diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 48c3d920003103dfc5ba9d4fc919213fdf67518c..6b8a4ef468ce00cc84637c8953939dd74910dbfa 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -27,11 +27,6 @@ build_macos:
    tags:
       - macos
 
-build_ubuntu1604:
-   extends: .build_template_linux
-   tags:
-      - ubuntu1604
-
 build_ubuntu1804:
    extends: .build_template_linux
    tags:
@@ -47,6 +42,11 @@ build_ubuntu1804-gcc8:
    tags:
       - ubuntu1804-gcc8
 
+build_ubuntu2004:
+   extends: .build_template_linux
+   tags:
+      - ubuntu2004
+
 build_vc191x:
    extends: .build_template
    tags:
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 10a86aa00ee6472c0763dd90f33cb5f5944b8629..7d8b3d739475db9ea920c6a9f90fd74684722892 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -52,22 +52,6 @@ set_property( GLOBAL PROPERTY USE_FOLDERS ON )
 # Include a utility module providing functions, macros, and settings
 include( ${CMAKE_SOURCE_DIR}/cmake/CMakeBuild/cmake/modules/BBuildEnv.cmake )
 
-# Enable multithreading
-bb_multithreading()
-
-find_package(OpenMP)
-
-if( OpenMP_FOUND )
-  set( CMAKE_C_FLAGS          "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}" )
-  set( CMAKE_CXX_FLAGS        "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}" )
-  set( CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}" )
-  
-  set( SET_ENABLE_SPLIT_PARALLELISM OFF CACHE BOOL "Set ENABLE_SPLIT_PARALLELISM as a compiler flag" )
-  set( ENABLE_SPLIT_PARALLELISM     OFF CACHE BOOL "If SET_ENABLE_SPLIT_PARALLELISM is on, it will be set to this value" )
-  set( SET_ENABLE_WPP_PARALLELISM   OFF CACHE BOOL "Set ENABLE_WPP_PARALLELISM as a compiler flag" )
-  set( ENABLE_WPP_PARALLELISM       OFF CACHE BOOL "If SET_ENABLE_WPP_PARALLELISM is on, it will be set to this value" )
-endif()
-
 # Enable warnings for some generators and toolsets.
 # bb_enable_warnings( gcc warnings-as-errors -Wno-sign-compare )
 # bb_enable_warnings( gcc -Wno-unused-variable )
diff --git a/COPYING b/COPYING
index a328b7da34cb542dd0137b58b0380080de67f97a..9d1ebfbff2b061cf6ae696ef9f6c4abfaea7e506 100644
--- a/COPYING
+++ b/COPYING
@@ -3,7 +3,7 @@ License, included below. This software may be subject to other third party
 and contributor rights, including patent rights, and no such rights are
 granted under this license.  Â 
 
-Copyright (c) 2010-2020, ITU/ISO/IEC
+Copyright (c) 2010-2021, ITU/ISO/IEC
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/README.md b/README.md
index d2853b793a91c0f9554b6f081da57508b242888e..fa74ff6f7b0206286a62c16177715f86ec5e08c7 100644
--- a/README.md
+++ b/README.md
@@ -1,14 +1,20 @@
 VTM reference software for VVC
 ==============================
 
-This software package is the reference software for Versatile Video Coding (VVC). The reference software includes both encoder and decoder functionality.
+This software package is the reference software for Rec. ITU-T H.266 | ISO/IEC 23090-3 Versatile Video Coding (VVC). The reference software includes both encoder and decoder functionality.
 
 Reference software is useful in aiding users of a video coding standard to establish and test conformance and interoperability, and to educate users and demonstrate the capabilities of the standard. For these purposes, this software is provided as an aid for the study and implementation of Versatile Video Coding.
 
-The software has been jointly developed by the ITU-T Video Coding Experts Group (VCEG, Question 6 of ITU-T Study Group 16) and the ISO/IEC Moving Picture Experts Group (MPEG, Working Group 11 of Subcommittee 29 of ISO/IEC Joint Technical Committee 1).
+The software has been jointly developed by the ITU-T Video Coding Experts Group (VCEG, Question 6 of ITU-T Study Group 16) and the ISO/IEC Moving Picture Experts Group (MPEG Joint Video Coding Team(s) with ITU-T SG 16, Working Group 5 of Subcommittee 29 of ISO/IEC Joint Technical Committee 1).
 
 A software manual, which contains usage instructions, can be found in the "doc" subdirectory of this software package.
 
+The source code is stored in a Git repository. The most recent version can be retrieved using the following commands:
+```bash
+git clone https://vcgit.hhi.fraunhofer.de/jvet/VVCSoftware_VTM.git
+cd VVCSoftware_VTM
+```
+
 Build instructions
 ==================
 
diff --git a/cfg/encoder_intra_vtm.cfg b/cfg/encoder_intra_vtm.cfg
index 4509605d9beee1255c1699fac1f2cb88cc040b26..d3c22b385077f61a254dc2fd6330d6fd8f9222bc 100644
--- a/cfg/encoder_intra_vtm.cfg
+++ b/cfg/encoder_intra_vtm.cfg
@@ -31,15 +31,15 @@ RDOQ                          : 1           # RDOQ
 RDOQTS                        : 1           # RDOQ for transform skip
 
 #=========== Deblock Filter ============
-LoopFilterOffsetInPPS         : 1           # Dbl params: 0=varying params in SliceHeader, param = base_param + GOP_offset_param; 1 (default) =constant params in PPS, param = base_param)
-LoopFilterDisable             : 0           # Disable deblocking filter (0=Filter, 1=No Filter)
-LoopFilterBetaOffset_div2     : 0           # base_param: -12 ~ 12
-LoopFilterTcOffset_div2       : 0           # base_param: -12 ~ 12
-LoopFilterCbBetaOffset_div2   : 0           # base_param: -12 ~ 12
-LoopFilterCbTcOffset_div2     : 0           # base_param: -12 ~ 12
-LoopFilterCrBetaOffset_div2   : 0           # base_param: -12 ~ 12
-LoopFilterCrTcOffset_div2     : 0           # base_param: -12 ~ 12
-DeblockingFilterMetric        : 0           # blockiness metric (automatically configures deblocking parameters in bitstream). Applies slice-level loop filter offsets (LoopFilterOffsetInPPS and LoopFilterDisable must be 0)
+DeblockingFilterOffsetInPPS         : 1           # Dbl params: 0=varying params in SliceHeader, param = base_param + GOP_offset_param; 1 (default) =constant params in PPS, param = base_param)
+DeblockingFilterDisable             : 0           # Disable deblocking filter (0=Filter, 1=No Filter)
+DeblockingFilterBetaOffset_div2     : 0           # base_param: -12 ~ 12
+DeblockingFilterTcOffset_div2       : 0           # base_param: -12 ~ 12
+DeblockingFilterCbBetaOffset_div2   : 0           # base_param: -12 ~ 12
+DeblockingFilterCbTcOffset_div2     : 0           # base_param: -12 ~ 12
+DeblockingFilterCrBetaOffset_div2   : 0           # base_param: -12 ~ 12
+DeblockingFilterCrTcOffset_div2     : 0           # base_param: -12 ~ 12
+DeblockingFilterMetric        : 0           # blockiness metric (automatically configures deblocking parameters in bitstream). Applies slice-level loop filter offsets (DeblockingFilterOffsetInPPS and DeblockingFilterDisable must be 0)
 
 #=========== Misc. ============
 InternalBitDepth              : 10          # codec operating bit-depth
@@ -110,9 +110,10 @@ FastLFNST                    : 1
 
 # Encoder optimization tools
 AffineAmvrEncOpt             : 0
-ALFStrength                  : 1.0
 ALFAllowPredefinedFilters    : 1
-CCALFStrength                : 1.0
+ALFStrengthTargetLuma        : 1.0
+ALFStrengthTargetChroma      : 1.0
+CCALFStrengthTarget          : 1.0
 ### DO NOT ADD ANYTHING BELOW THIS LINE ###
 ### DO NOT DELETE THE EMPTY LINE BELOW ###
 
diff --git a/cfg/encoder_lowdelay_P_vtm.cfg b/cfg/encoder_lowdelay_P_vtm.cfg
index 35b2291f4762f1177be73214459901519073c59d..5c1c492a5038ca71cd8d05555c305a3d7d0d1993 100644
--- a/cfg/encoder_lowdelay_P_vtm.cfg
+++ b/cfg/encoder_lowdelay_P_vtm.cfg
@@ -43,15 +43,15 @@ RDOQ                          : 1           # RDOQ
 RDOQTS                        : 1           # RDOQ for transform skip
 
 #=========== Deblock Filter ============
-LoopFilterOffsetInPPS         : 1           # Dbl params: 0=varying params in SliceHeader, param = base_param + GOP_offset_param; 1 (default) =constant params in PPS, param = base_param)
-LoopFilterDisable             : 0           # Disable deblocking filter (0=Filter, 1=No Filter)
-LoopFilterBetaOffset_div2     : 0           # base_param: -12 ~ 12
-LoopFilterTcOffset_div2       : 0           # base_param: -12 ~ 12
-LoopFilterCbBetaOffset_div2   : 0           # base_param: -12 ~ 12
-LoopFilterCbTcOffset_div2     : 0           # base_param: -12 ~ 12
-LoopFilterCrBetaOffset_div2   : 0           # base_param: -12 ~ 12
-LoopFilterCrTcOffset_div2     : 0           # base_param: -12 ~ 12
-DeblockingFilterMetric        : 0           # blockiness metric (automatically configures deblocking parameters in bitstream). Applies slice-level loop filter offsets (LoopFilterOffsetInPPS and LoopFilterDisable must be 0)
+DeblockingFilterOffsetInPPS         : 1           # Dbl params: 0=varying params in SliceHeader, param = base_param + GOP_offset_param; 1 (default) =constant params in PPS, param = base_param)
+DeblockingFilterDisable             : 0           # Disable deblocking filter (0=Filter, 1=No Filter)
+DeblockingFilterBetaOffset_div2     : 0           # base_param: -12 ~ 12
+DeblockingFilterTcOffset_div2       : 0           # base_param: -12 ~ 12
+DeblockingFilterCbBetaOffset_div2   : 0           # base_param: -12 ~ 12
+DeblockingFilterCbTcOffset_div2     : 0           # base_param: -12 ~ 12
+DeblockingFilterCrBetaOffset_div2   : 0           # base_param: -12 ~ 12
+DeblockingFilterCrTcOffset_div2     : 0           # base_param: -12 ~ 12
+DeblockingFilterMetric        : 0           # blockiness metric (automatically configures deblocking parameters in bitstream). Applies slice-level loop filter offsets (DeblockingFilterOffsetInPPS and DeblockingFilterDisable must be 0)
 
 #=========== Misc. ============
 InternalBitDepth              : 10          # codec operating bit-depth
@@ -136,9 +136,10 @@ FastLocalDualTreeMode        : 2
 # Encoder optimization tools
 AffineAmvrEncOpt             : 0
 MmvdDisNum                   : 6
-ALFStrength                  : 1.0
 ALFAllowPredefinedFilters    : 1
-CCALFStrength                : 1.0
+ALFStrengthTargetLuma        : 1.0
+ALFStrengthTargetChroma      : 1.0
+CCALFStrengthTarget          : 1.0
 ### DO NOT ADD ANYTHING BELOW THIS LINE ###
 ### DO NOT DELETE THE EMPTY LINE BELOW ###
 
diff --git a/cfg/encoder_lowdelay_vtm.cfg b/cfg/encoder_lowdelay_vtm.cfg
index 5818fb1f7ad45e3d7a5c6e2cac34fa77ca7a4406..d4643fc94027cc79cf126be1fe7672f09e300187 100644
--- a/cfg/encoder_lowdelay_vtm.cfg
+++ b/cfg/encoder_lowdelay_vtm.cfg
@@ -43,15 +43,15 @@ RDOQ                          : 1           # RDOQ
 RDOQTS                        : 1           # RDOQ for transform skip
 
 #=========== Deblock Filter ============
-LoopFilterOffsetInPPS         : 1           # Dbl params: 0=varying params in SliceHeader, param = base_param + GOP_offset_param; 1 (default) =constant params in PPS, param = base_param)
-LoopFilterDisable             : 0           # Disable deblocking filter (0=Filter, 1=No Filter)
-LoopFilterBetaOffset_div2     : 0           # base_param: -12 ~ 12
-LoopFilterTcOffset_div2       : 0           # base_param: -12 ~ 12
-LoopFilterCbBetaOffset_div2   : 0           # base_param: -12 ~ 12
-LoopFilterCbTcOffset_div2     : 0           # base_param: -12 ~ 12
-LoopFilterCrBetaOffset_div2   : 0           # base_param: -12 ~ 12
-LoopFilterCrTcOffset_div2     : 0           # base_param: -12 ~ 12
-DeblockingFilterMetric        : 0           # blockiness metric (automatically configures deblocking parameters in bitstream). Applies slice-level loop filter offsets (LoopFilterOffsetInPPS and LoopFilterDisable must be 0)
+DeblockingFilterOffsetInPPS         : 1           # Dbl params: 0=varying params in SliceHeader, param = base_param + GOP_offset_param; 1 (default) =constant params in PPS, param = base_param)
+DeblockingFilterDisable             : 0           # Disable deblocking filter (0=Filter, 1=No Filter)
+DeblockingFilterBetaOffset_div2     : 0           # base_param: -12 ~ 12
+DeblockingFilterTcOffset_div2       : 0           # base_param: -12 ~ 12
+DeblockingFilterCbBetaOffset_div2   : 0           # base_param: -12 ~ 12
+DeblockingFilterCbTcOffset_div2     : 0           # base_param: -12 ~ 12
+DeblockingFilterCrBetaOffset_div2   : 0           # base_param: -12 ~ 12
+DeblockingFilterCrTcOffset_div2     : 0           # base_param: -12 ~ 12
+DeblockingFilterMetric        : 0           # blockiness metric (automatically configures deblocking parameters in bitstream). Applies slice-level loop filter offsets (DeblockingFilterOffsetInPPS and DeblockingFilterDisable must be 0)
 
 #=========== Misc. ============
 InternalBitDepth              : 10          # codec operating bit-depth
@@ -140,9 +140,10 @@ FastLocalDualTreeMode        : 2
 # Encoder optimization tools
 AffineAmvrEncOpt             : 0
 MmvdDisNum                   : 6
-ALFStrength                  : 1.0
 ALFAllowPredefinedFilters    : 1
-CCALFStrength                : 1.0
+ALFStrengthTargetLuma        : 1.0
+ALFStrengthTargetChroma      : 1.0
+CCALFStrengthTarget          : 1.0
 ### DO NOT ADD ANYTHING BELOW THIS LINE ###
 ### DO NOT DELETE THE EMPTY LINE BELOW ###
 
diff --git a/cfg/encoder_randomaccess_vtm.cfg b/cfg/encoder_randomaccess_vtm.cfg
index 9ca9d6f62d2c0744312a6c126f95badcd4e381e5..82064508dcf8fcd4b219b0523f77ea4fe82ca825 100644
--- a/cfg/encoder_randomaccess_vtm.cfg
+++ b/cfg/encoder_randomaccess_vtm.cfg
@@ -17,7 +17,7 @@ GOPSize                       : 32          # GOP Size (number of B slice = GOPS
 IntraQPOffset                 : -3
 LambdaFromQpEnable            : 1           # see JCTVC-X0038 for suitable parameters for IntraQPOffset, QPoffset, QPOffsetModelOff, QPOffsetModelScale when enabled
 #        Type POC QPoffset QPOffsetModelOff QPOffsetModelScale CbQPoffset CrQPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 CbTcOffsetDiv2 CbBetaOffsetDiv2 CrTcOffsetDiv2 CrBetaOffsetDiv2 temporal_id #ref_pics_active_L0 #ref_pics_L0   reference_pictures_L0 #ref_pics_active_L1 #ref_pics_L1   reference_pictures_L1
-Frame1                        : B   32  -1        0.0                      0.0            0          0          1.0      0            0                0             0                0               0              0             2                3          32 48 64                    2                2           32 48 
+Frame1                        : B   32  -1        0.0                      0.0            0          0          1.0      0            0                0             0                0               0              0             2                3          32 64 48                    1                2           32 48 
 Frame2                        : B   16   0       -4.9309                   0.2265         0          0          1.0      0            0                0             0                0               0              1             2                2          16 32                       2                2           -16 16 
 Frame3                        : B    8   0       -4.5000                   0.2353         0          0          1.0      0            0                0             0                0               0              2             2                2          8 24                        2                2           -8 -24 
 Frame4                        : B    4   3       -5.4095                   0.2571         0          0          1.0      0            0                0             0                0               0              3             2                2          4 20                        2                3           -4 -12 -28 
@@ -69,15 +69,15 @@ RDOQ                          : 1           # RDOQ
 RDOQTS                        : 1           # RDOQ for transform skip
 
 #=========== Deblock Filter ============
-LoopFilterOffsetInPPS         : 1           # Dbl params: 0=varying params in SliceHeader, param = base_param + GOP_offset_param; 1 (default) =constant params in PPS, param = base_param)
-LoopFilterDisable             : 0           # Disable deblocking filter (0=Filter, 1=No Filter)
-LoopFilterBetaOffset_div2     : 0           # base_param: -12 ~ 12
-LoopFilterTcOffset_div2       : 0           # base_param: -12 ~ 12
-LoopFilterCbBetaOffset_div2   : 0           # base_param: -12 ~ 12
-LoopFilterCbTcOffset_div2     : 0           # base_param: -12 ~ 12
-LoopFilterCrBetaOffset_div2   : 0           # base_param: -12 ~ 12
-LoopFilterCrTcOffset_div2     : 0           # base_param: -12 ~ 12
-DeblockingFilterMetric        : 0           # blockiness metric (automatically configures deblocking parameters in bitstream). Applies slice-level loop filter offsets (LoopFilterOffsetInPPS and LoopFilterDisable must be 0)
+DeblockingFilterOffsetInPPS         : 1           # Dbl params: 0=varying params in SliceHeader, param = base_param + GOP_offset_param; 1 (default) =constant params in PPS, param = base_param)
+DeblockingFilterDisable             : 0           # Disable deblocking filter (0=Filter, 1=No Filter)
+DeblockingFilterBetaOffset_div2     : 0           # base_param: -12 ~ 12
+DeblockingFilterTcOffset_div2       : 0           # base_param: -12 ~ 12
+DeblockingFilterCbBetaOffset_div2   : 0           # base_param: -12 ~ 12
+DeblockingFilterCbTcOffset_div2     : 0           # base_param: -12 ~ 12
+DeblockingFilterCrBetaOffset_div2   : 0           # base_param: -12 ~ 12
+DeblockingFilterCrTcOffset_div2     : 0           # base_param: -12 ~ 12
+DeblockingFilterMetric        : 0           # blockiness metric (automatically configures deblocking parameters in bitstream). Applies slice-level loop filter offsets (DeblockingFilterOffsetInPPS and DeblockingFilterDisable must be 0)
 
 #=========== Misc. ============
 InternalBitDepth              : 10          # codec operating bit-depth
@@ -89,12 +89,6 @@ TransformSkipFast             : 1           # Fast Transform skipping (0: OFF, 1
 TransformSkipLog2MaxSize      : 5
 SAOLcuBoundary                : 0           # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON)
 
-#=========== TemporalFilter =================
-TemporalFilter                : 0           # Enable/disable GOP Based Temporal Filter
-TemporalFilterFutureReference : 1           # Enable/disable reading future frames
-TemporalFilterStrengthFrame8  : 0.95        # Enable filter at every 8th frame with given strength
-TemporalFilterStrengthFrame16 : 1.5         # Enable filter at every 16th frame with given strength, longer intervals has higher priority
-
 #============ Rate Control ======================
 RateControl                         : 0                # Rate control: enable rate control
 TargetBitrate                       : 1000000          # Rate control: target bitrate, in bps
@@ -172,10 +166,14 @@ ChromaTS                     : 1
 # Encoder optimization tools
 AffineAmvrEncOpt             : 1
 MmvdDisNum                   : 6
-TemporalFilter               : 1
-ALFStrength                  : 1.0
 ALFAllowPredefinedFilters    : 1
-CCALFStrength                : 1.0
+ALFStrengthTargetLuma        : 1.0
+ALFStrengthTargetChroma      : 1.0
+CCALFStrengthTarget          : 1.0
+TemporalFilter                : 1           # Enable/disable GOP Based Temporal Filter
+TemporalFilterFutureReference : 1           # Enable/disable reading future frames
+TemporalFilterStrengthFrame8  : 0.95        # Enable filter at every 8th frame with given strength
+TemporalFilterStrengthFrame16 : 1.5         # Enable filter at every 16th frame with given strength, longer intervals has higher priority
 ### DO NOT ADD ANYTHING BELOW THIS LINE ###
 ### DO NOT DELETE THE EMPTY LINE BELOW ###
 
diff --git a/cfg/encoder_randomaccess_vtm_gop16.cfg b/cfg/encoder_randomaccess_vtm_gop16.cfg
index e44197cf949c9058899effc6d5516501bc2c0921..48f3b2d0032a8e361dc13ee8f4dd15592dead4c7 100644
--- a/cfg/encoder_randomaccess_vtm_gop16.cfg
+++ b/cfg/encoder_randomaccess_vtm_gop16.cfg
@@ -53,15 +53,15 @@ RDOQ                          : 1           # RDOQ
 RDOQTS                        : 1           # RDOQ for transform skip
 
 #=========== Deblock Filter ============
-LoopFilterOffsetInPPS         : 1           # Dbl params: 0=varying params in SliceHeader, param = base_param + GOP_offset_param; 1 (default) =constant params in PPS, param = base_param)
-LoopFilterDisable             : 0           # Disable deblocking filter (0=Filter, 1=No Filter)
-LoopFilterBetaOffset_div2     : 0           # base_param: -12 ~ 12
-LoopFilterTcOffset_div2       : 0           # base_param: -12 ~ 12
-LoopFilterCbBetaOffset_div2   : 0           # base_param: -12 ~ 12
-LoopFilterCbTcOffset_div2     : 0           # base_param: -12 ~ 12
-LoopFilterCrBetaOffset_div2   : 0           # base_param: -12 ~ 12
-LoopFilterCrTcOffset_div2     : 0           # base_param: -12 ~ 12
-DeblockingFilterMetric        : 0           # blockiness metric (automatically configures deblocking parameters in bitstream). Applies slice-level loop filter offsets (LoopFilterOffsetInPPS and LoopFilterDisable must be 0)
+DeblockingFilterOffsetInPPS         : 1           # Dbl params: 0=varying params in SliceHeader, param = base_param + GOP_offset_param; 1 (default) =constant params in PPS, param = base_param)
+DeblockingFilterDisable             : 0           # Disable deblocking filter (0=Filter, 1=No Filter)
+DeblockingFilterBetaOffset_div2     : 0           # base_param: -12 ~ 12
+DeblockingFilterTcOffset_div2       : 0           # base_param: -12 ~ 12
+DeblockingFilterCbBetaOffset_div2   : 0           # base_param: -12 ~ 12
+DeblockingFilterCbTcOffset_div2     : 0           # base_param: -12 ~ 12
+DeblockingFilterCrBetaOffset_div2   : 0           # base_param: -12 ~ 12
+DeblockingFilterCrTcOffset_div2     : 0           # base_param: -12 ~ 12
+DeblockingFilterMetric        : 0           # blockiness metric (automatically configures deblocking parameters in bitstream). Applies slice-level loop filter offsets (DeblockingFilterOffsetInPPS and DeblockingFilterDisable must be 0)
 
 #=========== Misc. ============
 InternalBitDepth              : 10          # codec operating bit-depth
@@ -73,12 +73,6 @@ TransformSkipFast             : 1           # Fast Transform skipping (0: OFF, 1
 TransformSkipLog2MaxSize      : 5
 SAOLcuBoundary                : 0           # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON)
 
-#=========== TemporalFilter =================
-TemporalFilter                : 0           # Enable/disable GOP Based Temporal Filter
-TemporalFilterFutureReference : 1           # Enable/disable reading future frames
-TemporalFilterStrengthFrame8  : 0.95        # Enable filter at every 8th frame with given strength
-TemporalFilterStrengthFrame16 : 1.5         # Enable filter at every 16th frame with given strength, longer intervals has higher priority
-
 #============ Rate Control ======================
 RateControl                         : 0                # Rate control: enable rate control
 TargetBitrate                       : 1000000          # Rate control: target bitrate, in bps
@@ -156,10 +150,14 @@ ChromaTS                     : 1
 # Encoder optimization tools
 AffineAmvrEncOpt             : 1
 MmvdDisNum                   : 6
-TemporalFilter               : 1
-ALFStrength                  : 1.0
 ALFAllowPredefinedFilters    : 1
-CCALFStrength                : 1.0
+ALFStrengthTargetLuma        : 1.0
+ALFStrengthTargetChroma      : 1.0
+CCALFStrengthTarget          : 1.0
+TemporalFilter                : 1           # Enable/disable GOP Based Temporal Filter
+TemporalFilterFutureReference : 1           # Enable/disable reading future frames
+TemporalFilterStrengthFrame8  : 0.95        # Enable filter at every 8th frame with given strength
+TemporalFilterStrengthFrame16 : 1.5         # Enable filter at every 16th frame with given strength, longer intervals has higher priority
 ### DO NOT ADD ANYTHING BELOW THIS LINE ###
 ### DO NOT DELETE THE EMPTY LINE BELOW ###
 
diff --git a/cfg/examples_SEI_CTI/readMe.txt b/cfg/examples_SEI_CTI/readMe.txt
new file mode 100644
index 0000000000000000000000000000000000000000..04fb12dbe6d84e3e0a5b28d296ade17b583db3cc
--- /dev/null
+++ b/cfg/examples_SEI_CTI/readMe.txt
@@ -0,0 +1,7 @@
+example encoding command line
+encoder -c encoder_randomaccess_vtm.cfg -c classH1.cfg -c H1_BalloonFestival.cfg -c seiCti_hdrPq_to_sdr1.cfg
+        -i BalloonFestival_1920x1080p_24_10b_pq_709_ct2020_420_rev1.yuv -ip 32 -fs 0 -f 33 -q 22 
+        -b BalloonFestival_1920x1080p_24_10b_pq_709_ct2020_420_rev1.bin -o /dev/null --InternalBitDepth=10 --OutputBitDepth=10 
+
+example decoding command line
+decoder -b BalloonFestival_1920x1080p_24_10b_pq_709_ct2020_420_rev1.bin -o dec.yuv --SEICTIFilename=dec_cti.yuv
\ No newline at end of file
diff --git a/cfg/examples_SEI_CTI/seiCti_hdrPq_to_sdr1.cfg b/cfg/examples_SEI_CTI/seiCti_hdrPq_to_sdr1.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..8ac807a9a52d4167ccd4b233cc599662e6c585ea
--- /dev/null
+++ b/cfg/examples_SEI_CTI/seiCti_hdrPq_to_sdr1.cfg
@@ -0,0 +1,24 @@
+# example mapping SDR BT.2020 std range to BT.2100 HDR-PQ-like Std range 
+# this is provided as indicative example, but in principle the mapping curve should be dynamically tuned depending on the content
+
+SEICTIEnabled                 : 1
+SEICTIId                      : 1
+
+SEICTISignalInfoFlag          : 1
+SEICTIFullRangeFlag           : 0
+SEICTIPrimaries               : 9
+SEICTITransferFunction        : 14
+SEICTIMatrixCoefs             : 9
+
+SEICTICrossCompFlag           : 1
+SEICTICrossCompInferred       : 1
+SEICTILut0                    : 64 00 41 41 41 41 44 50 56 62 70 78 87 97 109 79 00 # Lut Y 
+
+SEICTIChromaOffset            : 0      # chroma scaling offset
+
+### DO NOT ADD ANYTHING BELOW THIS LINE ###
+### DO NOT DELETE THE EMPTY LINE BELOW ###
+
+
+
+
diff --git a/cfg/examples_SEI_CTI/seiCti_hdrPq_to_sdr2.cfg b/cfg/examples_SEI_CTI/seiCti_hdrPq_to_sdr2.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..715d8756ed5ac71f863711ca6fe3dfdc65a4008a
--- /dev/null
+++ b/cfg/examples_SEI_CTI/seiCti_hdrPq_to_sdr2.cfg
@@ -0,0 +1,21 @@
+# example mapping BT.2100 HDR-PQ Std range to SDR-like BT.2020 std range
+# this is provided as indicative example, but in principle the mapping curve should be dynamically tuned depending on the content
+
+SEICTIEnabled                 : 1
+SEICTIId                      : 1
+
+SEICTISignalInfoFlag          : 1
+SEICTIFullRangeFlag           : 0
+SEICTIPrimaries               : 9
+SEICTITransferFunction        : 14
+SEICTIMatrixCoefs             : 9
+
+SEICTICrossCompFlag           : 1      # cross-component scaling mode enabled or not
+SEICTICrossCompInferred       : 0      # chroma LUT inferred (1) or not (0) from luma LUT
+SEICTINbChromaLut             : 1      # nb of chroma LUT (1 or 2)
+SEICTILut0                    : 64 00 41 41 41 41 44 50 56 62 70 78 87 97 109 79 00 # Lut Y 
+SEICTILut1                    : 64 56 47 47 47 48 51 56 60 66 72 78 85 93 87  70 64 # Lut chroma 
+SEICTIChromaOffset            : 0      # chroma scaling offset
+
+### DO NOT ADD ANYTHING BELOW THIS LINE ###
+### DO NOT DELETE THE EMPTY LINE BELOW ###
diff --git a/cfg/examples_SEI_CTI/seiCti_sdr_to_hdrPq1.cfg b/cfg/examples_SEI_CTI/seiCti_sdr_to_hdrPq1.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..08efe740602e3424fd938576b07349fccd306db8
--- /dev/null
+++ b/cfg/examples_SEI_CTI/seiCti_sdr_to_hdrPq1.cfg
@@ -0,0 +1,22 @@
+# example mapping BT.2100 HDR-PQ Std range to SDR-like BT.2020 std range
+# this is provided as indicative example, but in principle the mapping curve should be dynamically tuned depending on the content
+
+SEICTIEnabled                 : 1
+SEICTIId                      : 1
+
+SEICTISignalInfoFlag          : 1
+SEICTIFullRangeFlag           : 0
+SEICTIPrimaries               : 9
+SEICTITransferFunction        : 16
+SEICTIMatrixCoefs             : 9
+
+SEICTICrossCompFlag           : 1
+SEICTICrossCompInferred       : 1
+SEICTILut0                    : 64 00 103 101 97 79 72 67 58 54 47 46 42 39 41 50 00 # Lut Y 
+SEICTIChromaOffset            : 0      # chroma scaling offset
+
+### DO NOT ADD ANYTHING BELOW THIS LINE ###
+### DO NOT DELETE THE EMPTY LINE BELOW ###
+
+
+
diff --git a/cfg/hbd/12-bit.cfg b/cfg/hbd/12-bit.cfg
index 55af5e790589e631f493f5b08d10d6ac79752d62..9f9bba34cde4c55bd9cebf781d04a70e75cd3c4e 100755
--- a/cfg/hbd/12-bit.cfg
+++ b/cfg/hbd/12-bit.cfg
@@ -1,3 +1,2 @@
 InternalBitDepth:  12
-MaxBitDepthConstraint: 12
-ExtendedPrecision: 0
+MaxBitDepthConstraint: 12
\ No newline at end of file
diff --git a/cfg/hbd/16-bit.cfg b/cfg/hbd/16-bit.cfg
index c9254b5bebe095dd14bd6aeedba4f35ee99cf8f4..ea71247641057c099dffe35d65bfd09e419123db 100755
--- a/cfg/hbd/16-bit.cfg
+++ b/cfg/hbd/16-bit.cfg
@@ -1,3 +1,2 @@
 InternalBitDepth:  16
-MaxBitDepthConstraint: 16
-ExtendedPrecision: 1
+MaxBitDepthConstraint: 16
\ No newline at end of file
diff --git a/cfg/hbd/hbd.cfg b/cfg/hbd/hbd.cfg
index 0a984507a39e87c0bbc35dbcf95e7c6a8870bc5d..88af1fdb6aad5081ff378a12bb84151edd7089d1 100755
--- a/cfg/hbd/hbd.cfg
+++ b/cfg/hbd/hbd.cfg
@@ -5,4 +5,20 @@ MaxTTLumaISlice:  16
 MaxTTNonISlice:   16
 LFNST:            0
 TemporalFilter:   0
-Profile:          none
+Profile:          auto
+#Updates for JVET-U2018
+LMCSEnable:       0
+BCW:              0
+MMVD:             0
+SMVD:             0
+Geo:              0
+BIO:              0
+DMVR:             0
+Affine:           0
+ISP:              0
+TSRCRicePresent:  1
+
+ExtendedPrecision: 1
+ExtendedRiceRRC               : 1
+GolombRiceParameterAdaptation : 1
+ReverseLastSigCoeff : 1
diff --git a/cfg/lossless/lossless.cfg b/cfg/lossless/lossless.cfg
index 793b2bedbd16f9300826026cd8bb27cf2024e94a..42c6884baaf19eaaab9f53f09f2024feeb84ef65 100644
--- a/cfg/lossless/lossless.cfg
+++ b/cfg/lossless/lossless.cfg
@@ -11,7 +11,7 @@ ISP                          : 0
 MTS                          : 0
 LFNST                        : 0
 JointCbCr                    : 0
-LoopFilterDisable            : 1
+DeblockingFilterDisable      : 1
 SAO                          : 0
 ALF                          : 0
 CCALF                        : 0
diff --git a/cfg/lossless/lossless_mixed.cfg b/cfg/lossless/lossless_mixed.cfg
index 5fbdd0bed8bedf255f7b256e6321abd1e2682d5a..e7d38327f923c69f18325572d17fc921bffd3e20 100644
--- a/cfg/lossless/lossless_mixed.cfg
+++ b/cfg/lossless/lossless_mixed.cfg
@@ -11,7 +11,7 @@ ISP                          : 0
 MTS                          : 0
 LFNST                        : 0
 JointCbCr                    : 0
-LoopFilterDisable            : 0
+DeblockingFilterDisable      : 0
 SAO                          : 1
 ALF                          : 1
 CCALF                        : 0
diff --git a/cfg/per-class/classH1.cfg b/cfg/per-class/classH1.cfg
index a6b09ffa1f9acffa0b742fe305e391b30c4a5283..5a2c188cd9388a3a9384742cd2bd209d76e29186 100644
--- a/cfg/per-class/classH1.cfg
+++ b/cfg/per-class/classH1.cfg
@@ -25,3 +25,5 @@ VerCollocatedChroma           : 1
 
 #======== HDR Metrics ============
 CalculateHdrMetrics           : 1           # Calculate HDR metrics for Class H1 (PQ) content
+
+PrintWPSNR                    : 1
\ No newline at end of file
diff --git a/cfg/per-sequence-HBD/H1_BalloonFestival_420_12bit.cfg b/cfg/per-sequence-HBD/H1_BalloonFestival_420_12bit.cfg
new file mode 100755
index 0000000000000000000000000000000000000000..e6588836d83d5057063399de08b23654b0542278
--- /dev/null
+++ b/cfg/per-sequence-HBD/H1_BalloonFestival_420_12bit.cfg
@@ -0,0 +1,11 @@
+#======== File I/O ===============
+InputFile                     : BalloonFestival_1920x1080p_24_12b_pq_709_ct2020_420.yuv
+InputBitDepth                 : 12          # Input bitdepth
+InputChromaFormat             : 420         # Ratio of luminance to chrominance samples
+FrameRate                     : 24          # Frame Rate per second
+FrameSkip                     : 0           # Number of frames to be skipped in input
+SourceWidth                   : 1920        # Input  frame width
+SourceHeight                  : 1080         # Input  frame height
+FramesToBeEncoded             : 240         # Number of frames to be coded
+
+Level                         : 4.1
diff --git a/cfg/per-sequence-HBD/H1_FireEater2_420_12bit.cfg b/cfg/per-sequence-HBD/H1_FireEater2_420_12bit.cfg
new file mode 100755
index 0000000000000000000000000000000000000000..92f264ecdc7024b9a83139c690132460546c3e0b
--- /dev/null
+++ b/cfg/per-sequence-HBD/H1_FireEater2_420_12bit.cfg
@@ -0,0 +1,11 @@
+#======== File I/O ===============
+InputFile                     : FireEater2Clip4000r1_1920x1080p_25_12b_pq_709_ct2020_420.yuv
+InputBitDepth                 : 12          # Input bitdepth
+InputChromaFormat             : 420         # Ratio of luminance to chrominance samples
+FrameRate                     : 25          # Frame Rate per second
+FrameSkip                     : 0           # Number of frames to be skipped in input
+SourceWidth                   : 1920        # Input  frame width
+SourceHeight                  : 1080        # Input  frame height
+FramesToBeEncoded             : 200         # Number of frames to be coded
+
+Level                         : 4.1
diff --git a/cfg/per-sequence-HBD/H1_Hurdles_420_12bit.cfg b/cfg/per-sequence-HBD/H1_Hurdles_420_12bit.cfg
new file mode 100755
index 0000000000000000000000000000000000000000..7aec9ed6abec8258f52c1dc42ab141a6757401e6
--- /dev/null
+++ b/cfg/per-sequence-HBD/H1_Hurdles_420_12bit.cfg
@@ -0,0 +1,11 @@
+#======== File I/O ===============
+InputFile                     : EBU_04_Hurdles_1920x1080p_50_12b_pq_709_ct2020_420.yuv
+InputBitDepth                 : 12          # Input bitdepth
+InputChromaFormat             : 420         # Ratio of luminance to chrominance samples
+FrameRate                     : 50          # Frame Rate per second
+FrameSkip                     : 0           # Number of frames to be skipped in input
+SourceWidth                   : 1920        # Input  frame width
+SourceHeight                  : 1080        # Input  frame height
+FramesToBeEncoded             : 500         # Number of frames to be coded
+
+Level                         : 4.1
diff --git a/cfg/per-sequence-HBD/H1_Market3_420_12bit.cfg b/cfg/per-sequence-HBD/H1_Market3_420_12bit.cfg
new file mode 100755
index 0000000000000000000000000000000000000000..d4c493d3733fd572d657c254fdb8f0d95ea7370b
--- /dev/null
+++ b/cfg/per-sequence-HBD/H1_Market3_420_12bit.cfg
@@ -0,0 +1,11 @@
+#======== File I/O ===============
+InputFile                     : Market3Clip4000r2_1920x1080p_50_12b_pq_709_ct2020_420.yuv
+InputBitDepth                 : 12          # Input bitdepth
+InputChromaFormat             : 420         # Ratio of luminance to chrominance samples
+FrameRate                     : 50          # Frame Rate per second
+FrameSkip                     : 0           # Number of frames to be skipped in input
+SourceWidth                   : 1920        # Input  frame width
+SourceHeight                  : 1080        # Input  frame height
+FramesToBeEncoded             : 400         # Number of frames to be coded
+
+Level                         : 4.1
diff --git a/cfg/per-sequence-HBD/H1_Starting_420_12bit.cfg b/cfg/per-sequence-HBD/H1_Starting_420_12bit.cfg
new file mode 100755
index 0000000000000000000000000000000000000000..bc8e6af00a100b0d442e3387625441b4fe284ae0
--- /dev/null
+++ b/cfg/per-sequence-HBD/H1_Starting_420_12bit.cfg
@@ -0,0 +1,11 @@
+#======== File I/O ===============
+InputFile                     : EBU_06_Starting_1920x1080p_50_12b_pq_709_ct2020_420.yuv
+InputBitDepth                 : 12          # Input bitdepth
+InputChromaFormat             : 420         # Ratio of luminance to chrominance samples
+FrameRate                     : 50          # Frame Rate per second
+FrameSkip                     : 0           # Number of frames to be skipped in input
+SourceWidth                   : 1920        # Input  frame width
+SourceHeight                  : 1080        # Input  frame height
+FramesToBeEncoded             : 500         # Number of frames to be coded
+
+Level                         : 4.1
diff --git a/cfg/per-sequence-HBD/H2_DayStreet_420_12bit.cfg b/cfg/per-sequence-HBD/H2_DayStreet_420_12bit.cfg
new file mode 100755
index 0000000000000000000000000000000000000000..3bdaf26adb32838947755f7c11cf0f6020493b0c
--- /dev/null
+++ b/cfg/per-sequence-HBD/H2_DayStreet_420_12bit.cfg
@@ -0,0 +1,11 @@
+#======== File I/O ===============
+InputFile                     : DayStreet_1920x1080_60p_12bit_420_hlg.yuv
+InputBitDepth                 : 12          # Input bitdepth
+InputChromaFormat             : 420         # Ratio of luminance to chrominance samples
+FrameRate                     : 60          # Frame Rate per second
+FrameSkip                     : 0           # Number of frames to be skipped in input
+SourceWidth                   : 1920        # Input  frame width
+SourceHeight                  : 1080        # Input  frame height
+FramesToBeEncoded             : 600         # Number of frames to be coded
+
+Level                         : 4.1
diff --git a/cfg/per-sequence-HBD/H2_DayStreet_422_12bit.cfg b/cfg/per-sequence-HBD/H2_DayStreet_422_12bit.cfg
index d0688d68ac132fae2fab6895c9f683ea7a89cf0e..3fd3b59f19eb827319cf24bafbfbc941a5dd397c 100755
--- a/cfg/per-sequence-HBD/H2_DayStreet_422_12bit.cfg
+++ b/cfg/per-sequence-HBD/H2_DayStreet_422_12bit.cfg
@@ -1,11 +1,11 @@
 #======== File I/O ===============
-InputFile                     : DayStreet_3840x2160_60p_12bit_422_hlg.yuv
+InputFile                     : DayStreet_1920x1080_60p_12bit_422_hlg.yuv
 InputBitDepth                 : 12          # Input bitdepth
 InputChromaFormat             : 422         # Ratio of luminance to chrominance samples
 FrameRate                     : 60          # Frame Rate per second
 FrameSkip                     : 0           # Number of frames to be skipped in input
-SourceWidth                   : 3840        # Input  frame width
-SourceHeight                  : 2160        # Input  frame height
+SourceWidth                   : 1920        # Input  frame width
+SourceHeight                  : 1080        # Input  frame height
 FramesToBeEncoded             : 600         # Number of frames to be coded
 
-Level                         : 5.1
+Level                         : 4.1
diff --git a/cfg/per-sequence-HBD/H2_DayStreet_444_12bit.cfg b/cfg/per-sequence-HBD/H2_DayStreet_444_12bit.cfg
index cab0cf34b742ec43b814660f17f89634412d0964..ed95d68b2c30b4826667fbe9bfccf8efca5833d7 100755
--- a/cfg/per-sequence-HBD/H2_DayStreet_444_12bit.cfg
+++ b/cfg/per-sequence-HBD/H2_DayStreet_444_12bit.cfg
@@ -1,11 +1,11 @@
 #======== File I/O ===============
-InputFile                     : DayStreet_3840x2160_60p_12bit_444_hlg.yuv
+InputFile                     : DayStreet_1920x1080_60p_12bit_444_hlg.yuv
 InputBitDepth                 : 12          # Input bitdepth
 InputChromaFormat             : 444         # Ratio of luminance to chrominance samples
 FrameRate                     : 60          # Frame Rate per second
 FrameSkip                     : 0           # Number of frames to be skipped in input
-SourceWidth                   : 3840        # Input  frame width
-SourceHeight                  : 2160        # Input  frame height
+SourceWidth                   : 1920        # Input  frame width
+SourceHeight                  : 1080        # Input  frame height
 FramesToBeEncoded             : 600         # Number of frames to be coded
 
-Level                         : 5.1
+Level                         : 4.1
diff --git a/cfg/per-sequence-HBD/H2_NightStreet_420_12bit.cfg b/cfg/per-sequence-HBD/H2_NightStreet_420_12bit.cfg
new file mode 100755
index 0000000000000000000000000000000000000000..5a7133629418c62e11e74602103b98178c0b7486
--- /dev/null
+++ b/cfg/per-sequence-HBD/H2_NightStreet_420_12bit.cfg
@@ -0,0 +1,11 @@
+#======== File I/O ===============
+InputFile                     : NightStreet_1920x1080_60p_12bit_420_hlg.yuv
+InputBitDepth                 : 12          # Input bitdepth
+InputChromaFormat             : 420         # Ratio of luminance to chrominance samples
+FrameRate                     : 60          # Frame Rate per second
+FrameSkip                     : 0           # Number of frames to be skipped in input
+SourceWidth                   : 1920        # Input  frame width
+SourceHeight                  : 1080        # Input  frame height
+FramesToBeEncoded             : 600         # Number of frames to be coded
+
+Level                         : 4.1
diff --git a/cfg/per-sequence-HBD/H2_NightStreet_422_12bit.cfg b/cfg/per-sequence-HBD/H2_NightStreet_422_12bit.cfg
index 22ea3fae873dfd5dd5d23c4abac2dbd14bf7e0f4..c5e44eafc4737e97eb5ff8d0777957ab49b403a5 100755
--- a/cfg/per-sequence-HBD/H2_NightStreet_422_12bit.cfg
+++ b/cfg/per-sequence-HBD/H2_NightStreet_422_12bit.cfg
@@ -1,11 +1,11 @@
 #======== File I/O ===============
-InputFile                     : NightStreet_3840x2160_60p_12bit_422_hlg.yuv
+InputFile                     : NightStreet_1920x1080_60p_12bit_422_hlg.yuv
 InputBitDepth                 : 12          # Input bitdepth
 InputChromaFormat             : 422         # Ratio of luminance to chrominance samples
 FrameRate                     : 60          # Frame Rate per second
 FrameSkip                     : 0           # Number of frames to be skipped in input
-SourceWidth                   : 3840        # Input  frame width
-SourceHeight                  : 2160        # Input  frame height
+SourceWidth                   : 1920        # Input  frame width
+SourceHeight                  : 1080        # Input  frame height
 FramesToBeEncoded             : 600         # Number of frames to be coded
 
-Level                         : 5.1
+Level                         : 4.1
diff --git a/cfg/per-sequence-HBD/H2_NightStreet_444_12bit.cfg b/cfg/per-sequence-HBD/H2_NightStreet_444_12bit.cfg
index 23e48c4603c856ee988fab361193b5e2cdfac919..a5fa0026186a9163f09e6b8657a161ebcbb87a5b 100755
--- a/cfg/per-sequence-HBD/H2_NightStreet_444_12bit.cfg
+++ b/cfg/per-sequence-HBD/H2_NightStreet_444_12bit.cfg
@@ -1,11 +1,11 @@
 #======== File I/O ===============
-InputFile                     : NightStreet_3840x2160_60p_12bit_444_hlg.yuv
+InputFile                     : NightStreet_1920x1080_60p_12bit_444_hlg.yuv
 InputBitDepth                 : 12          # Input bitdepth
 InputChromaFormat             : 444         # Ratio of luminance to chrominance samples
 FrameRate                     : 60          # Frame Rate per second
 FrameSkip                     : 0           # Number of frames to be skipped in input
-SourceWidth                   : 3840        # Input  frame width
-SourceHeight                  : 2160        # Input  frame height
+SourceWidth                   : 1920        # Input  frame width
+SourceHeight                  : 1080        # Input  frame height
 FramesToBeEncoded             : 600         # Number of frames to be coded
 
-Level                         : 5.1
+Level                         : 4.1
diff --git a/cfg/per-sequence-HBD/H2_PeopleInShoppingCenter_420_12bit.cfg b/cfg/per-sequence-HBD/H2_PeopleInShoppingCenter_420_12bit.cfg
new file mode 100755
index 0000000000000000000000000000000000000000..1c78855b4a79d912be809699276d88e9971354a8
--- /dev/null
+++ b/cfg/per-sequence-HBD/H2_PeopleInShoppingCenter_420_12bit.cfg
@@ -0,0 +1,11 @@
+#======== File I/O ===============
+InputFile                     : PeopleInShoppingCenter_1920x1080_60p_12bit_420_hlg.yuv
+InputBitDepth                 : 12          # Input bitdepth
+InputChromaFormat             : 420         # Ratio of luminance to chrominance samples
+FrameRate                     : 60          # Frame Rate per second
+FrameSkip                     : 0           # Number of frames to be skipped in input
+SourceWidth                   : 1920        # Input  frame width
+SourceHeight                  : 1080        # Input  frame height
+FramesToBeEncoded             : 600         # Number of frames to be coded
+
+Level                         : 4.1
diff --git a/cfg/per-sequence-HBD/H2_PeopleInShoppingCenter_422_12bit.cfg b/cfg/per-sequence-HBD/H2_PeopleInShoppingCenter_422_12bit.cfg
index 3041cb9301741e6eeec435ab93a47f4cfcb1ab4e..5f7b2fa3b8efeb46f0edc5d28940de50fc3fd966 100755
--- a/cfg/per-sequence-HBD/H2_PeopleInShoppingCenter_422_12bit.cfg
+++ b/cfg/per-sequence-HBD/H2_PeopleInShoppingCenter_422_12bit.cfg
@@ -1,11 +1,11 @@
 #======== File I/O ===============
-InputFile                     : PeopleInShoppingCenter_3840x2160_60p_12bit_422_hlg.yuv
+InputFile                     : PeopleInShoppingCenter_1920x1080_60p_12bit_422_hlg.yuv
 InputBitDepth                 : 12          # Input bitdepth
 InputChromaFormat             : 422         # Ratio of luminance to chrominance samples
 FrameRate                     : 60          # Frame Rate per second
 FrameSkip                     : 0           # Number of frames to be skipped in input
-SourceWidth                   : 3840        # Input  frame width
-SourceHeight                  : 2160        # Input  frame height
+SourceWidth                   : 1920        # Input  frame width
+SourceHeight                  : 1080        # Input  frame height
 FramesToBeEncoded             : 600         # Number of frames to be coded
 
-Level                         : 5.1
+Level                         : 4.1
diff --git a/cfg/per-sequence-HBD/H2_PeopleInShoppingCenter_444_12bit.cfg b/cfg/per-sequence-HBD/H2_PeopleInShoppingCenter_444_12bit.cfg
index 103fe4a0ab206f776270a6f230753b8dc36f1d96..d1f9d02b831745b1a4173ce6133586ba5b269f29 100755
--- a/cfg/per-sequence-HBD/H2_PeopleInShoppingCenter_444_12bit.cfg
+++ b/cfg/per-sequence-HBD/H2_PeopleInShoppingCenter_444_12bit.cfg
@@ -1,11 +1,11 @@
 #======== File I/O ===============
-InputFile                     : PeopleInShoppingCenter_3840x2160_60p_12bit_444_hlg.yuv
+InputFile                     : PeopleInShoppingCenter_1920x1080_60p_12bit_444_hlg.yuv
 InputBitDepth                 : 12          # Input bitdepth
 InputChromaFormat             : 444         # Ratio of luminance to chrominance samples
 FrameRate                     : 60          # Frame Rate per second
 FrameSkip                     : 0           # Number of frames to be skipped in input
-SourceWidth                   : 3840        # Input  frame width
-SourceHeight                  : 2160        # Input  frame height
+SourceWidth                   : 1920        # Input  frame width
+SourceHeight                  : 1080        # Input  frame height
 FramesToBeEncoded             : 600         # Number of frames to be coded
 
-Level                         : 5.1
+Level                         : 4.1
diff --git a/cfg/per-sequence-HBD/H2_StainedGlass_420_12bit.cfg b/cfg/per-sequence-HBD/H2_StainedGlass_420_12bit.cfg
new file mode 100755
index 0000000000000000000000000000000000000000..76af78772b5994929f02f474197cf28c85b305a9
--- /dev/null
+++ b/cfg/per-sequence-HBD/H2_StainedGlass_420_12bit.cfg
@@ -0,0 +1,11 @@
+#======== File I/O ===============
+InputFile                     : StainedGlass_1920x1080_60p_12bit_420_hlg.yuv
+InputBitDepth                 : 12          # Input bitdepth
+InputChromaFormat             : 420         # Ratio of luminance to chrominance samples
+FrameRate                     : 60          # Frame Rate per second
+FrameSkip                     : 0           # Number of frames to be skipped in input
+SourceWidth                   : 1920        # Input  frame width
+SourceHeight                  : 1080        # Input  frame height
+FramesToBeEncoded             : 600         # Number of frames to be coded
+
+Level                         : 4.1
diff --git a/cfg/per-sequence-HBD/H2_StainedGlass_422_12bit.cfg b/cfg/per-sequence-HBD/H2_StainedGlass_422_12bit.cfg
index 2a4f9157db11b63ec314def596b88c1ab7ab4c06..8be6ffdf5662d70f7cf47d66896d283a1ff38aa3 100755
--- a/cfg/per-sequence-HBD/H2_StainedGlass_422_12bit.cfg
+++ b/cfg/per-sequence-HBD/H2_StainedGlass_422_12bit.cfg
@@ -1,11 +1,11 @@
 #======== File I/O ===============
-InputFile                     : StainedGlass_3840x2160_60p_12bit_422_hlg.yuv
+InputFile                     : StainedGlass_1920x1080_60p_12bit_422_hlg.yuv
 InputBitDepth                 : 12          # Input bitdepth
 InputChromaFormat             : 422         # Ratio of luminance to chrominance samples
 FrameRate                     : 60          # Frame Rate per second
 FrameSkip                     : 0           # Number of frames to be skipped in input
-SourceWidth                   : 3840        # Input  frame width
-SourceHeight                  : 2160        # Input  frame height
+SourceWidth                   : 1920        # Input  frame width
+SourceHeight                  : 1080        # Input  frame height
 FramesToBeEncoded             : 600         # Number of frames to be coded
 
-Level                         : 5.1
+Level                         : 4.1
diff --git a/cfg/per-sequence-HBD/H2_StainedGlass_444_12bit.cfg b/cfg/per-sequence-HBD/H2_StainedGlass_444_12bit.cfg
index 64dc9d0a2a85f34e648117573267e233547b497a..3ed647d9886935c6b7b6d189c8f1d2b82a73398a 100755
--- a/cfg/per-sequence-HBD/H2_StainedGlass_444_12bit.cfg
+++ b/cfg/per-sequence-HBD/H2_StainedGlass_444_12bit.cfg
@@ -1,11 +1,11 @@
 #======== File I/O ===============
-InputFile                     : StainedGlass_3840x2160_60p_12bit_444_hlg.yuv
+InputFile                     : StainedGlass_1920x1080_60p_12bit_444_hlg.yuv
 InputBitDepth                 : 12          # Input bitdepth
 InputChromaFormat             : 444         # Ratio of luminance to chrominance samples
 FrameRate                     : 60          # Frame Rate per second
 FrameSkip                     : 0           # Number of frames to be skipped in input
-SourceWidth                   : 3840        # Input  frame width
-SourceHeight                  : 2160        # Input  frame height
+SourceWidth                   : 1920        # Input  frame width
+SourceHeight                  : 1080        # Input  frame height
 FramesToBeEncoded             : 600         # Number of frames to be coded
 
-Level                         : 5.1
+Level                         : 4.1
diff --git a/cfg/sei_vui/alpha_channel_info.cfg b/cfg/sei_vui/alpha_channel_info.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..de33a96649d834a3b4e6e0078b5889fa497cd295
--- /dev/null
+++ b/cfg/sei_vui/alpha_channel_info.cfg
@@ -0,0 +1,10 @@
+#======== Alpha Channel Information SEI message =====================
+SEIACIEnabled                               : 1
+SEIACICancelFlag                            : 0
+SEIACIUseIdc                                : 0
+SEIACIBitDepthMinus8                        : 0
+SEIACITransparentValue                      : 0
+SEIACIOpaqueValue                           : 255
+SEIACIIncrFlag                              : 0
+SEIACIClipFlag                              : 0
+SEIACIClipTypeFlag                          : 0
diff --git a/cfg/sei_vui/annotated_regions/anno_reg_0.txt b/cfg/sei_vui/annotated_regions/anno_reg_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8e8c985328f067e678c4832c0a8a047f5a456cc6
--- /dev/null
+++ b/cfg/sei_vui/annotated_regions/anno_reg_0.txt
@@ -0,0 +1,48 @@
+SEIArCancelFlag: 0
+SEIArNotOptForViewingFlag: 0
+SEIArTrueMotionFlag: 0
+SEIArOccludedObjsFlag: 0
+SEIArPartialObjsFlagPresentFlag: 0
+SEIArObjLabelPresentFlag: 1
+SEIArObjConfInfoPresentFlag: 1
+SEIArObjDetConfLength: 7
+SEIArObjLabelLangPresentFlag: 1
+SEIArLabelLanguage: ENGLISH
+SEIArNumLabelUpdates: 2
+SEIArLabelIdc[c]: 0
+SEIArLabelCancelFlag[c]: 0
+SEIArLabel[c]: car
+SEIArLabelIdc[c]: 1
+SEIArLabelCancelFlag[c]: 0
+SEIArLabel[c]: person
+SEIArNumObjUpdates: 3
+SEIArObjIdx[c]: 0
+SEIArObjCancelFlag[c]: 0
+SEIArObjLabelUpdateFlag[c]: 1
+SEIArObjectLabelIdc[c]: 0
+SEIArBoundBoxUpdateFlag[c]: 1
+SEIArObjTop[c]: 10
+SEIArObjLeft[c]: 10
+SEIArObjWidth[c]: 50
+SEIArObjHeight[c]: 50
+SEIArObjDetConf[c]: 90
+SEIArObjIdx[c]: 1
+SEIArObjCancelFlag[c]: 0
+SEIArObjLabelUpdateFlag[c]: 1
+SEIArObjectLabelIdc[c]: 0
+SEIArBoundBoxUpdateFlag[c]: 1
+SEIArObjTop[c]: 100
+SEIArObjLeft[c]: 100
+SEIArObjWidth[c]: 50
+SEIArObjHeight[c]: 50
+SEIArObjDetConf[c]: 90
+SEIArObjIdx[c]: 2
+SEIArObjCancelFlag[c]: 0
+SEIArObjLabelUpdateFlag[c]: 1
+SEIArObjectLabelIdc[c]: 1
+SEIArBoundBoxUpdateFlag[c]: 1
+SEIArObjTop[c]: 200
+SEIArObjLeft[c]: 200
+SEIArObjWidth[c]: 80
+SEIArObjHeight[c]: 100
+SEIArObjDetConf[c]: 85
diff --git a/cfg/sei_vui/annotated_regions/anno_reg_1.txt b/cfg/sei_vui/annotated_regions/anno_reg_1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5508a22a77ae5728f72ed6ef4f62291716081ca7
--- /dev/null
+++ b/cfg/sei_vui/annotated_regions/anno_reg_1.txt
@@ -0,0 +1,31 @@
+SEIArCancelFlag: 0
+SEIArNotOptForViewingFlag: 0
+SEIArTrueMotionFlag: 0
+SEIArOccludedObjsFlag: 0
+SEIArPartialObjsFlagPresentFlag: 0
+SEIArObjLabelPresentFlag: 1
+SEIArObjConfInfoPresentFlag: 1
+SEIArObjDetConfLength: 7
+SEIArObjLabelLangPresentFlag: 1
+SEIArLabelLanguage: ENGLISH
+SEIArNumLabelUpdates: 0
+SEIArNumObjUpdates: 2
+SEIArObjIdx[c]: 0
+SEIArObjCancelFlag[c]: 0
+SEIArObjLabelUpdateFlag[c]: 0
+SEIArBoundBoxUpdateFlag[c]: 1
+SEIArObjTop[c]: 20
+SEIArObjLeft[c]: 20
+SEIArObjWidth[c]: 50
+SEIArObjHeight[c]: 50
+SEIArObjDetConf[c]: 90
+SEIArObjIdx[c]: 3
+SEIArObjCancelFlag[c]: 0
+SEIArObjLabelUpdateFlag[c]: 1
+SEIArObjectLabelIdc[c]: 0
+SEIArBoundBoxUpdateFlag[c]: 1
+SEIArObjTop[c]: 300
+SEIArObjLeft[c]: 300
+SEIArObjWidth[c]: 80
+SEIArObjHeight[c]: 100
+SEIArObjDetConf[c]: 90
diff --git a/cfg/sei_vui/annotated_regions/anno_reg_2.txt b/cfg/sei_vui/annotated_regions/anno_reg_2.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd334b850b1e93987b6f15ecb7eaaa4dc3527879
--- /dev/null
+++ b/cfg/sei_vui/annotated_regions/anno_reg_2.txt
@@ -0,0 +1,43 @@
+SEIArCancelFlag: 0
+SEIArNotOptForViewingFlag: 0
+SEIArTrueMotionFlag: 0
+SEIArOccludedObjsFlag: 0
+SEIArPartialObjsFlagPresentFlag: 0
+SEIArObjLabelPresentFlag: 1
+SEIArObjConfInfoPresentFlag: 1
+SEIArObjDetConfLength: 7
+SEIArObjLabelLangPresentFlag: 1
+SEIArLabelLanguage: ENGLISH
+SEIArNumLabelUpdates: 1
+SEIArLabelIdc[c]: 2
+SEIArLabelCancelFlag[c]: 0
+SEIArLabel[c]: dog
+SEIArNumObjUpdates: 3
+SEIArObjIdx[c]: 1
+SEIArObjCancelFlag[c]: 0
+SEIArObjLabelUpdateFlag[c]: 0
+SEIArBoundBoxUpdateFlag[c]: 1
+SEIArObjTop[c]: 150
+SEIArObjLeft[c]: 150
+SEIArObjWidth[c]: 50
+SEIArObjHeight[c]: 50
+SEIArObjDetConf[c]: 90
+SEIArObjIdx[c]: 2
+SEIArObjCancelFlag[c]: 0
+SEIArObjLabelUpdateFlag[c]: 0
+SEIArBoundBoxUpdateFlag[c]: 1
+SEIArObjTop[c]: 220
+SEIArObjLeft[c]: 220
+SEIArObjWidth[c]: 80
+SEIArObjHeight[c]: 100
+SEIArObjDetConf[c]: 85
+SEIArObjIdx[c]: 4
+SEIArObjCancelFlag[c]: 0
+SEIArObjLabelUpdateFlag[c]: 1
+SEIArObjectLabelIdc[c]: 2
+SEIArBoundBoxUpdateFlag[c]: 1
+SEIArObjTop[c]: 400
+SEIArObjLeft[c]: 400
+SEIArObjWidth[c]: 30
+SEIArObjHeight[c]: 60
+SEIArObjDetConf[c]: 25
diff --git a/cfg/sei_vui/annotated_regions/anno_reg_3.txt b/cfg/sei_vui/annotated_regions/anno_reg_3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..49236846207aeb2c8c8dc9af53cf1fede5e5fffc
--- /dev/null
+++ b/cfg/sei_vui/annotated_regions/anno_reg_3.txt
@@ -0,0 +1,25 @@
+SEIArCancelFlag: 0
+SEIArNotOptForViewingFlag: 0
+SEIArTrueMotionFlag: 0
+SEIArOccludedObjsFlag: 0
+SEIArPartialObjsFlagPresentFlag: 0
+SEIArObjLabelPresentFlag: 1
+SEIArObjConfInfoPresentFlag: 1
+SEIArObjDetConfLength: 7
+SEIArObjLabelLangPresentFlag: 1
+SEIArLabelLanguage: ENGLISH
+SEIArNumLabelUpdates: 1
+SEIArLabelIdc[c]: 2
+SEIArLabelCancelFlag[c]: 1
+SEIArNumObjUpdates: 2
+SEIArObjIdx[c]: 0
+SEIArObjCancelFlag[c]: 0
+SEIArObjLabelUpdateFlag[c]: 0
+SEIArBoundBoxUpdateFlag[c]: 1
+SEIArObjTop[c]: 30
+SEIArObjLeft[c]: 30
+SEIArObjWidth[c]: 50
+SEIArObjHeight[c]: 50
+SEIArObjDetConf[c]: 90
+SEIArObjIdx[c]: 4
+SEIArObjCancelFlag[c]: 1
diff --git a/cfg/sei_vui/annotated_regions/anno_reg_4.txt b/cfg/sei_vui/annotated_regions/anno_reg_4.txt
new file mode 100644
index 0000000000000000000000000000000000000000..86406d191f1e592cd2a1ff5d00fd0983a1944fad
--- /dev/null
+++ b/cfg/sei_vui/annotated_regions/anno_reg_4.txt
@@ -0,0 +1,23 @@
+SEIArCancelFlag: 0
+SEIArNotOptForViewingFlag: 0
+SEIArTrueMotionFlag: 0
+SEIArOccludedObjsFlag: 0
+SEIArPartialObjsFlagPresentFlag: 0
+SEIArObjLabelPresentFlag: 1
+SEIArObjConfInfoPresentFlag: 1
+SEIArObjDetConfLength: 7
+SEIArObjLabelLangPresentFlag: 1
+SEIArLabelLanguage: ENGLISH
+SEIArNumLabelUpdates: 0
+SEIArNumObjUpdates: 2
+SEIArObjIdx[c]: 1
+SEIArObjCancelFlag[c]: 0
+SEIArObjLabelUpdateFlag[c]: 0
+SEIArBoundBoxUpdateFlag[c]: 1
+SEIArObjTop[c]: 180
+SEIArObjLeft[c]: 180
+SEIArObjWidth[c]: 50
+SEIArObjHeight[c]: 50
+SEIArObjDetConf[c]: 90
+SEIArObjIdx[c]: 3
+SEIArObjCancelFlag[c]: 1
diff --git a/cfg/sei_vui/depth_representation_info.cfg b/cfg/sei_vui/depth_representation_info.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..74d2eac3559566f634f899ec864ab2f0ec873d1e
--- /dev/null
+++ b/cfg/sei_vui/depth_representation_info.cfg
@@ -0,0 +1,14 @@
+#======== Depth Representation Information SEI message =====================
+SEIDRIEnabled                                   : 1
+SEIDRIZNearFlag                                 : 1
+SEIDRIZFarFlag                                  : 1
+SEIDRIDMinFlag                                  : 1
+SEIDRIDMaxFlag                                  : 1
+SEIDRIDepthRepresentationType                   : 3
+SEIDRIDisparityRefViewId                        : 0
+SEIDRIZNear                                     : 448.251214
+SEIDRIZFar                                      : 11206.280350
+SEIDRIDMin                                      : 1.891
+SEIDRIDMax                                      : 16.28
+SEIDRINonlinearNumMinus1                        : 4
+SEIDRINonlinearModel                            : 1  3 4 12 5
\ No newline at end of file
diff --git a/cfg/sei_vui/display_orientation.cfg b/cfg/sei_vui/display_orientation.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..bac5e6ea66a07ffd04a5e19ee723fc6c90374f62
--- /dev/null
+++ b/cfg/sei_vui/display_orientation.cfg
@@ -0,0 +1,6 @@
+#======== Display orientation SEI message =====================
+SEIDisplayOrientationEnabled                           : 1
+SEIDisplayOrientationCancelFlag                        : 0
+SEIDisplayOrientationPersistenceFlag                   : 1
+SEIDisplayOrientationTransformType                     : 0
+
diff --git a/cfg/sei_vui/multiview_acquisition_info.cfg b/cfg/sei_vui/multiview_acquisition_info.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..cfde992c55e6ee557b76d5a4611a1146668e5445
--- /dev/null
+++ b/cfg/sei_vui/multiview_acquisition_info.cfg
@@ -0,0 +1,26 @@
+#======== Multiview Acquisition Information SEI message =====================
+SEIMAIEnabled                         : 1
+SEIMAIIntrinsicParamFlag              : 1
+SEIMAIExtrinsicParamFlag              : 1             # see software manual for list format
+SEIMAINumViewsMinus1                  : 0
+SEIMAIIntrinsicParamsEqualFlag        : 1
+SEIMAIPrecFocalLength                 : 31
+SEIMAIPrecPrincipalPoint              : 31
+SEIMAIPrecSkewFactor                  : 31
+SEIMAISignFocalLengthX                : 0
+SEIMAIExponentFocalLengthX            : 0
+SEIMAIMantissaFocalLengthX            : 0
+SEIMAISignFocalLengthY                : 0
+SEIMAIExponentFocalLengthY            : 0
+SEIMAIMantissaFocalLengthY            : 0
+SEIMAISignPrincipalPointX             : 0
+SEIMAIExponentPrincipalPointX         : 0
+SEIMAIMantissaPrincipalPointX         : 0
+SEIMAISignPrincipalPointY             : 0
+SEIMAIExponentPrincipalPointY         : 0
+SEIMAIMantissaPrincipalPointY         : 0
+SEIMAISignSkewFactor                  : 0
+SEIMAIExponentSkewFactor              : 0
+SEIMAIMantissaSkewFactor              : 0
+SEIMAIPrecRotationParam               : 31
+SEIMAIPrecTranslationParam            : 31
\ No newline at end of file
diff --git a/cfg/sei_vui/multiview_view_position.cfg b/cfg/sei_vui/multiview_view_position.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..bec090b0e794cc531ceaee2209feffb5262034fe
--- /dev/null
+++ b/cfg/sei_vui/multiview_view_position.cfg
@@ -0,0 +1,4 @@
+#======== Multiview View Position SEI message =====================
+SEIMVPEnabled                         : 1
+SEIMVPNumViewsMinus1                  : 0
+SEIMVPViewPosition		      : 0
diff --git a/cfg/sei_vui/scalability_dimension_info.cfg b/cfg/sei_vui/scalability_dimension_info.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..17701479fd6dccc68949b17db8bfbb629cbcf8b9
--- /dev/null
+++ b/cfg/sei_vui/scalability_dimension_info.cfg
@@ -0,0 +1,10 @@
+#======== Scalability Dimension Information SEI message =====================
+SEISDIEnabled                               : 1
+SEISDIMaxLayersMinus1                       : 1
+SEISDIAuxiliaryInfoFlag                     : 1
+SEISDIMultiviewInfoFlag                     : 1
+SEISDIViewIdLenMinus1                       : 3
+SEISDILayerId                               : 0 1
+SEISDIViewIdVal                             : 0 1
+SEISDIAuxId                                 : 0 1
+SEISDINumAssociatedPrimaryLayersMinus1      : 0 1
\ No newline at end of file
diff --git a/doc/jvetdoc.cls b/doc/jvetdoc.cls
index f766488489577b40d4f1401611e89fe51e994dcb..6480d8aaca52f17fe44ed15966c8de42c5c170c9 100644
--- a/doc/jvetdoc.cls
+++ b/doc/jvetdoc.cls
@@ -106,7 +106,7 @@
 	\IfFileExists{logos/iec}{\includegraphics[height=0.74cm]{logos/iec}}{}
 	\\
 	\textbf{Joint Video Experts Team (JVET)}\\[0ex]
-	\textbf{of ITU-T SG16 WP3 and ISO/IEC JTC1/SC29/WG11}
+	\textbf{of ITU-T SG16 WP3 and ISO/IEC JTC1/SC29/WG5}
 		\hfill Document: JVET-\@jvetdocnum\\[0ex]
 	\@jvetmeeting
 
diff --git a/doc/mainpage.h b/doc/mainpage.h
index 69186d6f62bbf41661e8bee5da088368b6f5ef98..51667fa84d62449baa189f296ffdb3f49063195d 100644
--- a/doc/mainpage.h
+++ b/doc/mainpage.h
@@ -24,7 +24,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/doc/software-manual.pdf b/doc/software-manual.pdf
index fdc5c89e55e95d511a0eea1cab4f0a66de2321be..d1ea637e32e7a984f99112f89c9190acb94f0cdd 100644
Binary files a/doc/software-manual.pdf and b/doc/software-manual.pdf differ
diff --git a/doc/software-manual.tex b/doc/software-manual.tex
index 6031fb5c0885ac604bdae0559f9270933a11dc48..43d41e89f50d67fe25f072c5bf9351ffb2769fc8 100644
--- a/doc/software-manual.tex
+++ b/doc/software-manual.tex
@@ -1,4 +1,4 @@
-ï»¿\documentclass[a4paper,11pt]{jvetdoc}
+\documentclass[a4paper,11pt]{jvetdoc}
 
 \usepackage{geometry}[2010/02/12]
 
@@ -205,7 +205,7 @@ fontsize=\footnotesize}
 \maketitle
 \begin{abstract}
 This document is a user manual describing usage of the VTM reference software
-for the VVC project. It applies to version 10.1 of the software.
+for the VVC project. It applies to version 14.0 of the software.
 \end{abstract}
 
 \tableofcontents
@@ -216,7 +216,7 @@ for the VVC project. It applies to version 10.1 of the software.
 Reference software is being made available to provide a reference
 implementation of the HEVC standard being developed by the Joint 
 Video Experts Team (JVET) regrouping experts from
-ITU-T SG 16 and ISO/IEC SC29 WG11. One of the main goals of the
+ITU-T SG 16 and ISO/IEC SC29 WG5. One of the main goals of the
 reference software is to provide a basis upon which to conduct
 experiments in order to determine which coding tools provide desired
 coding performance. It is not meant to be a particularly efficient
@@ -258,7 +258,7 @@ be available in older compilers.
  \thead{Versions} \\
 \hline
 MS Visual Studio  & 2017 and 2019 \\
-GCC               & 5.4, 7.3 and 8.3\\
+GCC               & 7.3, 8.3 and 9.3\\
 Xcode/clang       & latest \\
 \hline
 \end{tabular}
@@ -453,32 +453,32 @@ range is between
 0.3 and 1.
 
 \item[]\textbf{tcOffsetDiv2}: An in-loop deblocking filter parameter for luma component, tcOffsetDiv2 
-is added to the base parameter LoopFilterTcOffset_div2 to set the final tc_offset_div2 
+is added to the base parameter DeblockingFilterTcOffset_div2 to set the final tc_offset_div2 
 parameter for this picture signalled in the slice segment header. The final 
 value of tc_offset_div2 shall be an integer number in the range $-12..12$.
 
 \item[]\textbf{betaOffsetDiv2}: An in-loop deblocking filter parameter for luma component, betaOffsetDiv2 
-is added to the base parameter LoopFilterBetaOffset_div2 to set the final beta_offset_div2 
+is added to the base parameter DeblockingFilterBetaOffset_div2 to set the final beta_offset_div2 
 parameter for this picture signalled in the slice segment header. The final 
 value of beta_offset_div2 shall be an integer number in the range $-12..12$.
 
 \item[]\textbf{CbTcOffsetDiv2}: An in-loop deblocking filter parameter for Cb component, CbTcOffsetDiv2 
-is added to the base parameter LoopFilterCbTcOffset_div2 to set the final tc_offset_div2 
+is added to the base parameter DeblockingFilterCbTcOffset_div2 to set the final tc_offset_div2 
 parameter for this picture signalled in the slice segment header. The final 
 value of tc_offset_div2 shall be an integer number in the range $-12..12$.
 
 \item[]\textbf{CbBetaOffsetDiv2}: An in-loop deblocking filter parameter for Cb component, CbBetaOffsetDiv2 
-is added to the base parameter LoopFilterCbBetaOffset_div2 to set the final beta_offset_div2 
+is added to the base parameter DeblockingFilterCbBetaOffset_div2 to set the final beta_offset_div2 
 parameter for this picture signalled in the slice segment header. The final 
 value of beta_offset_div2 shall be an integer number in the range $-12..12$.
 
 \item[]\textbf{CrTcOffsetDiv2}: An in-loop deblocking filter parameter for Cr component, CrTcOffsetDiv2 
-is added to the base parameter LoopFilterCrTcOffset_div2 to set the final tc_offset_div2 
+is added to the base parameter DeblockingFilterCrTcOffset_div2 to set the final tc_offset_div2 
 parameter for this picture signalled in the slice segment header. The final 
 value of tc_offset_div2 shall be an integer number in the range $-12..12$.
 
 \item[]\textbf{CrBetaOffsetDiv2}: An in-loop deblocking filter parameter for Cr component, CrBetaOffsetDiv2 
-is added to the base parameter LoopFilterCrBetaOffset_div2 to set the final beta_offset_div2 
+is added to the base parameter DeblockingFilterCrBetaOffset_div2 to set the final beta_offset_div2 
 parameter for this picture signalled in the slice segment header. The final 
 value of beta_offset_div2 shall be an integer number in the range $-12..12$.
 
@@ -752,6 +752,12 @@ When 1, the Mean Square Error (MSE) values of each frame will also be output alo
 When 1, the Mean Square Error (MSE) values of the entire sequence will also be output alongside the default PSNR values.
 \\
 
+\Option{PrintWPSNR} &
+%\ShortOption{\None} &
+\Default{false} &
+When 1, weighted PSNR (wPSNR) values of the entire sequence will also be output.
+\\
+
 \Option{SummaryOutFilename} &
 %\ShortOption{\None} &
 \Default{false} &
@@ -778,7 +784,7 @@ When 1, CABAC zero word padding will be enabled. This is currently not the defau
 
 \Option{ConformanceWindowMode} &
 %\ShortOption{\None} &
-\Default{0} &
+\Default{1} &
 Specifies how the parameters related to the conformance window are interpreted (cropping/padding).
 The following modes are available:
 \par
@@ -949,6 +955,16 @@ Strength for every * frame in GOP based temporal filter, where * is an integer.
 enable GOP based temporal filter at every 8th frame with strength 0.95. Longer intervals overrides shorter when there are
 multiple matches.
 \\
+\Option{AlfTrueOrg} &
+%\ShortOption{\None} &
+\Default{true} &
+When GOP based temporal filter is enabled, enable or disable using true original samples for ALF optimization .
+\\
+\Option{SaoTrueOrg} &
+%\ShortOption{\None} &
+\Default{false} &
+When GOP based temporal filter is enabled, enable or disable using true original samples for SAO optimization .
+\\
 \end{OptionTableNoShorthand}
 
 %%
@@ -1737,6 +1753,13 @@ Specifies the DRAP period in frames.
 Dependent RAP indication SEI messages are disabled if DRAPPeriod is 0.
 \\
 
+\Option{EDRAPPeriod} &
+%\ShortOption{\None} &
+\Default{0} &
+Specifies the EDRAP period in frames.
+Extended DRAP indication SEI messages are disabled if EDRAPPeriod is 0.
+\\
+
 \Option{GOPSize (-g)} &
 %\ShortOption{-g} &
 \Default{1} &
@@ -2189,6 +2212,60 @@ For Cb component with BT.2020 container use 1.14; for BT.709 material and 1.04 f
 For Cr component with BT.2020 container use 1.79; for BT.709 material and 1.39 for P3 material.
 \\
 
+\Option{SmoothQPReductionEnable} &
+\Default{0} &
+Enable QP reduction for smooth blocks according to a QP reduction model:
+$Clip3(SmoothQPReductionLimit, 0, SmoothQPReductionModelScale*QP+SmoothQPReductionModelOffset)$.
+The QP reduction model is used when SAD is less than SmoothQPReductionThreshold * number of samples in block. Separate parameters for intra and inter pictures.
+Where SAD is defined as the sum of absolute differences between original luma samples and luma samples predicted by a 2nd order polynomial model.
+The model parameters are determined by a least square fit to original luma samples on a granularity of 64x64 samples.
+\\
+
+\Option{SmoothQPReductionThresholdIntra} &
+\Default{3.0} &
+Threshold parameter for smoothness for intra pictures.
+\\
+
+\Option{SmoothQPReductionModelScaleIntra} &
+\Default{-1.0} &
+Scale parameter of the QP reduction model for intra pictures.
+\\
+
+\Option{SmoothQPReductionModelOffsetIntra} &
+\Default{27.0} &
+Offset parameter of the QP reduction model for intra pictures.
+\\
+
+\Option{SmoothQPReductionLimitIntra} &
+\Default{-16.0} &
+Threshold parameter for controlling amount of QP reduction by the QP reduction model for intra pictures.
+\\
+
+\Option{SmoothQPReductionThresholdInter} &
+\Default{3.0} &
+Threshold parameter for smoothness for inter pictures.
+\\
+
+\Option{SmoothQPReductionModelScaleInter} &
+\Default{-1.0} &
+Scale parameter of the QP reduction model for inter pictures.
+\\
+
+\Option{SmoothQPReductionModelOffsetInter} &
+\Default{27.0} &
+Offset parameter of the QP reduction model for inter pictures.
+\\
+
+\Option{SmoothQPReductionLimitInter} &
+\Default{-16.0} &
+Threshold parameter for controlling amount of QP reduction by the QP reduction model for inter pictures.
+\\
+
+\Option{SmoothQPReductionPeriodicity} &
+\Default{1} &
+Periodicity parameter for application of the QP reduction model. 1: all frames, 0: only intra pictures, 2: every second frame, etc.
+\\
+
 \Option{SliceChromaQPOffsetPeriodicity} &
 \Default{0} &
 Defines the periodicity for inter slices that use the slice-level chroma QP offsets, as defined by SliceCbQpOffsetIntraOrPeriodic and SliceCrQpOffsetIntraOrPeriodic. A value of 0 disables the periodicity. It is intended to be used in low-delay configurations where an regular intra period is not defined.
@@ -2200,11 +2277,11 @@ Defines the periodicity for inter slices that use the slice-level chroma QP offs
 Defines the slice-level QP offset to be used for intra slices, or once every 'SliceChromaQPOffsetPeriodicity' pictures.
 \\
 
-\Option{MaxCuDQPDepth (-dqd)} &
+\Option{MaxCuDQPSubdiv (-dqd)} &
 %\ShortOption{\None} &
 \Default{0} &
-Defines maximum depth of a minimum CuDQP for sub-LCU-level delta QP.
-MaxCuDQPDepth shall be greater than or equal to SliceGranularity.
+Defines maximum CTU subdivision level defining luma Quantization Groups. A quantization group contains at most one luma QP delta (carried by the first coded TU), and all CUs inside a QG share the same luma QP predictor.
+"Sbudivision level" means how many times the number of samples of the CTU is divided by two, e.g. a binary split increases subdiv by 1 and a quad split increases subdiv by 2.
 \\
 
 \Option{RDOQ} &
@@ -2324,10 +2401,26 @@ Specifies whether scaling matrices are disabled to blocks when the colour space
 Indicates if the designated colour space of scaling matrices is equal to the original colour space.
 \\
 
-\Option{MaxCUChromaQpAdjustmentDepth} &
+\Option{MaxCuChromaQpOffsetSubdiv} &
 %\ShortOption{\None} &
-\Default{-1} &
-Specifies the maximum depth for CU chroma QP adjustment; if negative, CU chroma QP adjustment is disabled.
+\Default{0} &
+Specifies the maximum subdiv for CU chroma QP adjustment. Has no effect if CbQpOffsetList, etc. are left empty.
+\\
+
+\Option{SliceCuChromaQpOffsetEnabled} &
+%\ShortOption{\None} &
+\Default{true} &
+Specifies whether CU chroma QP adjustment is enabled at slice level. Has no effect if CbQpOffsetList, etc. are left empty.
+\\
+
+\Option{CbQpOffsetList}%
+\Option{CrQpOffsetList}%
+\Option{CbCrQpOffsetList} &
+%\ShortOption{\None} &
+\Default{\NotSet} &
+Comma-separated value lists specifying the Cb/Cr/CbCr QP offsets for each chroma QP adjustment index. Each list shall be the same length.
+CbCrQpOffsetList may be omitted whereas CbQpOffsetList and CrQpOffsetList are specified, in which case it is filled with zeros.
+Note that when CbCrQpOffset and CbCrQpOffsetList values are all zero, pps_joint_cbcr_qp_offset_present_flag will be automatically set to zero.
 \\
 
 \end{OptionTableNoShorthand}
@@ -2530,13 +2623,13 @@ Target subpic index for target output layers that containing multiple subpicture
 %% In-loop filtering parameters
 %%
 \begin{OptionTableNoShorthand}{In-loop filtering parameters}{tab:inloop-filter}
-\Option{LoopFilterDisable} &
+\Option{DeblockingFilterDisable} &
 %\ShortOption{\None} &
 \Default{false} &
 Enables or disables the in-loop deblocking filter.
 \\
 
-\Option{LoopFilterOffsetInPPS}&
+\Option{DeblockingFilterOffsetInPPS}&
 %\ShortOption{\None}&
 \Default{false}&
 If enabled, the in-loop deblocking filter control parameters are sent in PPS. 
@@ -2549,42 +2642,42 @@ In this case, the final value of the deblocking filter parameter sent for a cert
 (base parameter + GOP parameter for this picture). Intra-pictures use the base parameters values.
 \\
 
-\Option{LoopFilterTcOffset_div2}&
+\Option{DeblockingFilterTcOffset_div2}&
 %\ShortOption{\None}&
 \Default{0}&
 Specifies the base value for the in-loop deblocking filter parameter tc_offset_div2 for luma component. The final value of tc_offset_div2 
 shall be an integer number in the range $-12..12$.
 \\
 
-\Option{LoopFilterBetaOffset_div2}&
+\Option{DeblockingFilterBetaOffset_div2}&
 %\ShortOption{\None}&
 \Default{0}&
 Specifies the base value for the in-loop deblocking filter parameter beta_offset_div2 for luma component. The final value of beta_offset_div2 
 shall be an integer number in the range $-12..12$.
 \\
 
-\Option{LoopFilterCbTcOffset_div2}&
+\Option{DeblockingFilterCbTcOffset_div2}&
 %\ShortOption{\None}&
 \Default{0}&
 Specifies the base value for the in-loop deblocking filter parameter tc_offset_div2 for Cb component. The final value of tc_offset_div2 
 shall be an integer number in the range $-12..12$.
 \\
 
-\Option{LoopFilterCbBetaOffset_div2}&
+\Option{DeblockingFilterCbBetaOffset_div2}&
 %\ShortOption{\None}&
 \Default{0}&
 Specifies the base value for the in-loop deblocking filter parameter beta_offset_div2 for Cb component. The final value of beta_offset_div2 
 shall be an integer number in the range $-12..12$.
 \\
 
-\Option{LoopFilterCrTcOffset_div2}&
+\Option{DeblockingFilterCrTcOffset_div2}&
 %\ShortOption{\None}&
 \Default{0}&
 Specifies the base value for the in-loop deblocking filter parameter tc_offset_div2 for Cr component. The final value of tc_offset_div2 
 shall be an integer number in the range $-12..12$.
 \\
 
-\Option{LoopFilterCrBetaOffset_div2}&
+\Option{DeblockingFilterCrBetaOffset_div2}&
 %\ShortOption{\None}&
 \Default{0}&
 Specifies the base value for the in-loop deblocking filter parameter beta_offset_div2 for Cr component. The final value of beta_offset_div2 
@@ -2888,10 +2981,28 @@ switched at CTB level. Set to 1 to disable alternative chroma filters.
 Value shall be in the range 1..8.
 \\
 
-\Option{ALFStrength} &
+\Option{ALFStrengthLuma} &
 %\ShortOption{\None} &
 \Default{1.0} &
-Enables control of ALF filter strength. The parameter scales the magnitudes of the ALF filter coefficients for both luma and chroma. Valid values are in the range 0.0 to 1.0. NOTE: Refinement of quantized filter coefficents is not used when ALFStrength is different from 1.0. To ensure reduced filter strength the parameter ALFAllowPredefinedFilters should also be set to false.  
+Enables control of ALF filter strength for luma. The parameter scales the magnitudes of the ALF filter coefficients for luma. Valid values are in the range 0.0 to 1.0. NOTE: Refinement of quantized filter coefficents is not used when ALFStrengthLuma is different from 1.0. To ensure reduced filter strength the parameter ALFAllowPredefinedFilters should also be set to false.  
+\\
+
+\Option{ALFStrengthChroma} &
+%\ShortOption{\None} &
+\Default{1.0} &
+Enables control of ALF filter strength for chroma. The parameter scales the magnitudes of the ALF filter coefficients for chroma. Valid values are in the range 0.0 to 1.0.
+\\
+
+\Option{ALFStrengthTargetLuma} &
+%\ShortOption{\None} &
+\Default{1.0} &
+Enables control of ALF filter strength target for luma filter optimization. The parameter scales the auto-correlation matrix E and the cross-correlation vector y for luma. Valid values are in the range 0.0 to 1.0.
+\\
+
+\Option{ALFStrengthTargetChroma} &
+%\ShortOption{\None} &
+\Default{1.0} &
+Enables control of ALF filter strength target for chroma filter optimization. The parameter scales the auto-correlation matrix E and the cross-correlation vector y for chroma. Valid values are in the range 0.0 to 1.0.
 \\
 
 \Option{ALFAllowPredefinedFilters} &
@@ -2918,6 +3029,12 @@ QP threshold above which the encoder reduces cross-component ALF usage.
 Enables control of CCALF filter strength. The parameter scales the magnitudes of the CCALF filter coefficients. Valid values are in the range 0.0 to 1.0. NOTE: Refinement of quantized filter coefficents is not used when CCALFStrength is different from 1.0.
 \\
 
+\Option{CCALFStrengthTarget} &
+%\ShortOption{\None} &
+\Default{1.0} &
+Enables control of CCALF filter strength target in filter optimization. The parameter scales the auto-correlation matrix E and the cross-correlation vector y for CCALF. Valid values are in the range 0.0 to 1.0.
+\\
+
 \Option{SMVD} &
 %\ShortOption{\None} &
 \Default{false} &
@@ -3130,6 +3247,43 @@ RCInitialCpbFullness should be smaller than or equal to 1.
 \\
 \end{OptionTableNoShorthand}
 
+%%
+%% GDR parameters
+%%
+\begin{OptionTableNoShorthand}{GDR parameters}{tab:gdr}
+
+
+\Option{GdrEnabled} &
+%\ShortOption{\None} &
+\Default{false} &
+Enables or disables the use of GDR (Gradual Decoding Refresh)
+\\
+
+\Option{GdrPocStart} &
+%\ShortOption{\None} &
+\Default{-1} &
+Specifies poc number of first GDR
+\\
+
+\Option{GdrPeriod} &
+%\ShortOption{\None} &
+\Default{-1} &
+Specifies number of frames between GDR picture to the next GDR picture
+\\
+
+\Option{GdrInterval} &
+%\ShortOption{\None} &
+\Default{-1} &
+Specifies number of of frames from GDR picture to the recovery point picture (note: ph_recovery_poc_cnt will be (GDR Inteval - 1)) 
+\\
+
+\Option{GdrNoHash} &
+%\ShortOption{\None} &
+\Default{true} &
+Specifies not to generate picture hash SEI for GDR/recovering pictures 
+\\
+
+\end{OptionTableNoShorthand}
 
 
 %%
@@ -3302,6 +3456,11 @@ Specifies the use of extended_precision_processing flag. Note that unless the HI
 This setting is only valid for the 16-bit RExt profiles.
 \\
 
+\Option{TSRCRicePresent} &
+\Default{false} &
+When true, specifies the that extension of the Golomb-Rice parameter derivation for TSRC is used. Version 1  profiles require this to be false and some Version 2 (RExt) profiles may require this to be true.
+\\
+
 \Option{HighPrecisionPredictionWeighting} &
 \Default{false} &
 Specifies the value of high_precision_prediction_weighting_flag. This setting is only valid for the 16-bit or 4:4:4 RExt profiles.
@@ -3328,11 +3487,21 @@ When true, specifies the use of the residual rotation tool. Version 1 and some V
 When true, specifies the use of a single significance map context for transform-skipped and transquant-bypassed TUs. Version 1 and some Version 2 (RExt) profiles require this to be false.
 \\
 
+\Option{ExtendedRiceRRC} &
+\Default{false} &
+When true, specifies the that extension of the Golomb-Rice parameter derivation for RRC is used. Version 1  profiles require this to be false and some Version 2 (RExt) profiles may require this to be true.
+\\
+
 \Option{GolombRiceParameterAdaptation} &
 \Default{false} &
 When true, enable the adaptation of the Golomb-Rice parameter over the course of each slice. Version 1 and some Version 2 (RExt) profiles require this to be false.
 \\
 
+\Option{ReverseLastSigCoeff} &
+\Default{false} &
+When true, enable reverse last significant coefficient postion in RRC. Version 1 and some Version 2 (RExt) profiles require this to be false.
+\\
+
 \Option{AlignCABACBeforeBypass} &
 \Default{false} &
 When true, align the CABAC engine to a defined fraction of a bit prior to coding bypass data (including sign bits) when coeff_abs_level_remaining syntax elements are present in the group.
@@ -3381,7 +3550,7 @@ The table below lists the SEI messages defined for Version 1 and Range-Extension
   139 & Temporal motion-constrained tile sets    & Table \ref{tab:sei-tmcts} \\
   140 & Chroma resampling filter hint            & Table \ref{tab:chroma-resampling-filter-hint} \\
   141 & Knee function information                & Table \ref{tab:sei-knee-function} \\
-  142 & Colour remapping information             & Table \ref{tab:sei-colour-remapping}\\
+  142 & Colour transform information             & Table \ref{tab:sei-colour-transform}\\
   143 & Deinterlaced field identification        & (Not handled)\\
   144 & Content light level info                 & Table \ref{tab:sei-content-light-level}\\
   147 & Alternative transfer characteristics     & Table \ref{tab:sei-alternative-transfer-characteristics}\\
@@ -3392,9 +3561,16 @@ The table below lists the SEI messages defined for Version 1 and Range-Extension
   154 & Sphere rotation                          & Table \ref{tab:sei-sphere-rotation} \\
   155 & Region-wise packing                      & Table \ref{tab:sei-rwp} \\
   156 & Omni viewport                            & Table \ref{tab:sei-omni-viewport} \\  
+  165 & Alpha Channel Information                & Table \ref{tab:sei-aci} \\
   168 & Frame-field information                  & Table \ref{tab:sei-frame-field} \\  
+  177 & Depth Representation Information         & Table \ref{tab:sei-dri} \\
+  179 & Multiview Acquisition Information        & Table \ref{tab:sei-mai} \\
+  180 & Multiview View Position                  & Table \ref{tab:sei-mvp} \\
+  202 & Annotated regions information            & Table \ref{tab:sei-annotated-regions} \\  
   203 & Subpicture Level Information             & Table \ref{tab:sei-subpic-level} \\  
   204 & Sample Aspect Ratio Information          & Table \ref{tab:sei-sari} \\  
+  205 & Scalability Dimension Information        & Table \ref{tab:sei-sdi} \\
+  207 & Constrained RASL encoding                & Table \ref{tab:sei-constrained-rasl-encoding} \\
 \end{SEIListTable}
 %%
 %% SEI messages
@@ -3705,17 +3881,21 @@ disabled.
 
 
 \begin{OptionTableNoShorthand}{Display orientation SEI message encoder parameters}{tab:sei-display-orientation}
-\Option{SEIDisplayOrientation} &
+\Option{SEIDisplayOrientationEnabled} &
+\Default{false} &
+Enables (true) or disables (false) the insertion of the Display orientation SEI messages.
+\\
+\Option{SEIDisplayOrientationCancelFlag} &
+\Default{true} &
+Indicates that display orientation SEI message cancels the persistence (true) or follows (false).
+\\
+\Option{SEIDisplayOrientationPersistenceFlag} &
+\Default{false} &
+Specifies the persistence of the display orientation SEI message.
+\\
+\Option{SEIDisplayOrientationTransformType} &
 \Default{0} &
-Enables or disables the insertion of the Display orientation
-SEI messages.
-\par
-\begin{tabular}{cp{0.20\textwidth}}
-  0 & Disabled \\
-  N: $0 < N < (2^{16} - 1)$ & Enable display orientation SEI message with 
-  \mbox{anticlockwise_rotation = N} 
-  and \mbox{display_orientation_repetition_period = 1} \\
-\end{tabular}
+Specifies the rotation and mirroring to be applied to the picture.
 \\
 \end{OptionTableNoShorthand}
 
@@ -4057,12 +4237,63 @@ Array of output knee point. Default table can be set to the following:
 \end{OptionTableNoShorthand}
 
 
-\begin{OptionTableNoShorthand}{Colour remapping SEI message encoder parameters}{tab:sei-colour-remapping}
-\Option{SEIColourRemappingInfoFileRoot (-cri)} &
-\Default{\NotSet} &
-Specifies the prefix of input Colour Remapping Information file. Prefix is completed by ``_x.txt'' where x is the  POC number.
-The contents of the file are a list of the SEI message's syntax element names (in decoding order) immediately followed by a `:' and then the associated value.
-An example file can be found in cfg/misc/example_colour_remapping_sei_encoder_0.txt.
+\begin{OptionTableNoShorthand}{Colour transform information SEI message encoder parameters}{tab:sei-colour-transform}
+\Option{SEICTIEnabled} &
+\Default{false} &
+Enables (true) or disables (false) the insertion of colour transform information (CTI) SEI message.
+Examples configuration files for CTI can be found in folder cfg/examples_SEI_CTI.
+\\
+\Option{SEICTIId} &
+\Default{0} &
+Specifies the ID of the CTI SEI message.
+\\
+\Option{SEICTISignalInfoFlag} &
+\Default{false} &
+Enables (true) or disables (false) the insertion of output signal information after applying the colour transform.
+\\
+\Option{SEICTIFullRangeFlag} &
+\Default{false} &
+Specifies the range (true:full, false:limited) of the output signal after applying the colour transform.
+\\
+\Option{SEICTIPrimaries} &
+\Default{0} &
+Specifies the colour primaries of the output signal after applying the colour transform.
+\\
+\Option{SEICTITransferFunction} &
+\Default{0} &
+Specifies the transfer function (characteristics) of the output signal after applying the colour transform.
+\\
+\Option{SEICTIMatrixCoefs} &
+\Default{0} &
+Specifies the matrix coefficients type of the output signal after applying the colour transform.
+\\
+\Option{SEICTICrossCompFlag} &
+\Default{true} &
+Enables (true) or disables (false) the cross-component scaling for applying the colour transform.
+\\
+\Option{SEICTICrossCompInferred} &
+\Default{true} &
+Infers (true) or signals (false) the cross-component scaling tables for the colour transform.
+\\
+\Option{SEICTINbChromaLut} &
+\Default{0} &
+Specifies the number of chroma tables (1 or 2) for the colour transform (only used when SEICTICrossCompInferred = false).
+\\
+\Option{SEICTILut0} &
+\Default{0} &
+Specifies the transform table for colour component 0.
+\\
+\Option{SEICTILut1} &
+\Default{0} &
+Specifies the transform table for colour component 1 (only used when SEICTICrossCompFlag = false).
+\\
+\Option{SEICTILut2} &
+\Default{0} &
+Specifies the transform table for colour component 2 (only used when SEICTINbChromaLut = 2).
+\\
+\Option{SEICTIChromaOffset} &
+\Default{0} &
+Specifies the offset to be added to the values of the cross-component scaling tables (only used when SEICTICrossCompInferred = false).
 \\
 \end{OptionTableNoShorthand}
 
@@ -4402,6 +4633,257 @@ Specifies the vertical size of the sample aspect ratio, if SEISARIAspectRatioIdc
 \\
 \end{OptionTableNoShorthand}
 
+\begin{OptionTableNoShorthand}{Scalability Dimension Information SEI message encoder parameters}{tab:sei-sdi}
+\Option{SEISDIEnabled} &
+\Default{false} &
+Enables (true) or disables (false) the insertion of Scalability Dimension Information SEI message.
+\\
+\Option{SEISDIMaxLayersMinus1} &
+\Default{0} &
+Specifies the maximum number of layers minus 1 in the current CVS.
+\\
+\Option{SEISDIMultiviewInfoFlag} &
+\Default{false} &
+Specifies the current CVS may have multiple views and the sdi_view_id_val[ ] syntax elements are present in the scalaibility dimension information SEI message.
+\\
+\Option{SEISDIAuxiliaryInfoFlag} &
+\Default{false} &
+Specifies that one or more layers in the current CVS may be auxiliary layers, which carry auxiliary information, and the sdi_aux_id[ ] syntax elements are present in the scalaibility dimension information SEI message.
+\\
+\Option{SEISDIViewIdLenMinus1} &
+\Default{0} &
+Specifies the length, in bits, of the sdi_view_id_val[ i ] syntax element minus 1 in the scalaibility dimension information SEI message.
+\\
+\Option{SEISDILayerId} &
+\Default{""} &
+List of the layer identifiers that may be present in the scalaibility dimension information SEI message in the current CVS.
+\\
+\Option{SEISDIViewIdVal} &
+\Default{""} &
+List of the view identifiers in the scalaibility dimension information SEI message.
+\\
+\Option{SEISDIAuxId} &
+\Default{""} &
+List of the auxiliary identifiers in the scalaibility dimension information SEI message.
+\\
+\Option{SEISDINumAssociatedPrimaryLayersMinus1} &
+\Default{""} &
+List of the numbers of associated primary layers of i-th layer, which is an auxiliary layer.
+\\
+\end{OptionTableNoShorthand}
+
+\begin{OptionTableNoShorthand}{Alpha Channel Information SEI message encoder parameters}{tab:sei-aci}
+\Option{SEIACIEnabled} &
+\Default{false} &
+Enables (true) or disables (false) the insertion of Alpha Channel Information SEI message.
+\\
+\Option{SEIACICancelFlag} &
+\Default{false} &
+Specifies the persistence of any previous alpha channel information SEI message in output order.
+\\
+\Option{SEIACIUseIdc} &
+\Default{0} &
+Specifies the usage of the auxiliary picture in the alpha channel information SEI message.
+\\
+\Option{SEIACIBitDepthMinus8} &
+\Default{0} &
+Specifies the bit depth of the samples of the auxiliary picture in the alpha channel information SEI message.
+\\
+\Option{SEIACITransparentValue} &
+\Default{0} &
+Specifies the interpretation sample value of an auxiliary coded picture luma sample for which the associated luma and chroma samples of the primary coded picture are considered transparent for purposes of alpha blending in the alpha channel information SEI message.
+\\
+\Option{SEIACIOpaqueValue} &
+\Default{0} &
+Specifies the interpretation sample value of an auxiliary coded picture luma sample for which the associated luma and chroma samples of the primary coded picture are considered opaque for purposes of alpha blending in the alpha channel information SEI message.
+\\
+\Option{SEIACIIncrFlag} &
+\Default{false} &
+Specifies the interpretation sample value for each decoded auxiliary picture luma sample value is equal to the decoded auxiliary picture sample value for purposes of alpha blending in the alpha channel information SEI message.
+\\
+\Option{SEIACIClipFlag} &
+\Default{false} &
+Specifies whether clipping operation is applied in the alpha channel information SEI message.
+\\
+\Option{SEIACIClipTypeFlag} &
+\Default{false} &
+Specifies the type of clipping operation in the alpha channel information SEI message.
+\\
+\end{OptionTableNoShorthand}
+
+\begin{OptionTableNoShorthand}{Depth Representation Information SEI message encoder parameters}{tab:sei-dri}
+\Option{SEIDRIEnabled} &
+\Default{false} &
+Enables (true) or disables (false) the insertion of Depth Representation Information SEI message.
+\\
+\Option{SEIDRIZNearFlag} &
+\Default{false} &
+Specifies the presence of the nearest depth value in the depth representation information SEI message.
+\\
+\Option{SEIDRIZFarFlag} &
+\Default{false} &
+Specifies the presence of the farthest depth value in the depth representation information SEI message.
+\\
+\Option{SEIDRIDMinFlag} &
+\Default{false} &
+Specifies the presence of the minimum disparity value in the depth representation information SEI message.
+\\
+\Option{SEIDRIDMaxFlag} &
+\Default{false} &
+Specifies the presence of the maximum disparity value in the depth representation information SEI message.
+\\
+\Option{SEIDRIZNear} &
+\Default{0.0} &
+Specifies the nearest depth value in the depth representation information SEI message.
+\\
+\Option{SEIDRIZFar} &
+\Default{0.0} &
+Specifies the farest depth value in the depth representation information SEI message.
+\\
+\Option{SEIDRIDMin} &
+\Default{0.0} &
+Specifies the minimum disparity value in the depth representation information SEI message.
+\\
+\Option{SEIDRIDMax} &
+\Default{0.0} &
+Specifies the maximum disparity value in the depth representation information SEI message.
+\\
+\Option{SEIDRIDepthRepresentationType} &
+\Default{0} &
+Specifies the the representation definition of decoded luma samples of auxiliary pictures in the depth representation information SEI message.
+\\
+\Option{SEIDRIDisparityRefViewId} &
+\Default{0} &
+Specifies the ViewId value against which the disparity values are derived in the depth representation information SEI message.
+\\
+\Option{SEIDRINonlinearNumMinus1} &
+\Default{0} &
+Specifies the number of piece-wise linear segments minus 2 for mapping of depth values to a scale that is uniformly quantized in terms of disparity  in the depth representation information SEI message.
+\\
+\Option{SEIDRINonlinearModel} &
+\Default{""} &
+List of the piece-wise linear segments for mapping of decoded luma sample values of an auxiliary picture to a scale that is uniformly quantized in terms of disparity in the depth representation information SEI message.
+\\
+\end{OptionTableNoShorthand}
+
+\begin{OptionTableNoShorthand}{Multiview Acquisition Information SEI message encoder parameters}{tab:sei-mai}
+\Option{SEIMAIEnabled} &
+\Default{false} &
+Enables (true) or disables (false) the insertion of Multiview Acquisition Information SEI message.
+\\
+\Option{SEIMAIIntrinsicParamFlag} &
+\Default{false} &
+Specifies the presence of intrinsic camera parameters in the multiview acquisition information SEI message.
+\\
+\Option{SEIMAIExtrinsicParamFlag} &
+\Default{false} &
+Specifies the presence of extrinsic camera parameters in the multiview acquisition information SEI message.
+\\
+\Option{SEIMAINumViewsMinus1} &
+\Default{0} &
+Specifies the number of views minus 1 in the multiview acquisition information SEI message.
+\\
+\Option{SEIMAIIntrinsicParamsEqualFlag} &
+\Default{false} &
+Specifies the intrinsic camera parameters are equal for all cameras in the multiview acquisition information SEI message.
+\\
+\Option{SEIMAIPrecFocalLength} &
+\Default{0} &
+Specifies the exponent of the maximum allowable truncation error for focal_length_x[i] and focal_length_y[i] in the multiview acquisition information SEI message.
+\\
+\Option{SEIMAIPrecPrincipalPoint} &
+\Default{0} &
+Specifies the exponent of the maximum allowable truncation error for principal_point_x[i] and principal_point_y[i] in the multiview acquisition information SEI message.
+\\
+\Option{SEIMAIPrecSkewFactor} &
+\Default{0} &
+Specifies the exponent of the maximum allowable truncation error for skew factor in the multiview acquisition information SEI message.
+\\
+\Option{SEIMAISignFocalLengthX} &
+\Default{""} &
+List of the signs of the focal length of the camera in the horizontal direction in the multiview acquisition information SEI message.
+\\
+\Option{SEIMAIExponentFocalLengthX} &
+\Default{""} &
+List of the exponent parts of the focal length of the camera in the horizontal direction. in the multiview acquisition information SEI message.
+\\
+\Option{SEIMAIMantissaFocalLengthX} &
+\Default{""} &
+List of the mantissa parts of the focal length of the camera in the horizontal direction in the multiview acquisition information SEI message.
+\\
+\Option{SEIMAISignFocalLengthY} &
+\Default{""} &
+List of the signs of the focal length of the camera in the vertical direction in the multiview acquisition information SEI message.
+\\
+\Option{SEIMAIExponentFocalLengthY} &
+\Default{""} &
+List of the exponent parts of the focal length of the camera in the vertical direction in the multiview acquisition information SEI message.
+\\
+\Option{SEIMAIMantissaFocalLengthY} &
+\Default{""} &
+List of the mantissa parts of the focal length of the camera in the vertical direction in the multiview acquisition information SEI message.
+\\
+\Option{SEIMAISignPrincipalPointX} &
+\Default{""} &
+List of the signs of the principal point of the camera in the horizontal direction in the multiview acquisition information SEI message.
+\\
+\Option{SEIMAIExponentPrincipalPointX} &
+\Default{""} &
+List of the exponent parts of the principal point of the camera in the horizontal direction in the multiview acquisition information SEI message.
+\\
+\Option{SEIMAIMantissaPrincipalPointX} &
+\Default{""} &
+List of the mantissa parts of the principal point of the camera in the horizontal direction in the multiview acquisition information SEI message.
+\\
+\Option{SEIMAISignPrincipalPointY} &
+\Default{""} &
+List of the signs of the principal point of the camera in the vertical direction in the multiview acquisition information SEI message.
+\\
+\Option{SEIMAIExponentPrincipalPointY} &
+\Default{""} &
+List of the exponent parts of the principal point of the camera in the vertical direction in the multiview acquisition information SEI message.
+\\
+\Option{SEIMAIMantissaPrincipalPointY} &
+\Default{""} &
+List of the mantissa parts of the principal point of the camera in the vertical direction in the multiview acquisition information SEI message.
+\\
+\Option{SEIMAISignSkewFactor} &
+\Default{""} &
+List of the signs of the skew factor of the camera in the multiview acquisition information SEI message.
+\\
+\Option{SEIMAIExponentSkewFactor} &
+\Default{""} &
+List of the exponent parts of the skew factor of the camera in the multiview acquisition information SEI message.
+\\
+\Option{SEIMAIMantissaSkewFactor} &
+\Default{""} &
+List of the mantissa parts of the skew factor of the camera in the multiview acquisition information SEI message.
+\\
+\Option{SEIMAIPrecRotationParam} &
+\Default{0} &
+Specifies the exponent of the maximum allowable truncation error for rotation in the multiview acquisition information SEI message.
+\\
+\Option{SEIMAIPrecTranslationParam} &
+\Default{0} &
+Specifies the exponent of the maximum allowable truncation error for translation in the multiview acquisition information SEI message.
+\\
+\end{OptionTableNoShorthand}
+
+\begin{OptionTableNoShorthand}{Multiview View Position SEI message encoder parameters}{tab:sei-mvp}
+\Option{SEIMVPEnabled} &
+\Default{false} &
+Enables (true) or disables (false) the insertion of Multiview View Position SEI message.
+\\
+\Option{SEIMVPNumViewsMinus1} &
+\Default{0} &
+Specifies the number of views minus 1 in the multiview view position SEI message.
+\\
+\Option{SEIMVPViewPosition} &
+\Default{""} &
+List of the view position in the multiview view position SEI message.
+\\
+\end{OptionTableNoShorthand}
+
 \begin{OptionTableNoShorthand}{Frame-Field Information SEI message encoder parameters}{tab:sei-frame-field}
 \Option{SEIFrameFieldInfo} &
 \Default{false} &
@@ -4409,6 +4891,15 @@ Enables (true) or disables (false) the insertion of Frame-Field Information SEI
 \\
 \end{OptionTableNoShorthand}
 
+\begin{OptionTableNoShorthand}{Annotated Regions SEI message encoder parameters}{tab:sei-annotated-regions}
+\Option{SEIAnnotatedRegionsFileRoot (-cri)} &
+\Default{\NotSet} &
+Specifies the prefix of input Annotated Regions file. Prefix is completed by ``_x.txt'' where x is the  POC number.
+The contents of the file are a list of the SEI message's syntax element names (in decoding order) immediately followed by a `:' and then the associated value.
+An example file can be found in cfg/sei_vui/annotated_regions/anno_reg_0.txt.
+\\
+\end{OptionTableNoShorthand}
+
 \begin{OptionTableNoShorthand}{Subpicture Level Information SEI message encoder parameters}{tab:sei-subpic-level}
 \Option{SEISubpictLevelInfoEnabled} &
 \Default{false} &
@@ -4439,7 +4930,7 @@ Enable signalling of level information for each sublayer
 \\
 \Option{SEISubpicLevelInfoNonSubpicLayersFractions} &
 \Default{""} &
-List of fraction of levels to be signalled for non-subpicture layers.
+List of fractions of levels to be signalled for non-subpicture layers. Each value in the list shall be in the range 0 to 255.
 \par
 \begin{tabular}{p{0.49\columnwidth}}
 When sli_sublayer_info_present_flag = 0, the number of input elements shall be equal to numReflevels. List is ordered by level.\\
@@ -4457,7 +4948,7 @@ When sli_sublayer_info_present_flag = 1, the number of input elements shall be e
 \\
 \Option{SEISubpicLevelInfoRefLevelFractions} &
 \Default{""} &
-List of fractions of levels to be signalled.
+List of fractions of levels to be signalled. Each value in the list shall be in the range 0 to 255.
 \par
 \begin{tabular}{p{0.49\columnwidth}}
 When sli_sublayer_info_present_flag = 0, the number of input elements shall be equal to numSubpics * numReflevels. List is ordered by subpicture then level.\\
@@ -4582,6 +5073,13 @@ specifies the CCV avg luminance value in the content colour volume SEI message.
 \end{OptionTableNoShorthand}
 
 
+\begin{OptionTableNoShorthand}{Constrained RASL encoding for bitstream switching}{tab:sei-constrained-rasl-encoding}
+\Option{SEIConstrainedRASL} &
+\Default{false} &
+When true (non-zero), the SEI enables several restrictions for encoding RASL frames: CCLM estimation is skipped in intra search, TMVP is disabled and PH syntax ph_dmvr_disabled_flag is set to 1.
+\\
+\end{OptionTableNoShorthand}
+
 
 
 %\Option{SEITimeCode} &
@@ -4774,13 +5272,20 @@ has the following behaviour:
 When a non-empty file name is specified, information regarding any decoded SEI messages will be output to the indicated file. If the file name is '-', then stdout is used instead.
 \\
 
-\Option{SEIColourRemappingInfoFilename} &
+\Option{SEICTIFilename} &
 %\ShortOption{\None} &
 \Default{\NotSet} &
-Specifies that the colour remapping SEI message should be applied to the output video, with the output written to this file.
+Specifies that the colour transform information (CTI) SEI message should be applied to the output video, with the output written to this file.
 If no value is specified, the SEI message is ignored and no mapping is applied.
 \\
 
+\Option{SEIAnnotatedRegionsInfoFilename} &
+%\ShortOption{\None} &
+\Default{\NotSet} &
+When a non-empty file name is specified, object information using the decoded SEI messages will be output to the indicated file.
+If no value is specified, the SEI message will not be output.
+\\
+
 \Option{OutputColourSpaceConvert} &
 \Default{\NotSet} &
 Specifies the colour space conversion to apply to 444 video. Permitted values are:
@@ -5047,10 +5552,19 @@ DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL,
 \label{sec:stream-merge-tool}
 
 The StreamMergeApp tool takes multiple single-layer (singe nuh_layer_id) bistreams 
-as inputs and merge them into a multi-layer bistream by interleaving the NALUs 
+as inputs and merge them into a multi-layer bistream by interleaving the Picture Units
 from the input single layer bistreams. During the merge, the tool assigns a new unique
-nuh_layer_id for each input bitstream. Then the decoder could specify which layer 
-bitstream to be decoded through the command line option "-p nuh_layer_id". 
+nuh_layer_id for each input bitstream as well as unique parameter sets identifiers for each layer.
+Then the decoder can specify which layer bitstream to be decoded through the command line option "-p nuh_layer_id".
+
+Some current limitations of the tool:
+\begin{itemize}
+\item All input bitstreams are single layer and thus all layers in the output bitstream are independent layers.
+\item Each layer in the output bitstream is abitrarily put in an individual OLS and is also an output layer.
+\item All parameter sets from the input bitstreams are treated as different parameter sets. There is thus no parameters sets sharing in the output bitstream.
+\item The slice header in the input bitstreams shall contain no picture header structure and no alf information.
+\end{itemize}
+
 
 \subsection{Usage}
 \label{sec:stream-merge-usage}
diff --git a/source/App/BitstreamExtractorApp/BitstreamExtractorApp.cpp b/source/App/BitstreamExtractorApp/BitstreamExtractorApp.cpp
index 15009db24e9c9fe261458cd3d85f0053b05f4856..b4bd86f8ab0380e630d83337db4e6cee50a31e4c 100644
--- a/source/App/BitstreamExtractorApp/BitstreamExtractorApp.cpp
+++ b/source/App/BitstreamExtractorApp/BitstreamExtractorApp.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -116,7 +116,7 @@ Slice BitstreamExtractorApp::xParseSliceHeader(InputNALUnit &nalu)
   slice.setTLayer(nalu.m_temporalId);
 
   m_hlSynaxReader.parseSliceHeader(&slice, &m_picHeader, &m_parameterSetManager, m_prevTid0Poc, m_prevPicPOC);
-  
+
   return slice;
 }
 
@@ -172,6 +172,30 @@ bool BitstreamExtractorApp::xCheckSliceSubpicture(InputNALUnit &nalu, int target
 }
 #endif
 
+#if JVET_S0154_ASPECT9_AND_S0158_ASPECT4
+bool BitstreamExtractorApp::xCheckSEIFiller(SEIMessages SEIs, int targetSubPicId, bool &rmAllFillerInSubpicExt, bool lastSliceWritten)
+{
+  for (auto sei : SEIs)
+  {
+    if (sei->payloadType() == SEI::SUBPICTURE_LEVEL_INFO)
+    {
+      SEISubpicureLevelInfo *seiSLI = (SEISubpicureLevelInfo *)sei;
+      if (!seiSLI->m_cbrConstraintFlag)
+      {
+        rmAllFillerInSubpicExt = true;
+      }
+    }
+  }
+  for (auto sei : SEIs)
+  {
+    if (sei->payloadType() == SEI::FILLER_PAYLOAD)
+    {
+      return (rmAllFillerInSubpicExt ? false : lastSliceWritten);
+    }
+  }
+  return true;
+}
+#else
 bool BitstreamExtractorApp::xCheckSeiSubpicture(SEIMessages SEIs, int targetSubPicId, bool &rmAllFillerInSubpicExt, bool lastSliceWritten, bool isVclNalUnitRemoved)
 {
   bool isWriteSeiNalUnitToStream = true;
@@ -242,6 +266,7 @@ bool BitstreamExtractorApp::xCheckSeiSubpicture(SEIMessages SEIs, int targetSubP
 
   return isWriteSeiNalUnitToStream;
 }
+#endif
 
 void BitstreamExtractorApp::xRewriteSPS (SPS &targetSPS, const SPS &sourceSPS, SubPic &subPic)
 {
@@ -267,6 +292,47 @@ void BitstreamExtractorApp::xRewriteSPS (SPS &targetSPS, const SPS &sourceSPS, S
   conf.setWindowRightOffset(subpicConfWinRightOffset);
   conf.setWindowTopOffset(subpicConfWinTopOffset);
   conf.setWindowBottomOffset(subpicConfWinBottomOffset);
+
+#if JVET_S0117_VB  // Virtual boundaries rewriting
+  if (sourceSPS.getVirtualBoundariesEnabledFlag() && sourceSPS.getVirtualBoundariesPresentFlag())
+  { 
+    targetSPS.setNumVerVirtualBoundaries(0);
+    for (int i = 0; i < sourceSPS.getNumVerVirtualBoundaries() ; i ++)
+    {
+      int subPicLeftX = subPic.getSubPicCtuTopLeftX() * sourceSPS.getCTUSize();
+      int subPicRightX = (subPic.getSubPicCtuTopLeftX() + subPic.getSubPicWidthInCTUs()) * sourceSPS.getCTUSize();
+      if (subPicRightX > sourceSPS.getMaxPicWidthInLumaSamples())
+      {
+        subPicRightX = sourceSPS.getMaxPicWidthInLumaSamples();
+      }
+      if ( sourceSPS.getVirtualBoundariesPosX(i) > subPicLeftX && sourceSPS.getVirtualBoundariesPosX(i) < subPicRightX)
+      {
+        targetSPS.setVirtualBoundariesPosX(targetSPS.getNumVerVirtualBoundaries(), sourceSPS.getVirtualBoundariesPosX(i) - subPicLeftX);
+        targetSPS.setNumVerVirtualBoundaries(targetSPS.getNumVerVirtualBoundaries() + 1);
+      }
+    }
+
+    targetSPS.setNumHorVirtualBoundaries(0);
+    for (int i = 0; i < sourceSPS.getNumHorVirtualBoundaries(); i++)
+    {
+      int subPicTopY = subPic.getSubPicCtuTopLeftY() * sourceSPS.getCTUSize();
+      int subPicBottomY = (subPic.getSubPicCtuTopLeftY() + subPic.getSubPicHeightInCTUs()) * sourceSPS.getCTUSize();
+      if (subPicBottomY > sourceSPS.getMaxPicHeightInLumaSamples())
+      {
+        subPicBottomY = sourceSPS.getMaxPicHeightInLumaSamples();
+      }
+      if (sourceSPS.getVirtualBoundariesPosY(i) > subPicTopY && sourceSPS.getVirtualBoundariesPosY(i) < subPicBottomY)
+      {
+        targetSPS.setVirtualBoundariesPosY(targetSPS.getNumHorVirtualBoundaries(), sourceSPS.getVirtualBoundariesPosY(i) - subPicTopY);
+        targetSPS.setNumHorVirtualBoundaries(targetSPS.getNumHorVirtualBoundaries() + 1);
+      }
+    }
+    if (targetSPS.getNumVerVirtualBoundaries() == 0 && targetSPS.getNumHorVirtualBoundaries() == 0)
+    {
+      targetSPS.setVirtualBoundariesEnabledFlag(0);
+    }
+  }
+#endif
 }
 
 void BitstreamExtractorApp::xRewritePPS(PPS &targetPPS, const PPS &sourcePPS, const SPS &sourceSPS, SubPic &subPic)
@@ -282,7 +348,7 @@ void BitstreamExtractorApp::xRewritePPS(PPS &targetPPS, const PPS &sourcePPS, co
   // picture size
   targetPPS.setPicWidthInLumaSamples(subPic.getSubPicWidthInLumaSample());
   targetPPS.setPicHeightInLumaSamples(subPic.getSubPicHeightInLumaSample());
-  // todo: Conformance window
+  // todo: Conformance window (conf window rewriting is not needed per JVET-S0117)
 
   int subWidthC = SPS::getWinUnitX(sourceSPS.getChromaFormatIdc());
   int subHeightC = SPS::getWinUnitY(sourceSPS.getChromaFormatIdc());
@@ -479,7 +545,11 @@ bool BitstreamExtractorApp::xCheckNumSubLayers(InputNALUnit &nalu, VPS *vps)
   return retval;
 }
 
+#if JVET_S0154_ASPECT9_AND_S0158_ASPECT4
+bool BitstreamExtractorApp::xCheckSEIsSubPicture(SEIMessages& SEIs, InputNALUnit& nalu, std::ostream& out, int subpicId, VPS *vps)
+#else
 bool BitstreamExtractorApp::xCheckSEIsSubPicture(SEIMessages& SEIs, InputNALUnit& nalu, std::ostream& out, int subpicId)
+#endif
 {
   SEIMessages scalableNestingSEIs = getSeisByType(SEIs, SEI::SCALABLE_NESTING);
   if (scalableNestingSEIs.size())
@@ -496,12 +566,20 @@ bool BitstreamExtractorApp::xCheckSEIsSubPicture(SEIMessages& SEIs, InputNALUnit
     }
     if (std::find(sei->m_snSubpicId.begin(), sei->m_snSubpicId.end(), subpicId) != sei->m_snSubpicId.end())
     {
-      // applies to target subpicture -> extract
-      OutputNALUnit outNalu( nalu.m_nalUnitType, nalu.m_nuhLayerId, nalu.m_temporalId );
-      m_seiWriter.writeSEImessages(outNalu.m_Bitstream, sei->m_nestedSEIs, m_hrd, false, nalu.m_temporalId);
-      NALUnitEBSP naluWithHeader(outNalu);
-      writeAnnexBNalUnit(out, naluWithHeader, true);
-      return false;
+#if JVET_S0154_ASPECT9_AND_S0158_ASPECT4
+      // C.7 step 7.c
+      if (sei->m_snOlsFlag || vps->getNumLayersInOls(m_targetOlsIdx) == 1)
+      {
+#endif
+        // applies to target subpicture -> extract
+        OutputNALUnit outNalu( nalu.m_nalUnitType, nalu.m_nuhLayerId, nalu.m_temporalId );
+        m_seiWriter.writeSEImessages(outNalu.m_Bitstream, sei->m_nestedSEIs, m_hrd, false, nalu.m_temporalId);
+        NALUnitEBSP naluWithHeader(outNalu);
+        writeAnnexBNalUnit(out, naluWithHeader, true);
+        return false;
+#if JVET_S0154_ASPECT9_AND_S0158_ASPECT4
+      }
+#endif
     }
     else
     {
@@ -554,6 +632,86 @@ bool BitstreamExtractorApp::xCheckScalableNestingSEI(SEIScalableNesting *seiNest
   return nestingAppliedInTargetOlsLayerId;
 }
 
+#if JVET_S0154_ASPECT9_AND_S0158_ASPECT4
+bool BitstreamExtractorApp::xIsTargetOlsIncludeAllVclLayers()
+{
+  std::ifstream bitstreamFileIn(m_bitstreamFileNameIn.c_str(), std::ifstream::in | std::ifstream::binary);
+  if (!bitstreamFileIn)
+  {
+    EXIT("failed to open bitstream file " << m_bitstreamFileNameIn.c_str() << " for reading");
+  }
+
+  InputByteStream bytestream(bitstreamFileIn);
+
+  bitstreamFileIn.clear();
+  bitstreamFileIn.seekg(0, std::ios::beg);
+
+  if (m_targetOlsIdx >= 0)
+  {
+    std::vector<int> layerIdInTargetOls;
+    std::vector<int> layerIdInVclNalu;
+    while (!!bitstreamFileIn)
+    {
+      AnnexBStats stats = AnnexBStats();
+      InputNALUnit nalu;
+      byteStreamNALUnit(bytestream, nalu.getBitstream().getFifo(), stats);
+
+      // call actual decoding function
+      if (nalu.getBitstream().getFifo().empty())
+      {
+        msg(WARNING, "Warning: Attempt to decode an empty NAL unit");
+      }
+      else
+      {
+        read(nalu);
+
+        if (nalu.m_nalUnitType == NAL_UNIT_VPS)
+        {
+          VPS* vps = new VPS();
+          m_hlSynaxReader.setBitstream(&nalu.getBitstream());
+          m_hlSynaxReader.parseVPS(vps);
+          int vpsId = vps->getVPSId();
+          // note: storeVPS may invalidate the vps pointer!
+          m_parameterSetManager.storeVPS(vps, nalu.getBitstream().getFifo());
+          // get VPS back
+          vps = m_parameterSetManager.getVPS(vpsId);
+          m_vpsId = vps->getVPSId();
+        }
+
+        VPS *vps = nullptr;
+        if (m_vpsId > 0)
+        {
+          vps = m_parameterSetManager.getVPS(m_vpsId);
+          layerIdInTargetOls = vps->getLayerIdsInOls(m_targetOlsIdx);
+          if (NALUnit::isVclNalUnitType(nalu.m_nalUnitType))
+          {
+            if (layerIdInVclNalu.size() == 0 || nalu.m_nuhLayerId >= layerIdInVclNalu[layerIdInVclNalu.size() - 1])
+            {
+              layerIdInVclNalu.push_back(nalu.m_nuhLayerId);
+            }
+            else
+            {
+              break;
+            }
+          }
+        }
+      }
+    }
+
+    //When LayerIdInOls[ targetOlsIdx ] does not include all values of nuh_layer_id in all VCL NAL units in the bitstream inBitstream
+    for (int i = 0; i < layerIdInVclNalu.size(); i++)
+    {
+      bool vclLayerIncludedInTargetOls = std::find(layerIdInTargetOls.begin(), layerIdInTargetOls.end(), layerIdInVclNalu[i]) != layerIdInTargetOls.end();
+      if (!vclLayerIncludedInTargetOls)
+      {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+#endif
+
 uint32_t BitstreamExtractorApp::decode()
 {
   std::ifstream bitstreamFileIn(m_bitstreamFileNameIn.c_str(), std::ifstream::in | std::ifstream::binary);
@@ -585,6 +743,10 @@ uint32_t BitstreamExtractorApp::decode()
   bool isMultiSubpicLayer[MAX_VPS_LAYERS] = { false };
   bool rmAllFillerInSubpicExt[MAX_VPS_LAYERS] = { false };
 
+#if JVET_S0154_ASPECT9_AND_S0158_ASPECT4
+  bool targetOlsIncludeAllVclLayers = xIsTargetOlsIncludeAllVclLayers();
+#endif
+
   while (!!bitstreamFileIn)
   {
     AnnexBStats stats = AnnexBStats();
@@ -643,11 +805,7 @@ uint32_t BitstreamExtractorApp::decode()
         }
         // Remove NAL units with nal_unit_type not equal to any of VPS_NUT, DPS_NUT, and EOB_NUT and with nuh_layer_id not included in the list LayerIdInOls[targetOlsIdx].
         NalUnitType t = nalu.m_nalUnitType;
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
         bool isSpecialNalTypes = t == NAL_UNIT_OPI || t == NAL_UNIT_VPS || t == NAL_UNIT_DCI || t == NAL_UNIT_EOB;
-#else
-        bool isSpecialNalTypes = t == NAL_UNIT_VPS || t == NAL_UNIT_DCI || t == NAL_UNIT_EOB;
-#endif
         vps = m_parameterSetManager.getVPS(m_vpsId);
         if (m_vpsId == 0)
         {
@@ -816,27 +974,55 @@ uint32_t BitstreamExtractorApp::decode()
               {
                 writeInpuNalUnitToStream &= xCheckScalableNestingSEI(seiNesting, nalu, vps);
               }
+#if JVET_S0154_ASPECT9_AND_S0158_ASPECT4
+              // C.6 step 9.c
+              if (writeInpuNalUnitToStream && !targetOlsIncludeAllVclLayers && !seiNesting->m_snSubpicFlag)
+              {
+                if (seiNesting->m_snOlsFlag || vps->getNumLayersInOls(m_targetOlsIdx) == 1)
+                {
+                  OutputNALUnit outNalu(nalu.m_nalUnitType, nalu.m_nuhLayerId, nalu.m_temporalId);
+                  m_seiWriter.writeSEImessages(outNalu.m_Bitstream, seiNesting->m_nestedSEIs, m_hrd, false, nalu.m_temporalId);
+                  NALUnitEBSP naluWithHeader(outNalu);
+                  writeAnnexBNalUnit(bitstreamFileOut, naluWithHeader, true);
+                  writeInpuNalUnitToStream = false;
+                }
+              }
+#endif
             }
             // remove unqualified timing related SEI
             if (sei->payloadType() == SEI::BUFFERING_PERIOD || (m_removeTimingSEI && sei->payloadType() == SEI::PICTURE_TIMING) || sei->payloadType() == SEI::DECODING_UNIT_INFO || sei->payloadType() == SEI::SUBPICTURE_LEVEL_INFO)
             {
+#if JVET_S0154_ASPECT9_AND_S0158_ASPECT4
+              writeInpuNalUnitToStream &= targetOlsIncludeAllVclLayers;
+#else
               bool targetOlsIdxGreaterThanZero = m_targetOlsIdx > 0;
               writeInpuNalUnitToStream &= !targetOlsIdxGreaterThanZero;
+#endif
             }
           }
+#if !JVET_S0154_ASPECT9_AND_S0158_ASPECT4
           if (m_subPicIdx >= 0 && writeInpuNalUnitToStream)
           {
             writeInpuNalUnitToStream = xCheckSeiSubpicture(SEIs, subpicIdTarget[nalu.m_nuhLayerId], rmAllFillerInSubpicExt[nalu.m_nuhLayerId], lastSliceWritten, isVclNalUnitRemoved[nalu.m_nuhLayerId]);
           }
+#endif
           if (m_vpsId == -1)
           {
             delete vps;
           }
         }
+#if JVET_S0154_ASPECT9_AND_S0158_ASPECT4
+        writeInpuNalUnitToStream &= xCheckSEIFiller(SEIs, subpicIdTarget[nalu.m_nuhLayerId], rmAllFillerInSubpicExt[nalu.m_nuhLayerId], lastSliceWritten);
+        if (writeInpuNalUnitToStream && isVclNalUnitRemoved[nalu.m_nuhLayerId] && m_subPicIdx >= 0)
+        {
+          writeInpuNalUnitToStream &= xCheckSEIsSubPicture(SEIs, nalu, bitstreamFileOut, subpicIdTarget[nalu.m_nuhLayerId], vps);
+        }
+#else
         if (m_subPicIdx >= 0)
         {
           writeInpuNalUnitToStream &= xCheckSEIsSubPicture(SEIs, nalu, bitstreamFileOut, subpicIdTarget[nalu.m_nuhLayerId]);
         }
+#endif
       }
 
 #if JVET_R0107_BITSTREAM_EXTACTION
@@ -846,9 +1032,15 @@ uint32_t BitstreamExtractorApp::decode()
          slice = xParseSliceHeader(nalu);
       }
 #endif
+#if JVET_S0154_ASPECT9_AND_S0158_ASPECT4
+      if (isMultiSubpicLayer[nalu.m_nuhLayerId] && writeInpuNalUnitToStream)
+      {
+        if (m_subPicIdx >= 0 && nalu.isSlice())
+#else
       if (m_subPicIdx >= 0 && isMultiSubpicLayer[nalu.m_nuhLayerId] && writeInpuNalUnitToStream)
       {
         if (nalu.isSlice())
+#endif
         {
           writeInpuNalUnitToStream = xCheckSliceSubpicture(slice, subpicIdTarget[nalu.m_nuhLayerId]);
           if (!writeInpuNalUnitToStream)
diff --git a/source/App/BitstreamExtractorApp/BitstreamExtractorApp.h b/source/App/BitstreamExtractorApp/BitstreamExtractorApp.h
index 345aac94a1eb289a166b6c607d8d8e15c01a533a..1ff853487d70ec482a50e8b2e664a55856edaf52 100644
--- a/source/App/BitstreamExtractorApp/BitstreamExtractorApp.h
+++ b/source/App/BitstreamExtractorApp/BitstreamExtractorApp.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -65,7 +65,11 @@ protected:
   void xPrintSubPicInfo (PPS *pps);
   void xRewriteSPS (SPS &targetSPS, const SPS &sourceSPS, SubPic &subPic);
   void xRewritePPS (PPS &targetPPS, const PPS &sourcePPS, const SPS &sourceSPS, SubPic &subPic);
+#if JVET_S0154_ASPECT9_AND_S0158_ASPECT4
+  bool xCheckSEIFiller(SEIMessages SEIs, int targetSubPicId, bool &rmAllFillerInSubpicExt, bool lastSliceWritten);
+#else
   bool xCheckSeiSubpicture(SEIMessages SEIs, int targetSubPicId, bool &rmAllFillerInSubpicExt, bool lastSliceWritten, bool isVclNalUnitRemoved);
+#endif
 
 #if JVET_R0107_BITSTREAM_EXTACTION
   Slice xParseSliceHeader(InputNALUnit &nalu);
@@ -74,7 +78,12 @@ protected:
   bool xCheckSliceSubpicture(InputNALUnit &nalu, int subPicId);
 #endif
   void xReadPicHeader(InputNALUnit &nalu);
+#if JVET_S0154_ASPECT9_AND_S0158_ASPECT4
+  bool xIsTargetOlsIncludeAllVclLayers();
+  bool xCheckSEIsSubPicture(SEIMessages& SEIs, InputNALUnit& nalu, std::ostream& out, int subpicId, VPS *vps);
+#else
   bool xCheckSEIsSubPicture(SEIMessages& SEIs, InputNALUnit& nalu, std::ostream& out, int subpicId);
+#endif
   bool xCheckScalableNestingSEI(SEIScalableNesting *seiNesting, InputNALUnit& nalu, VPS *vps);
 
   void xSetSPSUpdated(int spsId)   { return m_updatedSPSList.push_back(spsId); }
diff --git a/source/App/BitstreamExtractorApp/BitstreamExtractorAppCfg.cpp b/source/App/BitstreamExtractorApp/BitstreamExtractorAppCfg.cpp
index 318049b444eaab4910d80d8fb4e7f440c88f4246..8da48167a0f2f734015fc8d2929a053ca3914fc3 100644
--- a/source/App/BitstreamExtractorApp/BitstreamExtractorAppCfg.cpp
+++ b/source/App/BitstreamExtractorApp/BitstreamExtractorAppCfg.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/App/BitstreamExtractorApp/BitstreamExtractorAppCfg.h b/source/App/BitstreamExtractorApp/BitstreamExtractorAppCfg.h
index b76c0905a249d06af77cd5e561595a0d9659717c..a8a55d0e83384cd533a58bf78e709c186ed9b17b 100644
--- a/source/App/BitstreamExtractorApp/BitstreamExtractorAppCfg.h
+++ b/source/App/BitstreamExtractorApp/BitstreamExtractorAppCfg.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/App/BitstreamExtractorApp/CMakeLists.txt b/source/App/BitstreamExtractorApp/CMakeLists.txt
index a299cf5cca769c71f83b9c683bcfae461625ee79..c6145774e950d78575f976899d885e7d153d1b7a 100644
--- a/source/App/BitstreamExtractorApp/CMakeLists.txt
+++ b/source/App/BitstreamExtractorApp/CMakeLists.txt
@@ -33,32 +33,12 @@ if( SET_ENABLE_TRACING )
   endif()
 endif()
 
-if( OpenMP_FOUND )
-  if( SET_ENABLE_SPLIT_PARALLELISM )
-    if( ENABLE_SPLIT_PARALLELISM )
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 )
-    else()
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
-    endif()
-  endif()
-  if( SET_ENABLE_WPP_PARALLELISM )
-    if( ENABLE_WPP_PARALLELISM )
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 )
-    else()
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
-    endif()
-  endif()
-else()
-  target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
-  target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
-endif()
-
 if( CMAKE_COMPILER_IS_GNUCC AND BUILD_STATIC )
   set( ADDITIONAL_LIBS ${ADDITIONAL_LIBS} -static -static-libgcc -static-libstdc++ )
   target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_STATIC_LINK=1 )
 endif()
 
-target_link_libraries( ${EXE_NAME} CommonLib EncoderLib DecoderLib Utilities Threads::Threads ${ADDITIONAL_LIBS} )
+target_link_libraries( ${EXE_NAME} CommonLib EncoderLib DecoderLib Utilities ${ADDITIONAL_LIBS} )
 
 # lldb custom data formatters
 if( XCODE )
diff --git a/source/App/BitstreamExtractorApp/bitstreamextractormain.cpp b/source/App/BitstreamExtractorApp/bitstreamextractormain.cpp
index e57f05d414d2133702f2cf367d50a37d4b88857d..3e7b381a66bcdb05c2d33572fe0f4a44b4750337 100644
--- a/source/App/BitstreamExtractorApp/bitstreamextractormain.cpp
+++ b/source/App/BitstreamExtractorApp/bitstreamextractormain.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/App/DecoderAnalyserApp/CMakeLists.txt b/source/App/DecoderAnalyserApp/CMakeLists.txt
index ad272ca1f34efd94444f2a9c1a750c7d4c0fecc3..7b23371f76cd2fc58d2bb8b801548463262402b6 100644
--- a/source/App/DecoderAnalyserApp/CMakeLists.txt
+++ b/source/App/DecoderAnalyserApp/CMakeLists.txt
@@ -35,32 +35,12 @@ if( SET_ENABLE_TRACING )
   endif()
 endif()
 
-if( OpenMP_FOUND )
-  if( SET_ENABLE_SPLIT_PARALLELISM )
-    if( ENABLE_SPLIT_PARALLELISM )
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 )
-    else()
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
-    endif()
-  endif()
-  if( SET_ENABLE_WPP_PARALLELISM )
-    if( ENABLE_WPP_PARALLELISM )
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 )
-    else()
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
-    endif()
-  endif()
-else()
-  target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
-  target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
-endif()
-
 if( CMAKE_COMPILER_IS_GNUCC AND BUILD_STATIC )
   set( ADDITIONAL_LIBS ${ADDITIONAL_LIBS} -static -static-libgcc -static-libstdc++ )
   target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_STATIC_LINK=1 )
 endif()
 
-target_link_libraries( ${EXE_NAME} CommonAnalyserLib DecoderAnalyserLib Utilities Threads::Threads ${ADDITIONAL_LIBS} )
+target_link_libraries( ${EXE_NAME} CommonAnalyserLib DecoderAnalyserLib Utilities ${ADDITIONAL_LIBS} )
 
 # lldb custom data formatters
 if( XCODE )
diff --git a/source/App/DecoderApp/CMakeLists.txt b/source/App/DecoderApp/CMakeLists.txt
index 4e71c5c1e139ad10e15b9a973624f2fa2ea70274..958d2db0dc58327a15bdeadd0ad3084325be13de 100644
--- a/source/App/DecoderApp/CMakeLists.txt
+++ b/source/App/DecoderApp/CMakeLists.txt
@@ -33,32 +33,12 @@ if( SET_ENABLE_TRACING )
   endif()
 endif()
 
-if( OpenMP_FOUND )
-  if( SET_ENABLE_SPLIT_PARALLELISM )
-    if( ENABLE_SPLIT_PARALLELISM )
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 )
-    else()
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
-    endif()
-  endif()
-  if( SET_ENABLE_WPP_PARALLELISM )
-    if( ENABLE_WPP_PARALLELISM )
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 )
-    else()
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
-    endif()
-  endif()
-else()
-  target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
-  target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
-endif()
-
 if( CMAKE_COMPILER_IS_GNUCC AND BUILD_STATIC )
   set( ADDITIONAL_LIBS ${ADDITIONAL_LIBS} -static -static-libgcc -static-libstdc++ )
   target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_STATIC_LINK=1 )
 endif()
 
-target_link_libraries( ${EXE_NAME} CommonLib DecoderLib Utilities Threads::Threads ${ADDITIONAL_LIBS} )
+target_link_libraries( ${EXE_NAME} CommonLib DecoderLib Utilities ${ADDITIONAL_LIBS} )
 
 # lldb custom data formatters
 if( XCODE )
diff --git a/source/App/DecoderApp/DecApp.cpp b/source/App/DecoderApp/DecApp.cpp
index c2f20c7e8a383816600d90911963692ea900f8d4..100720131d4cbc9c568ac9d999673a934c193c2c 100644
--- a/source/App/DecoderApp/DecApp.cpp
+++ b/source/App/DecoderApp/DecApp.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -123,15 +123,24 @@ uint32_t DecApp::decode()
     }
   }
 
+  // clear contents of annotated-Regions-SEI output file
+  if (!m_annotatedRegionsSEIFileName.empty())
+  {
+    std::ofstream ofile(m_annotatedRegionsSEIFileName.c_str());
+    if (!ofile.good() || !ofile.is_open())
+    {
+      fprintf(stderr, "\nUnable to open file '%s' for writing annotated-Regions-SEI\n", m_annotatedRegionsSEIFileName.c_str());
+      exit(EXIT_FAILURE);
+    }
+  }
+
   // main decoder loop
   bool loopFiltered[MAX_VPS_LAYERS] = { false };
 
   bool bPicSkipped = false;
 
   bool isEosPresentInPu = false;
-#if JVET_S0078_NOOUTPUTPRIORPICFLAG
-  bool firstSliceInAU = true;
-#endif
+  bool isEosPresentInLastPu = false;
 
   bool outputPicturePresentInBitstream = false;
   auto setOutputPicturePresentInStream = [&]()
@@ -150,10 +159,11 @@ uint32_t DecApp::decode()
     }
   };
 
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
     m_cDecLib.setHTidExternalSetFlag(m_mTidExternalSet);
     m_cDecLib.setTOlsIdxExternalFlag(m_tOlsIdxTidExternalSet);
-#endif
+
+  bool gdrRecoveryPeriod[MAX_NUM_LAYER_IDS] = { false };
+  bool prevPicSkipped = true;
 
   while (!!bitstreamFile)
   {
@@ -188,21 +198,31 @@ uint32_t DecApp::decode()
             (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL ||
              nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP))
         {
-          m_newCLVS[nalu.m_nuhLayerId] = true;   // An IDR picture starts a new CLVS
-#if !JVET_S0078_NOOUTPUTPRIORPICFLAG
-          xFlushOutput(pcListPic, nalu.m_nuhLayerId);
-#endif
+          if (!m_cDecLib.getMixedNaluTypesInPicFlag())
+          {
+            m_newCLVS[nalu.m_nuhLayerId] = true;   // An IDR picture starts a new CLVS
+            xFlushOutput(pcListPic, nalu.m_nuhLayerId);
+          }
+          else
+          {
+            m_newCLVS[nalu.m_nuhLayerId] = false;
+          }
         }
-        if (m_cDecLib.getFirstSliceInPicture() && nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_CRA && isEosPresentInPu)
+        else if (m_cDecLib.getFirstSliceInPicture() && nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_CRA && isEosPresentInLastPu)
         {
           // A CRA that is immediately preceded by an EOS is a CLVSS
           m_newCLVS[nalu.m_nuhLayerId] = true;
+          xFlushOutput(pcListPic, nalu.m_nuhLayerId);
         }
-        else if (m_cDecLib.getFirstSliceInPicture() && nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_CRA && !isEosPresentInPu)
+        else if (m_cDecLib.getFirstSliceInPicture() && nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_CRA && !isEosPresentInLastPu)
         {
           // A CRA that is not immediately precede by an EOS is not a CLVSS
           m_newCLVS[nalu.m_nuhLayerId] = false;
         }
+        else if(m_cDecLib.getFirstSliceInPicture() && !isEosPresentInLastPu)
+        {
+          m_newCLVS[nalu.m_nuhLayerId] = false;
+        }
 
         // parse NAL unit syntax if within target decoding layer
         if( ( m_iMaxTemporalLayer < 0 || nalu.m_temporalId <= m_iMaxTemporalLayer ) && xIsNaluWithinTargetDecLayerIdSet( &nalu ) )
@@ -225,8 +245,40 @@ uint32_t DecApp::decode()
               bPicSkipped = false;
             }
           }
+
+          int skipFrameCounter = m_iSkipFrame;
           m_cDecLib.decode(nalu, m_iSkipFrame, m_iPOCLastDisplay, m_targetOlsIdx);
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
+
+          if ( prevPicSkipped && nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_GDR )
+          {
+            gdrRecoveryPeriod[nalu.m_nuhLayerId] = true;
+          }
+
+          if ( skipFrameCounter == 1 && ( nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_GDR  || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_CRA ))
+          {
+            skipFrameCounter--;
+          }
+
+          if ( m_iSkipFrame < skipFrameCounter  &&
+              ((nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_TRAIL) || (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_STSA) || (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_RASL) || (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_RADL) || (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL) || (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP) || (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_CRA) || (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_GDR)))
+          {
+            if (m_cDecLib.isSliceNaluFirstInAU(true, nalu))
+            {
+              m_cDecLib.checkSeiInPictureUnit();
+              m_cDecLib.resetPictureSeiNalus();
+              m_cDecLib.checkAPSInPictureUnit();
+              m_cDecLib.resetPictureUnitNals();
+              m_cDecLib.resetAccessUnitSeiTids();
+              m_cDecLib.checkSEIInAccessUnit();
+              m_cDecLib.resetAccessUnitSeiPayLoadTypes();
+              m_cDecLib.resetAccessUnitNals();
+              m_cDecLib.resetAccessUnitApsNals();
+              m_cDecLib.resetAccessUnitPicInfo();
+            }
+            bPicSkipped = true;
+            m_iSkipFrame++;   // skipFrame count restore, the real decrement occur at the begin of next frame
+          }
+
           if (nalu.m_nalUnitType == NAL_UNIT_OPI)
           {
             if (!m_cDecLib.getHTidExternalSetFlag() && m_cDecLib.getOPI()->getHtidInfoPresentFlag())
@@ -235,14 +287,9 @@ uint32_t DecApp::decode()
             }
             m_cDecLib.setHTidOpiSetFlag(m_cDecLib.getOPI()->getHtidInfoPresentFlag());
           }
-#endif
           if (nalu.m_nalUnitType == NAL_UNIT_VPS)
           {
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
             m_cDecLib.deriveTargetOutputLayerSet( m_cDecLib.getVPS()->m_targetOlsIdx );
-#else
-            m_cDecLib.deriveTargetOutputLayerSet( m_targetOlsIdx );
-#endif
             m_targetDecLayerIdSet = m_cDecLib.getVPS()->m_targetLayerIdSet;
             m_targetOutputLayerIdSet = m_cDecLib.getVPS()->m_targetOutputLayerIdSet;
           }
@@ -252,6 +299,12 @@ uint32_t DecApp::decode()
           bPicSkipped = true;
         }
       }
+
+      if( nalu.isSlice() && nalu.m_nalUnitType != NAL_UNIT_CODED_SLICE_RASL)
+      {
+        prevPicSkipped = bPicSkipped;
+      }
+
       // once an EOS NAL unit appears in the current PU, mark the variable isEosPresentInPu as true
       if (nalu.m_nalUnitType == NAL_UNIT_EOS)
       {
@@ -281,6 +334,14 @@ uint32_t DecApp::decode()
       m_cDecLib.updateAssociatedIRAP();
       m_cDecLib.updatePrevGDRInSameLayer();
       m_cDecLib.updatePrevIRAPAndGDRSubpic();
+
+      if ( gdrRecoveryPeriod[nalu.m_nuhLayerId] )
+      {
+        if ( m_cDecLib.getGDRRecoveryPocReached() )
+        {
+          gdrRecoveryPeriod[nalu.m_nuhLayerId] = false;
+        }
+      }
     }
     else if ( (bNewPicture || !bitstreamFile || nalu.m_nalUnitType == NAL_UNIT_EOS ) &&
       m_cDecLib.getFirstSliceInSequence(nalu.m_nuhLayerId))
@@ -290,6 +351,19 @@ uint32_t DecApp::decode()
 
     if( pcListPic )
     {
+      if ( gdrRecoveryPeriod[nalu.m_nuhLayerId] ) // Suppress YUV and OPL output during GDR recovery
+      {
+        PicList::iterator iterPic = pcListPic->begin();
+        while (iterPic != pcListPic->end())
+        {
+          Picture *pcPic = *(iterPic++);
+          if (pcPic->layerId == nalu.m_nuhLayerId)
+          {
+            pcPic->neededForOutput = false;
+          }
+        }
+      }
+
       if( !m_reconFileName.empty() && !m_cVideoIOYuvReconFile[nalu.m_nuhLayerId].isOpen() )
       {
         const BitDepths &bitDepths=pcListPic->front()->cs->sps->getBitDepths(); // use bit depths of first reconstructed picture.
@@ -336,6 +410,44 @@ uint32_t DecApp::decode()
           m_cVideoIOYuvReconFile[nalu.m_nuhLayerId].setBitdepthShift(channelType, reconBitdepth - fileBitdepth);
         }
       }
+      if (!m_SEICTIFileName.empty() && !m_cVideoIOYuvSEICTIFile[nalu.m_nuhLayerId].isOpen())
+      {
+        const BitDepths& bitDepths = pcListPic->front()->cs->sps->getBitDepths(); // use bit depths of first reconstructed picture.
+        for (uint32_t channelType = 0; channelType < MAX_NUM_CHANNEL_TYPE; channelType++)
+        {
+          if (m_outputBitDepth[channelType] == 0)
+          {
+            m_outputBitDepth[channelType] = bitDepths.recon[channelType];
+          }
+        }
+
+        if (m_packedYUVMode && (m_outputBitDepth[CH_L] != 10 && m_outputBitDepth[CH_L] != 12))
+        {
+          EXIT("Invalid output bit-depth for packed YUV output, aborting\n");
+        }
+
+        std::string SEICTIFileName = m_SEICTIFileName;
+        if (m_SEICTIFileName.compare("/dev/null") && m_cDecLib.getVPS() != nullptr && m_cDecLib.getVPS()->getMaxLayers() > 1 && xIsNaluWithinTargetOutputLayerIdSet(&nalu))
+        {
+          size_t pos = SEICTIFileName.find_last_of('.');
+          if (pos != string::npos)
+          {
+            SEICTIFileName.insert(pos, std::to_string(nalu.m_nuhLayerId));
+          }
+          else
+          {
+            SEICTIFileName.append(std::to_string(nalu.m_nuhLayerId));
+          }
+        }
+        if ((m_cDecLib.getVPS() != nullptr && (m_cDecLib.getVPS()->getMaxLayers() == 1 || xIsNaluWithinTargetOutputLayerIdSet(&nalu))) || m_cDecLib.getVPS() == nullptr)
+        {
+          m_cVideoIOYuvSEICTIFile[nalu.m_nuhLayerId].open(SEICTIFileName, true, m_outputBitDepth, m_outputBitDepth, bitDepths.recon); // write mode
+        }
+      }
+      if (!m_annotatedRegionsSEIFileName.empty())
+      {
+        xOutputAnnotatedRegions(pcListPic);
+      }
       // write reconstruction to file
       if( bNewPicture )
       {
@@ -344,74 +456,28 @@ uint32_t DecApp::decode()
       }
       if (nalu.m_nalUnitType == NAL_UNIT_EOS)
       {
+        if (!m_annotatedRegionsSEIFileName.empty() && bNewPicture)
+        {
+          xOutputAnnotatedRegions(pcListPic);
+        }
         setOutputPicturePresentInStream();
         xWriteOutput( pcListPic, nalu.m_temporalId );
         m_cDecLib.setFirstSliceInPicture (false);
       }
       // write reconstruction to file -- for additional bumping as defined in C.5.2.3
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
       if (!bNewPicture && ((nalu.m_nalUnitType >= NAL_UNIT_CODED_SLICE_TRAIL && nalu.m_nalUnitType <= NAL_UNIT_RESERVED_IRAP_VCL_11)
         || (nalu.m_nalUnitType >= NAL_UNIT_CODED_SLICE_IDR_W_RADL && nalu.m_nalUnitType <= NAL_UNIT_CODED_SLICE_GDR)))
-#else
-      if (!bNewPicture && ((nalu.m_nalUnitType >= NAL_UNIT_CODED_SLICE_TRAIL && nalu.m_nalUnitType <= NAL_UNIT_RESERVED_IRAP_VCL_12)
-        || (nalu.m_nalUnitType >= NAL_UNIT_CODED_SLICE_IDR_W_RADL && nalu.m_nalUnitType <= NAL_UNIT_CODED_SLICE_GDR)))
-#endif
       {
         setOutputPicturePresentInStream();
-#if JVET_S0078_NOOUTPUTPRIORPICFLAG
-        bool firstPicInCVSAUThatIsNotAU0 = false;
-        if (firstSliceInAU)
-        {
-          if (m_targetDecLayerIdSet.size() > 0)
-          {
-            if (m_cDecLib.getAudIrapOrGdrAuFlag())
-            {
-              firstPicInCVSAUThatIsNotAU0 = true;
-            }
-          }
-          else
-          {
-            if (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL
-                || nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP
-                || (nalu.m_nalUnitType == NAL_UNIT_CODED_SLICE_CRA && isEosPresentInPu))
-            {
-              firstPicInCVSAUThatIsNotAU0 = true;
-            }
-          }
-        }
-        if (firstPicInCVSAUThatIsNotAU0)
-        {
-          xFlushOutput(pcListPic, NOT_VALID, m_cDecLib.getNoOutputPriorPicsFlag());
-        }
-        else
-        {
-          xWriteOutput(pcListPic, nalu.m_temporalId);
-        }
-#else
         xWriteOutput( pcListPic, nalu.m_temporalId );
-#endif
       }
     }
-#if JVET_S0078_NOOUTPUTPRIORPICFLAG
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
-    if ((nalu.m_nalUnitType >= NAL_UNIT_CODED_SLICE_TRAIL && nalu.m_nalUnitType <= NAL_UNIT_OPI)
-        || (nalu.m_nalUnitType >= NAL_UNIT_CODED_SLICE_IDR_W_RADL && nalu.m_nalUnitType <= NAL_UNIT_CODED_SLICE_GDR))
-    {
-      firstSliceInAU = false;
-    }
-#else
-    if ((nalu.m_nalUnitType >= NAL_UNIT_CODED_SLICE_TRAIL && nalu.m_nalUnitType <= NAL_UNIT_RESERVED_IRAP_VCL_12)
-        || (nalu.m_nalUnitType >= NAL_UNIT_CODED_SLICE_IDR_W_RADL && nalu.m_nalUnitType <= NAL_UNIT_CODED_SLICE_GDR))
-    {
-      firstSliceInAU = false;
-    }
-#endif
-#endif
     if( bNewPicture )
     {
       m_cDecLib.checkSeiInPictureUnit();
       m_cDecLib.resetPictureSeiNalus();
       // reset the EOS present status for the next PU check
+      isEosPresentInLastPu = isEosPresentInPu;
       isEosPresentInPu = false;
     }
     if (bNewPicture || !bitstreamFile || nalu.m_nalUnitType == NAL_UNIT_EOS)
@@ -443,11 +509,12 @@ uint32_t DecApp::decode()
       m_cDecLib.resetAccessUnitNals();
       m_cDecLib.resetAccessUnitApsNals();
       m_cDecLib.resetAccessUnitPicInfo();
-#if JVET_S0078_NOOUTPUTPRIORPICFLAG
-      firstSliceInAU = true;
-#endif
     }
   }
+  if (!m_annotatedRegionsSEIFileName.empty())
+  {
+    xOutputAnnotatedRegions(pcListPic);
+  }
   // May need to check again one more time as in case one the bitstream has only one picture, the first check may miss it
   setOutputPicturePresentInStream();
   CHECK(!outputPicturePresentInBitstream, "It is required that there shall be at least one picture with PictureOutputFlag equal to 1 in the bitstream")
@@ -530,6 +597,9 @@ void DecApp::xCreateDecLib()
 #endif
   m_cDecLib.m_targetSubPicIdx = this->m_targetSubPicIdx;
   m_cDecLib.initScalingList();
+#if GDR_LEAK_TEST
+  m_cDecLib.m_gdrPocRandomAccess = this->m_gdrPocRandomAccess;
+#endif // GDR_LEAK_TEST
 }
 
 void DecApp::xDestroyDecLib()
@@ -541,6 +611,13 @@ void DecApp::xDestroyDecLib()
       recFile.second.close();
     }
   }
+  if (!m_SEICTIFileName.empty())
+  {
+    for (auto& recFile : m_cVideoIOYuvSEICTIFile)
+    {
+      recFile.second.close();
+    }
+  }
 
   // destroy decoder class
   m_cDecLib.destroy();
@@ -582,7 +659,7 @@ void DecApp::xWriteOutput( PicList* pcListPic, uint32_t tId )
   while (iterPic != pcListPic->end())
   {
     Picture* pcPic = *(iterPic);
-    if(pcPic->neededForOutput && pcPic->getPOC() > m_iPOCLastDisplay)
+    if(pcPic->neededForOutput && pcPic->getPOC() >= m_iPOCLastDisplay)
     {
        numPicsNotYetDisplayed++;
       dpbFullness++;
@@ -667,7 +744,7 @@ void DecApp::xWriteOutput( PicList* pcListPic, uint32_t tId )
     {
       pcPic = *(iterPic);
 
-      if(pcPic->neededForOutput && pcPic->getPOC() > m_iPOCLastDisplay &&
+      if(pcPic->neededForOutput && pcPic->getPOC() >= m_iPOCLastDisplay &&
         (numPicsNotYetDisplayed >  maxNumReorderPicsHighestTid || dpbFullness > maxDecPicBufferingHighestTid))
       {
         // write to file
@@ -699,6 +776,27 @@ void DecApp::xWriteOutput( PicList* pcListPic, uint32_t tId )
                                         NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range );
             }
         }
+        // Perform CTI on decoded frame and write to output CTI file
+        if (!m_SEICTIFileName.empty())
+        {
+          const Window& conf = pcPic->getConformanceWindow();
+          const SPS* sps = pcPic->cs->sps;
+          ChromaFormat chromaFormatIDC = sps->getChromaFormatIdc();
+          if (m_upscaledOutput)
+          {
+            m_cVideoIOYuvSEICTIFile[pcPic->layerId].writeUpscaledPicture(*sps, *pcPic->cs->pps, pcPic->getDisplayBuf(), m_outputColourSpaceConvert, m_packedYUVMode, m_upscaledOutput, NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range);
+          }
+          else
+          {
+            m_cVideoIOYuvSEICTIFile[pcPic->layerId].write(pcPic->getRecoBuf().get(COMPONENT_Y).width, pcPic->getRecoBuf().get(COMPONENT_Y).height, pcPic->getDisplayBuf(),
+              m_outputColourSpaceConvert, m_packedYUVMode,
+              conf.getWindowLeftOffset() * SPS::getWinUnitX(chromaFormatIDC),
+              conf.getWindowRightOffset() * SPS::getWinUnitX(chromaFormatIDC),
+              conf.getWindowTopOffset() * SPS::getWinUnitY(chromaFormatIDC),
+              conf.getWindowBottomOffset() * SPS::getWinUnitY(chromaFormatIDC),
+              NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range);
+          }
+        }
         writeLineToOutputLog(pcPic);
 
         // update POC of display order
@@ -719,11 +817,7 @@ void DecApp::xWriteOutput( PicList* pcListPic, uint32_t tId )
 
 /** \param pcListPic list of pictures to be written to file
  */
-#if JVET_S0078_NOOUTPUTPRIORPICFLAG
-void DecApp::xFlushOutput( PicList *pcListPic, const int layerId, bool noOutputOfPriorPicsFlag)
-#else
 void DecApp::xFlushOutput( PicList* pcListPic, const int layerId )
-#endif
 {
   if(!pcListPic || pcListPic->empty())
   {
@@ -752,10 +846,6 @@ void DecApp::xFlushOutput( PicList* pcListPic, const int layerId )
 
       if ( pcPicTop->neededForOutput && pcPicBottom->neededForOutput && !(pcPicTop->getPOC()%2) && (pcPicBottom->getPOC() == pcPicTop->getPOC()+1) )
       {
-#if JVET_S0078_NOOUTPUTPRIORPICFLAG
-        if (!noOutputOfPriorPicsFlag)
-        {
-#endif
           // write to file
           if ( !m_reconFileName.empty() )
           {
@@ -773,9 +863,6 @@ void DecApp::xFlushOutput( PicList* pcListPic, const int layerId )
           }
           writeLineToOutputLog(pcPicTop);
           writeLineToOutputLog(pcPicBottom);
-#if JVET_S0078_NOOUTPUTPRIORPICFLAG
-        }
-#endif
         // update POC of display order
         m_iPOCLastDisplay = pcPicBottom->getPOC();
 
@@ -820,10 +907,6 @@ void DecApp::xFlushOutput( PicList* pcListPic, const int layerId )
 
       if (pcPic->neededForOutput)
       {
-#if JVET_S0078_NOOUTPUTPRIORPICFLAG
-        if (!noOutputOfPriorPicsFlag)
-        {
-#endif
           // write to file
           if (!m_reconFileName.empty())
           {
@@ -846,10 +929,28 @@ void DecApp::xFlushOutput( PicList* pcListPic, const int layerId )
                                         NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range );
               }
           }
+          // Perform CTI on decoded frame and write to output CTI file
+          if (!m_SEICTIFileName.empty())
+          {
+            const Window& conf = pcPic->getConformanceWindow();
+            const SPS* sps = pcPic->cs->sps;
+            ChromaFormat chromaFormatIDC = sps->getChromaFormatIdc();
+            if (m_upscaledOutput)
+            {
+              m_cVideoIOYuvSEICTIFile[pcPic->layerId].writeUpscaledPicture(*sps, *pcPic->cs->pps, pcPic->getDisplayBuf(), m_outputColourSpaceConvert, m_packedYUVMode, m_upscaledOutput, NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range);
+            }
+            else
+            {
+              m_cVideoIOYuvSEICTIFile[pcPic->layerId].write(pcPic->getRecoBuf().get(COMPONENT_Y).width, pcPic->getRecoBuf().get(COMPONENT_Y).height, pcPic->getDisplayBuf(),
+                m_outputColourSpaceConvert, m_packedYUVMode,
+                conf.getWindowLeftOffset() * SPS::getWinUnitX(chromaFormatIDC),
+                conf.getWindowRightOffset() * SPS::getWinUnitX(chromaFormatIDC),
+                conf.getWindowTopOffset() * SPS::getWinUnitY(chromaFormatIDC),
+                conf.getWindowBottomOffset() * SPS::getWinUnitY(chromaFormatIDC),
+                NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range);
+            }
+          }
           writeLineToOutputLog(pcPic);
-#if JVET_S0078_NOOUTPUTPRIORPICFLAG
-        }
-#endif
         // update POC of display order
         m_iPOCLastDisplay = pcPic->getPOC();
 
@@ -880,6 +981,153 @@ void DecApp::xFlushOutput( PicList* pcListPic, const int layerId )
   m_iPOCLastDisplay = -MAX_INT;
 }
 
+/** \param pcListPic list of pictures to be written to file
+ */
+void DecApp::xOutputAnnotatedRegions(PicList* pcListPic)
+{
+  if(!pcListPic || pcListPic->empty())
+  {
+    return;
+  }
+  PicList::iterator iterPic   = pcListPic->begin();
+
+  while (iterPic != pcListPic->end())
+  {
+    Picture* pcPic = *(iterPic);
+    if (pcPic->neededForOutput)
+    {
+      // Check if any annotated region SEI has arrived
+      SEIMessages annotatedRegionSEIs = getSeisByType(pcPic->SEIs, SEI::ANNOTATED_REGIONS);
+      for(auto it=annotatedRegionSEIs.begin(); it!=annotatedRegionSEIs.end(); it++)
+      {
+        const SEIAnnotatedRegions &seiAnnotatedRegions = *(SEIAnnotatedRegions*)(*it);
+
+        if (seiAnnotatedRegions.m_hdr.m_cancelFlag)
+        {
+          m_arObjects.clear();
+          m_arLabels.clear();
+        }
+        else
+        {
+          if (m_arHeader.m_receivedSettingsOnce)
+          {
+            // validate those settings that must stay constant are constant.
+            assert(m_arHeader.m_occludedObjectFlag              == seiAnnotatedRegions.m_hdr.m_occludedObjectFlag);
+            assert(m_arHeader.m_partialObjectFlagPresentFlag    == seiAnnotatedRegions.m_hdr.m_partialObjectFlagPresentFlag);
+            assert(m_arHeader.m_objectConfidenceInfoPresentFlag == seiAnnotatedRegions.m_hdr.m_objectConfidenceInfoPresentFlag);
+            assert((!m_arHeader.m_objectConfidenceInfoPresentFlag) || m_arHeader.m_objectConfidenceLength == seiAnnotatedRegions.m_hdr.m_objectConfidenceLength);
+          }
+          else
+          {
+            m_arHeader.m_receivedSettingsOnce=true;
+            m_arHeader=seiAnnotatedRegions.m_hdr; // copy the settings.
+          }
+          // Process label updates
+          if (seiAnnotatedRegions.m_hdr.m_objectLabelPresentFlag)
+          {
+            for(auto srcIt=seiAnnotatedRegions.m_annotatedLabels.begin(); srcIt!=seiAnnotatedRegions.m_annotatedLabels.end(); srcIt++)
+            {
+              const uint32_t labIdx = srcIt->first;
+              if (srcIt->second.labelValid)
+              {
+                m_arLabels[labIdx] = srcIt->second.label;
+              }
+              else
+              {
+                m_arLabels.erase(labIdx);
+              }
+            }
+          }
+
+          // Process object updates
+          for(auto srcIt=seiAnnotatedRegions.m_annotatedRegions.begin(); srcIt!=seiAnnotatedRegions.m_annotatedRegions.end(); srcIt++)
+          {
+            uint32_t objIdx = srcIt->first;
+            const SEIAnnotatedRegions::AnnotatedRegionObject &src =srcIt->second;
+
+            if (src.objectCancelFlag)
+            {
+              m_arObjects.erase(objIdx);
+            }
+            else
+            {
+              auto destIt = m_arObjects.find(objIdx);
+
+              if (destIt == m_arObjects.end())
+              {
+                //New object arrived, needs to be appended to the map of tracked objects
+                m_arObjects[objIdx] = src;
+              }
+              else //Existing object, modifications to be done
+              {
+                SEIAnnotatedRegions::AnnotatedRegionObject &dst=destIt->second;
+
+                if (seiAnnotatedRegions.m_hdr.m_objectLabelPresentFlag && src.objectLabelValid)
+                {
+                  dst.objectLabelValid=true;
+                  dst.objLabelIdx = src.objLabelIdx;
+                }
+                if (src.boundingBoxValid)
+                {
+                  dst.boundingBoxTop    = src.boundingBoxTop   ;
+                  dst.boundingBoxLeft   = src.boundingBoxLeft  ;
+                  dst.boundingBoxWidth  = src.boundingBoxWidth ;
+                  dst.boundingBoxHeight = src.boundingBoxHeight;
+                  if (seiAnnotatedRegions.m_hdr.m_partialObjectFlagPresentFlag)
+                  {
+                    dst.partialObjectFlag = src.partialObjectFlag;
+                  }
+                  if (seiAnnotatedRegions.m_hdr.m_objectConfidenceInfoPresentFlag)
+                  {
+                    dst.objectConfidence = src.objectConfidence;
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+
+      if (!m_arObjects.empty())
+      {
+        FILE *fpPersist = fopen(m_annotatedRegionsSEIFileName.c_str(), "ab");
+        if (fpPersist == NULL)
+        {
+          std::cout << "Not able to open file for writing persist SEI messages" << std::endl;
+        }
+        else
+        {
+          fprintf(fpPersist, "\n");
+          fprintf(fpPersist, "Number of objects = %d\n", (int)m_arObjects.size());
+          for (auto it = m_arObjects.begin(); it != m_arObjects.end(); ++it)
+          {
+            fprintf(fpPersist, "Object Idx = %d\n",    it->first);
+            fprintf(fpPersist, "Object Top = %d\n",    it->second.boundingBoxTop);
+            fprintf(fpPersist, "Object Left = %d\n",   it->second.boundingBoxLeft);
+            fprintf(fpPersist, "Object Width = %d\n",  it->second.boundingBoxWidth);
+            fprintf(fpPersist, "Object Height = %d\n", it->second.boundingBoxHeight);
+            if (it->second.objectLabelValid)
+            {
+              auto labelIt=m_arLabels.find(it->second.objLabelIdx);
+              fprintf(fpPersist, "Object Label = %s\n", labelIt!=m_arLabels.end() ? (labelIt->second.c_str()) : "<UNKNOWN>");
+            }
+            if (m_arHeader.m_partialObjectFlagPresentFlag)
+            {
+              fprintf(fpPersist, "Object Partial = %d\n", it->second.partialObjectFlag?1:0);
+            }
+            if (m_arHeader.m_objectConfidenceInfoPresentFlag)
+            {
+              fprintf(fpPersist, "Object Conf = %d\n", it->second.objectConfidence);
+            }
+          }
+          fclose(fpPersist);
+        }
+      }
+    }
+   iterPic++;
+  }
+}
+
 /** \param nalu Input nalu to check whether its LayerId is within targetDecLayerIdSet
  */
 bool DecApp::xIsNaluWithinTargetDecLayerIdSet( const InputNALUnit* nalu ) const
diff --git a/source/App/DecoderApp/DecApp.h b/source/App/DecoderApp/DecApp.h
index 11f88ed5ee82192c166f8b60967612d8d2b46705..33ffaa51912863ce9b30fa3ff00b92e4059cd3f9 100644
--- a/source/App/DecoderApp/DecApp.h
+++ b/source/App/DecoderApp/DecApp.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -61,6 +61,7 @@ private:
   // class interface
   DecLib          m_cDecLib;                     ///< decoder class
   std::unordered_map<int, VideoIOYuv>      m_cVideoIOYuvReconFile;        ///< reconstruction YUV class
+  std::unordered_map<int, VideoIOYuv>      m_cVideoIOYuvSEICTIFile;       ///< reconstruction YUV with CTI class
 
   // for output control
   int             m_iPOCLastDisplay;              ///< last POC in display order
@@ -70,6 +71,9 @@ private:
 
   bool            m_newCLVS[MAX_NUM_LAYER_IDS];   ///< used to record a new CLVSS
 
+  SEIAnnotatedRegions::AnnotatedRegionHeader                 m_arHeader; ///< AR header
+  std::map<uint32_t, SEIAnnotatedRegions::AnnotatedRegionObject> m_arObjects; ///< AR object pool
+  std::map<uint32_t, std::string>                                m_arLabels; ///< AR label pool
 
 private:
   bool  xIsNaluWithinTargetDecLayerIdSet( const InputNALUnit* nalu ) const; ///< check whether given Nalu is within targetDecLayerIdSet
@@ -85,15 +89,12 @@ private:
   void  xCreateDecLib     (); ///< create internal classes
   void  xDestroyDecLib    (); ///< destroy internal classes
   void  xWriteOutput      ( PicList* pcListPic , uint32_t tId); ///< write YUV to file
-#if JVET_S0078_NOOUTPUTPRIORPICFLAG
-  void  xFlushOutput( PicList *pcListPic, const int layerId = NOT_VALID, bool noOutputOfPriorPicsFlag = false );   ///< flush all remaining decoded pictures to file
-#else
   void  xFlushOutput( PicList* pcListPic, const int layerId = NOT_VALID ); ///< flush all remaining decoded pictures to file
-#endif
   bool  isNewPicture(ifstream *bitstreamFile, class InputByteStream *bytestream);  ///< check if next NAL unit will be the first NAL unit from a new picture
   bool  isNewAccessUnit(bool newPicture, ifstream *bitstreamFile, class InputByteStream *bytestream);  ///< check if next NAL unit will be the first NAL unit from a new access unit
 
   void  writeLineToOutputLog(Picture * pcPic);
+  void xOutputAnnotatedRegions(PicList* pcListPic);
 
 };
 
diff --git a/source/App/DecoderApp/DecAppCfg.cpp b/source/App/DecoderApp/DecAppCfg.cpp
index d96c20493f7fe9759ff8016909921467fff3796b..616d65fd10938aa22e5b7812c90f78455f6f01ca 100644
--- a/source/App/DecoderApp/DecAppCfg.cpp
+++ b/source/App/DecoderApp/DecAppCfg.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -88,19 +88,16 @@ bool DecAppCfg::parseCfg( int argc, char* argv[] )
   ("OutputBitDepth,d",          m_outputBitDepth[CHANNEL_TYPE_LUMA],   0,          "bit depth of YUV output luma component (default: use 0 for native depth)")
   ("OutputBitDepthC,d",         m_outputBitDepth[CHANNEL_TYPE_CHROMA], 0,          "bit depth of YUV output chroma component (default: use luma output bit-depth)")
   ("OutputColourSpaceConvert",  outputColourSpaceConvert,              string(""), "Colour space conversion to apply to input 444 video. Permitted values are (empty string=UNCHANGED) " + getListOfColourSpaceConverts(false))
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
   ("MaxTemporalLayer,t",        m_iMaxTemporalLayer,                   500,    "Maximum Temporal Layer to be decoded. -1 to decode all layers")
   ("TargetOutputLayerSet,p",    m_targetOlsIdx,                        500,    "Target output layer set index")
-#else
-  ("MaxTemporalLayer,t",        m_iMaxTemporalLayer,                   -1,         "Maximum Temporal Layer to be decoded. -1 to decode all layers")
-  ("TargetOutputLayerSet,p",    m_targetOlsIdx,                          -1,       "Target output layer set index")
-#endif
   ("SEIDecodedPictureHash,-dph",m_decodedPictureHashSEIEnabled,        1,          "Control handling of decoded picture hash SEI messages\n"
                                                                                    "\t1: check hash in SEI messages if available in the bitstream\n"
                                                                                    "\t0: ignore SEI message")
   ("SEINoDisplay",              m_decodedNoDisplaySEIEnabled,          true,       "Control handling of decoded no display SEI messages")
   ("TarDecLayerIdSetFile,l",    cfg_TargetDecLayerIdSetFile,           string(""), "targetDecLayerIdSet file name. The file should include white space separated LayerId values to be decoded. Omitting the option or a value of -1 in the file decodes all layers.")
   ("SEIColourRemappingInfoFilename",  m_colourRemapSEIFileName,        string(""), "Colour Remapping YUV output file name. If empty, no remapping is applied (ignore SEI message)\n")
+  ("SEICTIFilename",            m_SEICTIFileName,                      string(""), "CTI YUV output file name. If empty, no Colour Transform is applied (ignore SEI message)\n")
+  ("SEIAnnotatedRegionsInfoFilename",  m_annotatedRegionsSEIFileName,   string(""), "Annotated regions output file name. If empty, no object information will be saved (ignore SEI message)\n")
   ("OutputDecodedSEIMessagesFilename",  m_outputDecodedSEIMessagesFilename,    string(""), "When non empty, output decoded SEI messages to the indicated file. If file is '-', then output to stdout\n")
 #if JVET_S0257_DUMP_360SEI_MESSAGE
   ("360DumpFile",  m_outputDecoded360SEIMessagesFilename, string(""), "When non empty, output decoded 360 SEI messages to the indicated file.\n")
@@ -125,6 +122,9 @@ bool DecAppCfg::parseCfg( int argc, char* argv[] )
   ("MCTSCheck",                m_mctsCheck,                           false,       "If enabled, the decoder checks for violations of mc_exact_sample_value_match_flag in Temporal MCTS ")
   ("targetSubPicIdx",          m_targetSubPicIdx,                     0,           "Specify which subpicture shall be written to output, using subpic index, 0: disabled, subpicIdx=m_targetSubPicIdx-1 \n" )
   ( "UpscaledOutput",          m_upscaledOutput,                          0,       "Upscaled output for RPR" )
+#if GDR_LEAK_TEST
+  ("RandomAccessPos",          m_gdrPocRandomAccess,                    0,         "POC of GDR Random access picture\n" )
+#endif // GDR_LEAK_TEST
   ;
 
   po::setDefaults(opts);
@@ -225,7 +225,6 @@ bool DecAppCfg::parseCfg( int argc, char* argv[] )
       msg( ERROR, "File %s could not be opened. Using all LayerIds as default.\n", cfg_TargetDecLayerIdSetFile.c_str() );
     }
   }
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
   if (m_iMaxTemporalLayer != 500)
   {
     m_mTidExternalSet = true;
@@ -242,7 +241,6 @@ bool DecAppCfg::parseCfg( int argc, char* argv[] )
   {
     m_targetOlsIdx = -1;
   }
-#endif
   return true;
 }
 
@@ -256,13 +254,13 @@ DecAppCfg::DecAppCfg()
 , m_outputColourSpaceConvert(IPCOLOURSPACE_UNCHANGED)
 , m_targetOlsIdx(0)
 , m_iMaxTemporalLayer(-1)
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
 , m_mTidExternalSet(false)
 , m_tOlsIdxTidExternalSet(false)
-#endif
 , m_decodedPictureHashSEIEnabled(0)
 , m_decodedNoDisplaySEIEnabled(false)
 , m_colourRemapSEIFileName()
+, m_SEICTIFileName()
+, m_annotatedRegionsSEIFileName()
 , m_targetDecLayerIdSet()
 , m_outputDecodedSEIMessagesFilename()
 #if JVET_S0257_DUMP_360SEI_MESSAGE
diff --git a/source/App/DecoderApp/DecAppCfg.h b/source/App/DecoderApp/DecAppCfg.h
index ba7c0338ef6372fc153146f77902783e3953a852..c8a735ca5ee195f2dda4db70e3992cb0e458efc0 100644
--- a/source/App/DecoderApp/DecAppCfg.h
+++ b/source/App/DecoderApp/DecAppCfg.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -67,13 +67,13 @@ protected:
   int           m_targetOlsIdx;                       ///< target output layer set
   std::vector<int> m_targetOutputLayerIdSet;          ///< set of LayerIds to be outputted
   int           m_iMaxTemporalLayer;                  ///< maximum temporal layer to be decoded
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
   bool          m_mTidExternalSet;                    ///< maximum temporal layer set externally
   bool          m_tOlsIdxTidExternalSet;              ///< target output layer set index externally set
-#endif
   int           m_decodedPictureHashSEIEnabled;       ///< Checksum(3)/CRC(2)/MD5(1)/disable(0) acting on decoded picture hash SEI message
   bool          m_decodedNoDisplaySEIEnabled;         ///< Enable(true)/disable(false) writing only pictures that get displayed based on the no display SEI message
   std::string   m_colourRemapSEIFileName;             ///< output Colour Remapping file name
+  std::string   m_SEICTIFileName;                     ///< output Recon with CTI file name
+  std::string   m_annotatedRegionsSEIFileName;        ///< annotated regions file name
   std::vector<int> m_targetDecLayerIdSet;             ///< set of LayerIds to be included in the sub-bitstream extraction process.
   std::string   m_outputDecodedSEIMessagesFilename;   ///< filename to output decoded SEI messages to. If '-', then use stdout. If empty, do not output details.
 #if JVET_S0257_DUMP_360SEI_MESSAGE
@@ -89,6 +89,9 @@ protected:
 
   int          m_upscaledOutput;                     ////< Output upscaled (2), decoded but in full resolution buffer (1) or decoded cropped (0, default) picture for RPR.
   int           m_targetSubPicIdx;                    ///< Specify which subpicture shall be write to output, using subpicture index
+#if GDR_LEAK_TEST
+  int           m_gdrPocRandomAccess;                   ///<
+#endif // GDR_LEAK_TEST
 public:
   DecAppCfg();
   virtual ~DecAppCfg();
diff --git a/source/App/DecoderApp/decmain.cpp b/source/App/DecoderApp/decmain.cpp
index c8a6e3bd7070cdcbd867a95d3f39c27cca6739e1..94264a0aec2a806000d50c0d29caa2d0ee6b334a 100644
--- a/source/App/DecoderApp/decmain.cpp
+++ b/source/App/DecoderApp/decmain.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/App/EncoderApp/CMakeLists.txt b/source/App/EncoderApp/CMakeLists.txt
index dd87e52d1f8244c607c34d42359dd0595e63cd88..960790142b807902a6bfa0e58ed002f5e3c35a96 100644
--- a/source/App/EncoderApp/CMakeLists.txt
+++ b/source/App/EncoderApp/CMakeLists.txt
@@ -35,32 +35,12 @@ if( SET_ENABLE_TRACING )
   endif()
 endif()
 
-if( OpenMP_FOUND )
-  if( SET_ENABLE_SPLIT_PARALLELISM )
-    if( ENABLE_SPLIT_PARALLELISM )
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 )
-    else()
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
-    endif()
-  endif()
-  if( SET_ENABLE_WPP_PARALLELISM )
-    if( ENABLE_WPP_PARALLELISM )
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 )
-    else()
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
-    endif()
-  endif()
-else()
-  target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
-  target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
-endif()
-
 if( CMAKE_COMPILER_IS_GNUCC AND BUILD_STATIC )
   set( ADDITIONAL_LIBS ${ADDITIONAL_LIBS} -static -static-libgcc -static-libstdc++ )
   target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_STATIC_LINK=1 )
 endif()
 
-target_link_libraries( ${EXE_NAME} CommonLib EncoderLib DecoderLib Utilities Threads::Threads ${ADDITIONAL_LIBS} )
+target_link_libraries( ${EXE_NAME} CommonLib EncoderLib DecoderLib Utilities ${ADDITIONAL_LIBS} )
 
 if( EXTENSION_360_VIDEO )
   target_link_libraries( ${EXE_NAME} Lib360 AppEncHelper360 )
diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp
index 67be5dd888deebcf0e3f1d41b2e28ccab3f317a7..70e980d7552a00d42905220cd94334a0bf5c2f4c 100644
--- a/source/App/EncoderApp/EncApp.cpp
+++ b/source/App/EncoderApp/EncApp.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -76,7 +76,6 @@ EncApp::~EncApp()
 void EncApp::xInitLibCfg()
 {
   VPS& vps = *m_cEncLib.getVPS();
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
   if (m_targetOlsIdx != 500)
   {
     vps.m_targetOlsIdx = m_targetOlsIdx;
@@ -85,9 +84,6 @@ void EncApp::xInitLibCfg()
   {
     vps.m_targetOlsIdx = -1;
   }
-#else
-  vps.m_targetOlsIdx = m_targetOlsIdx;
-#endif
 
   vps.setMaxLayers( m_maxLayers );
 
@@ -128,9 +124,7 @@ void EncApp::xInitLibCfg()
     }
   }
 
-#if JVET_R0193
   m_cfgVPSParameters.m_maxTidILRefPicsPlus1.resize(vps.getMaxLayers(), std::vector<uint32_t>(vps.getMaxLayers(), MAX_TLAYER));
-#endif
   for (int i = 0; i < vps.getMaxLayers(); i++)
   {
     vps.setGeneralLayerIdx( m_layerId[i], i );
@@ -155,7 +149,6 @@ void EncApp::xInitLibCfg()
             vps.setDirectRefLayerFlag(i, j, false);
           }
         }
-#if JVET_R0193
         string::size_type beginStr = m_maxTidILRefPicsPlus1Str[i].find_first_not_of(" ", 0);
         string::size_type endStr = m_maxTidILRefPicsPlus1Str[i].find_first_of(" ", beginStr);
         int t = 0;
@@ -165,7 +158,6 @@ void EncApp::xInitLibCfg()
           beginStr = m_maxTidILRefPicsPlus1Str[i].find_first_not_of(" ", endStr);
           endStr = m_maxTidILRefPicsPlus1Str[i].find_first_of(" ", beginStr);
         }
-#endif
       }
     }
   }
@@ -228,10 +220,19 @@ void EncApp::xInitLibCfg()
   ptls[0].setTierFlag                                            ( m_levelTier );
   ptls[0].setFrameOnlyConstraintFlag                             ( m_frameOnlyConstraintFlag);
   ptls[0].setMultiLayerEnabledFlag                               ( m_multiLayerEnabledFlag);
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+  CHECK((m_profile == Profile::MAIN_10 || m_profile == Profile::MAIN_10_444 || \
+         m_profile == Profile::MAIN_10_STILL_PICTURE || m_profile == Profile::MAIN_10_444_STILL_PICTURE || \
+         m_profile == Profile::MAIN_12 || m_profile == Profile::MAIN_12_INTRA || m_profile == Profile::MAIN_12_STILL_PICTURE || \
+         m_profile == Profile::MAIN_12_444 || m_profile == Profile::MAIN_12_444_INTRA || m_profile == Profile::MAIN_12_444_STILL_PICTURE || \
+         m_profile == Profile::MAIN_16_444 || m_profile == Profile::MAIN_16_444_INTRA || m_profile == Profile::MAIN_16_444_STILL_PICTURE) \
+          && m_multiLayerEnabledFlag, "ptl_multilayer_enabled_flag shall be equal to 0 for non-multilayer profiles");
+#else
   CHECK((m_profile == Profile::MAIN_10 || m_profile == Profile::MAIN_10_444
          || m_profile == Profile::MAIN_10_STILL_PICTURE || m_profile == Profile::MAIN_10_444_STILL_PICTURE)
           && m_multiLayerEnabledFlag,
         "ptl_multilayer_enabled_flag shall be equal to 0 for non-multilayer profiles");
+#endif
   ptls[0].setNumSubProfile                                       ( m_numSubProfile );
   for (int i = 0; i < m_numSubProfile; i++)
   {
@@ -258,13 +259,14 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setPrintHexPsnr(m_printHexPsnr);
   m_cEncLib.setPrintSequenceMSE                                  ( m_printSequenceMSE);
   m_cEncLib.setPrintMSSSIM                                       ( m_printMSSSIM );
+  m_cEncLib.setPrintWPSNR                                        ( m_printWPSNR );
   m_cEncLib.setCabacZeroWordPaddingEnabled                       ( m_cabacZeroWordPaddingEnabled );
 
   m_cEncLib.setFrameRate                                         ( m_iFrameRate );
   m_cEncLib.setFrameSkip                                         ( m_FrameSkip );
   m_cEncLib.setTemporalSubsampleRatio                            ( m_temporalSubsampleRatio );
-  m_cEncLib.setSourceWidth                                       ( m_iSourceWidth );
-  m_cEncLib.setSourceHeight                                      ( m_iSourceHeight );
+  m_cEncLib.setSourceWidth                                       ( m_sourceWidth );
+  m_cEncLib.setSourceHeight                                      ( m_sourceHeight );
   m_cEncLib.setConformanceWindow                                 ( m_confWinLeft / SPS::getWinUnitX( m_InputChromaFormatIDC ), m_confWinRight / SPS::getWinUnitX( m_InputChromaFormatIDC ), m_confWinTop / SPS::getWinUnitY( m_InputChromaFormatIDC ), m_confWinBottom / SPS::getWinUnitY( m_InputChromaFormatIDC ) );
   m_cEncLib.setScalingRatio                                      ( m_scalingRatioHor, m_scalingRatioVer );
   m_cEncLib.setRprEnabled                                        (m_rprEnabledFlag);
@@ -457,6 +459,13 @@ void EncApp::xInitLibCfg()
     CHECK(m_noVirtualBoundaryConstraintFlag && m_virtualBoundariesEnabledFlag, "Virtuall boundaries shall be deactivated when m_noVirtualBoundaryConstraintFlag is equal to 1");
     m_cEncLib.setNoChromaQpOffsetConstraintFlag(m_noChromaQpOffsetConstraintFlag);
     CHECK(m_noChromaQpOffsetConstraintFlag && m_cuChromaQpOffsetSubdiv, "Chroma Qp offset shall be 0 when m_noChromaQpOffsetConstraintFlag is equal to 1");
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+    m_cEncLib.setGeneralLowerBitRateConstraintFlag(m_generalLowerBitRateConstraintFlag);
+    if (m_profile == Profile::MAIN_12 || m_profile == Profile::MAIN_12_444 || m_profile == Profile::MAIN_16_444)
+    {
+      CHECK(m_generalLowerBitRateConstraintFlag==0, "generalLowerBitRateConstraintFlag shall be 1 when non-Intra/Still Picture operation range extension profiles are used");
+    }
+#endif
   }
   else
   {
@@ -522,13 +531,24 @@ void EncApp::xInitLibCfg()
     m_cEncLib.setNoActConstraintFlag(false);
     m_cEncLib.setNoLmcsConstraintFlag(false);
     m_cEncLib.setNoChromaQpOffsetConstraintFlag(false);
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+    m_cEncLib.setGeneralLowerBitRateConstraintFlag(false);
+#endif
   }
 
   //====== Coding Structure ========
   m_cEncLib.setIntraPeriod                                       ( m_iIntraPeriod );
+#if GDR_ENABLED
+  m_cEncLib.setGdrEnabled                                        ( m_gdrEnabled );
+  m_cEncLib.setGdrPeriod                                         ( m_gdrPeriod );
+  m_cEncLib.setGdrPocStart                                       ( m_gdrPocStart );
+  m_cEncLib.setGdrInterval                                       ( m_gdrInterval);
+  m_cEncLib.setGdrNoHash                                         ( m_gdrNoHash );
+#endif
   m_cEncLib.setDecodingRefreshType                               ( m_iDecodingRefreshType );
   m_cEncLib.setGOPSize                                           ( m_iGOPSize );
   m_cEncLib.setDrapPeriod                                        ( m_drapPeriod );
+  m_cEncLib.setEdrapPeriod                                       ( m_edrapPeriod );
   m_cEncLib.setReWriteParamSets                                  ( m_rewriteParamSets );
   m_cEncLib.setRPLList0                                          ( m_RPLList0);
   m_cEncLib.setRPLList1                                          ( m_RPLList1);
@@ -553,9 +573,9 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setIntraQPOffset                                     ( m_intraQPOffset );
   m_cEncLib.setLambdaFromQPEnable                                ( m_lambdaFromQPEnable );
 #endif
-  m_cEncLib.setChromaQpMappingTableParams                         (m_chromaQpMappingTableParams);
+  m_cEncLib.setChromaQpMappingTableParams                        (m_chromaQpMappingTableParams);
 
-  m_cEncLib.setPad                                               ( m_aiPad );
+  m_cEncLib.setSourcePadding                                     ( m_sourcePadding );
 
   m_cEncLib.setAccessUnitDelimiter                               ( m_AccessUnitDelimiter );
   m_cEncLib.setEnablePictureHeaderInSliceHeader                  ( m_enablePictureHeaderInSliceHeader );
@@ -565,14 +585,14 @@ void EncApp::xInitLibCfg()
   //===== Slice ========
 
   //====== Loop/Deblock Filter ========
-  m_cEncLib.setLoopFilterDisable                                 ( m_bLoopFilterDisable       );
-  m_cEncLib.setLoopFilterOffsetInPPS                             ( m_loopFilterOffsetInPPS );
-  m_cEncLib.setLoopFilterBetaOffset                              ( m_loopFilterBetaOffsetDiv2  );
-  m_cEncLib.setLoopFilterTcOffset                                ( m_loopFilterTcOffsetDiv2    );
-  m_cEncLib.setLoopFilterCbBetaOffset                            ( m_loopFilterCbBetaOffsetDiv2  );
-  m_cEncLib.setLoopFilterCbTcOffset                              ( m_loopFilterCbTcOffsetDiv2    );
-  m_cEncLib.setLoopFilterCrBetaOffset                            ( m_loopFilterCrBetaOffsetDiv2  );
-  m_cEncLib.setLoopFilterCrTcOffset                              ( m_loopFilterCrTcOffsetDiv2    );
+  m_cEncLib.setDeblockingFilterDisable                           ( m_deblockingFilterDisable           );
+  m_cEncLib.setDeblockingFilterOffsetInPPS                       ( m_deblockingFilterOffsetInPPS       );
+  m_cEncLib.setDeblockingFilterBetaOffset                        ( m_deblockingFilterBetaOffsetDiv2    );
+  m_cEncLib.setDeblockingFilterTcOffset                          ( m_deblockingFilterTcOffsetDiv2      );
+  m_cEncLib.setDeblockingFilterCbBetaOffset                      ( m_deblockingFilterCbBetaOffsetDiv2  );
+  m_cEncLib.setDeblockingFilterCbTcOffset                        ( m_deblockingFilterCbTcOffsetDiv2    );
+  m_cEncLib.setDeblockingFilterCrBetaOffset                      ( m_deblockingFilterCrBetaOffsetDiv2  );
+  m_cEncLib.setDeblockingFilterCrTcOffset                        ( m_deblockingFilterCrTcOffsetDiv2    );
 #if W0038_DB_OPT
   m_cEncLib.setDeblockingFilterMetric                            ( m_deblockingFilterMetric );
 #else
@@ -593,6 +613,8 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setMaxDeltaQP                                        ( m_iMaxDeltaQP  );
   m_cEncLib.setCuQpDeltaSubdiv                                   ( m_cuQpDeltaSubdiv );
   m_cEncLib.setCuChromaQpOffsetSubdiv                            ( m_cuChromaQpOffsetSubdiv );
+  m_cEncLib.setCuChromaQpOffsetList                              ( m_cuChromaQpOffsetList );
+  m_cEncLib.setCuChromaQpOffsetEnabled                           ( m_cuChromaQpOffsetEnabled );
   m_cEncLib.setChromaCbQpOffset                                  ( m_cbQpOffset     );
   m_cEncLib.setChromaCrQpOffset                                  ( m_crQpOffset  );
   m_cEncLib.setChromaCbQpOffsetDualTree                          ( m_cbQpOffsetDualTree );
@@ -613,6 +635,11 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setUseWPSNR                                          ( m_bUseWPSNR );
 #endif
   m_cEncLib.setExtendedPrecisionProcessingFlag                   ( m_extendedPrecisionProcessingFlag );
+  m_cEncLib.setRrcRiceExtensionEnableFlag                        ( m_rrcRiceExtensionEnableFlag );
+  m_cEncLib.setTSRCRicePresentFlag                               ( m_tsrcRicePresentFlag);
+#if JVET_W0046_RLSCP
+  m_cEncLib.setReverseLastSigCoeffEnabledFlag                    ( m_reverseLastSigCoeffEnabledFlag );
+#endif
   m_cEncLib.setHighPrecisionOffsetsEnabledFlag                   ( m_highPrecisionOffsetsEnabledFlag );
 
   m_cEncLib.setWeightedPredictionMethod( m_weightedPredictionMethod );
@@ -620,6 +647,24 @@ void EncApp::xInitLibCfg()
   //====== Tool list ========
 #if SHARP_LUMA_DELTA_QP
   m_cEncLib.setLumaLevelToDeltaQPControls                        ( m_lumaLevelToDeltaQPMapping );
+#endif
+  m_cEncLib.setSmoothQPReductionEnable                           (m_smoothQPReductionEnable);
+#if JVET_W0043
+  m_cEncLib.setSmoothQPReductionPeriodicity                      (m_smoothQPReductionPeriodicity);
+  m_cEncLib.setSmoothQPReductionThresholdIntra                   (m_smoothQPReductionThresholdIntra);
+  m_cEncLib.setSmoothQPReductionModelScaleIntra                  (m_smoothQPReductionModelScaleIntra);
+  m_cEncLib.setSmoothQPReductionModelOffsetIntra                 (m_smoothQPReductionModelOffsetIntra);
+  m_cEncLib.setSmoothQPReductionLimitIntra                       (m_smoothQPReductionLimitIntra);
+  m_cEncLib.setSmoothQPReductionThresholdInter                   (m_smoothQPReductionThresholdInter);
+  m_cEncLib.setSmoothQPReductionModelScaleInter                  (m_smoothQPReductionModelScaleInter);
+  m_cEncLib.setSmoothQPReductionModelOffsetInter                 (m_smoothQPReductionModelOffsetInter);
+  m_cEncLib.setSmoothQPReductionLimitInter                       (m_smoothQPReductionLimitInter);
+#else
+  m_cEncLib.setSmoothQPReductionThreshold                        (m_smoothQPReductionThreshold);
+  m_cEncLib.setSmoothQPReductionModelScale                       (m_smoothQPReductionModelScale);
+  m_cEncLib.setSmoothQPReductionModelOffset                      (m_smoothQPReductionModelOffset);
+  m_cEncLib.setSmoothQPReductionPeriodicity                      (m_smoothQPReductionPeriodicity);
+  m_cEncLib.setSmoothQPReductionLimit                            (m_smoothQPReductionLimit);
 #endif
 #if X0038_LAMBDA_FROM_QP_CAPABILITY
   m_cEncLib.setDeltaQpRD( (m_costMode==COST_LOSSLESS_CODING) ? 0 : m_uiDeltaQpRD );
@@ -774,6 +819,7 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setUseBDPCM                                          ( m_useBDPCM );
   m_cEncLib.setTransformSkipRotationEnabledFlag                  ( m_transformSkipRotationEnabledFlag );
   m_cEncLib.setTransformSkipContextEnabledFlag                   ( m_transformSkipContextEnabledFlag   );
+  m_cEncLib.setRrcRiceExtensionEnableFlag(m_rrcRiceExtensionEnableFlag);
   m_cEncLib.setPersistentRiceAdaptationEnabledFlag               ( m_persistentRiceAdaptationEnabledFlag );
   m_cEncLib.setCabacBypassAlignmentEnabledFlag                   ( m_cabacBypassAlignmentEnabledFlag );
   m_cEncLib.setLog2MaxTransformSkipBlockSize                     ( m_log2MaxTransformSkipBlockSize  );
@@ -830,6 +876,9 @@ void EncApp::xInitLibCfg()
   //====== Sub-picture and Slices ========
   m_cEncLib.setSingleSlicePerSubPicFlagFlag                      ( m_singleSlicePerSubPicFlag );
   m_cEncLib.setUseSAO                                            ( m_bUseSAO );
+#if JVET_W0129_ENABLE_ALF_TRUEORG
+  m_cEncLib.setSaoTrueOrg                                        ( m_saoTrueOrg );
+#endif
   m_cEncLib.setTestSAODisableAtPictureLevel                      ( m_bTestSAODisableAtPictureLevel );
   m_cEncLib.setSaoEncodingRate                                   ( m_saoEncodingRate );
   m_cEncLib.setSaoEncodingRateChroma                             ( m_saoEncodingRateChroma );
@@ -842,6 +891,7 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setDecodedPictureHashSEIType                         ( m_decodedPictureHashSEIType );
   m_cEncLib.setSubpicDecodedPictureHashType                      ( m_subpicDecodedPictureHashType );
   m_cEncLib.setDependentRAPIndicationSEIEnabled                  ( m_drapPeriod > 0 );
+  m_cEncLib.setEdrapIndicationSEIEnabled                         ( m_edrapPeriod > 0 );
   m_cEncLib.setBufferingPeriodSEIEnabled                         ( m_bufferingPeriodSEIEnabled );
   m_cEncLib.setPictureTimingSEIEnabled                           ( m_pictureTimingSEIEnabled );
   m_cEncLib.setFrameFieldInfoSEIEnabled                          ( m_frameFieldInfoSEIEnabled );
@@ -854,6 +904,10 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setFramePackingArrangementSEIId                      ( m_framePackingSEIId );
   m_cEncLib.setFramePackingArrangementSEIQuincunx                ( m_framePackingSEIQuincunx );
   m_cEncLib.setFramePackingArrangementSEIInterpretation          ( m_framePackingSEIInterpretation );
+  m_cEncLib.setDoSEIEnabled                                      ( m_doSEIEnabled );
+  m_cEncLib.setDoSEICancelFlag                                   ( m_doSEICancelFlag );
+  m_cEncLib.setDoSEIPersistenceFlag                              ( m_doSEIPersistenceFlag);
+  m_cEncLib.setDoSEITransformType                                ( m_doSEITransformType);
   m_cEncLib.setParameterSetsInclusionIndicationSEIEnabled        (m_parameterSetsInclusionIndicationSEIEnabled);
   m_cEncLib.setSelfContainedClvsFlag                             (m_selfContainedClvsFlag);
   m_cEncLib.setErpSEIEnabled                                     ( m_erpSEIEnabled );
@@ -879,6 +933,7 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setOmniViewportSEITiltCentre                         ( m_omniViewportSEITiltCentre );
   m_cEncLib.setOmniViewportSEIHorRange                           ( m_omniViewportSEIHorRange );
   m_cEncLib.setOmniViewportSEIVerRange                           ( m_omniViewportSEIVerRange );
+  m_cEncLib.setAnnotatedRegionSEIFileRoot                        (m_arSEIFileRoot);
   m_cEncLib.setRwpSEIEnabled                                     (m_rwpSEIEnabled);
   m_cEncLib.setRwpSEIRwpCancelFlag                               (m_rwpSEIRwpCancelFlag);
   m_cEncLib.setRwpSEIRwpPersistenceFlag                          (m_rwpSEIRwpPersistenceFlag);
@@ -952,6 +1007,22 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setAmbientViewingEnvironmentSEIIlluminance           (m_aveSEIAmbientIlluminance);
   m_cEncLib.setAmbientViewingEnvironmentSEIAmbientLightX         ((uint16_t)m_aveSEIAmbientLightX);
   m_cEncLib.setAmbientViewingEnvironmentSEIAmbientLightY         ((uint16_t)m_aveSEIAmbientLightY);
+  // colour tranform information sei
+  m_cEncLib.setCtiSEIEnabled(m_ctiSEIEnabled);
+  m_cEncLib.setCtiSEIId(m_ctiSEIId);
+  m_cEncLib.setCtiSEISignalInfoFlag(m_ctiSEISignalInfoFlag);
+  m_cEncLib.setCtiSEIFullRangeFlag(m_ctiSEIFullRangeFlag);
+  m_cEncLib.setCtiSEIPrimaries(m_ctiSEIPrimaries);
+  m_cEncLib.setCtiSEITransferFunction(m_ctiSEITransferFunction);
+  m_cEncLib.setCtiSEIMatrixCoefs(m_ctiSEIMatrixCoefs);
+  m_cEncLib.setCtiSEICrossComponentFlag(m_ctiSEICrossComponentFlag);
+  m_cEncLib.setCtiSEICrossComponentInferred(m_ctiSEICrossComponentInferred);
+  m_cEncLib.setCtiSEINbChromaLut(m_ctiSEINumberChromaLut);
+  m_cEncLib.setCtiSEIChromaOffset(m_ctiSEIChromaOffset);
+  for (int i = 0; i < MAX_NUM_COMPONENT; i++) 
+  {
+    m_cEncLib.setCtiSEILut(m_ctiSEILut[i], i);
+  }
   // content colour volume SEI
   m_cEncLib.setCcvSEIEnabled                                     (m_ccvSEIEnabled);
   m_cEncLib.setCcvSEICancelFlag                                  (m_ccvSEICancelFlag);
@@ -970,6 +1041,71 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setCcvSEIMinLuminanceValue                           (m_ccvSEIMinLuminanceValue);
   m_cEncLib.setCcvSEIMaxLuminanceValue                           (m_ccvSEIMaxLuminanceValue);
   m_cEncLib.setCcvSEIAvgLuminanceValue                           (m_ccvSEIAvgLuminanceValue);
+  // scalability dimension information sei
+  m_cEncLib.setSdiSEIEnabled                                     (m_sdiSEIEnabled);
+  m_cEncLib.setSdiSEIMaxLayersMinus1                             (m_sdiSEIMaxLayersMinus1);
+  m_cEncLib.setSdiSEIMultiviewInfoFlag                           (m_sdiSEIMultiviewInfoFlag);
+  m_cEncLib.setSdiSEIAuxiliaryInfoFlag                           (m_sdiSEIAuxiliaryInfoFlag);
+  m_cEncLib.setSdiSEIViewIdLenMinus1                             (m_sdiSEIViewIdLenMinus1);
+  m_cEncLib.setSdiSEILayerId                                     (m_sdiSEILayerId);
+  m_cEncLib.setSdiSEIViewIdVal                                   (m_sdiSEIViewIdVal);
+  m_cEncLib.setSdiSEIAuxId                                       (m_sdiSEIAuxId);
+  m_cEncLib.setSdiSEINumAssociatedPrimaryLayersMinus1            (m_sdiSEINumAssociatedPrimaryLayersMinus1);
+  // multiview acquisition information sei
+  m_cEncLib.setMaiSEIEnabled                                     (m_maiSEIEnabled);
+  m_cEncLib.setMaiSEIIntrinsicParamFlag                          (m_maiSEIIntrinsicParamFlag);
+  m_cEncLib.setMaiSEIExtrinsicParamFlag                          (m_maiSEIExtrinsicParamFlag);
+  m_cEncLib.setMaiSEINumViewsMinus1                              (m_maiSEINumViewsMinus1);
+  m_cEncLib.setMaiSEIIntrinsicParamsEqualFlag                    (m_maiSEIIntrinsicParamsEqualFlag);
+  m_cEncLib.setMaiSEIPrecFocalLength                             (m_maiSEIPrecFocalLength);
+  m_cEncLib.setMaiSEIPrecPrincipalPoint                          (m_maiSEIPrecPrincipalPoint);
+  m_cEncLib.setMaiSEIPrecSkewFactor                              (m_maiSEIPrecSkewFactor);
+  m_cEncLib.setMaiSEISignFocalLengthX                            (m_maiSEISignFocalLengthX);
+  m_cEncLib.setMaiSEIExponentFocalLengthX                        (m_maiSEIExponentFocalLengthX);
+  m_cEncLib.setMaiSEIMantissaFocalLengthX                        (m_maiSEIMantissaFocalLengthX);
+  m_cEncLib.setMaiSEISignFocalLengthY                            (m_maiSEISignFocalLengthY);
+  m_cEncLib.setMaiSEIExponentFocalLengthY                        (m_maiSEIExponentFocalLengthY);
+  m_cEncLib.setMaiSEIMantissaFocalLengthY                        (m_maiSEIMantissaFocalLengthY);
+  m_cEncLib.setMaiSEISignPrincipalPointX                         (m_maiSEISignPrincipalPointX);
+  m_cEncLib.setMaiSEIExponentPrincipalPointX                     (m_maiSEIExponentPrincipalPointX);
+  m_cEncLib.setMaiSEIMantissaPrincipalPointX                     (m_maiSEIMantissaPrincipalPointX);
+  m_cEncLib.setMaiSEISignPrincipalPointY                         (m_maiSEISignPrincipalPointY);
+  m_cEncLib.setMaiSEIExponentPrincipalPointY                     (m_maiSEIExponentPrincipalPointY);
+  m_cEncLib.setMaiSEIMantissaPrincipalPointY                     (m_maiSEIMantissaPrincipalPointY);
+  m_cEncLib.setMaiSEISignSkewFactor                              (m_maiSEISignSkewFactor);
+  m_cEncLib.setMaiSEIExponentSkewFactor                          (m_maiSEIExponentSkewFactor);
+  m_cEncLib.setMaiSEIMantissaSkewFactor                          (m_maiSEIMantissaSkewFactor);
+  m_cEncLib.setMaiSEIPrecRotationParam                           (m_maiSEIPrecRotationParam);
+  m_cEncLib.setMaiSEIPrecTranslationParam                        (m_maiSEIPrecTranslationParam);
+#if JVET_W0078_MVP_SEI 
+  m_cEncLib.setMvpSEIEnabled(m_mvpSEIEnabled);
+  m_cEncLib.setMvpSEINumViewsMinus1(m_mvpSEINumViewsMinus1);
+  m_cEncLib.setMvpSEIViewPosition(m_mvpSEIViewPosition);
+#endif
+  // alpha channel information sei
+  m_cEncLib.setAciSEIEnabled                                     (m_aciSEIEnabled);
+  m_cEncLib.setAciSEICancelFlag                                  (m_aciSEICancelFlag);
+  m_cEncLib.setAciSEIUseIdc                                      (m_aciSEIUseIdc);
+  m_cEncLib.setAciSEIBitDepthMinus8                              (m_aciSEIBitDepthMinus8);
+  m_cEncLib.setAciSEITransparentValue                            (m_aciSEITransparentValue);
+  m_cEncLib.setAciSEIOpaqueValue                                 (m_aciSEIOpaqueValue);
+  m_cEncLib.setAciSEIIncrFlag                                    (m_aciSEIIncrFlag);
+  m_cEncLib.setAciSEIClipFlag                                    (m_aciSEIClipFlag);
+  m_cEncLib.setAciSEIClipTypeFlag                                (m_aciSEIClipTypeFlag);
+  // depth representation information sei
+  m_cEncLib.setDriSEIEnabled                                     (m_driSEIEnabled);
+  m_cEncLib.setDriSEIZNearFlag                                   (m_driSEIZNearFlag);
+  m_cEncLib.setDriSEIZFarFlag                                    (m_driSEIZFarFlag);
+  m_cEncLib.setDriSEIDMinFlag                                    (m_driSEIDMinFlag);
+  m_cEncLib.setDriSEIDMaxFlag                                    (m_driSEIDMaxFlag);
+  m_cEncLib.setDriSEIZNear                                       (m_driSEIZNear);
+  m_cEncLib.setDriSEIZFar                                        (m_driSEIZFar);
+  m_cEncLib.setDriSEIDMin                                        (m_driSEIDMin);
+  m_cEncLib.setDriSEIDMax                                        (m_driSEIDMax);
+  m_cEncLib.setDriSEIDepthRepresentationType                     (m_driSEIDepthRepresentationType);
+  m_cEncLib.setDriSEIDisparityRefViewId                          (m_driSEIDisparityRefViewId);
+  m_cEncLib.setDriSEINonlinearNumMinus1                          (m_driSEINonlinearNumMinus1);
+  m_cEncLib.setDriSEINonlinearModel                              (m_driSEINonlinearModel);
   m_cEncLib.setEntropyCodingSyncEnabledFlag                      ( m_entropyCodingSyncEnabledFlag );
   m_cEncLib.setEntryPointPresentFlag                             ( m_entryPointPresentFlag );
   m_cEncLib.setTMVPModeId                                        ( m_TMVPModeId );
@@ -990,6 +1126,11 @@ void EncApp::xInitLibCfg()
   {
     m_cEncLib.setScalingMatrixDesignatedColourSpace(m_scalingMatrixDesignatedColourSpace);
   }
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+  m_cEncLib.setConstrainedRaslencoding                           ( m_constrainedRaslEncoding );
+  m_cEncLib.setCraAPSreset                                       ( m_craAPSreset );
+  m_cEncLib.setRprRASLtoolSwitch                                 ( m_rprRASLtoolSwitch );
+#endif
   m_cEncLib.setDepQuantEnabledFlag                               ( m_depQuantEnabledFlag);
   m_cEncLib.setSignDataHidingEnabledFlag                         ( m_signDataHidingEnabledFlag);
   m_cEncLib.setUseRateCtrl                                       ( m_RCEnableRateControl );
@@ -1027,8 +1168,9 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setOverscanInfoPresentFlag                           ( m_overscanInfoPresentFlag );
   m_cEncLib.setOverscanAppropriateFlag                           ( m_overscanAppropriateFlag );
   m_cEncLib.setVideoFullRangeFlag                                ( m_videoFullRangeFlag );
-  m_cEncLib.setEfficientFieldIRAPEnabled                         ( m_bEfficientFieldIRAPEnabled );
-  m_cEncLib.setHarmonizeGopFirstFieldCoupleEnabled               ( m_bHarmonizeGopFirstFieldCoupleEnabled );
+  m_cEncLib.setFieldSeqFlag                                      ( m_isField );
+  m_cEncLib.setEfficientFieldIRAPEnabled                         ( m_efficientFieldIRAPEnabled );
+  m_cEncLib.setHarmonizeGopFirstFieldCoupleEnabled               ( m_harmonizeGopFirstFieldCoupleEnabled );
   m_cEncLib.setSummaryOutFilename                                ( m_summaryOutFilename );
   m_cEncLib.setSummaryPicFilenameBase                            ( m_summaryPicFilenameBase );
   m_cEncLib.setSummaryVerboseness                                ( m_summaryVerboseness );
@@ -1043,16 +1185,19 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setStopAfterFFtoPOC                                  ( m_stopAfterFFtoPOC );
   m_cEncLib.setBs2ModPOCAndType                                  ( m_bs2ModPOCAndType );
   m_cEncLib.setDebugCTU                                          ( m_debugCTU );
-#if ENABLE_SPLIT_PARALLELISM
-  m_cEncLib.setNumSplitThreads                                   ( m_numSplitThreads );
-  m_cEncLib.setForceSingleSplitThread                            ( m_forceSplitSequential );
-#endif
   m_cEncLib.setUseALF                                            ( m_alf );
-#if JVET_T0064
-  m_cEncLib.setALFStrength                                       (m_alfStrength);
+#if JVET_W0129_ENABLE_ALF_TRUEORG
+  m_cEncLib.setAlfTrueOrg                                        ( m_alfTrueOrg );
+#else
+  m_cEncLib.setAlfSaoTrueOrg                                     ( m_alfSaoTrueOrg );
+#endif
+  m_cEncLib.setALFStrengthLuma                                   (m_alfStrengthLuma);
   m_cEncLib.setCCALFStrength                                     (m_ccalfStrength);
   m_cEncLib.setALFAllowPredefinedFilters                         (m_alfAllowPredefinedFilters);
-#endif
+  m_cEncLib.setALFStrengthChroma                                 (m_alfStrengthChroma);
+  m_cEncLib.setALFStrengthTargetLuma                             (m_alfStrengthTargetLuma);
+  m_cEncLib.setALFStrengthTargetChroma                           (m_alfStrengthTargetChroma);
+  m_cEncLib.setCCALFStrengthTarget                               (m_ccalfStrengthTarget);
   m_cEncLib.setUseCCALF                                          ( m_ccalf );
   m_cEncLib.setCCALFQpThreshold                                  ( m_ccalfQpThreshold );
   m_cEncLib.setLmcs                                              ( m_lmcsEnabled );
@@ -1081,7 +1226,6 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setCropOffsetBottom                                  (m_cropOffsetBottom);
   m_cEncLib.setCalculateHdrMetrics                               (m_calculateHdrMetrics);
 #endif
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
   m_cEncLib.setOPIEnabled                                         ( m_OPIEnabled );
   if (m_OPIEnabled)
   {
@@ -1094,7 +1238,6 @@ void EncApp::xInitLibCfg()
       m_cEncLib.setTargetOlsIdx                                   (m_targetOlsIdx);
     }
   }
-#endif
   m_cEncLib.setGopBasedTemporalFilterEnabled(m_gopBasedTemporalFilterEnabled);
   m_cEncLib.setNumRefLayers                                       ( m_numRefLayers );
 
@@ -1108,18 +1251,18 @@ void EncApp::xCreateLib( std::list<PelUnitBuf*>& recBufList, const int layerId )
 #if EXTENSION_360_VIDEO
   m_cVideoIOYuvInputFile.skipFrames(m_FrameSkip, m_inputFileWidth, m_inputFileHeight, m_InputChromaFormatIDC);
 #else
-  const int sourceHeight = m_isField ? m_iSourceHeightOrg : m_iSourceHeight;
-  m_cVideoIOYuvInputFile.skipFrames(m_FrameSkip, m_iSourceWidth - m_aiPad[0], sourceHeight - m_aiPad[1], m_InputChromaFormatIDC);
+  const int sourceHeight = m_isField ? m_iSourceHeightOrg : m_sourceHeight;
+  m_cVideoIOYuvInputFile.skipFrames(m_FrameSkip, m_sourceWidth - m_sourcePadding[0], sourceHeight - m_sourcePadding[1], m_InputChromaFormatIDC);
 #endif
   if (!m_reconFileName.empty())
   {
     if (m_packedYUVMode && ((m_outputBitDepth[CH_L] != 10 && m_outputBitDepth[CH_L] != 12)
-        || ((m_iSourceWidth & (1 + (m_outputBitDepth[CH_L] & 3))) != 0)))
+        || ((m_sourceWidth & (1 + (m_outputBitDepth[CH_L] & 3))) != 0)))
     {
       EXIT ("Invalid output bit-depth or image width for packed YUV output, aborting\n");
     }
     if (m_packedYUVMode && (m_chromaFormatIDC != CHROMA_400) && ((m_outputBitDepth[CH_C] != 10 && m_outputBitDepth[CH_C] != 12)
-        || (((m_iSourceWidth / SPS::getWinUnitX (m_chromaFormatIDC)) & (1 + (m_outputBitDepth[CH_C] & 3))) != 0)))
+        || (((m_sourceWidth / SPS::getWinUnitX (m_chromaFormatIDC)) & (1 + (m_outputBitDepth[CH_C] & 3))) != 0)))
     {
       EXIT ("Invalid chroma output bit-depth or image width for packed YUV output, aborting\n");
     }
@@ -1160,9 +1303,9 @@ void EncApp::xDestroyLib()
   m_cEncLib.destroy();
 }
 
-void EncApp::xInitLib(bool isFieldCoding)
+void EncApp::xInitLib()
 {
-  m_cEncLib.init(isFieldCoding, this );
+  m_cEncLib.init(this);
 }
 
 // ====================================================================================================================
@@ -1171,8 +1314,8 @@ void EncApp::xInitLib(bool isFieldCoding)
 
 void EncApp::createLib( const int layerIdx )
 {
-  const int sourceHeight = m_isField ? m_iSourceHeightOrg : m_iSourceHeight;
-  UnitArea unitArea( m_chromaFormatIDC, Area( 0, 0, m_iSourceWidth, sourceHeight ) );
+  const int sourceHeight = m_isField ? m_iSourceHeightOrg : m_sourceHeight;
+  UnitArea unitArea( m_chromaFormatIDC, Area( 0, 0, m_sourceWidth, sourceHeight ) );
 
   m_orgPic = new PelStorage;
   m_trueOrgPic = new PelStorage;
@@ -1197,7 +1340,7 @@ void EncApp::createLib( const int layerIdx )
   xInitLibCfg();
   const int layerId = m_cEncLib.getVPS() == nullptr ? 0 : m_cEncLib.getVPS()->getLayerId( layerIdx );
   xCreateLib( m_recBufList, layerId );
-  xInitLib( m_isField );
+  xInitLib();
 
   printChromaFormat();
 
@@ -1207,8 +1350,8 @@ void EncApp::createLib( const int layerIdx )
 
   if( m_gopBasedTemporalFilterEnabled )
   {
-    m_temporalFilter.init( m_FrameSkip, m_inputBitDepth, m_MSBExtendedBitDepth, m_internalBitDepth, m_iSourceWidth, m_iSourceHeight,
-      m_aiPad, m_bClipInputVideoToRec709Range, m_inputFileName, m_chromaFormatIDC,
+    m_temporalFilter.init( m_FrameSkip, m_inputBitDepth, m_MSBExtendedBitDepth, m_internalBitDepth, m_sourceWidth, sourceHeight,
+      m_sourcePadding, m_bClipInputVideoToRec709Range, m_inputFileName, m_chromaFormatIDC,
       m_inputColourSpaceConvert, m_iQP, m_gopBasedTemporalFilterStrengths,
       m_gopBasedTemporalFilterFutureReference );
   }
@@ -1266,10 +1409,10 @@ bool EncApp::encodePrep( bool& eos )
   }
   else
   {
-    m_cVideoIOYuvInputFile.read( *m_orgPic, *m_trueOrgPic, ipCSC, m_aiPad, m_InputChromaFormatIDC, m_bClipInputVideoToRec709Range );
+    m_cVideoIOYuvInputFile.read( *m_orgPic, *m_trueOrgPic, ipCSC, m_sourcePadding, m_InputChromaFormatIDC, m_bClipInputVideoToRec709Range );
   }
 #else
-  m_cVideoIOYuvInputFile.read( *m_orgPic, *m_trueOrgPic, ipCSC, m_aiPad, m_InputChromaFormatIDC, m_bClipInputVideoToRec709Range );
+  m_cVideoIOYuvInputFile.read( *m_orgPic, *m_trueOrgPic, ipCSC, m_sourcePadding, m_InputChromaFormatIDC, m_bClipInputVideoToRec709Range );
 #endif
 
   if( m_gopBasedTemporalFilterEnabled )
@@ -1340,8 +1483,8 @@ bool EncApp::encode()
 #if EXTENSION_360_VIDEO
       m_cVideoIOYuvInputFile.skipFrames( m_temporalSubsampleRatio - 1, m_inputFileWidth, m_inputFileHeight, m_InputChromaFormatIDC );
 #else
-    const int sourceHeight = m_isField ? m_iSourceHeightOrg : m_iSourceHeight;
-    m_cVideoIOYuvInputFile.skipFrames( m_temporalSubsampleRatio - 1, m_iSourceWidth - m_aiPad[0], sourceHeight - m_aiPad[1], m_InputChromaFormatIDC );
+    const int sourceHeight = m_isField ? m_iSourceHeightOrg : m_sourceHeight;
+    m_cVideoIOYuvInputFile.skipFrames( m_temporalSubsampleRatio - 1, m_sourceWidth - m_sourcePadding[0], sourceHeight - m_sourcePadding[1], m_InputChromaFormatIDC );
 #endif
     }
   }
@@ -1440,9 +1583,7 @@ void EncApp::rateStatsAccum(const AccessUnit& au, const std::vector<uint32_t>& a
     case NAL_UNIT_CODED_SLICE_GDR:
     case NAL_UNIT_CODED_SLICE_RADL:
     case NAL_UNIT_CODED_SLICE_RASL:
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
     case NAL_UNIT_OPI:
-#endif
     case NAL_UNIT_DCI:
     case NAL_UNIT_VPS:
     case NAL_UNIT_SPS:
diff --git a/source/App/EncoderApp/EncApp.h b/source/App/EncoderApp/EncApp.h
index 0efa6935e5fc94b6c211999065fe9bc0c8ddad8a..93323155cbd315d53df070b81d48de64bbf6ae3d 100644
--- a/source/App/EncoderApp/EncApp.h
+++ b/source/App/EncoderApp/EncApp.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -83,7 +83,7 @@ private:
   // initialization
   void xCreateLib( std::list<PelUnitBuf*>& recBufList, const int layerId );         ///< create files & encoder class
   void xInitLibCfg ();                           ///< initialize internal variables
-  void xInitLib    (bool isFieldCoding);         ///< initialize encoder class
+  void xInitLib();                               ///< initialize encoder class
   void xDestroyLib ();                           ///< destroy encoder class
 
   // file I/O
diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp
index b38001ebc7d825596bfc05e3253bb7dcd021f7d9..b7d50a7e5df9bfc0d5c6d1ece744deddd8fc7f7c 100644
--- a/source/App/EncoderApp/EncAppCfg.cpp
+++ b/source/App/EncoderApp/EncAppCfg.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -69,6 +69,17 @@ enum ExtendedProfileName   // this is used for determining profile strings, wher
   MULTILAYER_MAIN_10_STILL_PICTURE,
   MULTILAYER_MAIN_10_444,
   MULTILAYER_MAIN_10_444_STILL_PICTURE,
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+  MAIN_12,
+  MAIN_12_444,
+  MAIN_16_444,
+  MAIN_12_INTRA,
+  MAIN_12_444_INTRA,
+  MAIN_16_444_INTRA,
+  MAIN_12_STILL_PICTURE,
+  MAIN_12_444_STILL_PICTURE,
+  MAIN_16_444_STILL_PICTURE,
+#endif
   AUTO = -1
 };
 
@@ -100,6 +111,7 @@ EncAppCfg::~EncAppCfg()
 
 #if ENABLE_TRACING
   tracing_uninit(g_trace_ctx);
+  g_trace_ctx = nullptr;
 #endif
 }
 
@@ -178,6 +190,17 @@ static const struct MapStrToProfile
   { "multilayer_main_10_444", Profile::MULTILAYER_MAIN_10_444 },
   { "multilayer_main_10_still_picture", Profile::MULTILAYER_MAIN_10_STILL_PICTURE },
   { "multilayer_main_10_444_still_picture", Profile::MULTILAYER_MAIN_10_444_STILL_PICTURE },
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+  { "main_12", Profile::MAIN_12 },
+  { "main_12_444", Profile::MAIN_12_444 },
+  { "main_16_444", Profile::MAIN_16_444 },
+  { "main_12_intra", Profile::MAIN_12_INTRA },
+  { "main_12_444_intra", Profile::MAIN_12_444_INTRA },
+  { "main_16_444_intra", Profile::MAIN_16_444_INTRA },
+  { "main_12_still_picture", Profile::MAIN_12_STILL_PICTURE },
+  { "main_12_444_still_picture", Profile::MAIN_12_444_STILL_PICTURE },
+  { "main_16_444_still_picture", Profile::MAIN_16_444_STILL_PICTURE },
+#endif
 };
 
 static const struct MapStrToExtendedProfile
@@ -194,6 +217,17 @@ static const struct MapStrToExtendedProfile
   { "multilayer_main_10_444", MULTILAYER_MAIN_10_444 },
   { "multilayer_main_10_still_picture", MULTILAYER_MAIN_10_STILL_PICTURE },
   { "multilayer_main_10_444_still_picture", MULTILAYER_MAIN_10_444_STILL_PICTURE },
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+  { "main_12", MAIN_12 },
+  { "main_12_444", MAIN_12_444 },
+  { "main_16_444", MAIN_16_444 },
+  { "main_12_intra", MAIN_12_INTRA },
+  { "main_12_444_intra", MAIN_12_444_INTRA },
+  { "main_16_444_intra", MAIN_16_444_INTRA },
+  { "main_12_still_picture", MAIN_12_STILL_PICTURE },
+  { "main_12_444_still_picture", MAIN_12_444_STILL_PICTURE },
+  { "main_16_444_still_picture", MAIN_16_444_STILL_PICTURE },
+#endif
   { "auto", AUTO },
 };
 
@@ -229,9 +263,7 @@ strToLevel[] =
   {"6",   Level::LEVEL6},
   {"6.1", Level::LEVEL6_1},
   {"6.2", Level::LEVEL6_2},
-#if JVET_T0065_LEVEL_6_3
   {"6.3", Level::LEVEL6_3},
-#endif
   {"15.5", Level::LEVEL15_5},
 };
 
@@ -338,6 +370,8 @@ static inline istream& operator >> (istream &in, ScalingListMode &mode)
 template <class T>
 struct SMultiValueInput
 {
+  static_assert(!std::is_same<T, uint8_t>::value, "SMultiValueInput<uint8_t> is not supported");
+  static_assert(!std::is_same<T, int8_t>::value, "SMultiValueInput<int8_t> is not supported");
   const T              minValIncl;
   const T              maxValIncl;
   const std::size_t    minNumValuesIncl;
@@ -487,16 +521,11 @@ static uint32_t getMaxTileColsByLevel( Level::Name level )
     case Level::LEVEL6:
     case Level::LEVEL6_1:
     case Level::LEVEL6_2:
-#if !JVET_T0065_LEVEL_6_3
-    default:
-#endif
       return 20;
-#if JVET_T0065_LEVEL_6_3
     case Level::LEVEL6_3:
       return 30;
     default:
       return MAX_TILE_COLS;
-#endif
   }
 }
 
@@ -522,16 +551,11 @@ static uint32_t getMaxTileRowsByLevel( Level::Name level )
     case Level::LEVEL6:
     case Level::LEVEL6_1:
     case Level::LEVEL6_2:
-#if !JVET_T0065_LEVEL_6_3
-    default:
-#endif
       return 22;
-#if JVET_T0065_LEVEL_6_3
     case Level::LEVEL6_3:
       return 33;
     default:
       return MAX_TILES / MAX_TILE_COLS;
-#endif
   }
 }
 
@@ -558,16 +582,11 @@ static uint32_t getMaxSlicesByLevel( Level::Name level )
     case Level::LEVEL6:
     case Level::LEVEL6_1:
     case Level::LEVEL6_2:
-#if !JVET_T0065_LEVEL_6_3
-    default:
-#endif
       return 600;
-#if JVET_T0065_LEVEL_6_3
     case Level::LEVEL6_3:
       return 1000;
     default:
       return MAX_SLICES;
-#endif
   }
 }
 
@@ -623,15 +642,30 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   SMultiValueInput<int> cfg_qpOutValCr                  (MIN_QP_VALUE_FOR_16_BIT, MAX_QP, 0, MAX_NUM_QP_VALUES, zeroVector, 1);
   SMultiValueInput<int> cfg_qpInValCbCr                 (MIN_QP_VALUE_FOR_16_BIT, MAX_QP, 0, MAX_NUM_QP_VALUES, zeroVector, 1);
   SMultiValueInput<int> cfg_qpOutValCbCr                (MIN_QP_VALUE_FOR_16_BIT, MAX_QP, 0, MAX_NUM_QP_VALUES, zeroVector, 1);
+  const int cQpOffsets[] = { 6 };
+  SMultiValueInput<int> cfg_cbQpOffsetList              (-12, 12, 0, 6, cQpOffsets, 0);
+  SMultiValueInput<int> cfg_crQpOffsetList              (-12, 12, 0, 6, cQpOffsets, 0);
+  SMultiValueInput<int> cfg_cbCrQpOffsetList            (-12, 12, 0, 6, cQpOffsets, 0);
+
   const uint32_t defaultInputKneeCodes[3]  = { 600, 800, 900 };
   const uint32_t defaultOutputKneeCodes[3] = { 100, 250, 450 };
   SMultiValueInput<uint32_t> cfg_kneeSEIInputKneePointValue      (1,  999, 0, 999, defaultInputKneeCodes,  sizeof(defaultInputKneeCodes )/sizeof(uint32_t));
   SMultiValueInput<uint32_t> cfg_kneeSEIOutputKneePointValue     (0, 1000, 0, 999, defaultOutputKneeCodes, sizeof(defaultOutputKneeCodes)/sizeof(uint32_t));
   const int defaultPrimaryCodes[6]     = { 0,50000, 0,0, 50000,0 };
   const int defaultWhitePointCode[2]   = { 16667, 16667 };
+
   SMultiValueInput<int>  cfg_DisplayPrimariesCode            (0, 50000, 6, 6, defaultPrimaryCodes,   sizeof(defaultPrimaryCodes  )/sizeof(int));
   SMultiValueInput<int>  cfg_DisplayWhitePointCode           (0, 50000, 2, 2, defaultWhitePointCode, sizeof(defaultWhitePointCode)/sizeof(int));
 
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+  SMultiValueInput<Pel>  cfg_SEICTILut0(0, ((1 << (2 + 16 - 1)) - 1), 0, MAX_CTI_LUT_SIZE + 1);
+  SMultiValueInput<Pel>  cfg_SEICTILut1(0, ((1 << (2 + 16 - 1)) - 1), 0, MAX_CTI_LUT_SIZE + 1);
+  SMultiValueInput<Pel>  cfg_SEICTILut2(0, ((1 << (2 + 16 - 1)) - 1), 0, MAX_CTI_LUT_SIZE + 1);
+#else
+  SMultiValueInput<Pel>  cfg_SEICTILut0(0, ((1 << (2 + 12 - 1)) - 1), 0, MAX_CTI_LUT_SIZE + 1);
+  SMultiValueInput<Pel>  cfg_SEICTILut1(0, ((1 << (2 + 12 - 1)) - 1), 0, MAX_CTI_LUT_SIZE + 1);
+  SMultiValueInput<Pel>  cfg_SEICTILut2(0, ((1 << (2 + 12 - 1)) - 1), 0, MAX_CTI_LUT_SIZE + 1);
+#endif
   SMultiValueInput<bool> cfg_timeCodeSeiTimeStampFlag        (0,  1, 0, MAX_TIMECODE_SEI_SETS);
   SMultiValueInput<bool> cfg_timeCodeSeiNumUnitFieldBasedFlag(0,  1, 0, MAX_TIMECODE_SEI_SETS);
   SMultiValueInput<int>  cfg_timeCodeSeiCountingType         (0,  6, 0, MAX_TIMECODE_SEI_SETS);
@@ -674,6 +708,30 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   SMultiValueInput<uint32_t>   cfg_gcmpSEIFunctionUAffectedByVFlag   (0, 1, 5, 6);
   SMultiValueInput<double>     cfg_gcmpSEIFunctionCoeffV             (0.0, 1.0, 5, 6);
   SMultiValueInput<uint32_t>   cfg_gcmpSEIFunctionVAffectedByUFlag   (0, 1, 5, 6);
+  SMultiValueInput<uint32_t>        cfg_sdiSEILayerId                  (0, 63, 0, 63);
+  SMultiValueInput<uint32_t>        cfg_sdiSEIViewIdVal                (0, 63, 0, std::numeric_limits<uint32_t>::max());
+  SMultiValueInput<uint32_t>        cfg_sdiSEIAuxId                    (0, 255, 0, 63);
+  SMultiValueInput<uint32_t>        cfg_sdiSEINumAssociatedPrimaryLayersMinus1 (0, 63, 0, 63);
+  SMultiValueInput<bool>            cfg_maiSEISignFocalLengthX         (0, 1,   0, std::numeric_limits<uint32_t>::max());
+  SMultiValueInput<uint32_t>        cfg_maiSEIExponentFocalLengthX     (0, 63, 0, std::numeric_limits<uint32_t>::max());
+  SMultiValueInput<uint32_t>        cfg_maiSEIMantissaFocalLengthX     (0, std::numeric_limits<uint32_t>::max(), 0, std::numeric_limits<uint32_t>::max());
+  SMultiValueInput<bool>            cfg_maiSEISignFocalLengthY         (0, 1,   0, std::numeric_limits<uint32_t>::max());
+  SMultiValueInput<uint32_t>        cfg_maiSEIExponentFocalLengthY     (0, 63, 0, std::numeric_limits<uint32_t>::max());
+  SMultiValueInput<uint32_t>        cfg_maiSEIMantissaFocalLengthY     (0, std::numeric_limits<uint32_t>::max(), 0, std::numeric_limits<uint32_t>::max());
+  SMultiValueInput<bool>            cfg_maiSEISignPrincipalPointX      (0, 1,   0, std::numeric_limits<uint32_t>::max());
+  SMultiValueInput<uint32_t>        cfg_maiSEIExponentPrincipalPointX  (0, 63, 0, std::numeric_limits<uint32_t>::max());
+  SMultiValueInput<uint32_t>        cfg_maiSEIMantissaPrincipalPointX  (0, std::numeric_limits<uint32_t>::max(), 0, std::numeric_limits<uint32_t>::max());
+  SMultiValueInput<bool>            cfg_maiSEISignPrincipalPointY      (0, 1,   0, std::numeric_limits<uint32_t>::max());
+  SMultiValueInput<uint32_t>        cfg_maiSEIExponentPrincipalPointY  (0, 63, 0, std::numeric_limits<uint32_t>::max());
+  SMultiValueInput<uint32_t>        cfg_maiSEIMantissaPrincipalPointY  (0, std::numeric_limits<uint32_t>::max(), 0, std::numeric_limits<uint32_t>::max());
+  SMultiValueInput<bool>            cfg_maiSEISignSkewFactor           (0, 1,   0, std::numeric_limits<uint32_t>::max());
+  SMultiValueInput<uint32_t>        cfg_maiSEIExponentSkewFactor       (0, 63, 0, std::numeric_limits<uint32_t>::max());
+  SMultiValueInput<uint32_t>        cfg_maiSEIMantissaSkewFactor       (0, std::numeric_limits<uint32_t>::max(), 0, std::numeric_limits<uint32_t>::max());
+#if JVET_W0078_MVP_SEI 
+  SMultiValueInput<uint32_t>        cfg_mvpSEIViewPosition             (0, 63, 0, std::numeric_limits<uint32_t>::max());
+#endif
+
+  SMultiValueInput<uint32_t>        cfg_driSEINonlinearModel           (0, 31, 0, std::numeric_limits<uint32_t>::max());
 #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET
   const int defaultLadfQpOffset[3] = { 1, 0, 1 };
   const int defaultLadfIntervalLowerBound[2] = { 350, 833 };
@@ -683,7 +741,8 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   SMultiValueInput<unsigned> cfg_virtualBoundariesPosX       (0, std::numeric_limits<uint32_t>::max(), 0, 3);
   SMultiValueInput<unsigned> cfg_virtualBoundariesPosY       (0, std::numeric_limits<uint32_t>::max(), 0, 3);
 
-  SMultiValueInput<uint8_t> cfg_SubProfile(0, std::numeric_limits<uint8_t>::max(), 0, std::numeric_limits<uint8_t>::max());
+  SMultiValueInput<uint32_t>  cfg_SubProfile(0, std::numeric_limits<uint8_t>::max(), 0,
+                                            std::numeric_limits<uint8_t>::max());
   SMultiValueInput<uint32_t>  cfg_subPicCtuTopLeftX(0, std::numeric_limits<uint32_t>::max(), 0, MAX_NUM_SUB_PICS);
   SMultiValueInput<uint32_t>  cfg_subPicCtuTopLeftY(0, std::numeric_limits<uint32_t>::max(), 0, MAX_NUM_SUB_PICS);
   SMultiValueInput<uint32_t>  cfg_subPicWidth(1, std::numeric_limits<uint32_t>::max(), 0, MAX_NUM_SUB_PICS);
@@ -692,8 +751,8 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   SMultiValueInput<bool>      cfg_loopFilterAcrossSubpicEnabledFlag(0, 1, 0, MAX_NUM_SUB_PICS);
   SMultiValueInput<uint32_t>  cfg_subPicId(0, std::numeric_limits<uint16_t>::max(), 0, MAX_NUM_SUB_PICS);
 
-  SMultiValueInput<int>       cfg_sliFractions(0, 100, 0, std::numeric_limits<int>::max());
-  SMultiValueInput<int>       cfg_sliNonSubpicLayersFractions(0, 100, 0, std::numeric_limits<int>::max());
+  SMultiValueInput<int>       cfg_sliFractions(0, 255, 0, std::numeric_limits<int>::max());
+  SMultiValueInput<int>       cfg_sliNonSubpicLayersFractions(0, 255, 0, std::numeric_limits<int>::max());
 
   SMultiValueInput<Level::Name>  cfg_sliRefLevels(Level::NONE, Level::LEVEL15_5, 0, 8 * MAX_VPS_SUBLAYERS);
 
@@ -725,8 +784,8 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("InputPathPrefix,-ipp",                            inputPathPrefix,                             string(""), "pathname to prepend to input filename")
   ("BitstreamFile,b",                                 m_bitstreamFileName,                         string(""), "Bitstream output file name")
   ("ReconFile,o",                                     m_reconFileName,                             string(""), "Reconstructed YUV output file name")
-  ("SourceWidth,-wdt",                                m_iSourceWidth,                                       0, "Source picture width")
-  ("SourceHeight,-hgt",                               m_iSourceHeight,                                      0, "Source picture height")
+  ("SourceWidth,-wdt",                                m_sourceWidth,                                       0, "Source picture width")
+  ("SourceHeight,-hgt",                               m_sourceHeight,                                      0, "Source picture height")
   ("InputBitDepth",                                   m_inputBitDepth[CHANNEL_TYPE_LUMA],                   8, "Bit-depth of input file")
   ("OutputBitDepth",                                  m_outputBitDepth[CHANNEL_TYPE_LUMA],                  0, "Bit-depth of output file (default:InternalBitDepth)")
   ("MSBExtendedBitDepth",                             m_MSBExtendedBitDepth[CHANNEL_TYPE_LUMA],             0, "bit depth of luma component after addition of MSBs of value 0 (used for synthesising High Dynamic Range source material). (default:InputBitDepth)")
@@ -735,6 +794,10 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("OutputBitDepthC",                                 m_outputBitDepth[CHANNEL_TYPE_CHROMA],                0, "As per OutputBitDepth but for chroma component. (default: use luma output bit-depth)")
   ("MSBExtendedBitDepthC",                            m_MSBExtendedBitDepth[CHANNEL_TYPE_CHROMA],           0, "As per MSBExtendedBitDepth but for chroma component. (default:MSBExtendedBitDepth)")
   ("ExtendedPrecision",                               m_extendedPrecisionProcessingFlag,                false, "Increased internal accuracies to support high bit depths (not valid in V1 profiles)")
+  ("TSRCRicePresent",                                 m_tsrcRicePresentFlag,                            false, "Indicate that TSRC Rice information is present in slice header (not valid in V1 profiles)")
+#if JVET_W0046_RLSCP
+  ("ReverseLastSigCoeff",                             m_reverseLastSigCoeffEnabledFlag,                 false, "enable reverse last significant coefficient postion in RRC (not valid in V1 profiles)")  
+#endif
   ("HighPrecisionPredictionWeighting",                m_highPrecisionOffsetsEnabledFlag,                false, "Use high precision option for weighted prediction (not valid in V1 profiles)")
   ("InputColourSpaceConvert",                         inputColourSpaceConvert,                     string(""), "Colour space conversion to apply to input video. Permitted values are (empty string=UNCHANGED) " + getListOfColourSpaceConverts(true))
   ("SNRInternalColourSpace",                          m_snrInternalColourSpace,                         false, "If true, then no colour space conversion is applied prior to SNR, otherwise inverse of input is applied.")
@@ -745,16 +808,12 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("PrintFrameMSE",                                   m_printFrameMSE,                                  false, "0 (default) emit only bit count and PSNRs for each frame, 1 = also emit MSE values")
   ("PrintSequenceMSE",                                m_printSequenceMSE,                               false, "0 (default) emit only bit rate and PSNRs for the whole sequence, 1 = also emit MSE values")
   ("PrintMSSSIM",                                     m_printMSSSIM,                                    false, "0 (default) do not print MS-SSIM scores, 1 = print MS-SSIM scores for each frame and for the whole sequence")
+  ("PrintWPSNR",                                      m_printWPSNR,                                     false, "0 (default) do not print HDR-PQ based wPSNR, 1 = print HDR-PQ based wPSNR")
   ("CabacZeroWordPaddingEnabled",                     m_cabacZeroWordPaddingEnabled,                     true, "0 do not add conforming cabac-zero-words to bit streams, 1 (default) = add cabac-zero-words as required")
   ("ChromaFormatIDC,-cf",                             tmpChromaFormat,                                      0, "ChromaFormatIDC (400|420|422|444 or set 0 (default) for same as InputChromaFormat)")
-  ("ConformanceMode",                                 m_conformanceWindowMode,                              0, "Deprecated alias of ConformanceWindowMode")
-  ("ConformanceWindowMode",                           m_conformanceWindowMode,                              0, "Window conformance mode (0: no window, 1:automatic padding, 2:padding, 3:conformance")
-  ("HorizontalPadding,-pdx",                          m_aiPad[0],                                           0, "Horizontal source padding for conformance window mode 2")
-  ("VerticalPadding,-pdy",                            m_aiPad[1],                                           0, "Vertical source padding for conformance window mode 2")
-  ("ConfLeft",                                        m_confWinLeft,                                        0, "Deprecated alias of ConfWinLeft")
-  ("ConfRight",                                       m_confWinRight,                                       0, "Deprecated alias of ConfWinRight")
-  ("ConfTop",                                         m_confWinTop,                                         0, "Deprecated alias of ConfWinTop")
-  ("ConfBottom",                                      m_confWinBottom,                                      0, "Deprecated alias of ConfWinBottom")
+  ("ConformanceWindowMode",                           m_conformanceWindowMode,                              1, "Window conformance mode (0: no window, 1:automatic padding (default), 2:padding parameters specified, 3:conformance window parameters specified")
+  ("HorizontalPadding,-pdx",                          m_sourcePadding[0],                                   0, "Horizontal source padding for conformance window mode 2")
+  ("VerticalPadding,-pdy",                            m_sourcePadding[1],                                   0, "Vertical source padding for conformance window mode 2")
   ("ConfWinLeft",                                     m_confWinLeft,                                        0, "Left offset for window conformance mode 3")
   ("ConfWinRight",                                    m_confWinRight,                                       0, "Right offset for window conformance mode 3")
   ("ConfWinTop",                                      m_confWinTop,                                         0, "Top offset for window conformance mode 3")
@@ -793,8 +852,8 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   //Field coding parameters
   ("FieldCoding",                                     m_isField,                                        false, "Signals if it's a field based coding")
   ("TopFieldFirst, Tff",                              m_isTopFieldFirst,                                false, "In case of field based coding, signals whether if it's a top field first or not")
-  ("EfficientFieldIRAPEnabled",                       m_bEfficientFieldIRAPEnabled,                      true, "Enable to code fields in a specific, potentially more efficient, order.")
-  ("HarmonizeGopFirstFieldCoupleEnabled",             m_bHarmonizeGopFirstFieldCoupleEnabled,            true, "Enables harmonization of Gop first field couple")
+  ("EfficientFieldIRAPEnabled",                       m_efficientFieldIRAPEnabled,                      true, "Enable to code fields in a specific, potentially more efficient, order.")
+  ("HarmonizeGopFirstFieldCoupleEnabled",             m_harmonizeGopFirstFieldCoupleEnabled,            true, "Enables harmonization of Gop first field couple")
 
   // Profile and level
   ("Profile",                                         extendedProfile,              ExtendedProfileName::NONE, "Profile name to use for encoding. Use [multilayer_]main_10[_444][_still_picture], auto, or none")
@@ -873,6 +932,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("NoLmcsConstraintFlag",                            m_noLmcsConstraintFlag,                           false, "Indicate that LMCS is deactivated")
   ("NoLadfConstraintFlag",                            m_noLadfConstraintFlag,                          false, "Indicate that LADF is deactivated")
   ("NoVirtualBoundaryConstraintFlag",                 m_noVirtualBoundaryConstraintFlag,                false, "Indicate that virtual boundary is deactivated")
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+  ("GeneralLowerBitRateConstraintFlag",               m_generalLowerBitRateConstraintFlag,              false, "Indicate whether lower bitrate constraint is used")
+#endif
 
   ("CTUSize",                                         m_uiCTUSize,                                       128u, "CTUSize (specifies the CTU size if QTBT is on) [default: 128]")
   ("Log2MinCuSize",                                   m_log2MinCuSize,                                     2u, "Log2 min CU size")
@@ -1012,9 +1074,17 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
 
   // Coding structure paramters
   ("IntraPeriod,-ip",                                 m_iIntraPeriod,                                      -1, "Intra period in frames, (-1: only first frame)")
+#if GDR_ENABLED
+  ("GdrEnabled",                                      m_gdrEnabled,                                     false, "GDR enabled")
+  ("GdrPocStart",                                     m_gdrPocStart,                                       -1, "GDR poc start")
+  ("GdrPeriod",                                       m_gdrPeriod,                                         -1, "Number of frames between GDR picture to the next GDR picture")
+  ("GdrInterval",                                     m_gdrInterval,                                       -1, "Number of frames from GDR picture to the recovery point picture")
+  ("GdrNoHash",                                       m_gdrNoHash,                                       true, "Do not generate decode picture hash SEI messages for GDR and recovering pictures")
+#endif
   ("DecodingRefreshType,-dr",                         m_iDecodingRefreshType,                               0, "Intra refresh type (0:none 1:CRA 2:IDR 3:RecPointSEI)")
   ("GOPSize,g",                                       m_iGOPSize,                                           1, "GOP size of temporal structure")
   ("DRAPPeriod",                                      m_drapPeriod,                                         0, "DRAP period in frames (0: disable Dependent RAP indication SEI messages)")
+  ("EDRAPPeriod",                                     m_edrapPeriod,                                        0, "EDRAP period in frames (0: disable Extended Dependent RAP indication SEI messages)")
   ("ReWriteParamSets",                                m_rewriteParamSets,                           false, "Enable rewriting of Parameter sets before every (intra) random access point")
   ("IDRRefParamList",                                 m_idrRefParamList,                            false, "Enable indication of reference picture list syntax elements in slice headers of IDR pictures")
   // motion search options
@@ -1056,7 +1126,8 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("DeltaQpRD,-dqr",                                  m_uiDeltaQpRD,                                       0u, "max dQp offset for slice")
   ("MaxDeltaQP,d",                                    m_iMaxDeltaQP,                                        0, "max dQp offset for block")
   ("MaxCuDQPSubdiv,-dqd",                             m_cuQpDeltaSubdiv,                                    0, "Maximum subdiv for CU luma Qp adjustment")
-  ("MaxCuChromaQpOffsetSubdiv",                       m_cuChromaQpOffsetSubdiv,                            -1, "Maximum subdiv for CU chroma Qp adjustment - set less than 0 to disable")
+  ("MaxCuChromaQpOffsetSubdiv",                       m_cuChromaQpOffsetSubdiv,                             0, "Maximum subdiv for CU chroma Qp adjustment")
+  ("SliceCuChromaQpOffsetEnabled",                    m_cuChromaQpOffsetEnabled,                         true, "Enable local chroma QP offsets (slice level flag)")
   ("FastDeltaQP",                                     m_bFastDeltaQP,                                   false, "Fast Delta QP Algorithm")
 #if SHARP_LUMA_DELTA_QP
   ("LumaLevelToDeltaQPMode",                          lumaLevelToDeltaQPMode,                              0u, "Luma based Delta QP 0(default): not used. 1: Based on CTU average, 2: Based on Max luma in CTU")
@@ -1066,7 +1137,25 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("LumaLevelToDeltaQPMappingLuma",                   cfg_lumaLeveltoDQPMappingLuma,  cfg_lumaLeveltoDQPMappingLuma, "Luma to Delta QP Mapping - luma thresholds")
   ("LumaLevelToDeltaQPMappingDQP",                    cfg_lumaLeveltoDQPMappingQP,  cfg_lumaLeveltoDQPMappingQP, "Luma to Delta QP Mapping - DQP values")
 #endif
-  ("UseIdentityTableForNon420Chroma",                 m_useIdentityTableForNon420Chroma,                 true, "True: Indicates that 422/444 chroma uses identity chroma QP mapping tables; False: explicit Qp table may be specified in config")
+  ("SmoothQPReductionEnable",                         m_smoothQPReductionEnable,                         false, "Enable QP reduction for smooth blocks according to: Clip3(SmoothQPReductionLimit, 0, SmoothQPReductionModelScale*baseQP+SmoothQPReductionModelOffset)")
+#if JVET_W0043
+  ("SmoothQPReductionPeriodicity",                    m_smoothQPReductionPeriodicity,                        0, "Periodicity parameter of the QP reduction model, 1: all frames, 0: only intra pictures, 2: every second frame, etc")
+  ("SmoothQPReductionThresholdIntra",                 m_smoothQPReductionThresholdIntra,                   3.0, "Threshold parameter for smoothness for intra pictures (SmoothQPReductionThresholdIntra * number of samples in block)")
+  ("SmoothQPReductionModelScaleIntra",                m_smoothQPReductionModelScaleIntra,                 -1.0, "Scale parameter of the QP reduction model for intra pictures ")
+  ("SmoothQPReductionModelOffsetIntra",               m_smoothQPReductionModelOffsetIntra,                27.0, "Offset parameter of the QP reduction model for intra pictures ")
+  ("SmoothQPReductionLimitIntra",                     m_smoothQPReductionLimitIntra,                       -16, "Threshold parameter for controlling maximum amount of QP reduction by the QP reduction model for intra pictures ")
+  ("SmoothQPReductionThresholdInter",                 m_smoothQPReductionThresholdInter,                   3.0, "Threshold parameter for smoothness for inter pictures (SmoothQPReductionThresholdInter * number of samples in block)")
+  ("SmoothQPReductionModelScaleInter",                m_smoothQPReductionModelScaleInter,                 -1.0, "Scale parameter of the QP reduction model for inter pictures")
+  ("SmoothQPReductionModelOffsetInter",               m_smoothQPReductionModelOffsetInter,                27.0, "Offset parameter of the QP reduction model for inter pictures")
+  ("SmoothQPReductionLimitInter",                     m_smoothQPReductionLimitInter,                        -4, "Threshold parameter for controlling maximum amount of QP reduction by the QP reduction model for inter pictures")
+#else
+  ("SmoothQPReductionThreshold",                      m_smoothQPReductionThreshold,                        3.0, "Threshold parameter for smoothness (SmoothQPReductionThreshold * number of samples in block)")
+  ("SmoothQPReductionModelScale",                     m_smoothQPReductionModelScale,                      -1.0, "Scale parameter of the QP reduction model")
+  ("SmoothQPReductionModelOffset",                    m_smoothQPReductionModelOffset,                     27.0, "Offset parameter of the QP reduction model")
+  ("SmoothQPReductionLimit",                          m_smoothQPReductionLimit,                            -16, "Threshold parameter for controlling maximum amount of QP reduction by the QP reduction model")
+  ("SmoothQPReductionPeriodicity",                    m_smoothQPReductionPeriodicity,                        1, "Periodicity parameter of the QP reduction model, 1: all frames, 0: only intra pictures, 2: every second frame, etc")
+#endif
+  ("UseIdentityTableForNon420Chroma",                 m_useIdentityTableForNon420Chroma,                  true, "True: Indicates that 422/444 chroma uses identity chroma QP mapping tables; False: explicit Qp table may be specified in config")
   ("SameCQPTablesForAllChroma",                       m_chromaQpMappingTableParams.m_sameCQPTableForAllChromaFlag,                        true, "0: Different tables for Cb, Cr and joint Cb-Cr components, 1 (default): Same tables for all three chroma components")
   ("QpInValCb",                                       cfg_qpInValCb,                            cfg_qpInValCb, "Input coordinates for the QP table for Cb component")
   ("QpOutValCb",                                      cfg_qpOutValCb,                          cfg_qpOutValCb, "Output coordinates for the QP table for Cb component")
@@ -1092,6 +1181,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("SliceCbQpOffsetIntraOrPeriodic",                  m_sliceChromaQpOffsetIntraOrPeriodic[0],              0, "Chroma Cb QP Offset at slice level for I slice or for periodic inter slices as defined by SliceChromaQPOffsetPeriodicity. Replaces offset in the GOP table.")
   ("SliceCrQpOffsetIntraOrPeriodic",                  m_sliceChromaQpOffsetIntraOrPeriodic[1],              0, "Chroma Cr QP Offset at slice level for I slice or for periodic inter slices as defined by SliceChromaQPOffsetPeriodicity. Replaces offset in the GOP table.")
 #endif
+  ("CbQpOffsetList",                                  cfg_cbQpOffsetList,                  cfg_cbQpOffsetList, "Chroma Cb QP offset list for local adjustment")
+  ("CrQpOffsetList",                                  cfg_crQpOffsetList,                  cfg_crQpOffsetList, "Chroma Cb QP offset list for local adjustment")
+  ("CbCrQpOffsetList",                                cfg_cbCrQpOffsetList,              cfg_cbCrQpOffsetList, "Chroma joint Cb-Cr QP offset list for local adjustment")
 
   ("AdaptiveQP,-aq",                                  m_bUseAdaptiveQP,                                 false, "QP adaptation based on a psycho-visual model")
   ("MaxQPAdaptationRange,-aqr",                       m_iQPAdaptationRange,                                 6, "QP adaptation range")
@@ -1108,14 +1200,14 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("RDpenalty",                                       m_rdPenalty,                                          0, "RD-penalty for 32x32 TU for intra in non-intra slices. 0:disabled  1:RD-penalty  2:maximum RD-penalty")
 
   // Deblocking filter parameters
-  ("LoopFilterDisable",                               m_bLoopFilterDisable,                             false)
-  ("LoopFilterOffsetInPPS",                           m_loopFilterOffsetInPPS,                           true)
-  ("LoopFilterBetaOffset_div2",                       m_loopFilterBetaOffsetDiv2,                           0)
-  ("LoopFilterTcOffset_div2",                         m_loopFilterTcOffsetDiv2,                             0)
-  ("LoopFilterCbBetaOffset_div2",                     m_loopFilterCbBetaOffsetDiv2,                         0)
-  ("LoopFilterCbTcOffset_div2",                       m_loopFilterCbTcOffsetDiv2,                           0)
-  ("LoopFilterCrBetaOffset_div2",                     m_loopFilterCrBetaOffsetDiv2,                         0)
-  ("LoopFilterCrTcOffset_div2",                       m_loopFilterCrTcOffsetDiv2,                           0)
+  ("DeblockingFilterDisable",                         m_deblockingFilterDisable,                        false)
+  ("DeblockingFilterOffsetInPPS",                     m_deblockingFilterOffsetInPPS,                     true)
+  ("DeblockingFilterBetaOffset_div2",                 m_deblockingFilterBetaOffsetDiv2,                     0)
+  ("DeblockingFilterTcOffset_div2",                   m_deblockingFilterTcOffsetDiv2,                       0)
+  ("DeblockingFilterCbBetaOffset_div2",               m_deblockingFilterCbBetaOffsetDiv2,                   0)
+  ("DeblockingFilterCbTcOffset_div2",                 m_deblockingFilterCbTcOffsetDiv2,                     0)
+  ("DeblockingFilterCrBetaOffset_div2",               m_deblockingFilterCrBetaOffsetDiv2,                   0)
+  ("DeblockingFilterCrTcOffset_div2",                 m_deblockingFilterCrTcOffsetDiv2,                     0)
 #if W0038_DB_OPT
   ("DeblockingFilterMetric",                          m_deblockingFilterMetric,                             0)
 #else
@@ -1131,9 +1223,13 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("ISPFast",                                         m_useFastISP,                                     false, "Fast encoder search for ISP")
   ("ResidualRotation",                                m_transformSkipRotationEnabledFlag,               false, "Enable rotation of transform-skipped and transquant-bypassed TUs through 180 degrees prior to entropy coding (not valid in V1 profiles)")
   ("SingleSignificanceMapContext",                    m_transformSkipContextEnabledFlag,                false, "Enable, for transform-skipped and transquant-bypassed TUs, the selection of a single significance map context variable for all coefficients (not valid in V1 profiles)")
+  ("ExtendedRiceRRC",                                 m_rrcRiceExtensionEnableFlag,                     false, "Enable the extention of the Golomb-Rice parameter derivation for RRC")
   ("GolombRiceParameterAdaptation",                   m_persistentRiceAdaptationEnabledFlag,            false, "Enable the adaptation of the Golomb-Rice parameter over the course of each slice")
   ("AlignCABACBeforeBypass",                          m_cabacBypassAlignmentEnabledFlag,                false, "Align the CABAC engine to a defined fraction of a bit prior to coding bypass data. Must be 1 in high bit rate profile, 0 otherwise")
   ("SAO",                                             m_bUseSAO,                                         true, "Enable Sample Adaptive Offset")
+#if JVET_W0129_ENABLE_ALF_TRUEORG
+  ("SaoTrueOrg",                                      m_saoTrueOrg,                                     false, "Using true original samples for SAO optimization when MCTF is enabled\n")
+#endif
   ("TestSAODisableAtPictureLevel",                    m_bTestSAODisableAtPictureLevel,                  false, "Enables the testing of disabling SAO at the picture level after having analysed all blocks")
   ("SaoEncodingRate",                                 m_saoEncodingRate,                                 0.75, "When >0 SAO early picture termination is enabled for luma and chroma")
   ("SaoEncodingRateChroma",                           m_saoEncodingRateChroma,                            0.5, "The SAO early picture termination rate to use for chroma (when m_SaoEncodingRate is >0). If <=0, use results for luma")
@@ -1249,6 +1345,10 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
                                                                                                                "\t0: unspecified\n"
                                                                                                                "\t1: stereo pair, frame0 represents left view\n"
                                                                                                                "\t2: stereo pair, frame0 represents right view")
+  ("SEIDisplayOrientationEnabled",                    m_doSEIEnabled,                                   false, "Controls if display orientation packing SEI message enabled")
+  ("SEIDisplayOrientationCancelFlag",                 m_doSEICancelFlag,                                 true, "Specifies the persistence of any previous display orientation SEI message in output order.")
+  ("SEIDisplayOrientationPersistenceFlag",            m_doSEIPersistenceFlag,                           false, "Specifies the persistence of the display orientation packing SEI message for the current layer.")
+  ("SEIDisplayOrientationTransformType",              m_doSEITransformType,                                 0, "specifies the rotation and mirroring to be applied to the picture.")
   ("SEIParameterSetsInclusionIndication",             m_parameterSetsInclusionIndicationSEIEnabled,      false, "Control generation of Parameter sets inclusion indication SEI messages")
   ("SEISelfContainedClvsFlag",                        m_selfContainedClvsFlag,                               0, "Self contained CLVS indication flag value")
   ("SEIMasteringDisplayColourVolume",                 m_masteringDisplay.colourVolumeSEIEnabled,         false, "Control generation of mastering display colour volume SEI messages")
@@ -1327,6 +1427,7 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("SEISubpicLevelInfoRefLevels",                     cfg_sliRefLevels,                                  cfg_sliRefLevels, "List of reference levels for Subpicture Level Information SEI messages")
   ("SEISubpicLevelInfoExplicitFraction",              m_cfgSubpictureLevelInfoSEI.m_explicitFraction,    false,            "Enable sending of explicit fractions in Subpicture Level Information SEI messages")
   ("SEISubpicLevelInfoNumSubpics",                    m_cfgSubpictureLevelInfoSEI.m_numSubpictures,      1,                "Number of subpictures for Subpicture Level Information SEI messages")
+  ("SEIAnnotatedRegionsFileRoot,-ar",                 m_arSEIFileRoot,                                 string(""), "Annotated region SEI parameters root file name (wo num ext); only the file name base is to be added. Underscore and POC would be automatically addded to . E.g. \"-ar ar\" will search for files ar_0.txt, ar_1.txt, ...")
   ("SEISubpicLevelInfoMaxSublayers",                  m_cfgSubpictureLevelInfoSEI.m_sliMaxSublayers,               1,                    "Number of sublayers for Subpicture Level Information SEI messages")
   ("SEISubpicLevelInfoSublayerInfoPresentFlag",       m_cfgSubpictureLevelInfoSEI.m_sliSublayerInfoPresentFlag,    false,                "Enable sending of level information for all sublayers in Subpicture Level Information SEI messages")
   ("SEISubpicLevelInfoRefLevelFractions",             cfg_sliFractions,                                  cfg_sliFractions, "List of subpicture level fractions for Subpicture Level Information SEI messages")
@@ -1368,6 +1469,21 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("SEIAVEAmbientIlluminance",                        m_aveSEIAmbientIlluminance,                      100000u, "Specifies the environmental illluminance of the ambient viewing environment in units of 1/10000 lux for the ambient viewing environment SEI message")
   ("SEIAVEAmbientLightX",                             m_aveSEIAmbientLightX,                            15635u, "Specifies the normalized x chromaticity coordinate of the environmental ambient light in the nominal viewing enviornment according to the CIE 1931 definition in units of 1/50000 lux for the ambient viewing enviornment SEI message")
   ("SEIAVEAmbientLightY",                             m_aveSEIAmbientLightY,                            16450u, "Specifies the normalized y chromaticity coordinate of the environmental ambient light in the nominal viewing enviornment according to the CIE 1931 definition in units of 1/50000 lux for the ambient viewing enviornment SEI message")
+// colour tranform information SEI
+  ("SEICTIEnabled",                                   m_ctiSEIEnabled,                                   false, "Control generation of the Colour transform information SEI message")
+  ("SEICTIId",                                        m_ctiSEIId,                                           0u, "Id of the Colour transform information SEI message")
+  ("SEICTISignalInfoFlag",                            m_ctiSEISignalInfoFlag,                            false, "indicates if signal information are present in the Colour transform information SEI message")
+  ("SEICTIFullRangeFlag",                             m_ctiSEIFullRangeFlag,                             false, "specifies signal range after applying the Colour transform information SEI message")
+  ("SEICTIPrimaries",                                 m_ctiSEIPrimaries,                                    0u, "indicates the signal primaries after applying the Colour transform information SEI message")
+  ("SEICTITransferFunction",                          m_ctiSEITransferFunction,                             0u, "indicates the signal transfer function after applying the Colour transform information SEI message")
+  ("SEICTIMatrixCoefs",                               m_ctiSEIMatrixCoefs,                                  0u, "indicates the signal matrix coefficients after applying the Colour transform information SEI message")
+  ("SEICTICrossCompFlag",                             m_ctiSEICrossComponentFlag,                         true, "Specifies if cross-component transform mode is enabled in SEI CTI")
+  ("SEICTICrossCompInferred",                         m_ctiSEICrossComponentInferred,                     true, "Specifies if cross-component transform LUT is inferred in SEI CTI")
+  ("SEICTINbChromaLut",                               m_ctiSEINumberChromaLut,                              0u, "Specifies the number of chroma LUTs in SEI CTI")
+  ("SEICTIChromaOffset",                              m_ctiSEIChromaOffset,                                  0, "Specifies the chroma offset of SEI CTI")
+  ("SEICTILut0",                                      cfg_SEICTILut0,                           cfg_SEICTILut0, "slope values for component 0 of SEI CTI")
+  ("SEICTILut1",                                      cfg_SEICTILut1,                           cfg_SEICTILut1, "slope values for component 1 of SEI CTI")
+  ("SEICTILut2",                                      cfg_SEICTILut2,                           cfg_SEICTILut2, "slope values for component 2 of SEI CTI")
 // content colour volume SEI
   ("SEICCVEnabled",                                   m_ccvSEIEnabled,                                   false, "Control generation of the Content Colour Volume SEI message")
   ("SEICCVCancelFlag",                                m_ccvSEICancelFlag,                                 true, "Specifies the persistence of any previous content colour volume SEI message in output order.")
@@ -1385,7 +1501,76 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("SEICCVMaxLuminanceValue",                         m_ccvSEIMaxLuminanceValue,                           0.1, "specifies the CCV max luminance value  in the content colour volume SEI message")
   ("SEICCVAvgLuminanceValuePresent",                  m_ccvSEIAvgLuminanceValuePresentFlag,               true, "Specifies whether the CCV avg luminance value is present in the content colour volume SEI message")
   ("SEICCVAvgLuminanceValue",                         m_ccvSEIAvgLuminanceValue,                          0.01, "specifies the CCV avg luminance value  in the content colour volume SEI message")
-
+  // scalability dimension information SEI
+  ("SEISDIEnabled",                                   m_sdiSEIEnabled,                          false, "Control generation of scalaibility dimension information SEI message")
+  ("SEISDIMaxLayersMinus1",                           m_sdiSEIMaxLayersMinus1,                      0, "Specifies the maximum number of layers minus 1 in the current CVS")
+  ("SEISDIMultiviewInfoFlag",                         m_sdiSEIMultiviewInfoFlag,                false, "Specifies the current CVS may have multiple views and the sdi_view_id_val[ ] syntax elements are present in the scalaibility dimension information SEI message")
+  ("SEISDIAuxiliaryInfoFlag",                         m_sdiSEIAuxiliaryInfoFlag,                false, "Specifies that one or more layers in the current CVS may be auxiliary layers, which carry auxiliary information, and the sdi_aux_id[ ] syntax elements are present in the scalaibility dimension information SEI message")
+  ("SEISDIViewIdLenMinus1",                           m_sdiSEIViewIdLenMinus1,                      0, "Specifies the length, in bits, of the sdi_view_id_val[ i ] syntax element minus 1 in the scalaibility dimension information SEI message")
+  ("SEISDILayerId",                                   cfg_sdiSEILayerId,            cfg_sdiSEILayerId, "List of the layer identifiers that may be present in the scalaibility dimension information SEI message in the current CVS")
+  ("SEISDIViewIdVal",                                 cfg_sdiSEIViewIdVal,        cfg_sdiSEIViewIdVal, "List of the view identifiers in the scalaibility dimension information SEI message")
+  ("SEISDIAuxId",                                     cfg_sdiSEIAuxId,                cfg_sdiSEIAuxId, "List of the auxiliary identifiers in the scalaibility dimension information SEI message")
+  ("SEISDINumAssociatedPrimaryLayersMinus1",          cfg_sdiSEINumAssociatedPrimaryLayersMinus1, cfg_sdiSEINumAssociatedPrimaryLayersMinus1, "List of the numbers of associated primary layers of i-th layer, which is an auxiliary layer.")
+  // multiview acquisition information SEI
+  ("SEIMAIEnabled",                                   m_maiSEIEnabled,                                    false, "Control generation of multiview acquisition information SEI message")
+  ("SEIMAIIntrinsicParamFlag",                        m_maiSEIIntrinsicParamFlag,                         false, "Specifies the presence of intrinsic camera parameters in the multiview acquisition information SEI message")
+  ("SEIMAIExtrinsicParamFlag",                        m_maiSEIExtrinsicParamFlag,                         false, "Specifies the presence of extrinsic camera parameters in the multiview acquisition information SEI message")
+  ("SEIMAINumViewsMinus1",                            m_maiSEINumViewsMinus1,                                 0, "Specifies the number of views minus 1 in the multiview acquisition information SEI message")
+  ("SEIMAIIntrinsicParamsEqualFlag",                  m_maiSEIIntrinsicParamsEqualFlag,                   false, "Specifies the intrinsic camera parameters are equal for all cameras in the multiview acquisition information SEI message")
+  ("SEIMAIPrecFocalLength",                           m_maiSEIPrecFocalLength,                                0, "Specifies the exponent of the maximum allowable truncation error for focal_length_x[i] and focal_length_y[i] in the multiview acquisition information SEI message")
+  ("SEIMAIPrecPrincipalPoint",                        m_maiSEIPrecPrincipalPoint,                             0, "Specifies the exponent of the maximum allowable truncation error for principal_point_x[i] and principal_point_y[i] in the multiview acquisition information SEI message")
+  ("SEIMAIPrecSkewFactor",                            m_maiSEIPrecSkewFactor,                                 0, "Specifies the exponent of the maximum allowable truncation error for skew factor in the multiview acquisition information SEI message")
+  ("SEIMAISignFocalLengthX",                          cfg_maiSEISignFocalLengthX,    cfg_maiSEISignFocalLengthX, "List of the signs of the focal length of the camera in the horizontal direction in the multiview acquisition information SEI message")
+  ("SEIMAIExponentFocalLengthX",                      cfg_maiSEIExponentFocalLengthX, cfg_maiSEIExponentFocalLengthX, "List of the exponent parts of the focal length of the camera in the horizontal direction. in the multiview acquisition information SEI message")
+  ("SEIMAIMantissaFocalLengthX",                      cfg_maiSEIMantissaFocalLengthX, cfg_maiSEIMantissaFocalLengthX, "List of the mantissa parts of the focal length of the camera in the horizontal direction in the multiview acquisition information SEI message")
+  ("SEIMAISignFocalLengthY",                          cfg_maiSEISignFocalLengthY,    cfg_maiSEISignFocalLengthY, "List of the signs of the focal length of the camera in the vertical direction in the multiview acquisition information SEI message")
+  ("SEIMAIExponentFocalLengthY",                      cfg_maiSEIExponentFocalLengthY, cfg_maiSEIExponentFocalLengthY, "List of the exponent parts of the focal length of the camera in the vertical direction in the multiview acquisition information SEI message")
+  ("SEIMAIMantissaFocalLengthY",                      cfg_maiSEIMantissaFocalLengthY, cfg_maiSEIMantissaFocalLengthY, "List of the mantissa parts of the focal length of the camera in the vertical direction in the multiview acquisition information SEI message")
+  ("SEIMAISignPrincipalPointX",                       cfg_maiSEISignPrincipalPointX, cfg_maiSEISignPrincipalPointX, "List of the signs of the principal point of the camera in the horizontal direction in the multiview acquisition information SEI message")
+  ("SEIMAIExponentPrincipalPointX",                   cfg_maiSEIExponentPrincipalPointX, cfg_maiSEIExponentPrincipalPointX, "List of the exponent parts of the principal point of the camera in the horizontal direction in the multiview acquisition information SEI message")
+  ("SEIMAIMantissaPrincipalPointX",                   cfg_maiSEIMantissaPrincipalPointX, cfg_maiSEIMantissaPrincipalPointX, "List of the mantissa parts of the principal point of the camera in the horizontal direction in the multiview acquisition information SEI message")
+  ("SEIMAISignPrincipalPointY",                       cfg_maiSEISignPrincipalPointY, cfg_maiSEISignPrincipalPointY, "List of the signs of the principal point of the camera in the vertical direction in the multiview acquisition information SEI message")
+  ("SEIMAIExponentPrincipalPointY",                   cfg_maiSEIExponentPrincipalPointY, cfg_maiSEIExponentPrincipalPointY, "List of the exponent parts of the principal point of the camera in the vertical direction in the multiview acquisition information SEI message")
+  ("SEIMAIMantissaPrincipalPointY",                   cfg_maiSEIMantissaPrincipalPointY, cfg_maiSEIMantissaPrincipalPointY, "List of the mantissa parts of the principal point of the camera in the vertical direction in the multiview acquisition information SEI message")
+  ("SEIMAISignSkewFactor",                            cfg_maiSEISignSkewFactor,     cfg_maiSEISignSkewFactor, "List of the signs of the skew factor of the camera in the multiview acquisition information SEI message")
+  ("SEIMAIExponentSkewFactor",                        cfg_maiSEIExponentSkewFactor, cfg_maiSEIExponentSkewFactor, "List of the exponent parts of the skew factor of the camera in the multiview acquisition information SEI message")
+  ("SEIMAIMantissaSkewFactor",                        cfg_maiSEIMantissaSkewFactor, cfg_maiSEIMantissaSkewFactor, "List of the mantissa parts of the skew factor of the camera in the multiview acquisition information SEI message")
+  ("SEIMAIPrecRotationParam",                         m_maiSEIPrecRotationParam,                            0, "Specifies the exponent of the maximum allowable truncation error for rotation in the multiview acquisition information SEI message")
+  ("SEIMAIPrecTranslationParam",                      m_maiSEIPrecTranslationParam,                         0, "Specifies the exponent of the maximum allowable truncation error for translation in the multiview acquisition information SEI message")
+#if JVET_W0078_MVP_SEI 
+// multiview view position SEI
+  ("SEIMVPEnabled",                                   m_mvpSEIEnabled,                                  false, "Control generation of multiview view position SEI message")
+  ("SEIMVPNumViewsMinus1",                            m_mvpSEINumViewsMinus1,                               0, "Specifies the number of views minus 1 in the multiview view postion SEI message")
+  ("SEIMVPViewPosition",                              cfg_mvpSEIViewPosition,           cfg_mvpSEIViewPosition, "List of View Positions in the multiview view postion SEI message")
+#endif
+// alpha channel information SEI
+  ("SEIACIEnabled",                                   m_aciSEIEnabled,                                   false, "Control generation of alpha channel information SEI message")
+  ("SEIACICancelFlag",                                m_aciSEICancelFlag,                                false, "Specifies the persistence of any previous alpha channel information SEI message in output order")
+  ("SEIACIUseIdc",                                    m_aciSEIUseIdc,                                        0, "Specifies the usage of the auxiliary picture in the alpha channel information SEI message")
+  ("SEIACIBitDepthMinus8",                            m_aciSEIBitDepthMinus8,                                0, "Specifies the bit depth of the samples of the auxiliary picture in the alpha channel information SEI message")
+  ("SEIACITransparentValue",                          m_aciSEITransparentValue,                              0, "Specifies the interpretation sample value of an auxiliary coded picture luma sample for which the associated luma and chroma samples of the primary coded picture are considered transparent for purposes of alpha blending in the alpha channel information SEI message")
+  ("SEIACIOpaqueValue",                               m_aciSEIOpaqueValue,                                   0, "Specifies the interpretation sample value of an auxiliary coded picture luma sample for which the associated luma and chroma samples of the primary coded picture are considered opaque for purposes of alpha blending in the alpha channel information SEI message")
+  ("SEIACIIncrFlag",                                  m_aciSEIIncrFlag,                                  false, "Specifies the interpretation sample value for each decoded auxiliary picture luma sample value is equal to the decoded auxiliary picture sample value for purposes of alpha blending in the alpha channel information SEI message")
+  ("SEIACIClipFlag",                                  m_aciSEIClipFlag,                                  false, "Specifies whether clipping operation is applied in the alpha channel information SEI message")
+  ("SEIACIClipTypeFlag",                              m_aciSEIClipTypeFlag,                              false, "Specifies the type of clipping operation in the alpha channel information SEI message")
+  // depth representation information SEI
+  ("SEIDRIEnabled",                                   m_driSEIEnabled,                                   false, "Control generation of depth representation information SEI message")
+  ("SEIDRIZNearFlag",                                 m_driSEIZNearFlag,                                 false, "Specifies the presence of the nearest depth value in the depth representation information SEI message")
+  ("SEIDRIZFarFlag",                                  m_driSEIZFarFlag,                                  false, "Specifies the presence of the farthest depth value in the depth representation information SEI message")
+  ("SEIDRIDMinFlag",                                  m_driSEIDMinFlag,                                  false, "Specifies the presence of the minimum disparity value in the depth representation information SEI message")
+  ("SEIDRIDMaxFlag",                                  m_driSEIDMaxFlag,                                  false, "Specifies the presence of the maximum disparity value in the depth representation information SEI message")
+  ("SEIDRIZNear",                                     m_driSEIZNear,                                       0.0, "Specifies the nearest depth value in the depth representation information SEI message")
+  ("SEIDRIZFar",                                      m_driSEIZFar,                                        0.0, "Specifies the farest depth value in the depth representation information SEI message")
+  ("SEIDRIDMin",                                      m_driSEIDMin,                                        0.0, "Specifies the minimum disparity value in the depth representation information SEI message")
+  ("SEIDRIDMax",                                      m_driSEIDMax,                                        0.0, "Specifies the maximum disparity value in the depth representation information SEI message")
+  ("SEIDRIDepthRepresentationType",                   m_driSEIDepthRepresentationType,                       0, "Specifies the the representation definition of decoded luma samples of auxiliary pictures in the depth representation information SEI message")
+  ("SEIDRIDisparityRefViewId",                        m_driSEIDisparityRefViewId,                            0, "Specifies the ViewId value against which the disparity values are derived in the depth representation information SEI message")
+  ("SEIDRINonlinearNumMinus1",                        m_driSEINonlinearNumMinus1,                            0, "Specifies the number of piece-wise linear segments minus 2 for mapping of depth values to a scale that is uniformly quantized in terms of disparity  in the depth representation information SEI message")
+  ("SEIDRINonlinearModel",                            cfg_driSEINonlinearModel,       cfg_driSEINonlinearModel, "List of the piece-wise linear segments for mapping of decoded luma sample values of an auxiliary picture to a scale that is uniformly quantized in terms of disparity in the depth representation information SEI message")
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+  ("SEIConstrainedRASL",                              m_constrainedRaslEncoding,                         false, "Control generation of constrained RASL encoding SEI message")
+#endif
+  
   ("DebugBitstream",                                  m_decodeBitstreams[0],             string( "" ), "Assume the frames up to POC DebugPOC will be the same as in this bitstream. Load those frames from the bitstream instead of encoding them." )
   ("DebugPOC",                                        m_switchPOC,                                 -1, "If DebugBitstream is present, load frames up to this POC from this bitstream. Starting with DebugPOC, return to normal encoding." )
   ("DecodeBitstream1",                                m_decodeBitstreams[0],             string( "" ), "Assume the frames up to POC DebugPOC will be the same as in this bitstream. Load those frames from the bitstream instead of encoding them." )
@@ -1396,18 +1581,21 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("StopAfterFFtoPOC",                                m_stopAfterFFtoPOC,                       false, "If using fast forward to POC, after the POC of interest has been hit, stop further encoding.")
   ("ForceDecodeBitstream1",                           m_forceDecodeBitstream1,                  false, "force decoding of bitstream 1 - use this only if you are realy sure about what you are doing ")
   ("DecodeBitstream2ModPOCAndType",                   m_bs2ModPOCAndType,                       false, "Modify POC and NALU-type of second input bitstream, to use second BS as closing I-slice")
-  ("NumSplitThreads",                                 m_numSplitThreads,                            1, "Number of threads used to parallelize splitting")
-  ("ForceSingleSplitThread",                          m_forceSplitSequential,                   false, "Force single thread execution even if taking the parallelized path")
-  ("NumWppThreads",                                   m_numWppThreads,                              1, "Number of threads used to run WPP-style parallelization")
-  ("NumWppExtraLines",                                m_numWppExtraLines,                           0, "Number of additional wpp lines to switch when threads are blocked")
+
   ("DebugCTU",                                        m_debugCTU,                                  -1, "If DebugBitstream is present, load frames up to this POC from this bitstream. Starting with DebugPOC-frame at CTUline containin debug CTU.")
-  ("EnsureWppBitEqual",                               m_ensureWppBitEqual,                      false, "Ensure the results are equal to results with WPP-style parallelism, even if WPP is off")
+#if JVET_W0129_ENABLE_ALF_TRUEORG
+  ("AlfTrueOrg",                                      m_alfTrueOrg,                              true, "Using true original samples for ALF optimization when MCTF is enabled\n")
+#else
+  ("AlfSaoTrueOrg",                                    m_alfSaoTrueOrg,                         false, "Using true original samples for ALF and SAO optimization when MCTF is enabled\n")
+#endif
   ( "ALF",                                             m_alf,                                    true, "Adaptive Loop Filter\n" )
-#if JVET_T0064
-  ("ALFStrength",                                      m_alfStrength,                             1.0, "Adaptive Loop Filter strength. The parameter scales the magnitudes of the ALF filter coefficients for both luma and chroma. Valid range is 0.0 <= ALFStrength <= 1.0")
+  ("ALFStrengthLuma",                                  m_alfStrengthLuma,                         1.0, "Adaptive Loop Filter strength for luma. The parameter scales the magnitudes of the ALF filter coefficients for luma. Valid range is 0.0 <= ALFStrengthLuma <= 1.0")
   ("ALFAllowPredefinedFilters",                        m_alfAllowPredefinedFilters,              true, "Allow use of predefined filters for ALF")
   ("CCALFStrength",                                    m_ccalfStrength,                           1.0, "Cross-component Adaptive Loop Filter strength. The parameter scales the magnitudes of the CCALF filter coefficients. Valid range is 0.0 <= CCALFStrength <= 1.0")
-#endif
+  ("ALFStrengthChroma",                                m_alfStrengthChroma,                       1.0, "Adaptive Loop Filter strength for chroma. The parameter scales the magnitudes of the ALF filter coefficients for chroma. Valid range is 0.0 <= ALFStrengthChroma <= 1.0")
+  ("ALFStrengthTargetLuma",                            m_alfStrengthTargetLuma,                   1.0, "Adaptive Loop Filter strength target for ALF luma filter optimization. The parameter scales the auto-correlation matrix E and the cross-correlation vector y for luma. Valid range is 0.0 <= ALFStrengthTargetLuma <= 1.0")
+  ("ALFStrengthTargetChroma",                          m_alfStrengthTargetChroma,                 1.0, "Adaptive Loop Filter strength target for ALF chroma filter optimization. The parameter scales the auto-correlation matrix E and the cross-correlation vector y for chroma. Valid range is 0.0 <= ALFStrengthTargetChroma <= 1.0")
+  ("CCALFStrengthTarget",                              m_ccalfStrengthTarget,                     1.0, "Cross-component Adaptive Loop Filter strength target for filter optimization. The parameter scales the auto-correlation matrix E and the cross-correlation vector y. Valid range is 0.0 <= CCALFStrengthTarget <= 1.0")
   ( "CCALF",                                           m_ccalf,                                  true, "Cross-component Adaptive Loop Filter" )
   ( "CCALFQpTh",                                       m_ccalfQpThreshold,                         37, "QP threshold above which encoder reduces CCALF usage")
   ( "RPR",                                            m_rprEnabledFlag,                          true, "Reference Sample Resolution" )
@@ -1417,13 +1605,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ( "SwitchPocPeriod",                                m_switchPocPeriod,                            0, "Switch POC period for RPR" )
   ( "UpscaledOutput",                                 m_upscaledOutput,                             0, "Output upscaled (2), decoded but in full resolution buffer (1) or decoded cropped (0, default) picture for RPR" )
   ( "MaxLayers",                                      m_maxLayers,                                  1, "Max number of layers" )
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
   ( "EnableOperatingPointInformation",                m_OPIEnabled,                             false, "Enables writing of Operating Point Information (OPI)" )
   ( "MaxTemporalLayer",                               m_maxTemporalLayer,                         500, "Maximum temporal layer to be signalled in OPI" )
   ( "TargetOutputLayerSet",                           m_targetOlsIdx,                             500, "Target output layer set index to be signalled in OPI" )
-#else
-  ( "TargetOutputLayerSet,p",                         m_targetOlsIdx,                              -1, "Target output layer set index" )
-#endif
   ;
   opts.addOptions()
   ( "MaxSublayers",                                   m_maxSublayers,                               7, "Max number of Sublayers")
@@ -1439,11 +1623,7 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ( "OlsOutputLayer%d",                               m_olsOutputLayerStr, string(""), MAX_VPS_LAYERS, "Output layer index of i-th OLS")
   ( "NumPTLsInVPS",                                   m_numPtlsInVps,                               1, "Number of profile_tier_level structures in VPS" )
   ( "AvoidIntraInDepLayers",                          m_avoidIntraInDepLayer,                    true, "Replaces I pictures in dependent layers with B pictures" )
-#if JVET_R0193
   ( "MaxTidILRefPicsPlusOneLayerId%d",                m_maxTidILRefPicsPlus1Str, string(""), MAX_VPS_LAYERS, "Maximum temporal ID for inter-layer reference pictures plus 1 of i-th layer, 0 for IRAP only")
-#else
-  ( "MaxTidILRefPicsPlus1",                           m_cfgVPSParameters.m_maxTidILRefPicsPlus1,   -1, "Maximum temporal ID for inter-layer reference pictures plus 1, 0 for IRAP only" )
-#endif
     ;
 
   opts.addOptions()
@@ -1482,6 +1662,20 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
 
   m_resChangeInClvsEnabled = m_scalingRatioHor != 1.0 || m_scalingRatioVer != 1.0;
   m_resChangeInClvsEnabled = m_resChangeInClvsEnabled && m_rprEnabledFlag;
+  
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+  if( m_constrainedRaslEncoding )
+  {
+    m_craAPSreset            = true;
+    m_rprRASLtoolSwitch      = true;
+  }
+  else
+  {
+    m_craAPSreset            = false;
+    m_rprRASLtoolSwitch      = false;
+  }
+#endif
+  
   if( m_fractionOfFrames != 1.0 )
   {
     m_framesToBeEncoded = int( m_framesToBeEncoded * m_fractionOfFrames );
@@ -1502,6 +1696,77 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
     m_iIntraPeriod = -1;
   }
 
+#if GDR_ENABLED
+  if ( m_gdrEnabled )
+  {
+    m_iDecodingRefreshType = 3;
+    m_intraQPOffset = 0;
+    m_iGOPSize = 1;
+
+    int8_t sliceType = m_GOPList[0].m_sliceType;
+
+    m_GOPList[0].m_POC = 1;    
+    m_GOPList[0].m_QPOffset = 0;
+    m_GOPList[0].m_QPOffsetModelOffset = 0;
+    m_GOPList[0].m_QPOffsetModelScale = 0;
+    m_GOPList[0].m_CbQPoffset = 0;
+    m_GOPList[0].m_CrQPoffset = 0;
+    m_GOPList[0].m_QPFactor = 1.0;
+    m_GOPList[0].m_tcOffsetDiv2 = 0;
+    m_GOPList[0].m_betaOffsetDiv2 = 0;
+    m_GOPList[0].m_CbTcOffsetDiv2 = 0;
+    m_GOPList[0].m_CbBetaOffsetDiv2 = 0;
+    m_GOPList[0].m_CrTcOffsetDiv2 = 0;
+    m_GOPList[0].m_CrBetaOffsetDiv2 = 0;
+    m_GOPList[0].m_temporalId = 0;
+
+    m_GOPList[0].m_numRefPicsActive0 = 4;
+    m_GOPList[0].m_numRefPics0 = 4;
+    m_GOPList[0].m_deltaRefPics0[0] = 1;
+    m_GOPList[0].m_deltaRefPics0[1] = 2;
+    m_GOPList[0].m_deltaRefPics0[2] = 3;
+    m_GOPList[0].m_deltaRefPics0[3] = 4;
+
+    if (sliceType == 'B')
+    {
+      m_GOPList[0].m_numRefPicsActive1 = 4;
+      m_GOPList[0].m_numRefPics1 = 4;
+      m_GOPList[0].m_deltaRefPics1[0] = 1;
+      m_GOPList[0].m_deltaRefPics1[1] = 2;
+      m_GOPList[0].m_deltaRefPics1[2] = 3;
+      m_GOPList[0].m_deltaRefPics1[3] = 4;
+    }
+
+    m_BIO  = false;
+    m_DMVR = false;
+    m_SMVD = false;
+
+    if (m_gdrPeriod < 0)
+    {
+      m_gdrPeriod = m_iFrameRate * 2;
+    }
+
+    if (m_gdrInterval < 0)
+    {
+      m_gdrInterval = m_iFrameRate;
+    }
+
+    if (m_gdrPocStart < 0)
+    {
+      m_gdrPocStart = m_gdrPeriod;
+    }
+
+    if (m_iIntraPeriod == -1)
+    {
+      m_iFrameRate = (m_iFrameRate == 0) ? 30 : m_iFrameRate;
+      if (m_gdrPocStart % m_iFrameRate != 0)
+        m_iIntraPeriod = -1;
+      else
+        m_iIntraPeriod = m_gdrPeriod;
+    }
+  }
+#endif
+
   m_bpDeltasGOPStructure = false;
   if(m_iGOPSize == 16)
   {
@@ -1609,8 +1874,8 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
    * Set any derived parameters
    */
 #if EXTENSION_360_VIDEO
-  m_inputFileWidth = m_iSourceWidth;
-  m_inputFileHeight = m_iSourceHeight;
+  m_inputFileWidth = m_sourceWidth;
+  m_inputFileHeight = m_sourceHeight;
   m_ext360.setMaxCUInfo(m_uiCTUSize, 1 << MIN_CU_LOG2);
 #endif
 
@@ -1630,9 +1895,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   if(m_isField)
   {
     //Frame height
-    m_iSourceHeightOrg = m_iSourceHeight;
+    m_iSourceHeightOrg = m_sourceHeight;
     //Field height
-    m_iSourceHeight = m_iSourceHeight >> 1;
+    m_sourceHeight = m_sourceHeight >> 1;
     //number of fields to encode
     m_framesToBeEncoded *= 2;
   }
@@ -1672,8 +1937,8 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
         m_subPicId[i]                   = cfg_subPicId.values[i];
       }
     }
-    uint32_t tmpWidthVal = (m_iSourceWidth + m_uiCTUSize - 1) / m_uiCTUSize;
-    uint32_t tmpHeightVal = (m_iSourceHeight + m_uiCTUSize - 1) / m_uiCTUSize;
+    uint32_t tmpWidthVal = (m_sourceWidth + m_uiCTUSize - 1) / m_uiCTUSize;
+    uint32_t tmpHeightVal = (m_sourceHeight + m_uiCTUSize - 1) / m_uiCTUSize;
     if (!m_subPicSameSizeFlag)
     {
       for (int i = 0; i < m_numSubPics; i++)
@@ -1711,10 +1976,10 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
 
   if (m_virtualBoundariesPresentFlag)
   {
-    if (m_iSourceWidth <= 8)
+    if (m_sourceWidth <= 8)
       CHECK(m_numVerVirtualBoundaries != 0, "The number of vertical virtual boundaries shall be 0 when the picture width is less than or equal to 8");
 
-    if (m_iSourceHeight <= 8)
+    if (m_sourceHeight <= 8)
       CHECK(m_numHorVirtualBoundaries != 0, "The number of horizontal virtual boundaries shall be 0 when the picture height is less than or equal to 8");
   }
 
@@ -1897,6 +2162,26 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
     case ExtendedProfileName::MULTILAYER_MAIN_10_444_STILL_PICTURE:
       m_profile = Profile::MULTILAYER_MAIN_10_444_STILL_PICTURE;
       break;
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+    case ExtendedProfileName::MAIN_12:
+      m_profile = Profile::MAIN_12; break;
+    case ExtendedProfileName::MAIN_12_444:
+      m_profile = Profile::MAIN_12_444; break;
+    case ExtendedProfileName::MAIN_16_444:
+      m_profile = Profile::MAIN_16_444; break;
+    case ExtendedProfileName::MAIN_12_INTRA:
+      m_profile = Profile::MAIN_12_INTRA; break;
+    case ExtendedProfileName::MAIN_12_444_INTRA:
+      m_profile = Profile::MAIN_12_444_INTRA; break;
+    case ExtendedProfileName::MAIN_16_444_INTRA:
+      m_profile = Profile::MAIN_16_444_INTRA; break;
+    case ExtendedProfileName::MAIN_12_STILL_PICTURE:
+      m_profile = Profile::MAIN_12_STILL_PICTURE; break;
+    case ExtendedProfileName::MAIN_12_444_STILL_PICTURE:
+      m_profile = Profile::MAIN_12_444_STILL_PICTURE; break;
+    case ExtendedProfileName::MAIN_16_444_STILL_PICTURE:
+      m_profile = Profile::MAIN_16_444_STILL_PICTURE; break;
+#endif
     default: EXIT("Unable to determine profile from configured settings"); break;
     }
   }
@@ -1923,57 +2208,85 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
     CHECK(m_bitDepthConstraint < 8 || m_bitDepthConstraint>16, "MaxBitDepthConstraint setting must be in the range 8 to 16 (inclusive)");
   }
 
-
   m_inputColourSpaceConvert = stringToInputColourSpaceConvert(inputColourSpaceConvert, true);
   m_rgbFormat = (m_inputColourSpaceConvert == IPCOLOURSPACE_RGBtoGBR && m_chromaFormatIDC == CHROMA_444) ? true : false;
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+  if (m_profile == Profile::MAIN_12 || m_profile == Profile::MAIN_12_INTRA || m_profile == Profile::MAIN_12_STILL_PICTURE ||
+      m_profile == Profile::MAIN_12_444 || m_profile == Profile::MAIN_12_444_INTRA || m_profile == Profile::MAIN_12_444_STILL_PICTURE ||
+      m_profile == Profile::MAIN_16_444 || m_profile == Profile::MAIN_16_444_INTRA || m_profile == Profile::MAIN_16_444_STILL_PICTURE)
+  {
+    m_gciPresentFlag = true;
+    if (m_profile == Profile::MAIN_12 || m_profile == Profile::MAIN_12_444 || m_profile == Profile::MAIN_16_444)
+    {
+      CHECK(m_generalLowerBitRateConstraintFlag == 0, "GeneralLowerBitRateConstraintFlag setting must be 1 for non-Intra/Still Picture operation range extension profiles.")
+    }
+  }
+  if (m_profile == Profile::MAIN_12_INTRA || m_profile == Profile::MAIN_12_444_INTRA || m_profile == Profile::MAIN_16_444_INTRA)
+  {
+    CHECK(m_iIntraPeriod != 1, "IntraPeriod setting must be 1 for Intra profiles")
+  }
+  if (m_profile == Profile::MULTILAYER_MAIN_10_STILL_PICTURE || m_profile == Profile::MAIN_10_STILL_PICTURE ||
+      m_profile == Profile::MAIN_12_STILL_PICTURE || m_profile == Profile::MAIN_12_444_STILL_PICTURE || m_profile == Profile::MAIN_16_444_STILL_PICTURE)
+  {
+    CHECK(m_framesToBeEncoded != 1, "FramesToBeEncoded setting must be 1 for Still Picture profiles")
+  }
+#endif
 
   // Picture width and height must be multiples of 8 and minCuSize
   const int minResolutionMultiple = std::max(8, 1 << m_log2MinCuSize);
-  CHECK(((m_iSourceWidth% minResolutionMultiple) || (m_iSourceHeight % minResolutionMultiple)) && m_conformanceWindowMode != 1, "Picture width or height is not a multiple of 8 or minCuSize, please use ConformanceMode 1!");
+
   switch (m_conformanceWindowMode)
   {
   case 0:
     {
       // no conformance or padding
       m_confWinLeft = m_confWinRight = m_confWinTop = m_confWinBottom = 0;
-      m_aiPad[1] = m_aiPad[0] = 0;
+      m_sourcePadding[1] = m_sourcePadding[0] = 0;
       break;
     }
   case 1:
     {
       // automatic padding to minimum CU size
-      if (m_iSourceWidth % minResolutionMultiple)
+      if (m_sourceWidth % minResolutionMultiple)
       {
-        m_aiPad[0] = m_confWinRight  = ((m_iSourceWidth / minResolutionMultiple) + 1) * minResolutionMultiple - m_iSourceWidth;
-        m_iSourceWidth  += m_confWinRight;
+        m_sourcePadding[0] = m_confWinRight  = ((m_sourceWidth / minResolutionMultiple) + 1) * minResolutionMultiple - m_sourceWidth;
+        m_sourceWidth  += m_confWinRight;
       }
-      if (m_iSourceHeight % minResolutionMultiple)
+      if (m_sourceHeight % minResolutionMultiple)
       {
-        m_aiPad[1] = m_confWinBottom = ((m_iSourceHeight / minResolutionMultiple) + 1) * minResolutionMultiple - m_iSourceHeight;
-        m_iSourceHeight += m_confWinBottom;
+        m_sourcePadding[1] = m_confWinBottom = ((m_sourceHeight / minResolutionMultiple) + 1) * minResolutionMultiple - m_sourceHeight;
+        m_sourceHeight += m_confWinBottom;
         if ( m_isField )
         {
           m_iSourceHeightOrg += m_confWinBottom << 1;
-          m_aiPad[1] = m_confWinBottom << 1;
+          m_sourcePadding[1] = m_confWinBottom << 1;
         }
       }
-      if (m_aiPad[0] % SPS::getWinUnitX(m_chromaFormatIDC) != 0)
+      if (m_sourcePadding[0] % SPS::getWinUnitX(m_chromaFormatIDC) != 0)
       {
         EXIT( "Error: picture width is not an integer multiple of the specified chroma subsampling");
       }
-      if (m_aiPad[1] % SPS::getWinUnitY(m_chromaFormatIDC) != 0)
+      if (m_sourcePadding[1] % SPS::getWinUnitY(m_chromaFormatIDC) != 0)
       {
         EXIT( "Error: picture height is not an integer multiple of the specified chroma subsampling");
       }
+      if (m_sourcePadding[0])
+      {
+        msg( INFO, "Info: Conformance window automatically enabled. Adding %i lumal pel horizontally\n", m_sourcePadding[0]);
+      }
+      if (m_sourcePadding[1])
+      {
+        msg( INFO, "Info: Conformance window automatically enabled. Adding %i lumal pel vertically\n", m_sourcePadding[1]);
+      }
       break;
     }
   case 2:
     {
       //padding
-      m_iSourceWidth  += m_aiPad[0];
-      m_iSourceHeight += m_aiPad[1];
-      m_confWinRight  = m_aiPad[0];
-      m_confWinBottom = m_aiPad[1];
+      m_sourceWidth  += m_sourcePadding[0];
+      m_sourceHeight += m_sourcePadding[1];
+      m_confWinRight  = m_sourcePadding[0];
+      m_confWinBottom = m_sourcePadding[1];
       break;
     }
   case 3:
@@ -1983,24 +2296,25 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
       {
         msg( ERROR, "Warning: Conformance window enabled, but all conformance window parameters set to zero\n");
       }
-      if ((m_aiPad[1] != 0) || (m_aiPad[0]!=0))
+      if ((m_sourcePadding[1] != 0) || (m_sourcePadding[0]!=0))
       {
         msg( ERROR, "Warning: Conformance window enabled, padding parameters will be ignored\n");
       }
-      m_aiPad[1] = m_aiPad[0] = 0;
+      m_sourcePadding[1] = m_sourcePadding[0] = 0;
       break;
     }
   }
+  CHECK(((m_sourceWidth% minResolutionMultiple) || (m_sourceHeight % minResolutionMultiple)), "Picture width or height (after padding) is not a multiple of 8 or minCuSize, please use ConformanceWindowMode=1 for automatic adjustment or ConformanceWindowMode=2 to specify padding manually!!");
 
   if( m_conformanceWindowMode > 0 && m_subPicInfoPresentFlag )
   {
     for(int i = 0; i < m_numSubPics; i++)
     {
-      CHECK( (m_subPicCtuTopLeftX[i] * m_uiCTUSize) >= (m_iSourceWidth - m_confWinRight * SPS::getWinUnitX(m_chromaFormatIDC)),
+      CHECK( (m_subPicCtuTopLeftX[i] * m_uiCTUSize) >= (m_sourceWidth - m_confWinRight * SPS::getWinUnitX(m_chromaFormatIDC)),
           "No subpicture can be located completely outside of the conformance cropping window");
       CHECK( ((m_subPicCtuTopLeftX[i] + m_subPicWidth[i]) * m_uiCTUSize) <= (m_confWinLeft * SPS::getWinUnitX(m_chromaFormatIDC)),
 	  "No subpicture can be located completely outside of the conformance cropping window" );
-      CHECK( (m_subPicCtuTopLeftY[i] * m_uiCTUSize) >= (m_iSourceHeight  - m_confWinBottom * SPS::getWinUnitY(m_chromaFormatIDC)),
+      CHECK( (m_subPicCtuTopLeftY[i] * m_uiCTUSize) >= (m_sourceHeight  - m_confWinBottom * SPS::getWinUnitY(m_chromaFormatIDC)),
           "No subpicture can be located completely outside of the conformance cropping window");
       CHECK( ((m_subPicCtuTopLeftY[i] + m_subPicHeight[i]) * m_uiCTUSize) <= (m_confWinTop * SPS::getWinUnitY(m_chromaFormatIDC)),
           "No subpicture can be located completely outside of the conformance cropping window");
@@ -2158,6 +2472,22 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
     }
   }
 
+  /* Local chroma QP offsets configuration */
+  CHECK(m_cuChromaQpOffsetSubdiv < 0, "MaxCuChromaQpOffsetSubdiv shall be >= 0");
+  CHECK(cfg_crQpOffsetList.values.size() != cfg_cbQpOffsetList.values.size(), "Chroma QP offset lists shall be the same size");
+  CHECK(cfg_cbCrQpOffsetList.values.size() != cfg_cbQpOffsetList.values.size() && cfg_cbCrQpOffsetList.values.size() > 0, "Chroma QP offset list for joint CbCr shall be either the same size as Cb and Cr or empty");
+  if (m_cuChromaQpOffsetSubdiv > 0 && !cfg_cbQpOffsetList.values.size())
+  {
+    msg(WARNING, "MaxCuChromaQpOffsetSubdiv has no effect when chroma QP offset lists are empty\n");
+  }
+  m_cuChromaQpOffsetList.resize(cfg_cbQpOffsetList.values.size());
+  for (int i=0; i < cfg_cbQpOffsetList.values.size(); i++)
+  {
+    m_cuChromaQpOffsetList[i].u.comp.CbOffset = cfg_cbQpOffsetList.values[i];
+    m_cuChromaQpOffsetList[i].u.comp.CrOffset = cfg_crQpOffsetList.values[i];
+    m_cuChromaQpOffsetList[i].u.comp.JointCbCrOffset = cfg_cbCrQpOffsetList.values.size() ? cfg_cbCrQpOffsetList.values[i] : 0;
+  }
+
 #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET
   if ( m_LadfEnabed )
   {
@@ -2193,7 +2523,20 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   }
 #endif
 
+#if GDR_ENABLED
+  if (m_gdrEnabled)
+  {
+    m_virtualBoundariesEnabledFlag = 1;
+    m_virtualBoundariesPresentFlag = 0;
+  }
+  else
+  {
+    m_virtualBoundariesEnabledFlag = 0;
+  }
+#else
   m_virtualBoundariesEnabledFlag = 0;
+#endif
+
   if( m_numVerVirtualBoundaries > 0 || m_numHorVirtualBoundaries > 0 )
     m_virtualBoundariesEnabledFlag = 1;
 
@@ -2214,7 +2557,7 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
       }
       for (unsigned i = 0; i < m_numVerVirtualBoundaries; i++)
       {
-        CHECK( m_virtualBoundariesPosX[i] == 0 || m_virtualBoundariesPosX[i] >= m_iSourceWidth, "The vertical virtual boundary must be within the picture" );
+        CHECK( m_virtualBoundariesPosX[i] == 0 || m_virtualBoundariesPosX[i] >= m_sourceWidth, "The vertical virtual boundary must be within the picture" );
         CHECK( m_virtualBoundariesPosX[i] % 8, "The vertical virtual boundary must be a multiple of 8 luma samples" );
         if (i > 0)
         {
@@ -2228,7 +2571,7 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
       }
       for (unsigned i = 0; i < m_numHorVirtualBoundaries; i++)
       {
-        CHECK( m_virtualBoundariesPosY[i] == 0 || m_virtualBoundariesPosY[i] >= m_iSourceHeight, "The horizontal virtual boundary must be within the picture" );
+        CHECK( m_virtualBoundariesPosY[i] == 0 || m_virtualBoundariesPosY[i] >= m_sourceHeight, "The horizontal virtual boundary must be within the picture" );
         CHECK( m_virtualBoundariesPosY[i] % 8, "The horizontal virtual boundary must be a multiple of 8 luma samples" );
         if (i > 0)
         {
@@ -2275,6 +2618,50 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
       m_masteringDisplay.whitePoint[idx] = uint16_t((cfg_DisplayWhitePointCode.values.size() > idx) ? cfg_DisplayWhitePointCode.values[idx] : 0);
     }
   }
+  if (m_ctiSEIEnabled) 
+  {
+    CHECK(!m_ctiSEICrossComponentFlag && m_ctiSEICrossComponentInferred, "CTI CrossComponentFlag is 0, but CTI CrossComponentInferred is 1 (must be 0 for CrossComponentFlag 0)");
+    CHECK(!m_ctiSEICrossComponentFlag && !m_ctiSEICrossComponentInferred && !m_ctiSEINumberChromaLut, "For CTI CrossComponentFlag = 0, CTI NumberChromaLut needs to be specified (1 or 2) ");
+    CHECK(m_ctiSEICrossComponentFlag && !m_ctiSEICrossComponentInferred && !m_ctiSEINumberChromaLut, "For CTI CrossComponentFlag = 1 and CrossComponentInferred = 0, CTI NumberChromaLut needs to be specified (1 or 2) ");
+
+    CHECK(cfg_SEICTILut0.values.empty(), "SEI CTI (SEICTIEnabled) but no LUT0 specified");
+    m_ctiSEILut[0].presentFlag = true;
+    m_ctiSEILut[0].numLutValues = (int)cfg_SEICTILut0.values.size();
+    m_ctiSEILut[0].lutValues = cfg_SEICTILut0.values;
+
+    if (!m_ctiSEICrossComponentFlag || (m_ctiSEICrossComponentFlag && !m_ctiSEICrossComponentInferred)) 
+    {
+      CHECK(cfg_SEICTILut1.values.empty(), "SEI CTI LUT1 not specified");
+      m_ctiSEILut[1].presentFlag = true;
+      m_ctiSEILut[1].numLutValues = (int)cfg_SEICTILut1.values.size();
+      m_ctiSEILut[1].lutValues = cfg_SEICTILut1.values;
+
+      if (m_ctiSEINumberChromaLut == 1) 
+      { // Cb lut the same as Cr lut
+        m_ctiSEILut[2].presentFlag = true;
+        m_ctiSEILut[2].numLutValues = m_ctiSEILut[1].numLutValues;
+        m_ctiSEILut[2].lutValues = m_ctiSEILut[1].lutValues;
+      }
+      else if (m_ctiSEINumberChromaLut == 2) 
+      { // read from cfg
+        CHECK(cfg_SEICTILut2.values.empty(), "SEI CTI LUT2 not specified");
+        m_ctiSEILut[2].presentFlag = true;
+        m_ctiSEILut[2].numLutValues = (int)cfg_SEICTILut2.values.size();
+        m_ctiSEILut[2].lutValues = cfg_SEICTILut2.values;
+      }
+      else 
+      {
+        CHECK(m_ctiSEINumberChromaLut < 1 && m_ctiSEINumberChromaLut > 2, "Number of chroma LUTs is missing or out of range!");
+      }
+    }
+    //  check if lut size is power of 2
+    for (int idx = 0; idx < MAX_NUM_COMPONENT; idx++) 
+    {
+      int n = m_ctiSEILut[idx].numLutValues - 1;
+      CHECK(n > 0 && (n & (n - 1)) != 0, "Size of LUT minus 1 should be power of 2!");
+      CHECK(n > MAX_CTI_LUT_SIZE, "LUT size minus 1 is larger than MAX_CTI_LUT_SIZE (64)!");
+    }
+  }
   if ( m_omniViewportSEIEnabled && !m_omniViewportSEICancelFlag )
   {
     CHECK (!( m_omniViewportSEICntMinus1 >= 0 && m_omniViewportSEICntMinus1 < 16 ), "SEIOmniViewportCntMinus1 must be in the range of 0 to 16");
@@ -2383,9 +2770,94 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
       }
     }
   }
+  if ( m_sdiSEIEnabled )
+  {
+    if (m_sdiSEIMultiviewInfoFlag || m_sdiSEIAuxiliaryInfoFlag)
+    {
+      m_sdiSEILayerId.resize(m_sdiSEIMaxLayersMinus1 + 1);
+      m_sdiSEIViewIdVal.resize(m_sdiSEIMaxLayersMinus1 + 1);
+      m_sdiSEIAuxId.resize(m_sdiSEIMaxLayersMinus1 + 1);
+      m_sdiSEINumAssociatedPrimaryLayersMinus1.resize(m_sdiSEIMaxLayersMinus1 + 1);
+      for (int i = 0; i <= m_sdiSEIMaxLayersMinus1; i++)
+      {
+        m_sdiSEILayerId[i] = cfg_sdiSEILayerId.values[i];
+        if (m_sdiSEIMultiviewInfoFlag)
+        {
+          m_sdiSEIViewIdVal[i] = cfg_sdiSEIViewIdVal.values[i];
+        }
+        if (m_sdiSEIAuxiliaryInfoFlag)
+        {
+          m_sdiSEIAuxId[i] = cfg_sdiSEIAuxId.values[i];
+          if (m_sdiSEIAuxId[i] > 0)
+          {
+            m_sdiSEINumAssociatedPrimaryLayersMinus1[i] = cfg_sdiSEINumAssociatedPrimaryLayersMinus1.values[i];
+          }
+        }
+      }
+    }
+  }
+  if ( m_maiSEIEnabled )
+  {
+    if (m_maiSEIIntrinsicParamFlag)
+    {
+      int numViews = m_maiSEIIntrinsicParamsEqualFlag ? 1 : m_maiSEINumViewsMinus1 + 1;
+      m_maiSEISignFocalLengthX       .resize( numViews );
+      m_maiSEIExponentFocalLengthX   .resize( numViews );
+      m_maiSEIMantissaFocalLengthX   .resize( numViews );
+      m_maiSEISignFocalLengthY       .resize( numViews );
+      m_maiSEIExponentFocalLengthY   .resize( numViews );
+      m_maiSEIMantissaFocalLengthY   .resize( numViews );
+      m_maiSEISignPrincipalPointX    .resize( numViews );
+      m_maiSEIExponentPrincipalPointX.resize( numViews );
+      m_maiSEIMantissaPrincipalPointX.resize( numViews );
+      m_maiSEISignPrincipalPointY    .resize( numViews );
+      m_maiSEIExponentPrincipalPointY.resize( numViews );
+      m_maiSEIMantissaPrincipalPointY.resize( numViews );
+      m_maiSEISignSkewFactor         .resize( numViews );
+      m_maiSEIExponentSkewFactor     .resize( numViews );
+      m_maiSEIMantissaSkewFactor     .resize( numViews );
+      for( int i = 0; i  <=  ( m_maiSEIIntrinsicParamsEqualFlag ? 0 : m_maiSEINumViewsMinus1 ); i++ )
+      {
+        m_maiSEISignFocalLengthX       [i] = cfg_maiSEISignFocalLengthX.values[i];
+        m_maiSEIExponentFocalLengthX   [i] = cfg_maiSEIExponentFocalLengthX.values[i];
+        m_maiSEIMantissaFocalLengthX   [i] = cfg_maiSEIMantissaFocalLengthX.values[i];
+        m_maiSEISignFocalLengthY       [i] = cfg_maiSEISignFocalLengthY.values[i];
+        m_maiSEIExponentFocalLengthY   [i] = cfg_maiSEIExponentFocalLengthY.values[i];
+        m_maiSEIMantissaFocalLengthY   [i] = cfg_maiSEIMantissaFocalLengthY.values[i];
+        m_maiSEISignPrincipalPointX    [i] = cfg_maiSEISignPrincipalPointX.values[i];
+        m_maiSEIExponentPrincipalPointX[i] = cfg_maiSEIExponentPrincipalPointX.values[i];
+        m_maiSEIMantissaPrincipalPointX[i] = cfg_maiSEIMantissaPrincipalPointX.values[i];
+        m_maiSEISignPrincipalPointY    [i] = cfg_maiSEISignPrincipalPointY.values[i];
+        m_maiSEIExponentPrincipalPointY[i] = cfg_maiSEIExponentPrincipalPointY.values[i];
+        m_maiSEIMantissaPrincipalPointY[i] = cfg_maiSEIMantissaPrincipalPointY.values[i];
+        m_maiSEISignSkewFactor         [i] = cfg_maiSEISignSkewFactor.values[i];
+        m_maiSEIExponentSkewFactor     [i] = cfg_maiSEIExponentSkewFactor.values[i];
+        m_maiSEIMantissaSkewFactor     [i] = cfg_maiSEIMantissaSkewFactor.values[i];
+      }
+    }
+  }
+#if JVET_W0078_MVP_SEI 
+  if (m_mvpSEIEnabled)
+  {
+    int numViews = m_mvpSEINumViewsMinus1 + 1;
+    m_mvpSEIViewPosition.resize(numViews);
+    for (int i = 0; i <= m_mvpSEINumViewsMinus1; i++)
+    {
+      m_mvpSEIViewPosition[i] = cfg_mvpSEIViewPosition.values[i];
+    }
+  }
+#endif
+  if ( m_driSEIEnabled )
+  {
+    m_driSEINonlinearModel.resize(m_driSEINonlinearNumMinus1+1);
+    for(int i=0; i<(m_driSEINonlinearNumMinus1+1); i++)
+    {
+      m_driSEINonlinearModel[i]   = cfg_driSEINonlinearModel.values.size() > i ? cfg_driSEINonlinearModel.values[i] : 0;
+    }
+  }
   m_reshapeCW.binCW.resize(3);
   m_reshapeCW.rspFps = m_iFrameRate;
-  m_reshapeCW.rspPicSize = m_iSourceWidth*m_iSourceHeight;
+  m_reshapeCW.rspPicSize = m_sourceWidth*m_sourceHeight;
   m_reshapeCW.rspFpsToIp = std::max(16, 16 * (int)(round((double)m_iFrameRate /16.0)));
   m_reshapeCW.rspBaseQP = m_iQP;
   m_reshapeCW.updateCtrl = m_updateCtrl;
@@ -2411,22 +2883,22 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
 
 #if ENABLE_QPA_SUB_CTU
  #if QP_SWITCHING_FOR_PARALLEL
-  if ((m_iQP < 38) && m_bUsePerceptQPA && !m_bUseAdaptiveQP && (m_iSourceWidth <= 2048) && (m_iSourceHeight <= 1280)
+  if ((m_iQP < 38) && m_bUsePerceptQPA && !m_bUseAdaptiveQP && (m_sourceWidth <= 2048) && (m_sourceHeight <= 1280)
  #else
-  if (((int)m_fQP < 38) && m_bUsePerceptQPA && !m_bUseAdaptiveQP && (m_iSourceWidth <= 2048) && (m_iSourceHeight <= 1280)
+  if (((int)m_fQP < 38) && m_bUsePerceptQPA && !m_bUseAdaptiveQP && (m_sourceWidth <= 2048) && (m_sourceHeight <= 1280)
  #endif
  #if WCG_EXT && ER_CHROMA_QP_WCG_PPS
       && (!m_wcgChromaQpControl.enabled)
  #endif
-      && ((1 << (m_log2MaxTbSize + 1)) == m_uiCTUSize) && (m_iSourceWidth > 512 || m_iSourceHeight > 320))
+      && ((1 << (m_log2MaxTbSize + 1)) == m_uiCTUSize) && (m_sourceWidth > 512 || m_sourceHeight > 320))
   {
     m_cuQpDeltaSubdiv = 2;
   }
 #else
  #if QP_SWITCHING_FOR_PARALLEL
-  if( ( m_iQP < 38 ) && ( m_iGOPSize > 4 ) && m_bUsePerceptQPA && !m_bUseAdaptiveQP && ( m_iSourceHeight <= 1280 ) && ( m_iSourceWidth <= 2048 ) )
+  if( ( m_iQP < 38 ) && ( m_iGOPSize > 4 ) && m_bUsePerceptQPA && !m_bUseAdaptiveQP && ( m_sourceHeight <= 1280 ) && ( m_sourceWidth <= 2048 ) )
  #else
-  if( ( ( int ) m_fQP < 38 ) && ( m_iGOPSize > 4 ) && m_bUsePerceptQPA && !m_bUseAdaptiveQP && ( m_iSourceHeight <= 1280 ) && ( m_iSourceWidth <= 2048 ) )
+  if( ( ( int ) m_fQP < 38 ) && ( m_iGOPSize > 4 ) && m_bUsePerceptQPA && !m_bUseAdaptiveQP && ( m_sourceHeight <= 1280 ) && ( m_sourceWidth <= 2048 ) )
  #endif
   {
     msg( WARNING, "*************************************************************************\n" );
@@ -2463,7 +2935,7 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
     {
       firstSliceLossless = true;
     }
-    if (firstSliceLossless) // if first slice is lossless 
+    if (firstSliceLossless) // if first slice is lossless
     m_iQP = LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP - ( ( m_internalBitDepth[CHANNEL_TYPE_LUMA] - 8 ) * 6 );
   }
 
@@ -2510,6 +2982,17 @@ int EncAppCfg::xAutoDetermineProfile()
         m_profile = m_maxLayers > 1 ? Profile::MULTILAYER_MAIN_10 : Profile::MAIN_10;
       }
     }
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+    else if (maxBitDepth <= 12)
+    {
+      m_profile = (m_level == Level::LEVEL15_5 && m_framesToBeEncoded == 1) ? Profile::MAIN_12_STILL_PICTURE : (m_iIntraPeriod == 1) ? Profile::MAIN_12_INTRA : Profile::MAIN_12;
+    }
+    else if (maxBitDepth <= 16)
+    {
+      // Since there's no 16bit 420 profiles in VVC, we use 444 profiles.
+      m_profile = (m_level == Level::LEVEL15_5 && m_framesToBeEncoded == 1) ? Profile::MAIN_16_444_STILL_PICTURE : (m_iIntraPeriod == 1) ? Profile::MAIN_16_444_INTRA : Profile::MAIN_16_444;
+    }
+#endif
     break;
 
   case ChromaFormat::CHROMA_422:
@@ -2526,11 +3009,26 @@ int EncAppCfg::xAutoDetermineProfile()
         m_profile = m_maxLayers > 1 ? Profile::MULTILAYER_MAIN_10_444 : Profile::MAIN_10_444;
       }
     }
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+    else if (maxBitDepth <= 12)
+    {
+      m_profile = (m_level == Level::LEVEL15_5 && m_framesToBeEncoded == 1) ? Profile::MAIN_12_444_STILL_PICTURE : (m_iIntraPeriod == 1) ? Profile::MAIN_12_444_INTRA : Profile::MAIN_12_444;
+    }
+    else if (maxBitDepth <= 16)
+    {
+      m_profile = (m_level == Level::LEVEL15_5 && m_framesToBeEncoded == 1) ? Profile::MAIN_16_444_STILL_PICTURE : (m_iIntraPeriod == 1) ? Profile::MAIN_16_444_INTRA : Profile::MAIN_16_444;
+    }
+#endif
     break;
 
   default: return 1;
   }
-
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+    if (m_profile == Profile::MAIN_12 || m_profile == Profile::MAIN_12_444 || m_profile == Profile::MAIN_16_444)
+    {
+      m_generalLowerBitRateConstraintFlag = 1; // GeneralLowerBitRateConstraintFlag setting must be 1 for non-Intra/Still Picture operation range extension profiles.")
+    }
+#endif
   return 0;
 }
 
@@ -2572,20 +3070,10 @@ bool EncAppCfg::xCheckParameter()
   {
     const int minCUSize = 1 << m_log2MinCuSize;
     xConfirmPara(m_wrapAroundOffset <= m_uiCTUSize + minCUSize, "Wrap-around offset must be greater than CtbSizeY + MinCbSize");
-    xConfirmPara(m_wrapAroundOffset > m_iSourceWidth, "Wrap-around offset must not be greater than the source picture width");
+    xConfirmPara(m_wrapAroundOffset > m_sourceWidth, "Wrap-around offset must not be greater than the source picture width");
     xConfirmPara( m_wrapAroundOffset % minCUSize != 0, "Wrap-around offset must be an integer multiple of the specified minimum CU size" );
   }
 
-#if ENABLE_SPLIT_PARALLELISM
-  xConfirmPara( m_numSplitThreads < 1, "Number of used threads cannot be smaller than 1" );
-  xConfirmPara( m_numSplitThreads > PARL_SPLIT_MAX_NUM_THREADS, "Number of used threads cannot be higher than the number of actual jobs" );
-#else
-  xConfirmPara( m_numSplitThreads != 1, "ENABLE_SPLIT_PARALLELISM is disabled, numSplitThreads has to be 1" );
-#endif
-
-  xConfirmPara( m_numWppThreads != 1, "ENABLE_WPP_PARALLELISM is disabled, numWppThreads has to be 1" );
-  xConfirmPara( m_ensureWppBitEqual, "ENABLE_WPP_PARALLELISM is disabled, cannot ensure being WPP bit-equal" );
-
 
 #if SHARP_LUMA_DELTA_QP && ENABLE_QPA
   xConfirmPara( m_bUsePerceptQPA && m_lumaLevelToDeltaQPMapping.mode >= 2, "QPA and SharpDeltaQP mode 2 cannot be used together" );
@@ -2610,12 +3098,31 @@ bool EncAppCfg::xCheckParameter()
     xConfirmPara(m_log2MaxTransformSkipBlockSize>=6, "Transform Skip Log2 Max Size must be less or equal to 5 for given profile.");
     xConfirmPara(m_transformSkipRotationEnabledFlag==true, "UseResidualRotation must not be enabled for given profile.");
     xConfirmPara(m_transformSkipContextEnabledFlag==true, "UseSingleSignificanceMapContext must not be enabled for given profile.");
+#if !JVET_W2005_RANGE_EXTENSION_PROFILES
+    xConfirmPara(m_rrcRiceExtensionEnableFlag == true, "Extention of the Golomb-Rice parameter derivation for RRC must not be enabled for given profile.");
     xConfirmPara(m_persistentRiceAdaptationEnabledFlag==true, "GolombRiceParameterAdaption must not be enabled for given profile.");
     xConfirmPara(m_extendedPrecisionProcessingFlag==true, "UseExtendedPrecision must not be enabled for given profile.");
+    xConfirmPara(m_tsrcRicePresentFlag == true, "TSRCRicePresent must not be enabled for given profile.");
+#if JVET_W0046_RLSCP
+    xConfirmPara(m_reverseLastSigCoeffEnabledFlag == true, "ReverseLastSigCoeff must not be enabled for given profile.");
+#endif
+#endif
     xConfirmPara(m_highPrecisionOffsetsEnabledFlag==true, "UseHighPrecisionPredictionWeighting must not be enabled for given profile.");
     xConfirmPara(m_enableIntraReferenceSmoothing==false, "EnableIntraReferenceSmoothing must be enabled for given profile.");
     xConfirmPara(m_cabacBypassAlignmentEnabledFlag, "AlignCABACBeforeBypass cannot be enabled for given profile.");
   }
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+  if (m_profile != Profile::NONE && m_profile != Profile::MAIN_12_444 && m_profile != Profile::MAIN_16_444 && m_profile != Profile::MAIN_12_444_INTRA && m_profile != Profile::MAIN_16_444_INTRA && m_profile != Profile::MAIN_12_444_STILL_PICTURE && m_profile != Profile::MAIN_12_444_STILL_PICTURE && m_profile != Profile::MAIN_16_444_STILL_PICTURE)
+  {
+    xConfirmPara(m_rrcRiceExtensionEnableFlag == true, "Extention of the Golomb-Rice parameter derivation for RRC must not be enabled for given profile.");
+    xConfirmPara(m_persistentRiceAdaptationEnabledFlag==true, "GolombRiceParameterAdaption must not be enabled for given profile.");
+    xConfirmPara(m_extendedPrecisionProcessingFlag==true, "UseExtendedPrecision must not be enabled for given profile.");
+    xConfirmPara(m_tsrcRicePresentFlag == true, "TSRCRicePresent must not be enabled for given profile.");
+#if JVET_W0046_RLSCP
+    xConfirmPara(m_reverseLastSigCoeffEnabledFlag == true, "ReverseLastSigCoeff must not be enabled for given profile.");
+#endif
+  }
+#endif
 
 
   // check range of parameters
@@ -2650,6 +3157,15 @@ bool EncAppCfg::xCheckParameter()
   xConfirmPara( (m_MSBExtendedBitDepth[CHANNEL_TYPE_LUMA  ] < m_inputBitDepth[CHANNEL_TYPE_LUMA  ]), "MSB-extended bit depth for luma channel (--MSBExtendedBitDepth) must be greater than or equal to input bit depth for luma channel (--InputBitDepth)" );
   xConfirmPara( (m_MSBExtendedBitDepth[CHANNEL_TYPE_CHROMA] < m_inputBitDepth[CHANNEL_TYPE_CHROMA]), "MSB-extended bit depth for chroma channel (--MSBExtendedBitDepthC) must be greater than or equal to input bit depth for chroma channel (--InputBitDepthC)" );
 
+#if JVET_W0178_CONSTRAINTS_ON_REXT_TOOLS
+  bool check_sps_range_extension_flag = m_extendedPrecisionProcessingFlag || 
+                                  m_rrcRiceExtensionEnableFlag ||
+                                  m_persistentRiceAdaptationEnabledFlag || 
+                                  m_tsrcRicePresentFlag;
+  if (m_internalBitDepth[CHANNEL_TYPE_LUMA] <= 10)
+    xConfirmPara( (check_sps_range_extension_flag == 1) ,
+                 "RExt tools (Extended Precision Processing, RRC Rice Extension, Persistent Rice Adaptation and TSRC Rice Extension) must be disabled for BitDepth is less than or equal to 10 (the value of sps_range_extension_flag shall be 0 when BitDepth is less than or equal to 10.)");
+#endif
 
   xConfirmPara( m_chromaFormatIDC >= NUM_CHROMA_FORMAT,                                     "ChromaFormatIDC must be either 400, 420, 422 or 444" );
   std::string sTempIPCSC="InputColourSpaceConvert must be empty, "+getListOfColourSpaceConverts(true);
@@ -2663,6 +3179,7 @@ bool EncAppCfg::xCheckParameter()
   xConfirmPara( m_iGOPSize > 1 &&  m_iGOPSize % 2,                                          "GOP Size must be a multiple of 2, if GOP Size is greater than 1" );
   xConfirmPara( (m_iIntraPeriod > 0 && m_iIntraPeriod < m_iGOPSize) || m_iIntraPeriod == 0, "Intra period must be more than GOP size, or -1 , not 0" );
   xConfirmPara( m_drapPeriod < 0,                                                           "DRAP period must be greater or equal to 0" );
+  xConfirmPara( m_edrapPeriod < 0,                                                          "EDRAP period must be greater or equal to 0" );
   xConfirmPara( m_iDecodingRefreshType < 0 || m_iDecodingRefreshType > 3,                   "Decoding Refresh Type must be comprised between 0 and 3 included" );
 
   if (m_isField)
@@ -2696,16 +3213,16 @@ bool EncAppCfg::xCheckParameter()
 
   xConfirmPara( m_iQP < -6 * (m_internalBitDepth[CHANNEL_TYPE_LUMA] - 8) || m_iQP > MAX_QP, "QP exceeds supported range (-QpBDOffsety to 63)" );
 #if W0038_DB_OPT
-  xConfirmPara( m_deblockingFilterMetric!=0 && (m_bLoopFilterDisable || m_loopFilterOffsetInPPS), "If DeblockingFilterMetric is non-zero then both LoopFilterDisable and LoopFilterOffsetInPPS must be 0");
+  xConfirmPara( m_deblockingFilterMetric!=0 && (m_deblockingFilterDisable || m_deblockingFilterOffsetInPPS), "If DeblockingFilterMetric is non-zero then both LoopFilterDisable and LoopFilterOffsetInPPS must be 0");
 #else
   xConfirmPara( m_DeblockingFilterMetric && (m_bLoopFilterDisable || m_loopFilterOffsetInPPS), "If DeblockingFilterMetric is true then both LoopFilterDisable and LoopFilterOffsetInPPS must be 0");
 #endif
-  xConfirmPara( m_loopFilterBetaOffsetDiv2 < -12 || m_loopFilterBetaOffsetDiv2 > 12,          "Loop Filter Beta Offset div. 2 exceeds supported range (-12 to 12" );
-  xConfirmPara( m_loopFilterTcOffsetDiv2 < -12 || m_loopFilterTcOffsetDiv2 > 12,              "Loop Filter Tc Offset div. 2 exceeds supported range (-12 to 12)" );
-  xConfirmPara( m_loopFilterCbBetaOffsetDiv2 < -12 || m_loopFilterCbBetaOffsetDiv2 > 12,      "Loop Filter Beta Offset div. 2 exceeds supported range (-12 to 12" );
-  xConfirmPara( m_loopFilterCbTcOffsetDiv2 < -12 || m_loopFilterCbTcOffsetDiv2 > 12,          "Loop Filter Tc Offset div. 2 exceeds supported range (-12 to 12)" );
-  xConfirmPara( m_loopFilterCrBetaOffsetDiv2 < -12 || m_loopFilterCrBetaOffsetDiv2 > 12,      "Loop Filter Beta Offset div. 2 exceeds supported range (-12 to 12" );
-  xConfirmPara( m_loopFilterCrTcOffsetDiv2 < -12 || m_loopFilterCrTcOffsetDiv2 > 12,          "Loop Filter Tc Offset div. 2 exceeds supported range (-12 to 12)" );
+  xConfirmPara( m_deblockingFilterBetaOffsetDiv2 < -12 || m_deblockingFilterBetaOffsetDiv2 > 12,          "Loop Filter Beta Offset div. 2 exceeds supported range (-12 to 12" );
+  xConfirmPara( m_deblockingFilterTcOffsetDiv2 < -12 || m_deblockingFilterTcOffsetDiv2 > 12,              "Loop Filter Tc Offset div. 2 exceeds supported range (-12 to 12)" );
+  xConfirmPara( m_deblockingFilterCbBetaOffsetDiv2 < -12 || m_deblockingFilterCbBetaOffsetDiv2 > 12,      "Loop Filter Beta Offset div. 2 exceeds supported range (-12 to 12" );
+  xConfirmPara( m_deblockingFilterCbTcOffsetDiv2 < -12 || m_deblockingFilterCbTcOffsetDiv2 > 12,          "Loop Filter Tc Offset div. 2 exceeds supported range (-12 to 12)" );
+  xConfirmPara( m_deblockingFilterCrBetaOffsetDiv2 < -12 || m_deblockingFilterCrBetaOffsetDiv2 > 12,      "Loop Filter Beta Offset div. 2 exceeds supported range (-12 to 12" );
+  xConfirmPara( m_deblockingFilterCrTcOffsetDiv2 < -12 || m_deblockingFilterCrTcOffsetDiv2 > 12,          "Loop Filter Tc Offset div. 2 exceeds supported range (-12 to 12)" );
   xConfirmPara( m_iSearchRange < 0 ,                                                        "Search Range must be more than 0" );
   xConfirmPara( m_bipredSearchRange < 0 ,                                                   "Bi-prediction refinement search range must be more than 0" );
   xConfirmPara( m_minSearchWindow < 0,                                                      "Minimum motion search window size for the adaptive window ME must be greater than or equal to 0" );
@@ -2752,6 +3269,10 @@ bool EncAppCfg::xCheckParameter()
     if (m_updateCtrl > 0 && m_adpOption > 2) { m_adpOption -= 2; }
   }
 
+  if (m_ctiSEIEnabled)
+  {
+    xConfirmPara(m_ctiSEINumberChromaLut < 0 || m_ctiSEINumberChromaLut > 2, "CTI number of chroma LUTs is out of range");
+  }
   xConfirmPara( m_cbQpOffset < -12,   "Min. Chroma Cb QP Offset is -12" );
   xConfirmPara( m_cbQpOffset >  12,   "Max. Chroma Cb QP Offset is  12" );
   xConfirmPara( m_crQpOffset < -12,   "Min. Chroma Cr QP Offset is -12" );
@@ -2767,18 +3288,30 @@ bool EncAppCfg::xCheckParameter()
     msg( WARNING, "****************************************************************************\n");
     m_dualTree = false;
   }
-#if JVET_T0064
   if (m_alf)
   {
-    xConfirmPara(m_alfStrength < 0.0, "ALFStrength is less than 0. Valid range is 0.0 <= ALFStrength <= 1.0" );
-    xConfirmPara(m_alfStrength > 1.0, "ALFStrength is greater than 1. Valid range is 0.0 <= ALFStrength <= 1.0" );
+    xConfirmPara(m_alfStrengthLuma < 0.0, "ALFStrengthLuma is less than 0. Valid range is 0.0 <= ALFStrengthLuma <= 1.0");
+    xConfirmPara(m_alfStrengthLuma > 1.0, "ALFStrengthLuma is greater than 1. Valid range is 0.0 <= ALFStrengthLuma <= 1.0");
   }
   if (m_ccalf)
   {
     xConfirmPara(m_ccalfStrength < 0.0, "CCALFStrength is less than 0. Valid range is 0.0 <= CCALFStrength <= 1.0");
     xConfirmPara(m_ccalfStrength > 1.0, "CCALFStrength is greater than 1. Valid range is 0.0 <= CCALFStrength <= 1.0");
-  }  
-#endif
+  }
+  if (m_alf)
+  {
+    xConfirmPara(m_alfStrengthChroma < 0.0, "ALFStrengthChroma is less than 0. Valid range is 0.0 <= ALFStrengthChroma <= 1.0");
+    xConfirmPara(m_alfStrengthChroma > 1.0, "ALFStrengthChroma is greater than 1. Valid range is 0.0 <= ALFStrengthChroma <= 1.0");
+    xConfirmPara(m_alfStrengthTargetLuma < 0.0, "ALFStrengthTargetLuma is less than 0. Valid range is 0.0 <= ALFStrengthTargetLuma <= 1.0");
+    xConfirmPara(m_alfStrengthTargetLuma > 1.0, "ALFStrengthTargetLuma is greater than 1. Valid range is 0.0 <= ALFStrengthTargetLuma <= 1.0");
+    xConfirmPara(m_alfStrengthTargetChroma < 0.0, "ALFStrengthTargetChroma is less than 0. Valid range is 0.0 <= ALFStrengthTargetChroma <= 1.0");
+    xConfirmPara(m_alfStrengthTargetChroma > 1.0, "ALFStrengthTargetChroma is greater than 1. Valid range is 0.0 <= ALFStrengthTargetChroma <= 1.0");
+  }
+  if (m_ccalf)
+  {
+    xConfirmPara(m_ccalfStrengthTarget < 0.0, "CCALFStrengthTarget is less than 0. Valid range is 0.0 <= CCALFStrengthTarget <= 1.0");
+    xConfirmPara(m_ccalfStrengthTarget > 1.0, "CCALFStrengthTarget is greater than 1. Valid range is 0.0 <= CCALFStrengthTarget <= 1.0");
+  }
   if (m_ccalf && (m_chromaFormatIDC == CHROMA_400))
   {
     msg( WARNING, "****************************************************************************\n");
@@ -2816,7 +3349,7 @@ bool EncAppCfg::xCheckParameter()
   xConfirmPara( m_uiMaxTT[2] > 64,                                                          "Maximum TT size for chroma block in I slice should be smaller than or equal to 64");
   xConfirmPara( m_uiMinQT[0] < minCuSize,                                                   "Min Luma QT size in I slices should be larger than or equal to minCuSize");
   xConfirmPara( m_uiMinQT[1] < minCuSize,                                                   "Min Luma QT size in non-I slices should be larger than or equal to minCuSize");
-  xConfirmPara((m_iSourceWidth % minCuSize ) || (m_iSourceHeight % minCuSize),              "Picture width or height is not a multiple of minCuSize");
+  xConfirmPara((m_sourceWidth % minCuSize ) || (m_sourceHeight % minCuSize),              "Picture width or height is not a multiple of minCuSize");
   const int minDiff = (int)floorLog2(m_uiMinQT[2]) - std::max(MIN_CU_LOG2, (int)m_log2MinCuSize - (int)getChannelTypeScaleX(CHANNEL_TYPE_CHROMA, m_chromaFormatIDC));
   xConfirmPara( minDiff < 0 ,                                                               "Min Chroma QT size in I slices is smaller than Min Luma CU size even considering color format");
   xConfirmPara( (m_uiMinQT[2] << (int)getChannelTypeScaleX(CHANNEL_TYPE_CHROMA, m_chromaFormatIDC)) > std::min(64, (int)m_uiCTUSize),
@@ -2840,8 +3373,8 @@ bool EncAppCfg::xCheckParameter()
   xConfirmPara( m_uiMaxTT[2] < (m_uiMinQT[2] << (int)getChannelTypeScaleX(CHANNEL_TYPE_CHROMA, m_chromaFormatIDC)),
                                                                                             "Maximum TT size for chroma block in I slice should be larger than minimum QT size");
   xConfirmPara( m_uiMaxTT[2] > m_uiCTUSize,                                                 "Maximum TT size for chroma block in I slice should be smaller than or equal to CTUSize");
-  xConfirmPara( (m_iSourceWidth  % (std::max(8u, m_log2MinCuSize))) != 0,                   "Resulting coded frame width must be a multiple of Max(8, the minimum CU size)");
-  xConfirmPara( (m_iSourceHeight % (std::max(8u, m_log2MinCuSize))) != 0,                   "Resulting coded frame height must be a multiple of Max(8, the minimum CU size)");
+  xConfirmPara( (m_sourceWidth  % (std::max(8u, m_log2MinCuSize))) != 0,                   "Resulting coded frame width must be a multiple of Max(8, the minimum CU size)");
+  xConfirmPara( (m_sourceHeight % (std::max(8u, m_log2MinCuSize))) != 0,                   "Resulting coded frame height must be a multiple of Max(8, the minimum CU size)");
   if (m_uiMaxMTTHierarchyDepthI == 0)
   {
     xConfirmPara(m_uiMaxBT[0] != m_uiMinQT[0], "MaxBTLumaISlice shall be equal to MinQTLumaISlice when MaxMTTHierarchyDepthISliceL is 0.");
@@ -2886,6 +3419,10 @@ bool EncAppCfg::xCheckParameter()
     xConfirmPara(!m_useTransformSkip, "BDPCM cannot be used when transform skip is disabled.");
   }
 
+  if (m_tsrcRicePresentFlag)
+  {
+    xConfirmPara(!m_useTransformSkip, "TSRCRicePresent cannot be enabled when transform skip is disabled.");
+  }
 
   if (!m_alf)
   {
@@ -2893,11 +3430,11 @@ bool EncAppCfg::xCheckParameter()
   }
 
 
-  xConfirmPara( m_iSourceWidth  % SPS::getWinUnitX(m_chromaFormatIDC) != 0, "Picture width must be an integer multiple of the specified chroma subsampling");
-  xConfirmPara( m_iSourceHeight % SPS::getWinUnitY(m_chromaFormatIDC) != 0, "Picture height must be an integer multiple of the specified chroma subsampling");
+  xConfirmPara( m_sourceWidth  % SPS::getWinUnitX(m_chromaFormatIDC) != 0, "Picture width must be an integer multiple of the specified chroma subsampling");
+  xConfirmPara( m_sourceHeight % SPS::getWinUnitY(m_chromaFormatIDC) != 0, "Picture height must be an integer multiple of the specified chroma subsampling");
 
-  xConfirmPara( m_aiPad[0] % SPS::getWinUnitX(m_chromaFormatIDC) != 0, "Horizontal padding must be an integer multiple of the specified chroma subsampling");
-  xConfirmPara( m_aiPad[1] % SPS::getWinUnitY(m_chromaFormatIDC) != 0, "Vertical padding must be an integer multiple of the specified chroma subsampling");
+  xConfirmPara( m_sourcePadding[0] % SPS::getWinUnitX(m_chromaFormatIDC) != 0, "Horizontal padding must be an integer multiple of the specified chroma subsampling");
+  xConfirmPara( m_sourcePadding[1] % SPS::getWinUnitY(m_chromaFormatIDC) != 0, "Vertical padding must be an integer multiple of the specified chroma subsampling");
 
   xConfirmPara( m_confWinLeft   % SPS::getWinUnitX(m_chromaFormatIDC) != 0, "Left conformance window offset must be an integer multiple of the specified chroma subsampling");
   xConfirmPara( m_confWinRight  % SPS::getWinUnitX(m_chromaFormatIDC) != 0, "Right conformance window offset must be an integer multiple of the specified chroma subsampling");
@@ -2975,16 +3512,16 @@ bool EncAppCfg::xCheckParameter()
     }
   }
 
-  if ( (m_iIntraPeriod != 1) && !m_loopFilterOffsetInPPS && (!m_bLoopFilterDisable) )
+  if ( (m_iIntraPeriod != 1) && !m_deblockingFilterOffsetInPPS && (!m_deblockingFilterDisable) )
   {
     for(int i=0; i<m_iGOPSize; i++)
     {
-      xConfirmPara( (m_GOPList[i].m_betaOffsetDiv2 + m_loopFilterBetaOffsetDiv2) < -12 || (m_GOPList[i].m_betaOffsetDiv2 + m_loopFilterBetaOffsetDiv2) > 12, "Loop Filter Beta Offset div. 2 for one of the GOP entries exceeds supported range (-12 to 12)" );
-      xConfirmPara( (m_GOPList[i].m_tcOffsetDiv2 + m_loopFilterTcOffsetDiv2) < -12 || (m_GOPList[i].m_tcOffsetDiv2 + m_loopFilterTcOffsetDiv2) > 12, "Loop Filter Tc Offset div. 2 for one of the GOP entries exceeds supported range (-12 to 12)" );
-      xConfirmPara( (m_GOPList[i].m_CbBetaOffsetDiv2 + m_loopFilterCbBetaOffsetDiv2) < -12 || (m_GOPList[i].m_CbBetaOffsetDiv2 + m_loopFilterCbBetaOffsetDiv2) > 12, "Loop Filter Beta Offset div. 2 for one of the GOP entries exceeds supported range (-12 to 12)" );
-      xConfirmPara( (m_GOPList[i].m_CbTcOffsetDiv2 + m_loopFilterCbTcOffsetDiv2) < -12 || (m_GOPList[i].m_CbTcOffsetDiv2 + m_loopFilterCbTcOffsetDiv2) > 12, "Loop Filter Tc Offset div. 2 for one of the GOP entries exceeds supported range (-12 to 12)" );
-      xConfirmPara( (m_GOPList[i].m_CrBetaOffsetDiv2 + m_loopFilterCrBetaOffsetDiv2) < -12 || (m_GOPList[i].m_CrBetaOffsetDiv2 + m_loopFilterCrBetaOffsetDiv2) > 12, "Loop Filter Beta Offset div. 2 for one of the GOP entries exceeds supported range (-12 to 12)" );
-      xConfirmPara( (m_GOPList[i].m_CrTcOffsetDiv2 + m_loopFilterCrTcOffsetDiv2) < -12 || (m_GOPList[i].m_CrTcOffsetDiv2 + m_loopFilterCrTcOffsetDiv2) > 12, "Loop Filter Tc Offset div. 2 for one of the GOP entries exceeds supported range (-12 to 12)" );
+      xConfirmPara( (m_GOPList[i].m_betaOffsetDiv2 + m_deblockingFilterBetaOffsetDiv2) < -12 || (m_GOPList[i].m_betaOffsetDiv2 + m_deblockingFilterBetaOffsetDiv2) > 12, "Loop Filter Beta Offset div. 2 for one of the GOP entries exceeds supported range (-12 to 12)" );
+      xConfirmPara( (m_GOPList[i].m_tcOffsetDiv2 + m_deblockingFilterTcOffsetDiv2) < -12 || (m_GOPList[i].m_tcOffsetDiv2 + m_deblockingFilterTcOffsetDiv2) > 12, "Loop Filter Tc Offset div. 2 for one of the GOP entries exceeds supported range (-12 to 12)" );
+      xConfirmPara( (m_GOPList[i].m_CbBetaOffsetDiv2 + m_deblockingFilterCbBetaOffsetDiv2) < -12 || (m_GOPList[i].m_CbBetaOffsetDiv2 + m_deblockingFilterCbBetaOffsetDiv2) > 12, "Loop Filter Beta Offset div. 2 for one of the GOP entries exceeds supported range (-12 to 12)" );
+      xConfirmPara( (m_GOPList[i].m_CbTcOffsetDiv2 + m_deblockingFilterCbTcOffsetDiv2) < -12 || (m_GOPList[i].m_CbTcOffsetDiv2 + m_deblockingFilterCbTcOffsetDiv2) > 12, "Loop Filter Tc Offset div. 2 for one of the GOP entries exceeds supported range (-12 to 12)" );
+      xConfirmPara( (m_GOPList[i].m_CrBetaOffsetDiv2 + m_deblockingFilterCrBetaOffsetDiv2) < -12 || (m_GOPList[i].m_CrBetaOffsetDiv2 + m_deblockingFilterCrBetaOffsetDiv2) > 12, "Loop Filter Beta Offset div. 2 for one of the GOP entries exceeds supported range (-12 to 12)" );
+      xConfirmPara( (m_GOPList[i].m_CrTcOffsetDiv2 + m_deblockingFilterCrTcOffsetDiv2) < -12 || (m_GOPList[i].m_CrTcOffsetDiv2 + m_deblockingFilterCrTcOffsetDiv2) > 12, "Loop Filter Tc Offset div. 2 for one of the GOP entries exceeds supported range (-12 to 12)" );
     }
   }
 
@@ -3335,8 +3872,8 @@ bool EncAppCfg::xCheckParameter()
     uint32_t colIdx, rowIdx;
     uint32_t remSize;
 
-    pps.setPicWidthInLumaSamples( m_iSourceWidth );
-    pps.setPicHeightInLumaSamples( m_iSourceHeight );
+    pps.setPicWidthInLumaSamples( m_sourceWidth );
+    pps.setPicHeightInLumaSamples( m_sourceHeight );
     pps.setLog2CtuSize( floorLog2(m_uiCTUSize) );
 
     // set default tile column if not provided
@@ -3645,6 +4182,11 @@ bool EncAppCfg::xCheckParameter()
     xConfirmPara(m_framePackingSEIType < 3 || m_framePackingSEIType > 5 , "SEIFramePackingType must be in rage 3 to 5");
   }
 
+  if (m_doSEIEnabled)
+  {
+    xConfirmPara(m_doSEITransformType < 0 || m_doSEITransformType > 7, "SEIDisplayOrientationTransformType must be in rage 0 to 7");
+  }
+
   if( m_erpSEIEnabled && !m_erpSEICancelFlag )
   {
     xConfirmPara( m_erpSEIGuardBandType < 0 || m_erpSEIGuardBandType > 8, "SEIEquirectangularprojectionGuardBandType must be in the range of 0 to 7");
@@ -3812,8 +4354,8 @@ void EncAppCfg::xPrintParameter()
   msg( DETAILS, "Input          File                    : %s\n", m_inputFileName.c_str() );
   msg( DETAILS, "Bitstream      File                    : %s\n", m_bitstreamFileName.c_str() );
   msg( DETAILS, "Reconstruction File                    : %s\n", m_reconFileName.c_str() );
-  msg( DETAILS, "Real     Format                        : %dx%d %gHz\n", m_iSourceWidth - m_confWinLeft - m_confWinRight, m_iSourceHeight - m_confWinTop - m_confWinBottom, (double)m_iFrameRate / m_temporalSubsampleRatio );
-  msg( DETAILS, "Internal Format                        : %dx%d %gHz\n", m_iSourceWidth, m_iSourceHeight, (double)m_iFrameRate / m_temporalSubsampleRatio );
+  msg( DETAILS, "Real     Format                        : %dx%d %gHz\n", m_sourceWidth - m_confWinLeft - m_confWinRight, m_sourceHeight - m_confWinTop - m_confWinBottom, (double)m_iFrameRate / m_temporalSubsampleRatio );
+  msg( DETAILS, "Internal Format                        : %dx%d %gHz\n", m_sourceWidth, m_sourceHeight, (double)m_iFrameRate / m_temporalSubsampleRatio );
   msg( DETAILS, "Sequence PSNR output                   : %s\n", ( m_printMSEBasedSequencePSNR ? "Linear average, MSE-based" : "Linear average only" ) );
   msg( DETAILS, "Hexadecimal PSNR output                : %s\n", ( m_printHexPsnr ? "Enabled" : "Disabled" ) );
   msg( DETAILS, "Sequence MSE output                    : %s\n", ( m_printSequenceMSE ? "Enabled" : "Disabled" ) );
@@ -3835,6 +4377,9 @@ void EncAppCfg::xPrintParameter()
   {
     msg( DETAILS, "Profile                                : %s\n", profileToString(m_profile) );
   }
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+  msg( DETAILS,"GeneralLowerBitRateConstraintFlag      : %d\n", m_generalLowerBitRateConstraintFlag );
+#endif
   msg(DETAILS, "CTU size / min CU size                 : %d / %d \n", m_uiMaxCUWidth, 1 << m_log2MinCuSize);
 
   msg(DETAILS, "subpicture info present flag           : %s\n", m_subPicInfoPresentFlag ? "Enabled" : "Disabled");
@@ -3877,6 +4422,7 @@ void EncAppCfg::xPrintParameter()
   msg( DETAILS, "Intra period                           : %d\n", m_iIntraPeriod );
   msg( DETAILS, "Decoding refresh type                  : %d\n", m_iDecodingRefreshType );
   msg( DETAILS, "DRAP period                            : %d\n", m_drapPeriod );
+  msg( DETAILS, "EDRAP period                           : %d\n", m_edrapPeriod );
 #if QP_SWITCHING_FOR_PARALLEL
   if (m_qpIncrementAtSourceFrame.bPresent)
   {
@@ -3899,11 +4445,30 @@ void EncAppCfg::xPrintParameter()
   msg( DETAILS, "MSB-extended bit depth                 : (Y:%d, C:%d)\n", m_MSBExtendedBitDepth[CHANNEL_TYPE_LUMA], m_MSBExtendedBitDepth[CHANNEL_TYPE_CHROMA] );
   msg( DETAILS, "Internal bit depth                     : (Y:%d, C:%d)\n", m_internalBitDepth[CHANNEL_TYPE_LUMA], m_internalBitDepth[CHANNEL_TYPE_CHROMA] );
   msg( DETAILS, "Intra reference smoothing              : %s\n", (m_enableIntraReferenceSmoothing           ? "Enabled" : "Disabled") );
-  msg( DETAILS, "cu_chroma_qp_offset_subdiv             : %d\n", m_cuChromaQpOffsetSubdiv);
+  if (m_cuChromaQpOffsetList.size() > 0)
+  {
+    msg( DETAILS, "Chroma QP offset list                  : (" );
+    for (int i=0; i < m_cuChromaQpOffsetList.size(); i++)
+    {
+      msg( DETAILS, "%d %d %d%s", m_cuChromaQpOffsetList[i].u.comp.CbOffset, m_cuChromaQpOffsetList[i].u.comp.CrOffset, m_cuChromaQpOffsetList[i].u.comp.JointCbCrOffset,
+        (i+1 < m_cuChromaQpOffsetList.size() ? ", " : ")\n") );
+    }
+    msg( DETAILS, "cu_chroma_qp_offset_subdiv             : %d\n", m_cuChromaQpOffsetSubdiv);
+    msg( DETAILS, "cu_chroma_qp_offset_enabled_flag       : %s\n", (m_cuChromaQpOffsetEnabled ? "Enabled" : "Disabled") );
+  }
+  else
+  {
+    msg( DETAILS, "Chroma QP offset list                  : Disabled\n" );
+  }
   msg( DETAILS, "extended_precision_processing_flag     : %s\n", (m_extendedPrecisionProcessingFlag         ? "Enabled" : "Disabled") );
+  msg( DETAILS, "TSRC_Rice_present_flag                 : %s\n", (m_tsrcRicePresentFlag                     ? "Enabled" : "Disabled") );
+#if JVET_W0046_RLSCP
+  msg( DETAILS, "reverse_last_sig_coeff_enabled_flag    : %s\n", (m_reverseLastSigCoeffEnabledFlag          ? "Enabled" : "Disabled") );
+#endif
   msg( DETAILS, "transform_skip_rotation_enabled_flag   : %s\n", (m_transformSkipRotationEnabledFlag        ? "Enabled" : "Disabled") );
   msg( DETAILS, "transform_skip_context_enabled_flag    : %s\n", (m_transformSkipContextEnabledFlag         ? "Enabled" : "Disabled") );
   msg( DETAILS, "high_precision_offsets_enabled_flag    : %s\n", (m_highPrecisionOffsetsEnabledFlag         ? "Enabled" : "Disabled") );
+  msg( DETAILS, "rrc_rice_extension_flag                : %s\n", (m_rrcRiceExtensionEnableFlag                 ? "Enabled" : "Disabled") );
   msg( DETAILS, "persistent_rice_adaptation_enabled_flag: %s\n", (m_persistentRiceAdaptationEnabledFlag     ? "Enabled" : "Disabled") );
   msg( DETAILS, "cabac_bypass_alignment_enabled_flag    : %s\n", (m_cabacBypassAlignmentEnabledFlag         ? "Enabled" : "Disabled") );
 
@@ -3937,6 +4502,17 @@ void EncAppCfg::xPrintParameter()
 #endif
   }
 
+#if GDR_ENABLED
+  msg(DETAILS, "GDREnabled                             : %d\n", m_gdrEnabled);
+
+  if (m_gdrEnabled)
+  {
+    msg(DETAILS, "GDR Start                              : %d\n", m_gdrPocStart);
+    msg(DETAILS, "GDR Interval                           : %d\n", m_gdrInterval);
+    msg(DETAILS, "GDR Period                             : %d\n", m_gdrPeriod);
+  }
+#endif
+
   msg( DETAILS, "Max Num Merge Candidates               : %d\n", m_maxNumMergeCand );
   msg( DETAILS, "Max Num Affine Merge Candidates        : %d\n", m_maxNumAffineMergeCand );
   msg( DETAILS, "Max Num Geo Merge Candidates           : %d\n", m_maxNumGeoCand );
@@ -3975,7 +4551,7 @@ void EncAppCfg::xPrintParameter()
   msg( VERBOSE, "WPP:%d ", (int)m_useWeightedPred);
   msg( VERBOSE, "WPB:%d ", (int)m_useWeightedBiPred);
   msg( VERBOSE, "PME:%d ", m_log2ParallelMergeLevel);
-  const int iWaveFrontSubstreams = m_entropyCodingSyncEnabledFlag ? (m_iSourceHeight + m_uiMaxCUHeight - 1) / m_uiMaxCUHeight : 1;
+  const int iWaveFrontSubstreams = m_entropyCodingSyncEnabledFlag ? (m_sourceHeight + m_uiMaxCUHeight - 1) / m_uiMaxCUHeight : 1;
   msg( VERBOSE, " WaveFrontSynchro:%d WaveFrontSubstreams:%d", m_entropyCodingSyncEnabledFlag?1:0, iWaveFrontSubstreams);
   msg( VERBOSE, " ScalingList:%d ", m_useScalingListId );
   msg( VERBOSE, "TMVPMode:%d ", m_TMVPModeId );
@@ -4078,14 +4654,6 @@ void EncAppCfg::xPrintParameter()
   if( m_MIP ) msg(VERBOSE, "FastMIP:%d ", m_useFastMIP);
   msg( VERBOSE, "FastLocalDualTree:%d ", m_fastLocalDualTreeMode );
 
-  msg( VERBOSE, "NumSplitThreads:%d ", m_numSplitThreads );
-  if( m_numSplitThreads > 1 )
-  {
-    msg( VERBOSE, "ForceSingleSplitThread:%d ", m_forceSplitSequential );
-  }
-  msg( VERBOSE, "NumWppThreads:%d+%d ", m_numWppThreads, m_numWppExtraLines );
-  msg( VERBOSE, "EnsureWppBitEqual:%d ", m_ensureWppBitEqual );
-
   if (m_resChangeInClvsEnabled)
   {
     msg( VERBOSE, "RPR:(%1.2lfx, %1.2lfx)|%d ", m_scalingRatioHor, m_scalingRatioVer, m_switchPocPeriod );
@@ -4095,10 +4663,18 @@ void EncAppCfg::xPrintParameter()
     msg( VERBOSE, "RPR:%d ", 0 );
   }
   msg(VERBOSE, "TemporalFilter:%d ", m_gopBasedTemporalFilterEnabled);
+  msg(VERBOSE, "SEI CTI:%d ", m_ctiSEIEnabled);
 #if EXTENSION_360_VIDEO
   m_ext360.outputConfigurationSummary();
 #endif
 
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+  if( m_constrainedRaslEncoding )
+  {
+    msg(VERBOSE, "\n\nWarning: with SEIConstrainedRASL enabled, LMChroma estimation is skipped in RASL frames" );
+  }
+#endif
+
   msg( VERBOSE, "\n\n");
 
   msg( NOTICE, "\n");
diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h
index 47b2ce6827ae091755cd3bd7a3a841af919de156..eb145e69ca3d7b965fa701159057f769a20aa8f0 100644
--- a/source/App/EncoderApp/EncAppCfg.h
+++ b/source/App/EncoderApp/EncAppCfg.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -95,8 +95,8 @@ protected:
   int       m_iFrameRate;                                     ///< source frame-rates (Hz)
   uint32_t      m_FrameSkip;                                      ///< number of skipped frames from the beginning
   uint32_t      m_temporalSubsampleRatio;                         ///< temporal subsample ratio, 2 means code every two frames
-  int       m_iSourceWidth;                                   ///< source width in pixel
-  int       m_iSourceHeight;                                  ///< source height in pixel (when interlaced = field height)
+  int       m_sourceWidth;                                   ///< source width in pixel
+  int       m_sourceHeight;                                  ///< source height in pixel (when interlaced = field height)
 #if EXTENSION_360_VIDEO
   int       m_inputFileWidth;                                 ///< width of image in input file  (this is equivalent to sourceWidth,  if sourceWidth  is not subsequently altered due to padding)
   int       m_inputFileHeight;                                ///< height of image in input file (this is equivalent to sourceHeight, if sourceHeight is not subsequently altered due to padding)
@@ -105,16 +105,16 @@ protected:
 
   bool      m_isField;                                        ///< enable field coding
   bool      m_isTopFieldFirst;
-  bool      m_bEfficientFieldIRAPEnabled;                     ///< enable an efficient field IRAP structure.
-  bool      m_bHarmonizeGopFirstFieldCoupleEnabled;
+  bool      m_efficientFieldIRAPEnabled;   ///< enable an efficient field IRAP structure.
+  bool      m_harmonizeGopFirstFieldCoupleEnabled;
 
   int       m_conformanceWindowMode;
   int       m_confWinLeft;
   int       m_confWinRight;
   int       m_confWinTop;
   int       m_confWinBottom;
+  int       m_sourcePadding[2];                                       ///< number of padded pixels for width and height
   int       m_framesToBeEncoded;                              ///< number of encoded frames
-  int       m_aiPad[2];                                       ///< number of padded pixels for width and height
   bool      m_AccessUnitDelimiter;                            ///< add Access Unit Delimiter NAL units
   bool      m_enablePictureHeaderInSliceHeader;               ///< Enable Picture Header in Slice Header
   InputColourSpaceConversion m_inputColourSpaceConvert;       ///< colour space conversion to apply to input video
@@ -127,6 +127,7 @@ protected:
   bool      m_printFrameMSE;
   bool      m_printSequenceMSE;
   bool      m_printMSSSIM;
+  bool      m_printWPSNR;
   bool      m_cabacZeroWordPaddingEnabled;
   bool      m_bClipInputVideoToRec709Range;
   bool      m_bClipOutputVideoToRec709Range;
@@ -188,6 +189,9 @@ protected:
   bool      m_noCraConstraintFlag;
   bool      m_noGdrConstraintFlag;
   bool      m_noApsConstraintFlag;
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+  bool      m_generalLowerBitRateConstraintFlag;
+#endif
 
   // profile/level
   Profile::Name m_profile;
@@ -215,9 +219,17 @@ protected:
   bool          m_noSubpicInfoConstraintFlag;
   // coding structure
   int       m_iIntraPeriod;                                   ///< period of I-slice (random access period)
+#if GDR_ENABLED 
+  bool      m_gdrEnabled;
+  int       m_gdrPocStart;
+  int       m_gdrPeriod;
+  int       m_gdrInterval;  
+  bool      m_gdrNoHash;  
+#endif
   int       m_iDecodingRefreshType;                           ///< random access type
   int       m_iGOPSize;                                       ///< GOP size of hierarchical structure
   int       m_drapPeriod;                                     ///< period of dependent RAP pictures
+  int       m_edrapPeriod;                                    ///< period of extended dependent RAP pictures
   bool      m_rewriteParamSets;                              ///< Flag to enable rewriting of parameter sets at random access points
   RPLEntry  m_RPLList0[MAX_GOP];                               ///< the RPL entries from the config file
   RPLEntry  m_RPLList1[MAX_GOP];                               ///< the RPL entries from the config file
@@ -232,6 +244,7 @@ protected:
   uint32_t      m_log2MaxTransformSkipBlockSize;                  ///< transform-skip maximum size (minimum of 2)
   bool      m_transformSkipRotationEnabledFlag;               ///< control flag for transform-skip/transquant-bypass residual rotation
   bool      m_transformSkipContextEnabledFlag;                ///< control flag for transform-skip/transquant-bypass single significance map context
+  bool      m_rrcRiceExtensionEnableFlag;                        ///< control flag for enabling extension of the Golomb-Rice parameter derivation for RRC
   bool      m_persistentRiceAdaptationEnabledFlag;            ///< control flag for Golomb-Rice parameter adaptation over each slice
   bool      m_cabacBypassAlignmentEnabledFlag;
   bool      m_ISP;
@@ -253,9 +266,11 @@ protected:
   std::string m_dQPFileName;                                  ///< QP offset for each slice (initialized from external file)
   int*      m_aidQP;                                          ///< array of slice QP values
   int       m_iMaxDeltaQP;                                    ///< max. |delta QP|
-  uint32_t      m_uiDeltaQpRD;                                    ///< dQP range for multi-pass slice QP optimization
+  uint32_t  m_uiDeltaQpRD;                                    ///< dQP range for multi-pass slice QP optimization
   int       m_cuQpDeltaSubdiv;                                ///< Maximum subdiv for CU luma Qp adjustment (0:default)
   int       m_cuChromaQpOffsetSubdiv;                         ///< If negative, then do not apply chroma qp offsets.
+  std::vector<ChromaQpAdj> m_cuChromaQpOffsetList;            ///< Local chroma QP offsets list (to be signalled in PPS)
+  bool      m_cuChromaQpOffsetEnabled;                        ///< Enable local chroma QP offsets (slice level flag)
   bool      m_bFastDeltaQP;                                   ///< Fast Delta QP (false:default)
 
   int       m_cbQpOffset;                                     ///< Chroma Cb QP Offset (0:default)
@@ -275,6 +290,23 @@ protected:
   LumaLevelToDeltaQPMapping m_lumaLevelToDeltaQPMapping;      ///< mapping from luma level to Delta QP.
 #endif
   SEIMasteringDisplay m_masteringDisplay;
+  bool      m_smoothQPReductionEnable;
+#if JVET_W0043
+  double    m_smoothQPReductionThresholdIntra;
+  double    m_smoothQPReductionModelScaleIntra;
+  double    m_smoothQPReductionModelOffsetIntra;
+  int       m_smoothQPReductionLimitIntra;
+  double    m_smoothQPReductionThresholdInter;
+  double    m_smoothQPReductionModelScaleInter;
+  double    m_smoothQPReductionModelOffsetInter;
+  int       m_smoothQPReductionLimitInter;
+#else
+  double    m_smoothQPReductionThreshold;
+  double    m_smoothQPReductionModelScale;
+  double    m_smoothQPReductionModelOffset;
+  int       m_smoothQPReductionLimit;
+#endif
+  int       m_smoothQPReductionPeriodicity;
 
   bool      m_bUseAdaptiveQP;                                 ///< Flag for enabling QP adaptation based on a psycho-visual model
   int       m_iQPAdaptationRange;                             ///< dQP range by QP adaptation
@@ -393,13 +425,6 @@ protected:
   bool      m_useFastMIP;
   int       m_fastLocalDualTreeMode;
 
-
-  int       m_numSplitThreads;
-  bool      m_forceSplitSequential;
-  int       m_numWppThreads;
-  int       m_numWppExtraLines;
-  bool      m_ensureWppBitEqual;
-
   int       m_log2MaxTbSize;
   // coding tools (bit-depth)
   int       m_inputBitDepth   [MAX_NUM_CHANNEL_TYPE];         ///< bit-depth of input file
@@ -407,6 +432,10 @@ protected:
   int       m_MSBExtendedBitDepth[MAX_NUM_CHANNEL_TYPE];      ///< bit-depth of input samples after MSB extension
   int       m_internalBitDepth[MAX_NUM_CHANNEL_TYPE];         ///< bit-depth codec operates at (input/output files will be converted)
   bool      m_extendedPrecisionProcessingFlag;
+  bool      m_tsrcRicePresentFlag;
+#if JVET_W0046_RLSCP
+  bool      m_reverseLastSigCoeffEnabledFlag;
+#endif
   bool      m_highPrecisionOffsetsEnabledFlag;
 
   //coding tools (chroma format)
@@ -415,6 +444,9 @@ protected:
 
   // coding tool (SAO)
   bool      m_bUseSAO;
+#if JVET_W0129_ENABLE_ALF_TRUEORG
+  bool      m_saoTrueOrg;
+#endif
   bool      m_bTestSAODisableAtPictureLevel;
   double    m_saoEncodingRate;                                ///< When >0 SAO early picture termination is enabled for luma and chroma
   double    m_saoEncodingRateChroma;                          ///< The SAO early picture termination rate to use for chroma (when m_SaoEncodingRate is >0). If <=0, use results for luma.
@@ -422,14 +454,14 @@ protected:
   bool      m_saoCtuBoundary;                                 ///< SAO parameter estimation using non-deblocked pixels for CTU bottom and right boundary areas
   bool      m_saoGreedyMergeEnc;                              ///< SAO greedy merge encoding algorithm
   // coding tools (loop filter)
-  bool      m_bLoopFilterDisable;                             ///< flag for using deblocking filter
-  bool      m_loopFilterOffsetInPPS;                         ///< offset for deblocking filter in 0 = slice header, 1 = PPS
-  int       m_loopFilterBetaOffsetDiv2;                     ///< beta offset for deblocking filter
-  int       m_loopFilterTcOffsetDiv2;                       ///< tc offset for deblocking filter
-  int       m_loopFilterCbBetaOffsetDiv2;                     ///< beta offset for Cb deblocking filter
-  int       m_loopFilterCbTcOffsetDiv2;                       ///< tc offset for Cb deblocking filter
-  int       m_loopFilterCrBetaOffsetDiv2;                     ///< beta offset for Cr deblocking filter
-  int       m_loopFilterCrTcOffsetDiv2;                       ///< tc offset for Cr deblocking filter
+  bool      m_deblockingFilterDisable;                        ///< flag for using deblocking filter
+  bool      m_deblockingFilterOffsetInPPS;                    ///< offset for deblocking filter in 0 = slice header, 1 = PPS
+  int       m_deblockingFilterBetaOffsetDiv2;                 ///< beta offset for deblocking filter
+  int       m_deblockingFilterTcOffsetDiv2;                   ///< tc offset for deblocking filter
+  int       m_deblockingFilterCbBetaOffsetDiv2;               ///< beta offset for Cb deblocking filter
+  int       m_deblockingFilterCbTcOffsetDiv2;                 ///< tc offset for Cb deblocking filter
+  int       m_deblockingFilterCrBetaOffsetDiv2;               ///< beta offset for Cr deblocking filter
+  int       m_deblockingFilterCrTcOffsetDiv2;                 ///< tc offset for Cr deblocking filter
 #if W0038_DB_OPT
   int       m_deblockingFilterMetric;                         ///< blockiness metric in encoder
 #else
@@ -496,11 +528,16 @@ protected:
   int       m_framePackingSEIId;
   int       m_framePackingSEIQuincunx;
   int       m_framePackingSEIInterpretation;
+  bool      m_doSEIEnabled;
+  bool      m_doSEICancelFlag;
+  bool      m_doSEIPersistenceFlag;
+  int       m_doSEITransformType;
   bool      m_parameterSetsInclusionIndicationSEIEnabled;
   int       m_selfContainedClvsFlag;
 #if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
   int       m_preferredTransferCharacteristics;
 #endif
+
   // film grain characterstics sei
   bool      m_fgcSEIEnabled;
   bool      m_fgcSEICancelFlag;
@@ -519,6 +556,19 @@ protected:
   uint32_t  m_aveSEIAmbientIlluminance;
   uint32_t  m_aveSEIAmbientLightX;
   uint32_t  m_aveSEIAmbientLightY;
+  // colour tranform information sei
+  bool      m_ctiSEIEnabled;
+  uint32_t  m_ctiSEIId;
+  bool      m_ctiSEISignalInfoFlag;
+  bool      m_ctiSEIFullRangeFlag;
+  uint32_t  m_ctiSEIPrimaries;
+  uint32_t  m_ctiSEITransferFunction;
+  uint32_t  m_ctiSEIMatrixCoefs;
+  bool      m_ctiSEICrossComponentFlag;
+  bool      m_ctiSEICrossComponentInferred;
+  uint32_t  m_ctiSEINumberChromaLut;
+  int       m_ctiSEIChromaOffset;
+  LutModel  m_ctiSEILut[MAX_NUM_COMPONENT];
   // content colour volume sei
   bool      m_ccvSEIEnabled;
   bool      m_ccvSEICancelFlag;
@@ -532,6 +582,72 @@ protected:
   double    m_ccvSEIMinLuminanceValue;
   double    m_ccvSEIMaxLuminanceValue;
   double    m_ccvSEIAvgLuminanceValue;
+  // scalability dimension information sei
+  bool              m_sdiSEIEnabled;
+  int               m_sdiSEIMaxLayersMinus1;
+  bool              m_sdiSEIMultiviewInfoFlag;
+  bool              m_sdiSEIAuxiliaryInfoFlag;
+  int               m_sdiSEIViewIdLenMinus1;
+  std::vector<uint32_t>  m_sdiSEILayerId;
+  std::vector<uint32_t>  m_sdiSEIViewIdVal;
+  std::vector<uint32_t>  m_sdiSEIAuxId;
+  std::vector<uint32_t>  m_sdiSEINumAssociatedPrimaryLayersMinus1;
+  // multiview acquisition information sei
+  bool              m_maiSEIEnabled;
+  bool              m_maiSEIIntrinsicParamFlag;
+  bool              m_maiSEIExtrinsicParamFlag;
+  int               m_maiSEINumViewsMinus1;
+  bool              m_maiSEIIntrinsicParamsEqualFlag;
+  int               m_maiSEIPrecFocalLength;
+  int               m_maiSEIPrecPrincipalPoint;
+  int               m_maiSEIPrecSkewFactor;
+  std::vector<bool> m_maiSEISignFocalLengthX;
+  std::vector<uint32_t>  m_maiSEIExponentFocalLengthX;
+  std::vector<uint32_t>  m_maiSEIMantissaFocalLengthX;
+  std::vector<bool>      m_maiSEISignFocalLengthY;
+  std::vector<uint32_t>  m_maiSEIExponentFocalLengthY;
+  std::vector<uint32_t>  m_maiSEIMantissaFocalLengthY;
+  std::vector<bool>      m_maiSEISignPrincipalPointX;
+  std::vector<uint32_t>  m_maiSEIExponentPrincipalPointX;
+  std::vector<uint32_t>  m_maiSEIMantissaPrincipalPointX;
+  std::vector<bool>      m_maiSEISignPrincipalPointY;
+  std::vector<uint32_t>  m_maiSEIExponentPrincipalPointY;
+  std::vector<uint32_t>  m_maiSEIMantissaPrincipalPointY;
+  std::vector<bool>      m_maiSEISignSkewFactor;
+  std::vector<uint32_t>  m_maiSEIExponentSkewFactor;
+  std::vector<uint32_t>  m_maiSEIMantissaSkewFactor;
+  int               m_maiSEIPrecRotationParam;
+  int               m_maiSEIPrecTranslationParam;
+#if JVET_W0078_MVP_SEI 
+  // multiview acquisition information sei
+  bool              m_mvpSEIEnabled;
+  int               m_mvpSEINumViewsMinus1;
+  std::vector<uint32_t>  m_mvpSEIViewPosition;
+#endif
+  // alpha channel information sei
+  bool      m_aciSEIEnabled;
+  bool      m_aciSEICancelFlag;
+  int       m_aciSEIUseIdc;
+  int       m_aciSEIBitDepthMinus8;
+  int       m_aciSEITransparentValue;
+  int       m_aciSEIOpaqueValue;
+  bool      m_aciSEIIncrFlag;
+  bool      m_aciSEIClipFlag;
+  bool      m_aciSEIClipTypeFlag;
+  // depth representation information sei
+  bool      m_driSEIEnabled;
+  bool      m_driSEIZNearFlag;
+  bool      m_driSEIZFarFlag;
+  bool      m_driSEIDMinFlag;
+  bool      m_driSEIDMaxFlag;
+  double    m_driSEIZNear;
+  double    m_driSEIZFar;
+  double    m_driSEIDMin;
+  double    m_driSEIDMax;
+  int       m_driSEIDepthRepresentationType;
+  int       m_driSEIDisparityRefViewId;
+  int       m_driSEINonlinearNumMinus1;
+  std::vector<uint32_t> m_driSEINonlinearModel;
 
   bool      m_erpSEIEnabled;
   bool      m_erpSEICancelFlag;
@@ -558,6 +674,7 @@ protected:
   std::vector<int>      m_omniViewportSEITiltCentre;
   std::vector<uint32_t> m_omniViewportSEIHorRange;
   std::vector<uint32_t> m_omniViewportSEIVerRange;
+  std::string           m_arSEIFileRoot;  // Annotated region SEI - initialized from external file
   bool                  m_rwpSEIEnabled;
   bool                  m_rwpSEIRwpCancelFlag;
   bool                  m_rwpSEIRwpPersistenceFlag;
@@ -602,6 +719,10 @@ protected:
 
   CfgSEISubpictureLevel m_cfgSubpictureLevelInfoSEI;
 
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+  bool                  m_constrainedRaslEncoding;
+#endif
+
   bool                  m_sampleAspectRatioInfoSEIEnabled;
   bool                  m_sariCancelFlag;
   bool                  m_sariPersistenceFlag;
@@ -694,11 +815,18 @@ protected:
   bool        m_forceDecodeBitstream1;
 
   bool        m_alf;                                          ///< Adaptive Loop Filter
-#if JVET_T0064
-  double      m_alfStrength;
+#if JVET_W0129_ENABLE_ALF_TRUEORG
+  bool        m_alfTrueOrg;
+#else
+  bool        m_alfSaoTrueOrg;
+#endif
+  double      m_alfStrengthLuma;
   bool        m_alfAllowPredefinedFilters;
   double      m_ccalfStrength;
-#endif
+  double      m_alfStrengthChroma;
+  double      m_alfStrengthTargetLuma;
+  double      m_alfStrengthTargetChroma;
+  double      m_ccalfStrengthTarget;
   bool        m_ccalf;
   int         m_ccalfQpThreshold;
 
@@ -709,6 +837,10 @@ protected:
   double      m_fractionOfFrames;                             ///< encode a fraction of the frames as specified in FramesToBeEncoded
   int         m_switchPocPeriod;
   int         m_upscaledOutput;                               ////< Output upscaled (2), decoded cropped but in full resolution buffer (1) or decoded cropped (0, default) picture for RPR.
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+  bool        m_craAPSreset;
+  bool        m_rprRASLtoolSwitch;
+#endif
   bool        m_avoidIntraInDepLayer;
 
   bool                  m_gopBasedTemporalFilterEnabled;               ///< GOP-based Temporal Filter enable/disable
@@ -717,10 +849,8 @@ protected:
 
   int         m_maxLayers;
   int         m_targetOlsIdx;
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
   bool        m_OPIEnabled;                                     ///< enable Operating Point Information (OPI)
   int         m_maxTemporalLayer;
-#endif
   int         m_layerId[MAX_VPS_LAYERS];
   int         m_layerIdx;
   int         m_maxSublayers;
@@ -734,9 +864,7 @@ protected:
   int         m_olsModeIdc;
   int         m_numOutputLayerSets;
   std::string m_olsOutputLayerStr[MAX_VPS_LAYERS];
-#if JVET_R0193
   std::string m_maxTidILRefPicsPlus1Str[MAX_VPS_LAYERS];
-#endif
 
   int         m_numPtlsInVps;
 
diff --git a/source/App/EncoderApp/encmain.cpp b/source/App/EncoderApp/encmain.cpp
index 0dfefebecaeb29ce3ebcea6a9e55c2bbe2672ab9..74ef03119f035e85af957965f86a302403a5602a 100644
--- a/source/App/EncoderApp/encmain.cpp
+++ b/source/App/EncoderApp/encmain.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -101,16 +101,6 @@ int main(int argc, char* argv[])
 #endif
 #if ENABLE_TRACING
   fprintf( stdout, "[ENABLE_TRACING] " );
-#endif
-#if ENABLE_SPLIT_PARALLELISM
-  fprintf( stdout, "[SPLIT_PARALLEL (%d jobs)]", PARL_SPLIT_MAX_NUM_JOBS );
-#endif
-#if ENABLE_SPLIT_PARALLELISM
-  const char* waitPolicy = getenv( "OMP_WAIT_POLICY" );
-  const char* maxThLim   = getenv( "OMP_THREAD_LIMIT" );
-  fprintf( stdout, waitPolicy ? "[OMP: WAIT_POLICY=%s," : "[OMP: WAIT_POLICY=,", waitPolicy );
-  fprintf( stdout, maxThLim   ? "THREAD_LIMIT=%s" : "THREAD_LIMIT=", maxThLim );
-  fprintf( stdout, "]" );
 #endif
   fprintf( stdout, "\n" );
 
diff --git a/source/App/Parcat/CMakeLists.txt b/source/App/Parcat/CMakeLists.txt
index 12edc317ad223c8274d0234332fe9b2edf109db7..5b96050066c475c450f6dba881191c6db6185ce7 100644
--- a/source/App/Parcat/CMakeLists.txt
+++ b/source/App/Parcat/CMakeLists.txt
@@ -10,7 +10,7 @@ file( GLOB INC_FILES "*.h" )
 # add executable
 add_executable( ${EXE_NAME} ${SRC_FILES} ${INC_FILES} )
 
-target_link_libraries( ${EXE_NAME} CommonLib DecoderLib Utilities Threads::Threads ${ADDITIONAL_LIBS} )
+target_link_libraries( ${EXE_NAME} CommonLib DecoderLib Utilities ${ADDITIONAL_LIBS} )
 
 # include the output directory, where the svnrevision.h file is generated
 include_directories(${CMAKE_CURRENT_BINARY_DIR})
diff --git a/source/App/Parcat/parcat.cpp b/source/App/Parcat/parcat.cpp
index ad869476ffab48867057371049d9229ee37f98c3..9f7db1dde31666b365839b823c1e07e472bbc2a9 100644
--- a/source/App/Parcat/parcat.cpp
+++ b/source/App/Parcat/parcat.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -65,7 +65,7 @@ void ParcatHLSyntaxReader::parsePictureHeaderUpToPoc ( ParameterSetManager *para
   uint32_t  uiCode;
   PPS* pps = NULL;
   SPS* sps = NULL;
-  
+
   uint32_t uiTmp;
   READ_FLAG(uiTmp, "ph_gdr_or_irap_pic_flag");
   READ_FLAG(uiCode, "ph_non_ref_pic_flag");
@@ -212,8 +212,9 @@ std::vector<uint8_t> filter_segment(const std::vector<uint8_t> & v, int idx, int
   int sz = (int) v.size();
   int nal_start, nal_end;
   int off = 0;
-  int cnt = 0;
-  bool idr_found = false;
+  int cnt[MAX_VPS_LAYERS] = { 0 };
+  bool idr_found[MAX_VPS_LAYERS] = { false };
+  bool is_pre_sei_before_idr = true;
 
   std::vector<uint8_t> out;
   out.reserve(v.size());
@@ -268,22 +269,23 @@ std::vector<uint8_t> filter_segment(const std::vector<uint8_t> & v, int idx, int
       HLSReader.parsePPS( pps );
       parameterSetManager.storePPS( pps, inp_nalu.getBitstream().getFifo() );
     }
+    int nalu_layerId = nalu[0] & 0x3F;
 
+    if (nalu_type == NAL_UNIT_CODED_SLICE_IDR_W_RADL || nalu_type == NAL_UNIT_CODED_SLICE_IDR_N_LP)
+    {
+      is_pre_sei_before_idr = false;
+    }
     if(nalu_type == NAL_UNIT_CODED_SLICE_IDR_W_RADL || nalu_type == NAL_UNIT_CODED_SLICE_IDR_N_LP)
     {
       poc = 0;
       new_poc = *poc_base + poc;
       if (first_idr_slice_after_ph_nal)
       {
-        cnt--;
+        cnt[nalu_layerId]--;
       }
       first_idr_slice_after_ph_nal = false;
     }
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
     if(inp_nalu.m_nalUnitType == NAL_UNIT_PH || (nalu_type < NAL_UNIT_CODED_SLICE_IDR_W_RADL) || (nalu_type > NAL_UNIT_CODED_SLICE_IDR_N_LP && nalu_type <= NAL_UNIT_RESERVED_IRAP_VCL_11) )
-#else
-    if(inp_nalu.m_nalUnitType == NAL_UNIT_PH || (nalu_type < NAL_UNIT_CODED_SLICE_IDR_W_RADL) || (nalu_type > NAL_UNIT_CODED_SLICE_IDR_N_LP && nalu_type <= NAL_UNIT_RESERVED_IRAP_VCL_12) )
-#endif
     {
       parcatHLSReader.setBitstream( &inp_nalu.getBitstream() );
       if (inp_nalu.m_nalUnitType == NAL_UNIT_PH)
@@ -323,7 +325,7 @@ std::vector<uint8_t> filter_segment(const std::vector<uint8_t> & v, int idx, int
 #if ENABLE_TRACING
         std::cout << "Changed poc " << poc << " to " << new_poc << std::endl;
 #endif
-        ++cnt;
+        ++cnt[nalu_layerId];
         change_poc = false;
       }
     }
@@ -331,16 +333,12 @@ std::vector<uint8_t> filter_segment(const std::vector<uint8_t> & v, int idx, int
     if(idx > 1 && (nalu_type == NAL_UNIT_CODED_SLICE_IDR_W_RADL || nalu_type == NAL_UNIT_CODED_SLICE_IDR_N_LP))
     {
       skip_next_sei = true;
-      idr_found = true;
+      idr_found[nalu_layerId] = true;
     }
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
-    if ((idx > 1 && (nalu_type == NAL_UNIT_CODED_SLICE_IDR_W_RADL || nalu_type == NAL_UNIT_CODED_SLICE_IDR_N_LP)) 
-      || ((idx > 1 && !idr_found) && (nalu_type == NAL_UNIT_OPI || nalu_type == NAL_UNIT_DCI || nalu_type == NAL_UNIT_VPS || nalu_type == NAL_UNIT_SPS || nalu_type == NAL_UNIT_PPS || nalu_type == NAL_UNIT_PREFIX_APS || nalu_type == NAL_UNIT_SUFFIX_APS || nalu_type == NAL_UNIT_PH || nalu_type == NAL_UNIT_ACCESS_UNIT_DELIMITER)) 
-      || (nalu_type == NAL_UNIT_SUFFIX_SEI && skip_next_sei))
-#else
-    if ((idx > 1 && (nalu_type == NAL_UNIT_CODED_SLICE_IDR_W_RADL || nalu_type == NAL_UNIT_CODED_SLICE_IDR_N_LP)) || ((idx > 1 && !idr_found) && (nalu_type == NAL_UNIT_DCI || nalu_type == NAL_UNIT_VPS || nalu_type == NAL_UNIT_SPS || nalu_type == NAL_UNIT_PPS || nalu_type == NAL_UNIT_PREFIX_APS || nalu_type == NAL_UNIT_SUFFIX_APS || nalu_type == NAL_UNIT_PH || nalu_type == NAL_UNIT_ACCESS_UNIT_DELIMITER))
-      || (nalu_type == NAL_UNIT_SUFFIX_SEI && skip_next_sei))
-#endif
+    if ((idx > 1 && (nalu_type == NAL_UNIT_CODED_SLICE_IDR_W_RADL || nalu_type == NAL_UNIT_CODED_SLICE_IDR_N_LP))
+      || ((idx > 1 && !idr_found[nalu_layerId]) && (nalu_type == NAL_UNIT_OPI || nalu_type == NAL_UNIT_DCI || nalu_type == NAL_UNIT_VPS || nalu_type == NAL_UNIT_SPS || nalu_type == NAL_UNIT_PPS || nalu_type == NAL_UNIT_PREFIX_APS || nalu_type == NAL_UNIT_SUFFIX_APS || nalu_type == NAL_UNIT_PH || nalu_type == NAL_UNIT_ACCESS_UNIT_DELIMITER))
+      || (nalu_type == NAL_UNIT_SUFFIX_SEI && skip_next_sei)
+      || (idx > 1 && nalu_type == NAL_UNIT_PREFIX_SEI && is_pre_sei_before_idr))
     {
     }
     else
@@ -359,7 +357,7 @@ std::vector<uint8_t> filter_segment(const std::vector<uint8_t> & v, int idx, int
     sz -= nal_end;
   }
 
-  *poc_base += cnt;
+  *poc_base += *std::max_element(std::begin(cnt), std::end(cnt));
   return out;
 }
 
diff --git a/source/App/SEIRemovalApp/CMakeLists.txt b/source/App/SEIRemovalApp/CMakeLists.txt
index cb783adfff2cbc83d841ea84e9b79372f7698efa..ef4fe3634e8ad74aa7a1a8c9ff80f030380394fd 100644
--- a/source/App/SEIRemovalApp/CMakeLists.txt
+++ b/source/App/SEIRemovalApp/CMakeLists.txt
@@ -33,32 +33,12 @@ if( SET_ENABLE_TRACING )
   endif()
 endif()
 
-if( OpenMP_FOUND )
-  if( SET_ENABLE_SPLIT_PARALLELISM )
-    if( ENABLE_SPLIT_PARALLELISM )
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 )
-    else()
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
-    endif()
-  endif()
-  if( SET_ENABLE_WPP_PARALLELISM )
-    if( ENABLE_WPP_PARALLELISM )
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 )
-    else()
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
-    endif()
-  endif()
-else()
-  target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
-  target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
-endif()
-
 if( CMAKE_COMPILER_IS_GNUCC AND BUILD_STATIC )
   set( ADDITIONAL_LIBS ${ADDITIONAL_LIBS} -static -static-libgcc -static-libstdc++ )
   target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_STATIC_LINK=1 )
 endif()
 
-target_link_libraries( ${EXE_NAME} CommonLib DecoderLib Utilities Threads::Threads ${ADDITIONAL_LIBS} )
+target_link_libraries( ${EXE_NAME} CommonLib DecoderLib Utilities ${ADDITIONAL_LIBS} )
 
 # lldb custom data formatters
 if( XCODE )
diff --git a/source/App/SEIRemovalApp/SEIRemovalApp.cpp b/source/App/SEIRemovalApp/SEIRemovalApp.cpp
index e10c99da882d7df9985db68d04239c880f4d251a..c12dee6a99c8954ffe162f6d94934835b2190b39 100644
--- a/source/App/SEIRemovalApp/SEIRemovalApp.cpp
+++ b/source/App/SEIRemovalApp/SEIRemovalApp.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/App/SEIRemovalApp/SEIRemovalApp.h b/source/App/SEIRemovalApp/SEIRemovalApp.h
index 32bb23ac26d974025866f8df9eb28d359d40e65c..ec03a9a3624b623eba92fcdc490fa275e0c0ea06 100644
--- a/source/App/SEIRemovalApp/SEIRemovalApp.h
+++ b/source/App/SEIRemovalApp/SEIRemovalApp.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/App/SEIRemovalApp/SEIRemovalAppCfg.cpp b/source/App/SEIRemovalApp/SEIRemovalAppCfg.cpp
index f4ab6e1f0c46ff63065465c81edacb81d144bf38..20310d657455d4e3fb6d4a642dc19a16de2156c9 100644
--- a/source/App/SEIRemovalApp/SEIRemovalAppCfg.cpp
+++ b/source/App/SEIRemovalApp/SEIRemovalAppCfg.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/App/SEIRemovalApp/SEIRemovalAppCfg.h b/source/App/SEIRemovalApp/SEIRemovalAppCfg.h
index 593ba838316762c62ac1bae26d23d0e6cff21d12..5edd3561b8cbee8c7b9117593216f67d4a5daee6 100644
--- a/source/App/SEIRemovalApp/SEIRemovalAppCfg.h
+++ b/source/App/SEIRemovalApp/SEIRemovalAppCfg.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/App/SEIRemovalApp/seiremovalmain.cpp b/source/App/SEIRemovalApp/seiremovalmain.cpp
index 5fba16f98c0820dc7d1273f8adcff2ddbe9b9432..309bb72c37a2472c957a0b2c9d375ef85388f1e2 100644
--- a/source/App/SEIRemovalApp/seiremovalmain.cpp
+++ b/source/App/SEIRemovalApp/seiremovalmain.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/App/StreamMergeApp/CMakeLists.txt b/source/App/StreamMergeApp/CMakeLists.txt
index 77c53ece6b5e192580b86a7f3b7dac9e4c0b6290..117652574ccdcc67d184bed3b46e2a8d409bc604 100644
--- a/source/App/StreamMergeApp/CMakeLists.txt
+++ b/source/App/StreamMergeApp/CMakeLists.txt
@@ -33,32 +33,12 @@ if( SET_ENABLE_TRACING )
   endif()
 endif()
 
-if( OpenMP_FOUND )
-  if( SET_ENABLE_SPLIT_PARALLELISM )
-    if( ENABLE_SPLIT_PARALLELISM )
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 )
-    else()
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
-    endif()
-  endif()
-  if( SET_ENABLE_WPP_PARALLELISM )
-    if( ENABLE_WPP_PARALLELISM )
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 )
-    else()
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
-    endif()
-  endif()
-else()
-  target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
-  target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
-endif()
-
 if( CMAKE_COMPILER_IS_GNUCC AND BUILD_STATIC )
   set( ADDITIONAL_LIBS ${ADDITIONAL_LIBS} -static -static-libgcc -static-libstdc++ )
   target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_STATIC_LINK=1 )
 endif()
 
-target_link_libraries( ${EXE_NAME} CommonLib EncoderLib DecoderLib Utilities Threads::Threads ${ADDITIONAL_LIBS} )
+target_link_libraries( ${EXE_NAME} CommonLib EncoderLib DecoderLib Utilities ${ADDITIONAL_LIBS} )
 
 # lldb custom data formatters
 if( XCODE )
diff --git a/source/App/StreamMergeApp/StreamMergeApp.cpp b/source/App/StreamMergeApp/StreamMergeApp.cpp
index b9943e8ec3ee89f0de0ca8856ec57e93c3ca0b18..2f813eff7e9c6cf67e2ad9660f09d0321ec9c23f 100644
--- a/source/App/StreamMergeApp/StreamMergeApp.cpp
+++ b/source/App/StreamMergeApp/StreamMergeApp.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -41,8 +41,8 @@
 #include <fcntl.h>
 
 #include "StreamMergeApp.h"
-#include "DecoderLib/AnnexBread.h"
-#include "DecoderLib/NALread.h"
+#include "AnnexBwrite.h"
+#include "NALwrite.h"
 #if RExt__DECODER_DEBUG_BIT_STATISTICS
 #include "CommonLib/CodingStatistics.h"
 #endif
@@ -63,26 +63,6 @@ StreamMergeApp::StreamMergeApp()
 // Public member functions
 // ====================================================================================================================
 
-/**
- - create internal class
- - initialize internal class
- - until the end of the bitstream, call decoding function in StreamMergeApp class
- - delete allocated buffers
- - destroy internal class
- - returns the number of mismatching pictures
- */
-
-void read2(InputNALUnit& nalu)
-{
-  InputBitstream& bs = nalu.getBitstream();
-
-  nalu.m_forbiddenZeroBit   = bs.read(1);                 // forbidden zero bit
-  nalu.m_nuhReservedZeroBit = bs.read(1);                 // nuh_reserved_zero_bit
-  nalu.m_nuhLayerId         = bs.read(6);                 // nuh_layer_id
-  nalu.m_nalUnitType        = (NalUnitType) bs.read(5);   // nal_unit_type
-  nalu.m_temporalId         = bs.read(3) - 1;             // nuh_temporal_id_plus1
-}
-
 static void
 _byteStreamNALUnit(
   SingleLayerStream& bs,
@@ -224,121 +204,440 @@ byteStreamNALUnit(
   return eof;
 }
 
-void StreamMergeApp::writeNewVPS(ostream& out, int nLayerId, int nTemporalId)
+/**
+ - lookahead through next NAL units to determine if current NAL unit is the first NAL unit in a new picture
+ */
+bool StreamMergeApp::isNewPicture(std::ifstream *bitstreamFile, InputByteStream *bytestream, bool firstSliceInPicture)
 {
-  //write NALU header
-  OutputBitstream bsNALUHeader;
-  static const uint8_t start_code_prefix[] = { 0,0,0,1 };
+  bool ret      = false;
+  bool finished = false;
 
-  int forbiddenZero = 0;
-  bsNALUHeader.write(forbiddenZero, 1);   // forbidden_zero_bit
-  int nuhReservedZeroBit = 0;
-  bsNALUHeader.write(nuhReservedZeroBit, 1);   // nuh_reserved_zero_bit
-  bsNALUHeader.write(nLayerId, 6);             // nuh_layer_id
-  bsNALUHeader.write(NAL_UNIT_VPS, 5);         // nal_unit_type
-  bsNALUHeader.write(nTemporalId + 1, 3);      // nuh_temporal_id_plus1
-
-  out.write(reinterpret_cast<const char*>(start_code_prefix), 4);
-  out.write(reinterpret_cast<const char*>(bsNALUHeader.getByteStream()), bsNALUHeader.getByteStreamLength());
+  // cannot be a new picture if there haven't been any slices yet
+  if (firstSliceInPicture)
+  {
+    return false;
+  }
 
-  //write VPS
-  OutputBitstream bsVPS;
-  HLSWriter       m_HLSWriter;
+  // save stream position for backup
+  std::streampos location = bitstreamFile->tellg();
 
-  m_HLSWriter.setBitstream(&bsVPS);
-  m_HLSWriter.codeVPS(&vps);
+  // look ahead until picture start location is determined
+  while (!finished && !!(*bitstreamFile))
+  {
+    AnnexBStats  stats = AnnexBStats();
+    InputNALUnit nalu;
+    byteStreamNALUnit(*bytestream, nalu.getBitstream().getFifo(), stats);
+    if (nalu.getBitstream().getFifo().empty())
+    {
+      msg(ERROR, "Warning: Attempt to decode an empty NAL unit\n");
+    }
+    else
+    {
+      // get next NAL unit type
+      read(nalu);
+      switch (nalu.m_nalUnitType)
+      {
+      // NUT that indicate the start of a new picture
+      case NAL_UNIT_ACCESS_UNIT_DELIMITER:
+      case NAL_UNIT_OPI:
+      case NAL_UNIT_DCI:
+      case NAL_UNIT_VPS:
+      case NAL_UNIT_SPS:
+      case NAL_UNIT_PPS:
+      case NAL_UNIT_PH:
+        ret      = true;
+        finished = true;
+        break;
+
+      // NUT that are not the start of a new picture
+      case NAL_UNIT_CODED_SLICE_TRAIL:
+      case NAL_UNIT_CODED_SLICE_STSA:
+      case NAL_UNIT_CODED_SLICE_RASL:
+      case NAL_UNIT_CODED_SLICE_RADL:
+      case NAL_UNIT_RESERVED_VCL_4:
+      case NAL_UNIT_RESERVED_VCL_5:
+      case NAL_UNIT_RESERVED_VCL_6:
+      case NAL_UNIT_CODED_SLICE_IDR_W_RADL:
+      case NAL_UNIT_CODED_SLICE_IDR_N_LP:
+      case NAL_UNIT_CODED_SLICE_CRA:
+      case NAL_UNIT_CODED_SLICE_GDR:
+      case NAL_UNIT_RESERVED_IRAP_VCL_11:
+        ret      = checkPictureHeaderInSliceHeaderFlag(nalu);
+        finished = true;
+        break;
+
+        // NUT that are not the start of a new picture
+      case NAL_UNIT_EOS:
+      case NAL_UNIT_EOB:
+      case NAL_UNIT_SUFFIX_APS:
+      case NAL_UNIT_SUFFIX_SEI:
+      case NAL_UNIT_FD:
+        ret      = false;
+        finished = true;
+        break;
+
+      // NUT that might indicate the start of a new picture - keep looking
+      case NAL_UNIT_PREFIX_APS:
+      case NAL_UNIT_PREFIX_SEI:
+      case NAL_UNIT_RESERVED_NVCL_26:
+      case NAL_UNIT_RESERVED_NVCL_27:
+      case NAL_UNIT_UNSPECIFIED_28:
+      case NAL_UNIT_UNSPECIFIED_29:
+      case NAL_UNIT_UNSPECIFIED_30:
+      case NAL_UNIT_UNSPECIFIED_31:
+      default:
+        break;
+      }
+    }
+  }
 
-  out.write(reinterpret_cast<const char*>(bsVPS.getByteStream()), bsVPS.getByteStreamLength());
+  // restore previous stream location - minus 3 due to the need for the annexB parser to read three extra bytes
+  bitstreamFile->clear();
+  bitstreamFile->seekg(location - std::streamoff(3));
+  bytestream->reset();
 
-  return;
+  // return TRUE if next NAL unit is the start of a new picture
+  return ret;
 }
 
-uint32_t StreamMergeApp::mergeStreams()
+/**
+- lookahead through next NAL units to determine if current NAL unit is the first NAL unit in a new access unit
+*/
+bool StreamMergeApp::isNewAccessUnit(bool newPicture, std::ifstream *bitstreamFile, class InputByteStream *bytestream)
 {
-  ifstream bitstreamFileIn[MAX_VPS_LAYERS];
-  ofstream bitstreamFileOut(m_bitstreamFileNameOut.c_str(), ifstream::out | ifstream::binary);
-  int nNumValidStr = m_numInputStreams;
+  bool ret      = false;
+  bool finished = false;
 
-  for (int i = 0; i < m_numInputStreams; i++)
+  // can only be the start of an AU if this is the start of a new picture
+  if (newPicture == false)
   {
-    bitstreamFileIn[i].open(m_bitstreamFileNameIn[i].c_str(), ifstream::in | ifstream::binary);
+    return false;
+  }
 
-    if (!bitstreamFileIn[i])
+  // save stream position for backup
+  std::streampos location = bitstreamFile->tellg();
+
+  // look ahead until access unit start location is determined
+  while (!finished && !!(*bitstreamFile))
+  {
+    AnnexBStats  stats = AnnexBStats();
+    InputNALUnit nalu;
+    byteStreamNALUnit(*bytestream, nalu.getBitstream().getFifo(), stats);
+    if (nalu.getBitstream().getFifo().empty())
     {
-      EXIT("failed to open bitstream file " << m_bitstreamFileNameIn[i].c_str() << " for reading");
+      msg(ERROR, "Warning: Attempt to decode an empty NAL unit\n");
+    }
+    else
+    {
+      // get next NAL unit type
+      read(nalu);
+      switch (nalu.m_nalUnitType)
+      {
+      // AUD always indicates the start of a new access unit
+      case NAL_UNIT_ACCESS_UNIT_DELIMITER:
+        ret      = true;
+        finished = true;
+        break;
+
+      // slice types - check layer ID and POC
+      case NAL_UNIT_CODED_SLICE_TRAIL:
+      case NAL_UNIT_CODED_SLICE_STSA:
+      case NAL_UNIT_CODED_SLICE_RASL:
+      case NAL_UNIT_CODED_SLICE_RADL:
+      case NAL_UNIT_CODED_SLICE_IDR_W_RADL:
+      case NAL_UNIT_CODED_SLICE_IDR_N_LP:
+      case NAL_UNIT_CODED_SLICE_CRA:
+      case NAL_UNIT_CODED_SLICE_GDR:
+        ret      = true; // isSliceNaluFirstInAU(newPicture, nalu); // TODO: according to DecLib::isSliceNaluFirstInAU(), true if layerID==prevLayerID, otherwise true if POC!=prevPOC.
+        finished = true;
+        break;
+
+      // NUT that are not the start of a new access unit
+      case NAL_UNIT_EOS:
+      case NAL_UNIT_EOB:
+      case NAL_UNIT_SUFFIX_APS:
+      case NAL_UNIT_SUFFIX_SEI:
+      case NAL_UNIT_FD:
+        ret      = false;
+        finished = true;
+        break;
+
+      // all other NUT - keep looking to find first VCL
+      default: break;
+      }
     }
-
-    bitstreamFileIn[i].clear();
-    bitstreamFileIn[i].seekg(0, ios::beg);
   }
 
-  SingleLayerStream bytestream[MAX_VPS_LAYERS];
+  // restore previous stream location
+  bitstreamFile->clear();
+  bitstreamFile->seekg(location);
+  bytestream->reset();
 
-  for (int i = 0; i < m_numInputStreams; i++)
-    bytestream[i].init(bitstreamFileIn[i]);
+  // return TRUE if next NAL unit is the start of a new picture
+  return ret;
+}
 
-  //set VPS which will be replicated for all layers but with differnt nul_layer_id
-  vps.setMaxLayers(m_numInputStreams);
-  vps.setVPSExtensionFlag(false);
+void StreamMergeApp::inputNaluHeaderToOutputNalu(InputNALUnit& inNalu, OutputNALUnit& outNalu) {
+  outNalu.m_forbiddenZeroBit   = inNalu.m_forbiddenZeroBit;
+  outNalu.m_nalUnitType        = inNalu.m_nalUnitType;
+  outNalu.m_nuhLayerId         = inNalu.m_nuhLayerId;
+  outNalu.m_nuhReservedZeroBit = inNalu.m_nuhReservedZeroBit;
+  outNalu.m_temporalId         = inNalu.m_temporalId;
+}
+
+bool StreamMergeApp::preInjectNalu(MergeLayer &layer, InputNALUnit &inNalu, OutputNALUnit &outNalu)
+{
+  HLSyntaxReader hlsReader;
+  HLSWriter      hlsWriter;
+  hlsReader.setBitstream(&inNalu.getBitstream());
+  hlsWriter.setBitstream(&outNalu.m_Bitstream);
 
-  //Loop all input bitstreams to interleave their NALUs
-  while (nNumValidStr)
+  switch (inNalu.m_nalUnitType)
   {
-    //loop over all input streams
+  case NAL_UNIT_SPS:
+  {
+    VPS *vps = new VPS();
+    if (vpsId == -1)
+    {
+      vpsId = ++idIncrement;
+    }
+    vps->setVPSId(vpsId);
     for (int i = 0; i < m_numInputStreams; i++)
     {
-      uint8_t layerId = i < 63 ? i : i + 1;
+      vps->setLayerId(i, i);   // Because we use layer IDs that are layer indices.
+    }
+    vps->setMaxLayers(m_numInputStreams);
+    vector<ProfileTierLevel> ptls;
+    ptls.push_back(ProfileTierLevel());
+    vps->setProfileTierLevel(ptls);
+    layer.vpsIdMapping[0] = vps->getVPSId();
+    layer.psManager.storeVPS(vps, hlsReader.getBitstream()->getFifo());
+    hlsWriter.codeVPS(vps);
+    outNalu.m_nalUnitType = NAL_UNIT_VPS;
+    msg(INFO, " layer %i, nalu type VPS%i injected\n", layer.id, vps->getVPSId());
+    return true;
+  }
+  default:
+    break;
+  }
+  return false;
+}
+
+/**
+  - Decode NAL unit if it is parameter set or picture header, or decode slice header of VLC NAL unit
+ */
+void StreamMergeApp::decodeAndRewriteNalu(MergeLayer &layer, InputNALUnit &inNalu, OutputNALUnit &outNalu)
+{
+  HLSyntaxReader hlsReader;
+  HLSWriter      hlsWriter;
+  hlsReader.setBitstream(&inNalu.getBitstream());
+  hlsWriter.setBitstream(&outNalu.m_Bitstream);
+
+  msg(INFO, " layer %i, nalu type ", layer.id);
+  switch (inNalu.m_nalUnitType)
+  {
+  case NAL_UNIT_SPS:
+  {
+    SPS *oldSps = new SPS();
+    SPS *newSps = new SPS();
+    hlsReader.parseSPS(oldSps);
+    inNalu.getBitstream().resetToStart();
+    uint32_t uiCode;
+    inNalu.getBitstream().read(16, uiCode);
+    hlsReader.parseSPS(newSps);
+    // Set new values.
+    newSps->setSPSId(++idIncrement);
+    newSps->setVPSId(layer.vpsIdMapping.at(oldSps->getVPSId()));
+    newSps->setLayerId(layer.id);
+    // Store values for later reference.
+    layer.spsIdMapping.insert({ oldSps->getSPSId(), newSps->getSPSId() });
+    layer.oldIDsPsManager.storeSPS(oldSps, hlsReader.getBitstream()->getFifo());
+    layer.psManager.storeSPS(newSps, hlsReader.getBitstream()->getFifo());
+    hlsWriter.codeSPS(newSps);
+    msg(INFO, "SPS%i", newSps->getSPSId());
+    break;
+  }
+  case NAL_UNIT_PPS:
+  {
+    PPS *oldPps = new PPS();
+    PPS *newPps = new PPS();
+    hlsReader.parsePPS(oldPps);
+    inNalu.getBitstream().resetToStart();
+    uint32_t uiCode;
+    inNalu.getBitstream().read(16, uiCode);
+    hlsReader.parsePPS(newPps);
+    // Set new values.
+    newPps->setPPSId(++idIncrement);
+    newPps->setSPSId(layer.spsIdMapping.at(oldPps->getSPSId()));
+    newPps->setLayerId(layer.id);
+    // Store values for later reference.
+    layer.ppsIdMapping.insert({ oldPps->getPPSId(), newPps->getPPSId() });
+    layer.oldIDsPsManager.storePPS(oldPps, hlsReader.getBitstream()->getFifo());
+    layer.psManager.storePPS(newPps, hlsReader.getBitstream()->getFifo());
+    hlsWriter.codePPS(newPps);
+    msg(INFO, "PPS%i", newPps->getPPSId());
+    break;
+  }
+  case NAL_UNIT_PREFIX_APS:
+  case NAL_UNIT_SUFFIX_APS:
+  {
+    APS *aps = new APS();
+    hlsReader.parseAPS(aps);
+    layer.apsIdMapping.insert({ aps->getAPSId(), ++idIncrement });
+    aps->setLayerId(layer.id);
+    aps->setAPSId(idIncrement);
+    layer.psManager.storeAPS(aps, hlsReader.getBitstream()->getFifo());
+    hlsWriter.codeAPS(aps);
+    msg(INFO, "APS%s%i", inNalu.m_nalUnitType == NAL_UNIT_PREFIX_APS ? "p" : "s", aps->getAPSId());
+    break;
+  }
+  case NAL_UNIT_PH:
+  {
+    PicHeader ph = PicHeader();
+    hlsReader.parsePictureHeader(&ph, &layer.oldIDsPsManager, true);
+    Slice slice = Slice();
+    slice.setPPS(layer.psManager.getPPS(layer.ppsIdMapping.at(ph.getPPSId())));
+    slice.setSPS(layer.psManager.getSPS(layer.spsIdMapping.at(ph.getSPSId())));
+    slice.setPOC(ph.getPocLsb());
+    ph.setPPSId(layer.ppsIdMapping.at(ph.getPPSId()));
+    hlsWriter.codePictureHeader(&ph, true, &slice);
+    msg(INFO, "PH");
+    break;
+  }
+  default:
+  {
+    if (inNalu.isVcl())
+    {
+      msg(INFO, "VCL");
+    }
+    else if (inNalu.isSei())
+    {
+      msg(INFO, "SEI");
+    }
+    else
+    {
+      msg(INFO, "NNN");   // Any other NAL unit that is not handled above
+    }
+    msg(INFO, " with index %i", inNalu.m_nalUnitType);
+    // Copy payload from input nalu to output nalu. Code copied from SubpicMergeApp::copyInputNaluToOutputNalu().
+    vector<uint8_t> &inFifo  = inNalu.getBitstream().getFifo();
+    vector<uint8_t> &outFifo = outNalu.m_Bitstream.getFIFO();
+    outFifo                  = vector<uint8_t>(inFifo.begin() + 2, inFifo.end());
+    break;
+  }
+  }
+  msg(INFO, "\n");
+}
 
-      if (!bitstreamFileIn[i])
-        continue;
+uint32_t StreamMergeApp::mergeStreams()
+{
+  ofstream outputStream(m_bitstreamFileNameOut, ifstream::out | ifstream::binary);
 
-      AnnexBStats stats = AnnexBStats();
+  vector<MergeLayer> *layers = new vector<MergeLayer>;
+  layers->resize(m_numInputStreams);
 
-      InputNALUnit nalu;
+  // Prepare merge layers.
+  for (int i = 0; i < layers->size(); i++)
+  {
+    MergeLayer &layer = layers->at(i);
+    layer.id          = i;
 
-      byteStreamNALUnit(bytestream[i], bitstreamFileIn[i], nalu.getBitstream().getFifo(), stats);
+    // Open input file.
+    layer.fp = new ifstream();
+    layer.fp->open(m_bitstreamFileNameIn[i], ifstream::in | ifstream::binary);
+    if (!layer.fp->is_open())
+    {
+      EXIT("failed to open bitstream file " << m_bitstreamFileNameIn[i] << " for reading");
+    }
+    layer.fp->clear();
+    layer.fp->seekg(0, ios::beg);
+
+    // Prep other values.
+    layer.bs = new InputByteStream(*(layer.fp));
+
+    VPS vps;
+    vps.setMaxLayers((uint32_t) layers->size());
+    vps.setLayerId(layer.id, layer.id);   // Layer ID is rewritten here.
+    layer.vpsIdMapping.insert({ vps.getVPSId(), 0 });
+    vps.setVPSId(0);
+    layer.psManager.storeVPS(&vps, std::vector<uint8_t>()); // Create VPS with default values (VTM slice header parser needs this)
+  }
 
-      // call actual decoding function
-      if (nalu.getBitstream().getFifo().empty())
-      {
-        /* this can happen if the following occur:
-         *  - empty input file
-         *  - two back-to-back start_code_prefixes
-         *  - start_code_prefix immediately followed by EOF
-         */
-        std::cerr << "Warning: Attempt to decode an empty NAL unit" << std::endl;
-      }
-      else
-      {
-        read2(nalu);
+  // Loop over layers until every one is entirely read.
+  uint32_t layersStillToRead = (uint32_t) layers->size();
+  while (layersStillToRead > 0)
+  {
+    // Loop over every layer.
+    for (auto &layer: *layers)
+    {
+      if (layer.doneReading) continue;
+
+      //vector<OutputNALUnit> outNalus; // collection of nalus of this interleave part.
+      AccessUnit outAccessUnit;
+      // Read until eof or after first vcl nalu.
+      bool eoi = false; // end of interleave part.
+      while (!eoi) {
+        AnnexBStats  stats;
+        InputNALUnit inNalu;
+        inNalu.m_nalUnitType = NAL_UNIT_INVALID;
+
+        // Find next nalu in stream.
+        bool eof = byteStreamNALUnit(*layer.bs, inNalu.getBitstream().getFifo(), stats);
+
+        // End of file reached.
+        if (eof) {
+          eoi = true;
+          layersStillToRead--;
+          layer.doneReading = true;
+        }
 
-        if (nalu.m_nalUnitType == NAL_UNIT_VPS)
+        if (inNalu.getBitstream().getFifo().empty())
         {
-          writeNewVPS(bitstreamFileOut, layerId, nalu.m_temporalId);
-          printf("Write new VPS for stream %d\n", i);
-
+          msg(ERROR, "Warning: Attempt to decode an empty NAL unit\n");
           continue;
         }
 
-        int iNumZeros = stats.m_numLeadingZero8BitsBytes + stats.m_numZeroByteBytes + stats.m_numStartCodePrefixBytes - 1;
-        char ch = 0;
-        for (int i = 0; i < iNumZeros; i++) { bitstreamFileOut.write(&ch, 1); }
-        ch = 1; bitstreamFileOut.write(&ch, 1);
 
-        //update the nul_layer_id
-        uint8_t *p = (uint8_t*)nalu.getBitstream().getFifo().data();
-        p[1] = ((layerId + 1) << 1) & 0xff;
+        read(inNalu);   // Convert nalu payload to RBSP and parse nalu header
+
+        // NALU to optionally inject before the main output NALU.
+        OutputNALUnit injectedOutNalu((NalUnitType) inNalu.m_nalUnitType);
+        inputNaluHeaderToOutputNalu(inNalu, injectedOutNalu);
+        injectedOutNalu.m_nuhLayerId = layer.id;
+        if (preInjectNalu(layer, inNalu, injectedOutNalu))
+        {
+          outAccessUnit.push_back(new NALUnitEBSP(injectedOutNalu));
+        }
 
-        bitstreamFileOut.write((const char*)p, nalu.getBitstream().getFifo().size());
+        // Change input NALU to output NALU.
+        OutputNALUnit outNalu((NalUnitType) inNalu.m_nalUnitType);
+        inputNaluHeaderToOutputNalu(inNalu, outNalu);
+        outNalu.m_nuhLayerId = layer.id;
+        decodeAndRewriteNalu(layer, inNalu, outNalu);
+        outAccessUnit.push_back(new NALUnitEBSP(outNalu));
 
-        printf("Merge NALU type %d from stream %d\n", nalu.m_nalUnitType, i);
-      }
+        if (inNalu.isVcl())
+        {
+          layer.firstSliceInPicture = false;
+        }
 
-      if (!bitstreamFileIn[i])
-        nNumValidStr--;
+        try
+        {
+          bool bIsNewPicture = isNewPicture(layer.fp, layer.bs, layer.firstSliceInPicture);
+          if (isNewAccessUnit(bIsNewPicture, layer.fp, layer.bs))
+          {
+            layer.firstSliceInPicture = bIsNewPicture;
+            eoi                       = true;
+          }
+        }
+        catch (std::ios_base::failure&)
+        {
+          eoi = true;
+        }
+      }
+      writeAnnexBAccessUnit(outputStream, outAccessUnit);
     }
   }
-
   return 0;
 }
 
diff --git a/source/App/StreamMergeApp/StreamMergeApp.h b/source/App/StreamMergeApp/StreamMergeApp.h
index b4dc15ae8673800ec65c470ed6a948e56ad0fd84..1982ecc8eb378fda17480f8d68219aefdd79fb24 100644
--- a/source/App/StreamMergeApp/StreamMergeApp.h
+++ b/source/App/StreamMergeApp/StreamMergeApp.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -45,30 +45,71 @@
 #include <stdio.h>
 #include <fstream>
 #include <iostream>
-#include "CommonLib/CommonDef.h"
-#include "VLCWriter.h"
+#include "CommonDef.h" 
+#include "NALread.h"
 #include "CABACWriter.h"
 #include "AnnexBread.h"
+#include "VLCReader.h"
+#include "VLCWriter.h"
 #include "StreamMergeAppCfg.h"
 
 using namespace std;
 
+
+
+struct MergeLayer;
+class SingleLayerStream;
+typedef map<uint32_t, uint32_t> OldToNewIdMapping;
+
 // ====================================================================================================================
 // Class definition
 // ====================================================================================================================
 
-/// decoder application class
+/// stream merger application class
 class StreamMergeApp : public StreamMergeAppCfg
 {
 
 public:
   StreamMergeApp();
-  virtual ~StreamMergeApp         ()  {}
+  virtual ~StreamMergeApp() {}
 
   VPS vps;
 
-  uint32_t  mergeStreams            (); ///< main stream merging function
-  void      writeNewVPS             (ostream& out, int nNumLayers, int nTemporalId);
+  uint32_t mergeStreams();   ///< main stream merging function
+
+private:
+  bool isNewPicture(std::ifstream *bitstreamFile, InputByteStream *bytestream, bool firstSliceInPicture);
+  bool isNewAccessUnit(bool newPicture, std::ifstream *bitstreamFile, InputByteStream *bytestream);
+  void inputNaluHeaderToOutputNalu(InputNALUnit &inNalu, OutputNALUnit &outNalu);
+  bool preInjectNalu(MergeLayer &layer, InputNALUnit &inNalu, OutputNALUnit &outNalu);
+  void decodeAndRewriteNalu(MergeLayer &layer, InputNALUnit &inNalu, OutputNALUnit &outNalu);
+
+  int vpsId = -1;
+  int idIncrement = 0;
+};
+
+
+
+
+struct MergeLayer
+{
+  int id;
+
+  ifstream *                 fp;
+  InputByteStream *          bs;
+  bool                       firstSliceInPicture = true;
+  bool                       doneReading = false;
+  vector<AnnexBStats>        stats;
+  ParameterSetManager        oldIDsPsManager;
+  ParameterSetManager        psManager;
+  vector<int>                vpsIds;
+  vector<int>                spsIds;
+  vector<int>                ppsIds;
+
+  OldToNewIdMapping vpsIdMapping;
+  OldToNewIdMapping spsIdMapping;
+  OldToNewIdMapping ppsIdMapping;
+  OldToNewIdMapping apsIdMapping;
 };
 
 
diff --git a/source/App/StreamMergeApp/StreamMergeAppCfg.cpp b/source/App/StreamMergeApp/StreamMergeAppCfg.cpp
index 88432c360fd9a5e9304961c2872fae8a841119f3..310fab49bba04771684138e423dda17227b4dd32 100644
--- a/source/App/StreamMergeApp/StreamMergeAppCfg.cpp
+++ b/source/App/StreamMergeApp/StreamMergeAppCfg.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/App/StreamMergeApp/StreamMergeAppCfg.h b/source/App/StreamMergeApp/StreamMergeAppCfg.h
index 6ef3e791ce8f0fbe708759cfd5026f99c5bf4c34..e6248c0c55e60f5a6e26da0bc00646cdaab1771e 100644
--- a/source/App/StreamMergeApp/StreamMergeAppCfg.h
+++ b/source/App/StreamMergeApp/StreamMergeAppCfg.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/App/StreamMergeApp/StreamMergeMain.cpp b/source/App/StreamMergeApp/StreamMergeMain.cpp
index 7404d8751ae8f398ca4559d9afab2eccca11fe31..ad7c0473595b98074fb4f32a2cfdaa3f61934184 100644
--- a/source/App/StreamMergeApp/StreamMergeMain.cpp
+++ b/source/App/StreamMergeApp/StreamMergeMain.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/App/SubpicMergeApp/CMakeLists.txt b/source/App/SubpicMergeApp/CMakeLists.txt
index dd8fac45fd1f432db633da32662435936ece5309..64690566dae51d9c7cf5ec7b19ffd3d06b1f23ed 100644
--- a/source/App/SubpicMergeApp/CMakeLists.txt
+++ b/source/App/SubpicMergeApp/CMakeLists.txt
@@ -33,32 +33,12 @@ if( SET_ENABLE_TRACING )
   endif()
 endif()
 
-if( OpenMP_FOUND )
-  if( SET_ENABLE_SPLIT_PARALLELISM )
-    if( ENABLE_SPLIT_PARALLELISM )
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 )
-    else()
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
-    endif()
-  endif()
-  if( SET_ENABLE_WPP_PARALLELISM )
-    if( ENABLE_WPP_PARALLELISM )
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 )
-    else()
-      target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
-    endif()
-  endif()
-else()
-  target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
-  target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
-endif()
-
 if( CMAKE_COMPILER_IS_GNUCC AND BUILD_STATIC )
   set( ADDITIONAL_LIBS ${ADDITIONAL_LIBS} -static -static-libgcc -static-libstdc++ )
   target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_STATIC_LINK=1 )
 endif()
 
-target_link_libraries( ${EXE_NAME} CommonLib EncoderLib DecoderLib Utilities Threads::Threads ${ADDITIONAL_LIBS} )
+target_link_libraries( ${EXE_NAME} CommonLib EncoderLib DecoderLib Utilities ${ADDITIONAL_LIBS} )
 
 # lldb custom data formatters
 if( XCODE )
diff --git a/source/App/SubpicMergeApp/SubpicMergeApp.cpp b/source/App/SubpicMergeApp/SubpicMergeApp.cpp
index 3dbda5f4a9b1989a6a010f9d50eb35cbe6ada421..c5903447ac12ecd673bfc383837d3e6ac40d0129 100644
--- a/source/App/SubpicMergeApp/SubpicMergeApp.cpp
+++ b/source/App/SubpicMergeApp/SubpicMergeApp.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -52,6 +52,9 @@
 #include "NALwrite.h"
 #include "AnnexBwrite.h"
 #include "SubpicMergeApp.h"
+#include "SEIread.h"
+#include "SEIEncoder.h"
+#include "SEIwrite.h"
 
 
  //! \ingroup SubpicMergeApp
@@ -82,6 +85,7 @@ struct Subpicture {
   PicHeader                            picHeader;
   std::vector<Slice>                   slices;
   std::vector<OutputBitstream>         sliceData;
+  SEI                                  *decodedPictureHashSei;
 };
 
 
@@ -166,9 +170,7 @@ bool SubpicMergeApp::isNewPicture(std::ifstream *bitstreamFile, InputByteStream
 
         // NUT that indicate the start of a new picture
         case NAL_UNIT_ACCESS_UNIT_DELIMITER:
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
         case NAL_UNIT_OPI:
-#endif
         case NAL_UNIT_DCI:
         case NAL_UNIT_VPS:
         case NAL_UNIT_SPS:
@@ -177,7 +179,7 @@ bool SubpicMergeApp::isNewPicture(std::ifstream *bitstreamFile, InputByteStream
           ret = true;
           finished = true;
           break;
-        
+
         // NUT that are not the start of a new picture
         case NAL_UNIT_CODED_SLICE_TRAIL:
         case NAL_UNIT_CODED_SLICE_STSA:
@@ -191,9 +193,6 @@ bool SubpicMergeApp::isNewPicture(std::ifstream *bitstreamFile, InputByteStream
         case NAL_UNIT_CODED_SLICE_CRA:
         case NAL_UNIT_CODED_SLICE_GDR:
         case NAL_UNIT_RESERVED_IRAP_VCL_11:
-#if !JVET_S0163_ON_TARGETOLS_SUBLAYERS
-        case NAL_UNIT_RESERVED_IRAP_VCL_12:
-#endif
           ret = checkPictureHeaderInSliceHeaderFlag(nalu);
           finished = true;
           break;
@@ -207,7 +206,7 @@ bool SubpicMergeApp::isNewPicture(std::ifstream *bitstreamFile, InputByteStream
           ret = false;
           finished = true;
           break;
-        
+
         // NUT that might indicate the start of a new picture - keep looking
         case NAL_UNIT_PREFIX_APS:
         case NAL_UNIT_PREFIX_SEI:
@@ -222,7 +221,7 @@ bool SubpicMergeApp::isNewPicture(std::ifstream *bitstreamFile, InputByteStream
       }
     }
   }
-  
+
   // restore previous stream location - minus 3 due to the need for the annexB parser to read three extra bytes
 #if RExt__DECODER_DEBUG_BIT_STATISTICS
   bitstreamFile->clear();
@@ -302,6 +301,27 @@ void SubpicMergeApp::parseAPS(HLSyntaxReader &hlsReader, ParameterSetManager &ps
   msg( INFO, "  APS%i", apsId);
 }
 
+/**
+  - Parse SEI message
+*/
+void SubpicMergeApp::parseSEI(SEIReader &seiReader, InputNALUnit &nalu, const VPS *vps, const SPS *sps, SEI *&decodePictureHashSei)
+{
+  SEIMessages seis;
+  HRD hrd;
+
+  seiReader.parseSEImessage(seiReader.getBitstream(), seis, nalu.m_nalUnitType, nalu.m_nuhLayerId, nalu.m_temporalId, vps, sps, hrd, 0);
+
+  decodePictureHashSei = nullptr;
+  for (auto& s : seis)
+  {
+    if (s->payloadType() == SEI::DECODED_PICTURE_HASH)
+    {
+      decodePictureHashSei = s;
+      break;
+    }
+  }
+}
+
 /**
   - Parse picture header
 */
@@ -339,10 +359,12 @@ void SubpicMergeApp::parseSliceHeader(HLSyntaxReader &hlsReader, InputNALUnit &n
 /**
   - Decode NAL unit if it is parameter set or picture header, or decode slice header of VLC NAL unit
  */
-void SubpicMergeApp::decodeNalu(Subpicture &subpic, InputNALUnit &nalu)
+void SubpicMergeApp::decodeNalu(Subpicture &subpic, InputNALUnit &nalu, SEI *&decodePictureHashSei)
 {
   HLSyntaxReader hlsReader;
+  SEIReader seiReader;
   hlsReader.setBitstream(&nalu.getBitstream());
+  seiReader.setBitstream(&nalu.getBitstream());
   int apsId;
   int apsType;
 
@@ -366,7 +388,18 @@ void SubpicMergeApp::decodeNalu(Subpicture &subpic, InputNALUnit &nalu)
     break;
   case NAL_UNIT_PH:
     parsePictureHeader(hlsReader, subpic.picHeader, subpic.psManager);
-  break;
+    break;
+  case NAL_UNIT_SUFFIX_SEI:
+    parseSEI(seiReader, nalu, subpic.slices.front().getVPS(), subpic.slices.front().getSPS(), decodePictureHashSei);
+    if (decodePictureHashSei != nullptr)
+    {
+      msg( INFO, "  hash SEI");
+    }
+    else
+    {
+      msg( INFO, "  suffix SEI");
+    }
+    break;
   default:
     if (nalu.isVcl())
     {
@@ -376,11 +409,11 @@ void SubpicMergeApp::decodeNalu(Subpicture &subpic, InputNALUnit &nalu)
     }
     else if (nalu.isSei())
     {
-      msg( INFO, "  SEI");
+      msg( INFO, "  prefix SEI");
     }
     else
     {
-      msg( INFO, "  NNN");  // Any other NAL unit that is not handled above
+      msg( INFO, "  ignored NALU");  // Any other NAL unit that is not handled above
     }
     break;
   }
@@ -403,6 +436,7 @@ void SubpicMergeApp::parseSubpic(Subpicture &subpic, bool &morePictures)
   subpic.slices.clear();
   subpic.sliceData.clear();
   subpic.firstSliceInPicture = true;
+  subpic.decodedPictureHashSei = nullptr;
 
   bool eof = false;
 
@@ -431,7 +465,7 @@ void SubpicMergeApp::parseSubpic(Subpicture &subpic, bool &morePictures)
     }
 
     read(nalu);  // Convert nalu payload to RBSP and parse nalu header
-    decodeNalu(subpic, nalu);
+    decodeNalu(subpic, nalu, subpic.decodedPictureHashSei);
 
     if (nalu.isVcl())
     {
@@ -448,7 +482,7 @@ void SubpicMergeApp::generateMergedStreamVPSes(std::vector<VPS*> &vpsList)
 {
   for (auto vpsId : m_subpics->at(0).vpsIds)
   {
-    // Create new SPS based on the SPS from the first subpicture 
+    // Create new SPS based on the SPS from the first subpicture
     vpsList.push_back(new VPS(*m_subpics->at(0).psManager.getVPS(vpsId)));
     VPS &vps = *vpsList.back();
 
@@ -493,7 +527,7 @@ void SubpicMergeApp::generateMergedStreamSPSes(std::vector<SPS*> &spsList)
 
   for (auto spsId : m_subpics->at(0).spsIds)
   {
-    // Create new SPS based on the SPS from the first subpicture 
+    // Create new SPS based on the SPS from the first subpicture
     spsList.push_back(new SPS(*m_subpics->at(0).psManager.getSPS(spsId)));
     SPS &sps = *spsList.back();
 
@@ -535,25 +569,59 @@ void SubpicMergeApp::getTileDimensions(std::vector<int> &tileWidths, std::vector
   std::vector<int> tileX;
   std::vector<int> tileY;
 
+  // Add subpicture boundaries as tile boundaries
+  for (auto &subpic : *m_subpics)
+  {
+    bool addTileXForCurrentSubpic = true;
+    bool addTileYForCurrentSubpic = true;
+
+    // Check if current subpic boundary need to be added as tile boundary
+    for (auto &subpicScan : *m_subpics)
+    {
+      if (subpic.topLeftCornerX >= subpicScan.topLeftCornerX && (subpic.topLeftCornerX + subpic.width) <= (subpicScan.topLeftCornerX + subpicScan.width) && subpic.width < subpicScan.width)
+      {
+        addTileXForCurrentSubpic = false;
+      }
+      if (subpic.topLeftCornerY >= subpicScan.topLeftCornerY && (subpic.topLeftCornerY + subpic.height) <= (subpicScan.topLeftCornerY + subpicScan.height) && subpic.height < subpicScan.height)
+      {
+        addTileYForCurrentSubpic = false;
+      }
+    }
+
+    if (addTileXForCurrentSubpic)
+    {
+      tileX.push_back(subpic.topLeftCornerX);
+    }
+    if (addTileYForCurrentSubpic)
+    {
+      tileY.push_back(subpic.topLeftCornerY);
+    }
+  }
+
+  // Add tile boundaries from tiles within subpictures
   for (auto &subpic : *m_subpics)
   {
-    tileX.push_back(subpic.topLeftCornerX);
-    tileY.push_back(subpic.topLeftCornerY);
     const PPS &pps = *subpic.slices[0].getPPS();
     if (!pps.getNoPicPartitionFlag())
     {
-      int x = subpic.topLeftCornerX;
-      for (int i = 0; i < pps.getNumTileColumns(); i++)
+      if (pps.getNumTileColumns() > 1)
       {
-        x += pps.getTileColumnWidth(i) * pps.getCtuSize();
-        tileX.push_back(x);
+        int x = subpic.topLeftCornerX;
+        for (int i = 0; i < pps.getNumTileColumns(); i++)
+        {
+          x += pps.getTileColumnWidth(i) * pps.getCtuSize();
+          tileX.push_back(x);
+        }
       }
 
-      int y = subpic.topLeftCornerY;
-      for (int i = 0; i < pps.getNumTileRows(); i++)
+      if (pps.getNumTileRows() > 1)
       {
-        y += pps.getTileRowHeight(i) * pps.getCtuSize();
-        tileY.push_back(y);
+        int y = subpic.topLeftCornerY;
+        for (int i = 0; i < pps.getNumTileRows(); i++)
+        {
+          y += pps.getTileRowHeight(i) * pps.getCtuSize();
+          tileY.push_back(y);
+        }
       }
     }
   }
@@ -601,7 +669,7 @@ void SubpicMergeApp::generateMergedStreamPPSes(ParameterSetManager &psManager, s
 
   for (auto ppsId : m_subpics->at(0).ppsIds)
   {
-    // Create new PPS based on the PPS from the first subpicture 
+    // Create new PPS based on the PPS from the first subpicture
     ppsList.push_back(new PPS(*m_subpics->at(0).psManager.getPPS(ppsId)));
     PPS &pps = *ppsList.back();
     SPS &sps = *psManager.getSPS(pps.getSPSId());
@@ -642,52 +710,82 @@ void SubpicMergeApp::generateMergedStreamPPSes(ParameterSetManager &psManager, s
     pps.setTileIdxDeltaPresentFlag(true);
     pps.initRectSlices( );
 
-    unsigned int numTileColsInPic = pps.getNumTileColumns();
-
-    unsigned int sliceIdx = 0;
+    pps.setSingleSlicePerSubPicFlag(true);
     for (auto &subpic : *m_subpics)
     {
-      unsigned int tileIdxY = 0;
-      for (unsigned int tileY = 0; tileY != subpic.topLeftCornerY && tileIdxY < tileHeights.size(); tileIdxY++)
+      const PPS &subpicPPS = *subpic.slices[0].getPPS();
+      if (subpicPPS.getNumSlicesInPic() > 1)
       {
-        tileY += tileHeights[tileIdxY];
+        pps.setSingleSlicePerSubPicFlag(false);
+        break;
       }
-      CHECK(tileIdxY == tileHeights.size(), "Could not find subpicture to tile border match");
+    }
+
+    if (!pps.getSingleSlicePerSubPicFlag())
+    {
+      unsigned int numTileColsInPic = pps.getNumTileColumns();
 
-      unsigned int tileIdxX = 0;
-      for (unsigned int tileX = 0; tileX != subpic.topLeftCornerX && tileIdxX < tileWidths.size(); tileIdxX++)
+      unsigned int sliceIdx = 0;
+      for (auto& subpic : *m_subpics)
       {
-        tileX += tileWidths[tileIdxX];
-      }
-      CHECK(tileIdxX == tileWidths.size(), "Could not find subpicture to tile border match")
+        unsigned int tileIdxY = 0;
+        for (unsigned int tileY = 0; tileIdxY < tileHeights.size(); tileIdxY++)
+        {
+          if (tileY == subpic.topLeftCornerY || (tileY + tileHeights[tileIdxY]) == (subpic.topLeftCornerY + subpic.height) ||
+              (tileY < subpic.topLeftCornerY && (tileY + tileHeights[tileIdxY]) >(subpic.topLeftCornerY + subpic.height)))
+          {
+            break;
+          }
+          tileY += tileHeights[tileIdxY];
+        }
+        CHECK(tileIdxY == tileHeights.size(), "Could not find subpicture to tile mapping");
 
-      const PPS &subpicPPS = *subpic.slices[0].getPPS();
+        unsigned int tileIdxX = 0;
+        for (unsigned int tileX = 0; tileIdxX < tileWidths.size(); tileIdxX++)
+        {
+          if (tileX == subpic.topLeftCornerX || (tileX + tileWidths[tileIdxX]) == (subpic.topLeftCornerX + subpic.width) ||
+              (tileX < subpic.topLeftCornerX && (tileX + tileWidths[tileIdxX]) >(subpic.topLeftCornerX + subpic.width)))
+          {
+            break;
+          }
+          tileX += tileWidths[tileIdxX];
+        }
+        CHECK(tileIdxX == tileWidths.size(), "Could not find subpicture to tile mapping")
 
-      if (subpicPPS.getNumSlicesInPic() == 1)
-      {
-        pps.setSliceWidthInTiles(sliceIdx, subpicPPS.getNumTileColumns());
-        pps.setSliceHeightInTiles(sliceIdx, subpicPPS.getNumTileRows());
-        pps.setNumSlicesInTile(sliceIdx, 1);
-        unsigned int sliceTileIdx = tileIdxY * numTileColsInPic + tileIdxX;
-        pps.setSliceTileIdx(sliceIdx, sliceTileIdx);
-        pps.setSliceHeightInCtu(sliceIdx, subpicPPS.getPicHeightInCtu());
-
-        sliceIdx++;
-      }
-      else
-      {
-        for (int subpicSliceIdx = 0; subpicSliceIdx < subpicPPS.getNumSlicesInPic(); subpicSliceIdx++, sliceIdx++)
+        const PPS& subpicPPS = *subpic.slices[0].getPPS();
+
+        if (subpicPPS.getNumSlicesInPic() == 1)
         {
-          pps.setSliceWidthInTiles(sliceIdx, subpicPPS.getSliceWidthInTiles(subpicSliceIdx));
-          pps.setSliceHeightInTiles(sliceIdx, subpicPPS.getSliceHeightInTiles(subpicSliceIdx));
-          pps.setNumSlicesInTile(sliceIdx, subpicPPS.getNumSlicesInTile(subpicSliceIdx));
-          unsigned int sliceTileIdxSubpic = subpicPPS.getSliceTileIdx(subpicSliceIdx);
-          unsigned int sliceTileIdx = (sliceTileIdxSubpic / subpicPPS.getNumTileColumns() + tileIdxY) * numTileColsInPic + tileIdxX + (sliceTileIdxSubpic % subpicPPS.getNumTileColumns());
+          pps.setSliceWidthInTiles(sliceIdx, subpicPPS.getNumTileColumns());
+          pps.setSliceHeightInTiles(sliceIdx, subpicPPS.getNumTileRows());
+          pps.setNumSlicesInTile(sliceIdx, 1);
+          unsigned int sliceTileIdx = tileIdxY * numTileColsInPic + tileIdxX;
           pps.setSliceTileIdx(sliceIdx, sliceTileIdx);
-          pps.setSliceHeightInCtu(sliceIdx, subpicPPS.getSliceHeightInCtu(subpicSliceIdx));
+          pps.setSliceHeightInCtu(sliceIdx, subpicPPS.getPicHeightInCtu());
+
+          sliceIdx++;
+        }
+        else
+        {
+          for (int subpicSliceIdx = 0; subpicSliceIdx < subpicPPS.getNumSlicesInPic(); subpicSliceIdx++, sliceIdx++)
+          {
+            pps.setSliceWidthInTiles(sliceIdx, subpicPPS.getSliceWidthInTiles(subpicSliceIdx));
+            pps.setSliceHeightInTiles(sliceIdx, subpicPPS.getSliceHeightInTiles(subpicSliceIdx));
+            pps.setNumSlicesInTile(sliceIdx, subpicPPS.getNumSlicesInTile(subpicSliceIdx));
+            unsigned int sliceTileIdxSubpic = subpicPPS.getSliceTileIdx(subpicSliceIdx);
+            unsigned int sliceTileIdx = (sliceTileIdxSubpic / subpicPPS.getNumTileColumns() + tileIdxY) * numTileColsInPic + tileIdxX + (sliceTileIdxSubpic % subpicPPS.getNumTileColumns());
+            pps.setSliceTileIdx(sliceIdx, sliceTileIdx);
+            pps.setSliceHeightInCtu(sliceIdx, subpicPPS.getSliceHeightInCtu(subpicSliceIdx));
+          }
         }
       }
     }
+    else
+    {
+      pps.setTileIdxDeltaPresentFlag(false);
+    }
+
+    pps.initRectSliceMap(&sps);
 
     pps.setLoopFilterAcrossTilesEnabledFlag(false);
     pps.setLoopFilterAcrossSlicesEnabledFlag(false);
@@ -708,8 +806,8 @@ void SubpicMergeApp::updateSliceHeadersForMergedStream(ParameterSetManager &psMa
       // Update slice headers to use new SPSes and PPSes
       int ppsId = slice.getPPS()->getPPSId();
       int spsId = slice.getSPS()->getSPSId();
-      CHECK(!psManager.getSPS(spsId), "Invaldi SPS");
-      CHECK(!psManager.getSPS(ppsId), "Invaldi PPS");
+      CHECK(!psManager.getSPS(spsId), "Invalid SPS");
+      CHECK(!psManager.getSPS(ppsId), "Invalid PPS");
       slice.setSPS(psManager.getSPS(spsId));
       slice.setPPS(psManager.getPPS(ppsId));
 
@@ -801,7 +899,7 @@ Subpicture &SubpicMergeApp::selectSubpicForPicHeader(bool isMixedNaluPic)
   Subpicture *subpicToReturn = NULL;
   bool IRAPFound = false;
 
-  // Find first non-IRAP subpicture 
+  // Find first non-IRAP subpicture
   for (auto &subpic : *m_subpics)
   {
     if (subpic.slices[0].isIRAP())
@@ -906,9 +1004,6 @@ void SubpicMergeApp::generateMergedPic(ParameterSetManager &psManager, bool mixe
     }
   }
 
-  // Don't copy SEI NAL units - many of them would be incorrect for merged stream
-  //copyNalUnitsToAccessUnit(accessUnit, subpic.nalus, (int)NAL_UNIT_PREFIX_SEI);
-
   updateSliceHeadersForMergedStream(psManager);
 
   // Code merged stream prefix APS NAL units
@@ -967,8 +1062,36 @@ void SubpicMergeApp::generateMergedPic(ParameterSetManager &psManager, bool mixe
     }
   }
 
-  // Don't copy SEIs - many of them would be incorrect for merged stream
-  // copyNalUnitsToAccessUnit(accessUnit, subpic.nalus, (int)NAL_UNIT_SUFFIX_SEI);
+  // Code Decoded picture hash SEI messages within Scalable nesting SEI messages
+  uint32_t layerId = m_subpics->at(0).slices[0].getNalUnitLayerId();
+  uint32_t temporalId = m_subpics->at(0).slices[0].getTLayer();
+  int subpicId = 0;
+  for (auto& subpic : *m_subpics)
+  {
+    if (subpic.decodedPictureHashSei != nullptr)
+    {
+      SEIEncoder seiEncoder;
+      SEIWriter seiWriter;
+      SEIMessages seiMessages;
+      SEIMessages nestedSEI;
+      HRD hrd;
+      nestedSEI.push_back(subpic.decodedPictureHashSei);
+      const std::vector<uint16_t> subPicIds = { (uint16_t)subpicId };
+      std::vector<int> targetOLS;
+      std::vector<int> targetLayers = { (int)subpic.nalus[0].m_nuhLayerId };
+      SEIScalableNesting *nestingSEI = new SEIScalableNesting();
+      seiEncoder.init(0, 0, 0);
+      const uint16_t maxSubpicIdInPic =
+        subPicIds.size() == 0 ? 0 : *std::max_element(subPicIds.begin(), subPicIds.end());
+      seiEncoder.initSEIScalableNesting(nestingSEI, nestedSEI, targetOLS, targetLayers, subPicIds, maxSubpicIdInPic);
+      OutputNALUnit nalu( NAL_UNIT_SUFFIX_SEI, layerId, temporalId );
+      seiMessages.push_back(nestingSEI);
+      seiWriter.writeSEImessages(nalu.m_Bitstream, seiMessages, hrd, false, temporalId);
+      accessUnit.push_back(new NALUnitEBSP(nalu));
+    }
+    subpicId++;
+  }
+
   copyNalUnitsToAccessUnit(accessUnit, subpic0.nalus, (int)NAL_UNIT_EOS);
   copyNalUnitsToAccessUnit(accessUnit, subpic0.nalus, (int)NAL_UNIT_EOB);
 
diff --git a/source/App/SubpicMergeApp/SubpicMergeApp.h b/source/App/SubpicMergeApp/SubpicMergeApp.h
index f293993d01dbc8d72be871e59b9a18277313822b..2a5285e3a88c1fbb208c6fd31879c56ff88a5cef 100644
--- a/source/App/SubpicMergeApp/SubpicMergeApp.h
+++ b/source/App/SubpicMergeApp/SubpicMergeApp.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -56,6 +56,8 @@ struct SubpicParams {
 struct Subpicture;
 class InputByteStream;
 class HLSyntaxReader;
+class SEIReader;
+class SEI;
 class DCI;
 class ParameterSetManager;
 class PicHeader;
@@ -92,9 +94,10 @@ private:
   int parseSPS(HLSyntaxReader &hlsReader, ParameterSetManager &psManager);
   int parsePPS(HLSyntaxReader &hlsReader, ParameterSetManager &psManager);
   void parseAPS(HLSyntaxReader &hlsReader, ParameterSetManager &psManager, int &apsId, int &apsType);
+  void parseSEI(SEIReader& seiReader, InputNALUnit &nalu, const VPS *vps, const SPS *sps, SEI *&decodePictureHashSei);
   void parsePictureHeader(HLSyntaxReader &hlsReader, PicHeader &picHeader, ParameterSetManager &psManager);
   void parseSliceHeader(HLSyntaxReader &hlsReader, InputNALUnit &nalu, Slice &slice, PicHeader &picHeader, OutputBitstream &sliceData, ParameterSetManager &psManager, int prevTid0Poc);
-  void decodeNalu(Subpicture &subpic, InputNALUnit &nalu);
+  void decodeNalu(Subpicture &subpic, InputNALUnit &nalu, SEI *&decodePictureHashSei);
   void parseSubpic(Subpicture &subpic, bool &morePictures);
   void generateMergedStreamVPSes(std::vector<VPS*> &vpsList);
   int computeSubPicIdLen(int numSubpics);
diff --git a/source/App/SubpicMergeApp/SubpicMergeMain.cpp b/source/App/SubpicMergeApp/SubpicMergeMain.cpp
index 6b37d09c80d626673058a9cf957dd184ffb4c4ec..51f4587b3648467dc4ce57c146d5e95dbc0ccea2 100644
--- a/source/App/SubpicMergeApp/SubpicMergeMain.cpp
+++ b/source/App/SubpicMergeApp/SubpicMergeMain.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonAnalyserLib/CMakeLists.txt b/source/Lib/CommonAnalyserLib/CMakeLists.txt
index e915720f26f5944a1787f05472417cfea5f83d7d..40471bb3e5fd2eb804e5c7f033e68ad4fd179179 100644
--- a/source/Lib/CommonAnalyserLib/CMakeLists.txt
+++ b/source/Lib/CommonAnalyserLib/CMakeLists.txt
@@ -64,31 +64,9 @@ if( SET_ENABLE_TRACING )
     target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_TRACING=0 )
   endif()
 endif()
-
-if( OpenMP_FOUND )
-  if( SET_ENABLE_SPLIT_PARALLELISM )
-    if( ENABLE_SPLIT_PARALLELISM )
-      target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 )
-    else()
-      target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
-    endif()
-  endif()
-  if( SET_ENABLE_WPP_PARALLELISM )
-    if( ENABLE_WPP_PARALLELISM )
-      target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 )
-    else()
-      target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
-    endif()
-  endif()
-  target_include_directories( ${LIB_NAME} PUBLIC ${OpenMP_CXX_INCLUDE_DIRS} )
-  target_link_libraries( ${LIB_NAME} ${OpenMP_CXX_LIBRARIES} )
-else()
-  target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
-  target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
-endif()
   
 target_include_directories( ${LIB_NAME} PUBLIC ../CommonLib/. ../CommonLib/.. ../CommonLib/x86 ../libmd5 )
-target_link_libraries( ${LIB_NAME} Threads::Threads )
+target_link_libraries( ${LIB_NAME} )
 
 # set needed compile definitions
 set_property( SOURCE ${SSE41_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_SSE41 )
diff --git a/source/Lib/CommonLib/AdaptiveLoopFilter.cpp b/source/Lib/CommonLib/AdaptiveLoopFilter.cpp
index 91b0490bd400d704bedeab8ca3c74178aea977b0..61105f0c0e8307a7c6ae1b1195fbd83c3acaac6f 100644
--- a/source/Lib/CommonLib/AdaptiveLoopFilter.cpp
+++ b/source/Lib/CommonLib/AdaptiveLoopFilter.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -435,7 +435,7 @@ void AdaptiveLoopFilter::ALFProcess(CodingStructure& cs)
       const CodingUnit *cu = cs.getCU( Position(xPos, yPos), CHANNEL_TYPE_LUMA );
 
       // skip this CTU if ALF is disabled
-      if (!cu->slice->getTileGroupAlfEnabledFlag(COMPONENT_Y) && !cu->slice->getTileGroupAlfEnabledFlag(COMPONENT_Cb) && !cu->slice->getTileGroupAlfEnabledFlag(COMPONENT_Cr))
+      if (!cu->slice->getAlfEnabledFlag(COMPONENT_Y) && !cu->slice->getAlfEnabledFlag(COMPONENT_Cb) && !cu->slice->getAlfEnabledFlag(COMPONENT_Cr))
       {
         ctuIdx++;
         continue;
@@ -445,7 +445,7 @@ void AdaptiveLoopFilter::ALFProcess(CodingStructure& cs)
       if(ctuIdx == 0 || lastSliceIdx != cu->slice->getSliceID() || alfCtuFilterIndex==nullptr)
       {
         cs.slice = cu->slice;
-        reconstructCoeffAPSs(cs, true, cu->slice->getTileGroupAlfEnabledFlag(COMPONENT_Cb) || cu->slice->getTileGroupAlfEnabledFlag(COMPONENT_Cr), false);
+        reconstructCoeffAPSs(cs, true, cu->slice->getAlfEnabledFlag(COMPONENT_Cb) || cu->slice->getAlfEnabledFlag(COMPONENT_Cr), false);
         alfCtuFilterIndex = cu->slice->getPic()->getAlfCtbFilterIndex();
         m_ccAlfFilterParam = cu->slice->m_ccAlfFilterParam;
       }
@@ -627,9 +627,9 @@ void AdaptiveLoopFilter::reconstructCoeffAPSs(CodingStructure& cs, bool luma, bo
   APS* curAPS;
   if (luma)
   {
-    for (int i = 0; i < cs.slice->getTileGroupNumAps(); i++)
+    for (int i = 0; i < cs.slice->getNumAlfApsIdsLuma(); i++)
     {
-      int apsIdx = cs.slice->getTileGroupApsIdLuma()[i];
+      int apsIdx = cs.slice->getAlfApsIdsLuma()[i];
       curAPS = aps[apsIdx];
       CHECK(curAPS == NULL, "invalid APS");
       alfParamTmp = curAPS->getAlfAPSParam();
@@ -642,7 +642,7 @@ void AdaptiveLoopFilter::reconstructCoeffAPSs(CodingStructure& cs, bool luma, bo
   //chroma
   if (chroma)
   {
-    int apsIdxChroma = cs.slice->getTileGroupApsIdChroma();
+    int apsIdxChroma = cs.slice->getAlfApsIdChroma();
     curAPS = aps[apsIdxChroma];
     m_alfParamChroma = &curAPS->getAlfAPSParam();
     alfParamTmp = *m_alfParamChroma;
diff --git a/source/Lib/CommonLib/AdaptiveLoopFilter.h b/source/Lib/CommonLib/AdaptiveLoopFilter.h
index 4df4485d9dbd6a5226b2c14477f53d6eadd6a30d..aa8284b44f5f8767b4e3653887329f782b84cae7 100644
--- a/source/Lib/CommonLib/AdaptiveLoopFilter.h
+++ b/source/Lib/CommonLib/AdaptiveLoopFilter.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/AffineGradientSearch.cpp b/source/Lib/CommonLib/AffineGradientSearch.cpp
index 90d939ac738f2a25b125f1952e300b6e4fa5f857..0764936ebf1a416f54d24883444f3ffc6e02a405 100644
--- a/source/Lib/CommonLib/AffineGradientSearch.cpp
+++ b/source/Lib/CommonLib/AffineGradientSearch.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/AffineGradientSearch.h b/source/Lib/CommonLib/AffineGradientSearch.h
index 380db32074491e625fce3d06ec4e02bd18e0ab9f..cc04bff904dcab100e44947c96438856ece9e941 100644
--- a/source/Lib/CommonLib/AffineGradientSearch.h
+++ b/source/Lib/CommonLib/AffineGradientSearch.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/AlfParameters.h b/source/Lib/CommonLib/AlfParameters.h
index 3bf050d84d5bcf662bfdec38902e423de8662adc..5cec8edf5c76795dfcf9e740699c82e1ffa514a7 100644
--- a/source/Lib/CommonLib/AlfParameters.h
+++ b/source/Lib/CommonLib/AlfParameters.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -112,7 +112,7 @@ struct AlfFilterShape
     else
     {
       filterType = ALF_NUM_OF_FILTER_TYPES;
-      CHECK( 0, "Wrong ALF filter shape" );
+      THROW("Wrong ALF filter shape");
     }
   }
 
diff --git a/source/Lib/CommonLib/BitStream.cpp b/source/Lib/CommonLib/BitStream.cpp
index 58a3360b3e5f9b22af1651219ae8f8a83d7654e3..f89692d5a6cc8cacfb8c893255516f6bbfef0ecc 100644
--- a/source/Lib/CommonLib/BitStream.cpp
+++ b/source/Lib/CommonLib/BitStream.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/BitStream.h b/source/Lib/CommonLib/BitStream.h
index bce5feadcb227b56dbcf2c757ddba14279f6ca91..5688c689c4e73d424ca394bf5b2641c6d4bcd81c 100644
--- a/source/Lib/CommonLib/BitStream.h
+++ b/source/Lib/CommonLib/BitStream.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/Buffer.cpp b/source/Lib/CommonLib/Buffer.cpp
index b691f54e418e4ca15cce115389caf677a5c4655b..a72afc49e190ebe3d25b2dafd341cb53f0cf273a 100644
--- a/source/Lib/CommonLib/Buffer.cpp
+++ b/source/Lib/CommonLib/Buffer.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -249,6 +249,40 @@ void removeHighFreq(int16_t* dst, int dstStride, const int16_t* src, int srcStri
 #undef REM_HF_OP
 #undef REM_HF_OP_CLIP
 }
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+void removeWeightHighFreq_HBD(Pel* dst, int dstStride, const Pel* src, int srcStride, int width, int height, int shift, int bcwWeight)
+{
+  Intermediate_Int normalizer = ((1 << 16) + (bcwWeight > 0 ? (bcwWeight >> 1) : -(bcwWeight >> 1))) / bcwWeight;
+  Intermediate_Int weight0 = normalizer << g_BcwLog2WeightBase;
+  Intermediate_Int weight1 = (g_BcwWeightBase - bcwWeight)*normalizer;
+#define REM_HF_INC  \
+  src += srcStride; \
+  dst += dstStride; \
+
+#define REM_HF_OP( ADDR )      dst[ADDR] =             (Pel)((dst[ADDR]*weight0 - src[ADDR]*weight1 + (1<<15))>>16)
+
+  SIZE_AWARE_PER_EL_OP(REM_HF_OP, REM_HF_INC);
+
+#undef REM_HF_INC
+#undef REM_HF_OP
+#undef REM_HF_OP_CLIP
+}
+
+void removeHighFreq_HBD(Pel* dst, int dstStride, const Pel* src, int srcStride, int width, int height)
+{
+#define REM_HF_INC  \
+  src += srcStride; \
+  dst += dstStride; \
+
+#define REM_HF_OP( ADDR )      dst[ADDR] =             2 * dst[ADDR] - src[ADDR]
+
+  SIZE_AWARE_PER_EL_OP(REM_HF_OP, REM_HF_INC);
+
+#undef REM_HF_INC
+#undef REM_HF_OP
+#undef REM_HF_OP_CLIP
+}
+#endif
 #endif
 
 template<typename T>
@@ -299,10 +333,17 @@ PelBufferOps::PelBufferOps()
   copyBuffer = copyBufferCore;
   padding = paddingCore;
 #if ENABLE_SIMD_OPT_BCW
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+  removeWeightHighFreq8 = removeWeightHighFreq_HBD;
+  removeWeightHighFreq4 = removeWeightHighFreq_HBD;
+  removeHighFreq8 = removeHighFreq_HBD;
+  removeHighFreq4 = removeHighFreq_HBD;
+#else
   removeWeightHighFreq8 = removeWeightHighFreq;
   removeWeightHighFreq4 = removeWeightHighFreq;
   removeHighFreq8 = removeHighFreq;
   removeHighFreq4 = removeHighFreq;
+#endif
 #endif
 
   profGradFilter = gradFilterCore <false>;
@@ -381,15 +422,15 @@ void AreaBuf<Pel>::rspSignal(std::vector<Pel>& pLUT)
 {
   Pel* dst = buf;
   Pel* src = buf;
-    for (unsigned y = 0; y < height; y++)
+  for (unsigned y = 0; y < height; y++)
+  {
+    for (unsigned x = 0; x < width; x++)
     {
-      for (unsigned x = 0; x < width; x++)
-      {
-        dst[x] = pLUT[src[x]];
-      }
-      dst += stride;
-      src += stride;
+      dst[x] = pLUT[src[x]];
     }
+    dst += stride;
+    src += stride;
+  }
 }
 
 template<>
@@ -443,6 +484,61 @@ void AreaBuf<Pel>::scaleSignal(const int scale, const bool dir, const ClpRng& cl
   }
 }
 
+template<>
+void AreaBuf<Pel>::applyLumaCTI(std::vector<Pel>& pLUTY)
+{
+  Pel* dst = buf;
+  Pel* src = buf;
+  for (unsigned y = 0; y < height; y++)
+  {
+    for (unsigned x = 0; x < width; x++)
+    {
+      dst[x] = pLUTY[src[x]];
+    }
+    dst += stride;
+    src += stride;
+  }
+}
+
+template<>
+void AreaBuf<Pel>::applyChromaCTI(Pel* bufY, int strideY, std::vector<Pel>& pLUTC, int bitDepth, ChromaFormat chrFormat, bool fwdMap)
+{
+  int range = 1 << bitDepth;
+  int offset = range / 2;
+  int sx = 1 << getComponentScaleX(COMPONENT_Cb, chrFormat);
+  int sy = 1 << getComponentScaleY(COMPONENT_Cb, chrFormat);
+
+  Pel* dst = buf;
+  Pel* src = buf;
+  if (fwdMap)
+  {
+    for (unsigned y = 0; y < height; y++)
+    {
+      for (unsigned x = 0; x < width; x++)
+      {
+        int pelY = bufY[sy * y * strideY + sx * x];
+        double scale = (double)pLUTC[pelY] / (double)(1 << CSCALE_FP_PREC);
+        dst[x] = Clip3((Pel)0, (Pel)(range - 1), (Pel)(offset + (double)(src[x] - offset) / scale + .5));
+      }
+      dst += stride;
+      src += stride;
+    }
+  }
+  else
+  {
+    for (unsigned y = 0; y < height; y++)
+    {
+      for (unsigned x = 0; x < width; x++)
+      {
+        int pelY = bufY[sy * y * strideY + sx * x];
+        int scal = pLUTC[pelY];
+        dst[x] = Clip3(0, range - 1, ((offset << CSCALE_FP_PREC) + (src[x] - offset) * scal + (1 << (CSCALE_FP_PREC - 1))) >> CSCALE_FP_PREC);
+      }
+      dst += stride;
+      src += stride;
+    }
+  }
+}
 template<>
 void AreaBuf<Pel>::addAvg( const AreaBuf<const Pel> &other1, const AreaBuf<const Pel> &other2, const ClpRng& clpRng)
 {
@@ -831,57 +927,57 @@ void UnitBuf<Pel>::colorSpaceConvert(const UnitBuf<Pel> &other, const bool forwa
   CHECK(other.bufs[COMPONENT_Y].stride != other.bufs[COMPONENT_Cb].stride || other.bufs[COMPONENT_Y].stride != other.bufs[COMPONENT_Cr].stride, "unequal stride for 444 content");
   CHECK(bufs[COMPONENT_Y].width != other.bufs[COMPONENT_Y].width || bufs[COMPONENT_Y].height != other.bufs[COMPONENT_Y].height, "unequal block size")
 
-    if (forward)
+  if (forward)
+  {
+    for (int y = 0; y < height; y++)
     {
-      for (int y = 0; y < height; y++)
+      for (int x = 0; x < width; x++)
       {
-        for (int x = 0; x < width; x++)
-        {
-          r = pOrg2[x];
-          g = pOrg0[x];
-          b = pOrg1[x];
-
-          co = r - b;
-          int t = b + (co >> 1);
-          cg = g - t;
-          pDst0[x] = t + (cg >> 1);
-          pDst1[x] = cg;
-          pDst2[x] = co;
-        }
-        pOrg0 += strideOrg;
-        pOrg1 += strideOrg;
-        pOrg2 += strideOrg;
-        pDst0 += strideDst;
-        pDst1 += strideDst;
-        pDst2 += strideDst;
+        r = pOrg2[x];
+        g = pOrg0[x];
+        b = pOrg1[x];
+
+        co       = r - b;
+        int t    = b + (co >> 1);
+        cg       = g - t;
+        pDst0[x] = t + (cg >> 1);
+        pDst1[x] = cg;
+        pDst2[x] = co;
       }
+      pOrg0 += strideOrg;
+      pOrg1 += strideOrg;
+      pOrg2 += strideOrg;
+      pDst0 += strideDst;
+      pDst1 += strideDst;
+      pDst2 += strideDst;
     }
-    else
+  }
+  else
+  {
+    for (int y = 0; y < height; y++)
     {
-      for (int y = 0; y < height; y++)
+      for (int x = 0; x < width; x++)
       {
-        for (int x = 0; x < width; x++)
-        {
-          y0 = pOrg0[x];
-          cg = pOrg1[x];
-          co = pOrg2[x];
-
-          y0 = Clip3((-maxAbsclipBD - 1), maxAbsclipBD, y0);
-          cg = Clip3((-maxAbsclipBD - 1), maxAbsclipBD, cg);
-          co = Clip3((-maxAbsclipBD - 1), maxAbsclipBD, co);
-
-          int t = y0 - (cg >> 1);
-          pDst0[x] = cg + t;
-          pDst1[x] = t - (co >> 1);
-          pDst2[x] = co + pDst1[x];
-        }
-
-        pOrg0 += strideOrg;
-        pOrg1 += strideOrg;
-        pOrg2 += strideOrg;
-        pDst0 += strideDst;
-        pDst1 += strideDst;
-        pDst2 += strideDst;
+        y0 = pOrg0[x];
+        cg = pOrg1[x];
+        co = pOrg2[x];
+
+        y0 = Clip3((-maxAbsclipBD - 1), maxAbsclipBD, y0);
+        cg = Clip3((-maxAbsclipBD - 1), maxAbsclipBD, cg);
+        co = Clip3((-maxAbsclipBD - 1), maxAbsclipBD, co);
+
+        int t    = y0 - (cg >> 1);
+        pDst0[x] = cg + t;
+        pDst1[x] = t - (co >> 1);
+        pDst2[x] = co + pDst1[x];
       }
+
+      pOrg0 += strideOrg;
+      pOrg1 += strideOrg;
+      pOrg2 += strideOrg;
+      pDst0 += strideDst;
+      pDst1 += strideDst;
+      pDst2 += strideDst;
     }
+  }
 }
diff --git a/source/Lib/CommonLib/Buffer.h b/source/Lib/CommonLib/Buffer.h
index 4f79060760bd2c1dca8327c0775806d57a055cdb..de50d3175a9e35a326c7aaaf8081ad536f7f669e 100644
--- a/source/Lib/CommonLib/Buffer.h
+++ b/source/Lib/CommonLib/Buffer.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -137,6 +137,8 @@ struct AreaBuf : public Size
 
   void rspSignal            ( std::vector<Pel>& pLUT );
   void scaleSignal          ( const int scale, const bool dir , const ClpRng& clpRng);
+  void applyLumaCTI(std::vector<Pel>& pLUTY);
+  void applyChromaCTI(Pel* bufY, int strideY, std::vector<Pel>& pLUTUV, int bitDepth, ChromaFormat chrFormat, bool fwdMap);
   T    computeAvg           ( ) const;
 
         T& at( const int &x, const int &y )          { return buf[y * stride + x]; }
@@ -430,11 +432,17 @@ void AreaBuf<T>::removeWeightHighFreq(const AreaBuf<T>& other, const bool bClip,
   if(!bClip)
   {
     if(!(width & 7))
+    {
       g_pelBufOP.removeWeightHighFreq8(dst, dstStride, src, srcStride, width, height, 16, bcwWeight);
+    }
     else if(!(width & 3))
+    {
       g_pelBufOP.removeWeightHighFreq4(dst, dstStride, src, srcStride, width, height, 16, bcwWeight);
+    }
     else
-      CHECK(true, "Not supported");
+    {
+      THROW("Not supported");
+    }
   }
   else
   {
@@ -479,11 +487,17 @@ void AreaBuf<T>::removeHighFreq( const AreaBuf<T>& other, const bool bClip, cons
   if (!bClip)
   {
     if(!(width & 7))
+    {
       g_pelBufOP.removeHighFreq8(dst, dstStride, src, srcStride, width, height);
+    }
     else if (!(width & 3))
+    {
       g_pelBufOP.removeHighFreq4(dst, dstStride, src, srcStride, width, height);
+    }
     else
-      CHECK(true, "Not supported");
+    {
+      THROW("Not supported");
+    }
   }
   else
   {
diff --git a/source/Lib/CommonLib/CMakeLists.txt b/source/Lib/CommonLib/CMakeLists.txt
index 6ae75c82bc979827f168cb64cef7252649220cfc..368545998da6ae99a825c95bc9348575f86fef94 100644
--- a/source/Lib/CommonLib/CMakeLists.txt
+++ b/source/Lib/CommonLib/CMakeLists.txt
@@ -62,31 +62,9 @@ if( SET_ENABLE_TRACING )
     target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_TRACING=0 )
   endif()
 endif()
-
-if( OpenMP_FOUND )
-  if( SET_ENABLE_SPLIT_PARALLELISM )
-    if( ENABLE_SPLIT_PARALLELISM )
-      target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 )
-    else()
-      target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
-    endif()
-  endif()
-  if( SET_ENABLE_WPP_PARALLELISM )
-    if( ENABLE_WPP_PARALLELISM )
-      target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 )
-    else()
-      target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
-    endif()
-  endif()
-  target_include_directories( ${LIB_NAME} PUBLIC ${OpenMP_CXX_INCLUDE_DIRS} )
-  target_link_libraries( ${LIB_NAME} ${OpenMP_CXX_LIBRARIES} )
-else()
-  target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
-  target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
-endif()
   
 target_include_directories( ${LIB_NAME} PUBLIC . .. ./x86 ../libmd5 )
-target_link_libraries( ${LIB_NAME} Threads::Threads )
+target_link_libraries( ${LIB_NAME} )
 
 # set needed compile definitions
 set_property( SOURCE ${SSE41_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_SSE41 )
diff --git a/source/Lib/CommonLib/CacheModel.cpp b/source/Lib/CommonLib/CacheModel.cpp
index 1f6d122247681d8ba5ff31f2226a132c67b0e29f..2bd7b26bd8a0a818dc54d2564fab16ac686289e9 100644
--- a/source/Lib/CommonLib/CacheModel.cpp
+++ b/source/Lib/CommonLib/CacheModel.cpp
@@ -449,10 +449,11 @@ void CacheModel::cacheAccess( const Pel *addr, const std::string& fileName, cons
   // check cache hit in each way
   for ( way = 0 ; way < m_numWay ; way++ )
   {
-      if ( xIsCacheHit( pos + way, cacheAddr ) ) {
-        hit = true;
-        break;
-      }
+    if (xIsCacheHit(pos + way, cacheAddr))
+    {
+      hit = true;
+      break;
+    }
   }
 #if JVET_J0090_MEMORY_BANDWITH_MEASURE_PRINT_ACCESS_INFO
   if ( m_frameCount == JVET_J0090_MEMORY_BANDWITH_MEASURE_PRINT_FRAME )
diff --git a/source/Lib/CommonLib/CacheModel.h b/source/Lib/CommonLib/CacheModel.h
index 1150c24e0262c4a7be046b6f648de21c92d3867b..ed71d9daac0fe95d86ed751bb8c04c23afdf4c8f 100644
--- a/source/Lib/CommonLib/CacheModel.h
+++ b/source/Lib/CommonLib/CacheModel.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/ChromaFormat.cpp b/source/Lib/CommonLib/ChromaFormat.cpp
index 9a56d89e33ad88e808a2ab60ba34b7845e2bad69..beec8c4dc62cfefb92a81587663a58b2c53e7d7c 100644
--- a/source/Lib/CommonLib/ChromaFormat.cpp
+++ b/source/Lib/CommonLib/ChromaFormat.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/ChromaFormat.h b/source/Lib/CommonLib/ChromaFormat.h
index 14bc517bd07346aef7bbae95b5fb89884c9d9c8a..141351f5ae96a9c6ceff6b4acc2b19ebabeb8b31 100644
--- a/source/Lib/CommonLib/ChromaFormat.h
+++ b/source/Lib/CommonLib/ChromaFormat.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/CodingStatistics.h b/source/Lib/CommonLib/CodingStatistics.h
index 2cbb789a9654d94dba69a959efbf3376ba2b726f..42cd4d7ee9d5a94f7e933bb020ec0b5f7c9c393e 100644
--- a/source/Lib/CommonLib/CodingStatistics.h
+++ b/source/Lib/CommonLib/CodingStatistics.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/CodingStructure.cpp b/source/Lib/CommonLib/CodingStructure.cpp
index b655d445fa71aad3cf8b31e8fa2b46fc4b85f616..e9e084dba14ba1c60f42c2e41a77abb833b38adf 100644
--- a/source/Lib/CommonLib/CodingStructure.cpp
+++ b/source/Lib/CommonLib/CodingStructure.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -93,6 +93,10 @@ CodingStructure::CodingStructure(CUCache& cuCache, PUCache& puCache, TUCache& tu
   }
 
   m_motionBuf     = nullptr;
+#if GDR_ENABLED
+  picHeader = nullptr;
+#endif
+
   features.resize( NUM_ENC_FEATURES );
   treeType = TREE_D;
   modeType = MODE_TYPE_ALL;
@@ -144,6 +148,676 @@ void CodingStructure::releaseIntermediateData()
   clearCUs();
 }
 
+#if GDR_ENABLED
+bool CodingStructure::containRefresh(int begX, int endX) const
+{
+  if (begX == endX)
+  {
+    return false;
+  }
+
+  const Area csArea      = area.Y();
+  const Area refreshArea = Area(begX, area.ly(), endX - begX, std::min(slice->getPPS()->getPicHeightInLumaSamples(), area.lheight()));
+
+  if (csArea.contains(refreshArea))
+  {
+    return true;
+  }
+
+  return false;
+}
+
+bool CodingStructure::overlapRefresh(int begX, int endX) const
+{
+  if (begX == endX)
+  {
+    return false;
+  }
+
+  const Area csArea = area.Y();
+  const Area refreshArea = Area(begX, area.ly(), endX - begX, area.lheight());
+
+  if (csArea.overlap(refreshArea))
+  {
+    return true;
+  }
+
+  return false;
+}
+
+bool CodingStructure::overlapRefresh() const
+{
+  const int  csX     = area.lx();
+  const int  csWidth = area.lwidth();
+
+  bool ret = overlapRefresh(csX, csX + csWidth);
+
+  return ret;
+}
+
+bool CodingStructure::withinRefresh(int begX, int endX) const
+{
+  if (begX == endX)
+  {
+    return false;
+  }
+
+  const Area csArea = area.Y();
+  const Area refreshArea = Area(begX, area.ly(), endX - begX, area.lheight());
+
+  if (refreshArea.contains(csArea))
+  {
+    return true;
+  }
+
+  return false;
+}
+
+bool CodingStructure::refreshCrossTTV(int begX, int endX) const
+{
+  const int  csX = area.lx();
+  const int  csY = area.ly();
+  const int  csWidth  = area.lwidth();
+  const int  csHeight = area.lheight();
+
+  const Area refreshArea = Area(begX, csY, endX - begX, csHeight);
+
+  const Area csArea0 = Area(csX,                                   csY, csWidth >> 2, csHeight);
+  const Area csArea1 = Area(csX + (csWidth >> 2),                  csY, csWidth >> 1, csHeight);
+  const Area csArea2 = Area(csX + (csWidth >> 2) + (csWidth >> 1), csY, csWidth >> 2, csHeight);
+
+  bool overlap0 = csArea0.overlap(refreshArea);
+  bool overlap1 = csArea1.overlap(refreshArea);
+  bool overlap2 = csArea2.overlap(refreshArea);
+
+  int sum = (overlap0 ? 1 : 0) + (overlap1 ? 1 : 0) + (overlap2 ? 1 : 0);
+
+  if (0 < sum)
+  {
+    return true;
+  }
+
+  return false;
+}
+
+bool CodingStructure::refreshCrossBTV(int begX, int endX) const
+{
+  const int  csX = area.lx();
+  const int  csY = area.ly();
+  const int  csWidth = area.lwidth();
+  const int  csHeight = area.lheight();
+
+  const Area refreshArea = Area(begX, csY, endX - begX, csHeight);
+
+  const Area csArea0 = Area(csX,                  csY, (csWidth >> 1), csHeight);
+  const Area csArea1 = Area(csX + (csWidth >> 1), csY, (csWidth >> 1), csHeight);
+
+  bool overlap0 = csArea0.overlap(refreshArea);
+  bool overlap1 = csArea1.overlap(refreshArea);
+
+  int sum = (overlap0 ? 1 : 0) + (overlap1 ? 1 : 0);
+
+  if (0 < sum)
+  {
+    return true;
+  }
+
+  return false;
+}
+
+bool CodingStructure::overlapDirty() const
+{
+  const Position topLeft  = area.Y().topLeft();
+  const Position topRight = area.Y().topRight();
+
+  bool insideLeft  = isClean(topLeft, CHANNEL_TYPE_LUMA);
+  bool insideRight = isClean(topRight, CHANNEL_TYPE_LUMA);
+
+  if (insideLeft != insideRight)
+  {
+    return true;
+  }
+
+  return false;
+}
+
+bool CodingStructure::dirtyCrossTTV() const
+{
+  const int  csX = area.lx();
+  const int  csY = area.ly();
+  const int  csWidth = area.lwidth();
+  const int  csHeight = area.lheight();
+
+  const Area csArea0 = Area(csX, csY, csWidth >> 2, csHeight);
+  const Area csArea1 = Area(csX + (csWidth >> 2), csY, csWidth >> 1, csHeight);
+  const Area csArea2 = Area(csX + (csWidth >> 2) + (csWidth >> 1), csY, csWidth >> 2, csHeight);
+
+  bool clean0 = isClean(csArea0, CHANNEL_TYPE_LUMA);
+  bool clean1 = isClean(csArea1, CHANNEL_TYPE_LUMA);
+  bool clean2 = isClean(csArea2, CHANNEL_TYPE_LUMA);
+
+  bool allclean = clean0 && clean1 && clean2;
+
+  if (allclean)
+  {
+    return false;
+  }
+
+  return true;
+}
+
+bool CodingStructure::dirtyCrossBTV() const
+{
+  const int  csX = area.lx();
+  const int  csY = area.ly();
+  const int  csWidth = area.lwidth();
+  const int  csHeight = area.lheight();
+
+  const Area csArea0 = Area(csX,                  csY, (csWidth >> 1), csHeight);
+  const Area csArea1 = Area(csX + (csWidth >> 1), csY, (csWidth >> 1), csHeight);
+
+  bool clean0 = isClean(csArea0, CHANNEL_TYPE_LUMA);
+  bool clean1 = isClean(csArea1, CHANNEL_TYPE_LUMA);
+
+  bool allclean = clean0 && clean1;
+
+  if (allclean)
+  {
+    return false;
+  }
+
+  return true;
+}
+#endif
+
+
+
+#if GDR_ENABLED
+bool CodingStructure::isClean(const Position &IntPos, Mv FracMv) const
+{
+  /*
+    1. non gdr picture --> false;
+    2. gdr picture
+         pos in clean area -> true
+         pos in dirty area -> false
+  */
+  const Picture* const curPic = slice->getPic();
+
+  if (!curPic)
+  {
+    return false;
+  }
+
+  PicHeader     *curPh = curPic->cs->picHeader;
+  bool isCurGdrPicture = curPh->getInGdrInterval();
+
+  if (isCurGdrPicture)
+  {
+    const int lumaPixelAway = 4;
+    const int chromaPixelAway = 5;
+
+    const int iMvShift = MV_FRACTIONAL_BITS_INTERNAL;
+    const int iMvLumaFrac = (1 << iMvShift);
+    const int iMvChromaFrac = (iMvLumaFrac << 1);
+
+    const bool isIntLumaMv = (FracMv.getHor() % iMvLumaFrac) == 0;
+    const bool isIntChromaMv = (FracMv.getHor() % iMvChromaFrac) == 0;
+
+    const int scaledEndX = curPh->getVirtualBoundariesPosX(0) << iMvShift;
+
+
+    const Position OrigFracPos = Position(IntPos.x << iMvShift, IntPos.y << iMvShift);
+    const int lastLumaPos = ((OrigFracPos.x / iMvLumaFrac)   * iMvLumaFrac) + FracMv.getHor() + (isIntLumaMv ? 0 : (lumaPixelAway << iMvShift));
+    const int lastChromaPos = ((OrigFracPos.x / iMvChromaFrac) * iMvChromaFrac) + FracMv.getHor() + (isIntChromaMv ? 0 : (chromaPixelAway << iMvShift));
+
+    const int lastPelPos = std::max(lastLumaPos, lastChromaPos);
+
+    if (lastPelPos < scaledEndX)
+    {
+      return true;
+    }
+    else
+    {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool CodingStructure::isClean(const Position &IntPos, Mv FracMv, const Picture* const refPic) const
+{
+  /*
+    1. non gdr picture --> false;
+    2. gdr picture
+         pos in clean area -> true
+         pos in dirty area -> false
+  */
+  if (!refPic)
+  {
+    return false;
+  }
+
+  if (!refPic->cs)
+  {
+    return false;
+  }
+
+  PicHeader *refPh = refPic->cs->picHeader;
+  if (!refPh)
+  {
+    return false;
+  }
+
+  bool isRefGdrPicture = refPh->getInGdrInterval();
+
+  if (isRefGdrPicture)
+  {
+    const int lumaPixelAway = 4;
+    const int chromaPixelAway = 5;
+
+    const int iMvShift = MV_FRACTIONAL_BITS_INTERNAL;
+    const int iMvLumaFrac = (1 << iMvShift);
+    const int iMvChromaFrac = (iMvLumaFrac << 1);
+
+    const bool isIntLumaMv = (FracMv.getHor() % iMvLumaFrac) == 0;
+    const bool isIntChromaMv = (FracMv.getHor() % iMvChromaFrac) == 0;
+
+    const int  scaledEndX = refPh->getVirtualBoundariesPosX(0) << iMvShift;
+
+    const Position OrigFracPos = Position((IntPos.x) << iMvShift, IntPos.y << iMvShift);
+    const int lastLumaPos = ((OrigFracPos.x / iMvLumaFrac)   * iMvLumaFrac) + FracMv.getHor() + (isIntLumaMv ? 0 : (lumaPixelAway << iMvShift));
+    const int lastChromaPos = ((OrigFracPos.x / iMvChromaFrac) * iMvChromaFrac) + FracMv.getHor() + (isIntChromaMv ? 0 : (chromaPixelAway << iMvShift));
+
+    const int lastPelPos = std::max(lastLumaPos, lastChromaPos);
+
+    if (lastPelPos < scaledEndX)
+    {
+      return true;
+    }
+    else
+    {
+      return false;
+    }
+  }
+  else
+  {
+    // refPic is normal picture
+    bool isCurGdrPicture = (slice->getPicHeader()->getNumVerVirtualBoundaries() > 0);
+
+    if (isCurGdrPicture)
+    {
+      return false;
+    }
+    else
+    {
+      return true;
+    }
+  }
+}
+
+
+bool CodingStructure::isClean(const Position &IntPos, Mv FracMv, RefPicList e, int refIdx, int isProf) const
+{
+  /*
+    1. non gdr picture --> false;
+    2. gdr picture
+         pos in clean area -> true
+         pos in dirty area -> false
+  */
+  if (refIdx < 0)
+  {
+    return false;
+  }
+
+  const Picture* const refPic = slice->getRefPic(e, refIdx);
+  const bool isExceedNumRef = (refIdx < slice->getNumRefIdx(e)) ? false : true;
+
+  if (!refPic || isExceedNumRef)
+  {
+    return false;
+  }
+
+  if (!refPic->cs)
+  {
+    return false;
+  }
+
+  PicHeader *refPh = refPic->cs->picHeader;
+
+  if (!refPh)
+  {
+    return false;
+  }
+
+  bool isRefGdrPicture = refPh->getInGdrInterval();
+
+  if (isRefGdrPicture)
+  {
+    const int lumaPixelAway   = 4 + (isProf << 0);
+    const int chromaPixelAway = 4 + (isProf << 1);
+
+    const int iMvShift      = MV_FRACTIONAL_BITS_INTERNAL;
+    const int iMvLumaFrac   = (1 << iMvShift);
+    const int iMvChromaFrac = (iMvLumaFrac << 1);
+
+    const bool isIntLumaMv      = (FracMv.getHor() % iMvLumaFrac  ) == 0;
+    const bool isIntChromaMv    = isProf ? false : (FracMv.getHor() % iMvChromaFrac) == 0;
+
+    const int  scaledEndX      = refPh->getVirtualBoundariesPosX(0) << iMvShift;
+
+
+    const Position OrigFracPos  = Position((IntPos.x) << iMvShift, IntPos.y << iMvShift);
+    const int lastLumaPos     = ((OrigFracPos.x / iMvLumaFrac)   * iMvLumaFrac)   + FracMv.getHor() + (isIntLumaMv   ? 0 : (lumaPixelAway   << iMvShift));
+    const int lastChromaPos   = ((OrigFracPos.x / iMvChromaFrac) * iMvChromaFrac) + FracMv.getHor() + (isIntChromaMv ? 0 : (chromaPixelAway << iMvShift)) ;
+
+    const int lastPelPos    = std::max(lastLumaPos, lastChromaPos);
+
+    if (lastPelPos < scaledEndX)
+    {
+      return true;
+    }
+    else
+    {
+      return false;
+    }
+  }
+  else
+  {
+    // refPic is normal picture
+    bool isCurGdrPicture = (slice->getPicHeader()->getNumVerVirtualBoundaries() > 0);
+
+    if (isCurGdrPicture)
+    {
+      return false;
+    }
+    else
+    {
+      return true;
+    }
+  }
+}
+
+bool CodingStructure::isClean(const Position &IntPos, Mv FracMv, RefPicList e, int refIdx, bool ibc) const
+{
+  /*
+    1. non gdr picture --> false;
+    2. gdr picture
+         pos in clean area -> true
+         pos in dirty area -> false
+  */
+  if (refIdx < 0) return false;
+
+  Picture*   refPic;
+  PicHeader *refPh;
+
+  if (refIdx == MAX_NUM_REF)
+  {
+    refPic = slice->getPic();
+  }
+  else
+  {
+    refPic = slice->getRefPic(e, refIdx);
+  }
+
+  if (!refPic)
+  {
+    return false;
+  }
+
+  if (refIdx == MAX_NUM_REF)
+  {
+    refPh = picHeader;
+  }
+  else
+  {
+    if (refPic->cs)
+    {
+      return false;
+    }
+
+    refPh = refPic->cs->picHeader;
+  }
+
+  if (!refPh)
+  {
+    return false;
+  }
+
+  bool isRefGdrPicture = refPh->getInGdrInterval();
+
+  if (isRefGdrPicture)
+  {
+    const int lumaPixelAway = 4;
+    const int chromaPixelAway = 5;
+
+    const int iMvShift = MV_FRACTIONAL_BITS_INTERNAL;
+    const int iMvLumaFrac = (1 << iMvShift);
+    const int iMvChromaFrac = (iMvLumaFrac << 1);
+
+    const bool isIntLumaMv = (FracMv.getHor() % iMvLumaFrac) == 0;
+    const bool isIntChromaMv = (FracMv.getHor() % iMvChromaFrac) == 0;
+
+    const int  scaledEndX = refPh->getVirtualBoundariesPosX(0) << iMvShift;
+
+    const Position OrigFracPos = Position((IntPos.x) << iMvShift, IntPos.y << iMvShift);
+    const int lastLumaPos = ((OrigFracPos.x / iMvLumaFrac)   * iMvLumaFrac) + FracMv.getHor() + (isIntLumaMv ? 0 : (lumaPixelAway << iMvShift));
+    const int lastChromaPos = ((OrigFracPos.x / iMvChromaFrac) * iMvChromaFrac) + FracMv.getHor() + (isIntChromaMv ? 0 : (chromaPixelAway << iMvShift));
+
+    const int lastPelPos = std::max(lastLumaPos, lastChromaPos);
+
+    if (lastPelPos < scaledEndX)
+    {
+      return true;
+    }
+    else
+    {
+      return false;
+    }
+  }
+  else
+  {
+    // refPic is normal picture
+    bool isCurGdrPicture = (slice->getPicHeader()->getNumVerVirtualBoundaries() > 0);
+
+    if (isCurGdrPicture)
+    {
+      return false;
+    }
+    else
+    {
+      return true;
+    }
+  }
+}
+
+
+bool CodingStructure::isClean(const Position &IntPos, RefPicList e, int refIdx) const
+{
+  /*
+    1. non gdr picture --> false;
+    2. gdr picture
+         pos in clean area -> true
+         pos in dirty area -> false
+  */
+  const Picture* const refPic = slice->getRefPic(e, refIdx);
+
+  if (!refPic || refIdx < 0)
+  {
+    return false;
+  }
+
+  PicHeader     *refPh = refPic->cs->picHeader;
+  bool isRefGdrPicture = refPh->getInGdrInterval();
+
+  if (isRefGdrPicture)
+  {
+    if (IntPos.x < refPh->getVirtualBoundariesPosX(0))
+    {
+      return true;
+    }
+    else
+    {
+      return false;
+    }
+  }
+  else
+  {
+    // refPic is normal picture
+    bool isCurGdrPicture = (slice->getPicHeader()->getNumVerVirtualBoundaries() > 0);
+
+    if (isCurGdrPicture)
+    {
+      return false;
+    }
+    else
+    {
+      return true;
+    }
+  }
+}
+
+bool CodingStructure::isClean(const Position &IntPos, const Picture* const refPic) const
+{
+  if (!refPic)
+  {
+    return false;
+  }
+
+  PicHeader     *refPh = refPic->cs->picHeader;
+  bool isRefGdrPicture = refPh->getInGdrInterval();
+
+  if (isRefGdrPicture)
+  {
+    if (IntPos.x < refPh->getVirtualBoundariesPosX(0))
+    {
+      return true;
+    }
+    else
+    {
+      return false;
+    }
+  }
+  else
+  {
+    // refPic is normal picture
+    bool isCurGdrPicture = (slice->getPicHeader()->getNumVerVirtualBoundaries() > 0);
+
+    if (isCurGdrPicture)
+    {
+      return false;
+    }
+    else
+    {
+      return true;
+    }
+  }
+}
+
+bool CodingStructure::isClean(const int Intx, const int Inty, const ChannelType effChType) const
+{
+  /*
+    1. non gdr picture --> false;
+    2. gdr picture
+         pos in clean area -> true
+         pos in dirty area -> false
+  */
+  PicHeader     *curPh = picHeader;
+  bool isCurGdrPicture = curPh->getInGdrInterval();
+  if (isCurGdrPicture)
+  {
+    int virboundary_endx = curPh->getVirtualBoundariesPosX(0);
+
+    virboundary_endx = virboundary_endx >> effChType;
+    if (Intx < virboundary_endx)
+    {
+      return true;
+    }
+    else
+    {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool CodingStructure::isClean(const Position &IntPos, const ChannelType effChType) const
+{
+  bool ret = isClean(IntPos.x, IntPos.y, effChType);
+
+  return ret;
+}
+
+bool CodingStructure::isClean(const Area &area, const ChannelType effChType) const
+{
+  Position pTopLeft  = area.topLeft();
+  Position pTopRight = area.topRight();
+  Position pBotLeft  = area.bottomLeft();
+  Position pBotRight = area.bottomRight();
+
+  bool bTopLeft  = isClean(pTopLeft,  effChType);
+  bool bTopRight = isClean(pTopRight, effChType);
+  bool bBotLeft  = isClean(pBotLeft,  effChType);
+  bool bBotRight = isClean(pBotRight, effChType);
+
+  return bTopLeft && bTopRight && bBotLeft && bBotRight;
+}
+
+bool CodingStructure::isClean(const ChannelType effChType) const
+{
+  bool ret = isClean(area.Y(), effChType);
+
+  return ret;
+}
+
+bool CodingStructure::isSubPuClean(PredictionUnit &pu, const Mv *mv) const
+{
+  MotionBuf mb = pu.getMotionBuf();
+
+  if (pu.cu->affine)
+  {
+    Position puPos = pu.Y().pos();
+    Size subPuSize = Size(4, 4);
+
+    int isProf = 1;
+
+    for (int y = 0; y < mb.height; y++)
+    {
+      for (int x = 0; x < mb.width; x++)
+      {
+
+        MotionInfo mi = mb.at(x, y);
+        Position subPuPos  = Position{puPos.x + (x << 2), puPos.y + (y << 2)};
+        Area     subPuArea = Area(subPuPos, subPuSize);
+        Position subPuTR   = subPuArea.topRight();
+
+        // check if SubPu with L0 is Out of boundary
+        if (mi.refIdx[0] >= 0)
+        {
+          if (!isClean(subPuTR, mi.mv[0], REF_PIC_LIST_0, mi.refIdx[0], isProf))
+          {
+            return false;
+          }
+        }
+
+        // check if SubPu wiht L1 is Out of boundary
+        if (mi.refIdx[1] >= 0)
+        {
+          if (!isClean(subPuTR, mi.mv[1], REF_PIC_LIST_1, mi.refIdx[1], isProf))
+          {
+            return false;
+          }
+        }
+      }
+    }
+  }
+
+  return true;
+}
+#endif
+
+
 bool CodingStructure::isDecomp( const Position &pos, const ChannelType effChType )
 {
   if( area.blocks[effChType].contains( pos ) )
@@ -549,10 +1223,6 @@ CodingUnit& CodingStructure::addCU( const UnitArea &unit, const ChannelType chTy
   if( prevCU )
   {
     prevCU->next = cu;
-#if ENABLE_SPLIT_PARALLELISM
-
-    CHECK( prevCU->cacheId != cu->cacheId, "Inconsintent cacheId between previous and current CU" );
-#endif
   }
 
   cus.push_back( cu );
@@ -593,21 +1263,12 @@ PredictionUnit& CodingStructure::addPU( const UnitArea &unit, const ChannelType
   pu->cs     = this;
   pu->cu     = m_isTuEnc ? cus[0] : getCU( unit.blocks[chType].pos(), chType );
   pu->chType = chType;
-#if ENABLE_SPLIT_PARALLELISM
-
-  CHECK( pu->cacheId != pu->cu->cacheId, "Inconsintent cacheId between the PU and assigned CU" );
-  CHECK( pu->cu->firstPU != nullptr, "Without an RQT the firstPU should be null" );
-#endif
 
   PredictionUnit *prevPU = m_numPUs > 0 ? pus.back() : nullptr;
 
   if( prevPU && prevPU->cu == pu->cu )
   {
     prevPU->next = pu;
-#if ENABLE_SPLIT_PARALLELISM
-
-    CHECK( prevPU->cacheId != pu->cacheId, "Inconsintent cacheId between previous and current PU" );
-#endif
   }
 
   pus.push_back( pu );
@@ -654,14 +1315,6 @@ TransformUnit& CodingStructure::addTU( const UnitArea &unit, const ChannelType c
   tu->cs     = this;
   tu->cu     = m_isTuEnc ? cus[0] : getCU( unit.blocks[chType].pos(), chType );
   tu->chType = chType;
-#if ENABLE_SPLIT_PARALLELISM
-
-  if( tu->cu )
-  {
-    CHECK(tu->cacheId != tu->cu->cacheId, "Inconsintent cacheId between the TU and assigned CU");
-  }
-#endif
-
 
   TransformUnit *prevTU = m_numTUs > 0 ? tus.back() : nullptr;
 
@@ -669,10 +1322,6 @@ TransformUnit& CodingStructure::addTU( const UnitArea &unit, const ChannelType c
   {
     prevTU->next = tu;
     tu->prev     = prevTU;
-#if ENABLE_SPLIT_PARALLELISM
-
-    CHECK( prevTU->cacheId != tu->cacheId, "Inconsintent cacheId between previous and current TU" );
-#endif
   }
 
   tus.push_back( tu );
@@ -893,6 +1542,7 @@ void CodingStructure::create(const ChromaFormat &_chromaFormat, const Area& _are
     return;
   }
 
+
   m_reco.create( area );
   m_pred.create( area );
   m_resi.create( area );
@@ -908,6 +1558,7 @@ void CodingStructure::create(const UnitArea& _unit, const bool isTopLayer, const
     return;
   }
 
+
   m_reco.create( area );
   m_pred.create( area );
   m_resi.create( area );
@@ -942,7 +1593,10 @@ void CodingStructure::createInternals(const UnitArea& _unit, const bool isTopLay
     m_offsets[i] = 0;
   }
 
-  if( !isTopLayer ) createCoeffs(isPLTused);
+  if (!isTopLayer)
+  {
+    createCoeffs(isPLTused);
+  }
 
   unsigned _lumaAreaScaled = g_miScaling.scale( area.lumaSize() ).area();
   m_motionBuf       = new MotionInfo[_lumaAreaScaled];
@@ -1050,6 +1704,7 @@ void CodingStructure::setPrevPLT(PLTBuf predictor)
     memcpy(prevPLT.curPLT[comp], predictor.curPLT[comp], MAXPLTPREDSIZE * sizeof(Pel));
   }
 }
+
 void CodingStructure::storePrevPLT(PLTBuf& predictor)
 {
   for (int comp = 0; comp < MAX_NUM_CHANNEL_TYPE; comp++)
@@ -1179,6 +1834,7 @@ void CodingStructure::initSubStructure( CodingStructure& subStruct, const Channe
   subStruct.vps       = vps;
   subStruct.pps       = pps;
   subStruct.picHeader = picHeader;
+
   memcpy(subStruct.alfApss, alfApss, sizeof(alfApss));
 
   subStruct.lmcsAps = lmcsAps;
@@ -1240,15 +1896,36 @@ void CodingStructure::useSubStructure( const CodingStructure& subStruct, const C
   if( parent )
   {
     // copy data to picture
-    if( cpyPred )    getPredBuf   ( clippedArea ).copyFrom( subPredBuf );
-    if( cpyResi )    getResiBuf   ( clippedArea ).copyFrom( subResiBuf );
-    if( cpyReco )    getRecoBuf   ( clippedArea ).copyFrom( subRecoBuf );
-    if( cpyOrgResi ) getOrgResiBuf( clippedArea ).copyFrom( subStruct.getOrgResiBuf( clippedArea ) );
+    if (cpyPred)
+    {
+      getPredBuf(clippedArea).copyFrom(subPredBuf);
+    }
+    if (cpyResi)
+    {
+      getResiBuf(clippedArea).copyFrom(subResiBuf);
+    }
+    if (cpyReco)
+    {
+      getRecoBuf(clippedArea).copyFrom(subRecoBuf);
+    }
+    if (cpyOrgResi)
+    {
+      getOrgResiBuf(clippedArea).copyFrom(subStruct.getOrgResiBuf(clippedArea));
+    }
   }
 
-  if( cpyPred ) picture->getPredBuf( clippedArea ).copyFrom( subPredBuf );
-  if( cpyResi ) picture->getResiBuf( clippedArea ).copyFrom( subResiBuf );
-  if( cpyReco ) picture->getRecoBuf( clippedArea ).copyFrom( subRecoBuf );
+  if (cpyPred)
+  {
+    picture->getPredBuf(clippedArea).copyFrom(subPredBuf);
+  }
+  if (cpyResi)
+  {
+    picture->getResiBuf(clippedArea).copyFrom(subResiBuf);
+  }
+  if (cpyReco)
+  {
+    picture->getRecoBuf(clippedArea).copyFrom(subRecoBuf);
+  }
 
   if (!subStruct.m_isTuEnc && ((!slice->isIntra() || slice->getSPS()->getIBCFlag()) && chType != CHANNEL_TYPE_CHROMA))
   {
@@ -1586,6 +2263,8 @@ const CPelUnitBuf CodingStructure::getOrgBuf(const UnitArea &unit)     const { r
 const CPelBuf     CodingStructure::getOrgBuf(const ComponentID &compID)const { return picture->getBuf(area.blocks[compID], PIC_ORIGINAL); }
        PelUnitBuf CodingStructure::getOrgBuf()                               { return picture->getBuf(area, PIC_ORIGINAL); }
 const CPelUnitBuf CodingStructure::getOrgBuf()                         const { return picture->getBuf(area, PIC_ORIGINAL); }
+       PelUnitBuf CodingStructure::getTrueOrgBuf()                           { return picture->getBuf(area, PIC_TRUE_ORIGINAL); }
+const CPelUnitBuf CodingStructure::getTrueOrgBuf()                     const { return picture->getBuf(area, PIC_TRUE_ORIGINAL); }
 
 PelBuf CodingStructure::getBuf( const CompArea &blk, const PictureType &type )
 {
diff --git a/source/Lib/CommonLib/CodingStructure.h b/source/Lib/CommonLib/CodingStructure.h
index b5ae7ac6307e1d2e863fd19948c909e77ae9e5a2..9fcfe29b20e89efbe2c5c423588368be915e6f30 100644
--- a/source/Lib/CommonLib/CodingStructure.h
+++ b/source/Lib/CommonLib/CodingStructure.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -104,6 +104,34 @@ public:
   void destroy();
   void releaseIntermediateData();
 
+#if GDR_ENABLED
+  bool containRefresh(int begX, int endX) const;
+  bool overlapRefresh() const;
+  bool overlapRefresh(int begX, int endX) const;
+  bool withinRefresh(int begX, int endX) const;
+
+  bool refreshCrossTTV(int begX, int endX) const;
+  bool refreshCrossBTV(int begX, int endX) const;
+
+  bool overlapDirty() const;
+  bool dirtyCrossTTV() const;  
+  bool dirtyCrossBTV() const;
+#endif
+
+#if GDR_ENABLED
+  bool isClean(const ChannelType effChType) const;
+  bool isClean(const Position &IntPos, RefPicList e, int refIdx) const;
+  bool isClean(const Position &IntPos, const Picture* const ref_pic) const;
+  bool isClean(const Position &IntPos, Mv FracMv) const;  
+  bool isClean(const Position &IntPos, Mv FracMv, const Picture* const refPic) const;
+  bool isClean(const Position &IntPos, Mv FracMv, RefPicList e, int refIdx, int isProf=0) const;
+  bool isClean(const Position &IntPos, Mv FracMv, RefPicList e, int refIdx, bool ibc) const;
+  bool isClean(const Position &IntPos, const ChannelType effChType) const;  
+  bool isClean(const int x, const int y, const ChannelType effChType) const;  
+  bool isClean(const Area &area, const ChannelType effChType) const;
+  
+  bool isSubPuClean(PredictionUnit &pu, const Mv *mv) const;
+#endif
   void rebindPicBufs();
 
   void createCoeffs(const bool isPLTused);
@@ -293,7 +321,8 @@ public:
   const CPelBuf       getOrgBuf(const ComponentID &compID) const;
          PelUnitBuf   getOrgBuf();
   const CPelUnitBuf   getOrgBuf() const;
-
+         PelUnitBuf   getTrueOrgBuf();
+  const CPelUnitBuf   getTrueOrgBuf() const;
 
   // pred buffer
          PelBuf       getPredBuf(const ComponentID &compID)       { return m_pred.get(compID); }
diff --git a/source/Lib/CommonLib/Common.h b/source/Lib/CommonLib/Common.h
index 9d30b8393ea28a4b619d2c3b9d3d2d256b33bcd3..6313b984af0ee80fd6cd5a5ab9acf8a32340c38a 100644
--- a/source/Lib/CommonLib/Common.h
+++ b/source/Lib/CommonLib/Common.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -97,6 +97,33 @@ struct Area : public Position, public Size
   bool contains(const Position &_pos)       const { return (_pos.x >= x) && (_pos.x < (x + width)) && (_pos.y >= y) && (_pos.y < (y + height)); }
   bool contains(const Area &_area)          const { return contains(_area.pos()) && contains(_area.bottomRight()); }
 
+#if GDR_ENABLED  
+  bool overlap(const Area &_area) const 
+  { 
+    Area thisArea = Area(pos(), size());
+
+    if (contains(_area))
+      return false;
+
+    if (_area.contains(thisArea))
+      return false;
+
+    bool topLeft  = contains(_area.topLeft());
+    bool topRight = contains(_area.topRight());
+    bool botLeft  = contains(_area.bottomLeft());
+    bool botRight = contains(_area.bottomRight());
+
+    int sum = (topLeft ? 1 : 0) + (topRight ? 1 : 0) + (botLeft ? 1 : 0) + (botRight ? 1 : 0);
+
+    if (0 < sum && sum < 4)
+    {
+      return true;
+    }
+
+    return false;
+  }
+#endif
+
   bool operator!=(const Area &other)        const { return (Size::operator!=(other)) || (Position::operator!=(other)); }
   bool operator==(const Area &other)        const { return (Size::operator==(other)) && (Position::operator==(other)); }
 };
diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h
index edcdceedeeb36aac9ab55ad7432062c3402e75f8..ffbca486194a9703ae47c34d682fb999289a6d33 100644
--- a/source/Lib/CommonLib/CommonDef.h
+++ b/source/Lib/CommonLib/CommonDef.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -163,19 +163,12 @@ static const int MAX_TB_LOG2_SIZEY = 6;
 static const int MIN_TB_SIZEY = 1 << MIN_TB_LOG2_SIZEY;
 static const int MAX_TB_SIZEY = 1 << MAX_TB_LOG2_SIZEY;
 
-static const int MAX_NUM_PICS_IN_SOP =                           1024;
-
-static const int MAX_NESTING_NUM_OPS =                           1024;
 static const int MAX_NESTING_NUM_LAYER =                           64;
 
-static const int MAX_VPS_NUM_HRD_PARAMETERS =                       1;
 static const int MAX_VPS_LAYERS =                                  64;
 static const int MAX_VPS_SUBLAYERS =                                7;
-static const int MAX_NUM_REF_LAYERS =                               7;
 static const int MAX_NUM_OLSS =                                   256;
 static const int MAX_VPS_OLS_MODE_IDC =                             2;
-static const int MAXIMUM_INTRA_FILTERED_WIDTH =                    16;
-static const int MAXIMUM_INTRA_FILTERED_HEIGHT =                   16;
 
 static const int MIP_MAX_WIDTH =                                   MAX_TB_SIZEY;
 static const int MIP_MAX_HEIGHT =                                  MAX_TB_SIZEY;
@@ -185,7 +178,6 @@ static const int MAX_NUM_ALF_CLASSES         =                     25;
 static const int MAX_NUM_ALF_LUMA_COEFF      =                     13;
 static const int MAX_NUM_ALF_CHROMA_COEFF    =                      7;
 static const int MAX_ALF_FILTER_LENGTH       =                      7;
-static const int MAX_NUM_ALF_COEFF           =                     MAX_ALF_FILTER_LENGTH * MAX_ALF_FILTER_LENGTH / 2 + 1;
 static const int MAX_ALF_PADDING_SIZE        =                      4;
 #define MAX_NUM_CC_ALF_FILTERS                                      4
 static constexpr int MAX_NUM_CC_ALF_CHROMA_COEFF    =               8;
@@ -195,8 +187,6 @@ static constexpr int CCALF_BITS_PER_COEFF_LEVEL     =               3;
 static const int ALF_FIXED_FILTER_NUM        =                     64;
 static const int ALF_CTB_MAX_NUM_APS         =                      8;
 static const int NUM_FIXED_FILTER_SETS       =                     16;
-static const int NUM_TOTAL_FILTER_SETS       =                     NUM_FIXED_FILTER_SETS + ALF_CTB_MAX_NUM_APS;
-
 
 static const int MAX_BDOF_APPLICATION_REGION =                     16;
 
@@ -209,23 +199,16 @@ static const int CU_DQP_EG_k =                                      0; ///< expg
 static const int SBH_THRESHOLD =                                    4; ///< value of the fixed SBH controlling threshold
 
 static const int MAX_NUM_VPS =                                     16;
-static const int MAX_NUM_DPS =                                     16;
 static const int MAX_NUM_SPS =                                     16;
 static const int MAX_NUM_PPS =                                     64;
 static const int MAX_NUM_APS =                                     32;  //Currently APS ID has 5 bits
 static const int NUM_APS_TYPE_LEN =                                 3;  //Currently APS Type has 3 bits
 static const int MAX_NUM_APS_TYPE =                                 8;  //Currently APS Type has 3 bits so the max type is 8
 
-#if JVET_T0065_LEVEL_6_3
 static constexpr int MAX_TILE_COLS = 30;   // Maximum number of tile columns
 static constexpr int MAX_TILES     = 990;  // Maximum number of tiles
 static constexpr int MAX_SLICES    = 1000; // Maximum number of slices per picture
 
-#else
-static const int MAX_TILE_COLS =                                   20;  ///< Maximum number of tile columns
-static const int MAX_TILES =                                      440;  ///< Maximum number of tiles
-static const int MAX_SLICES =                                     600;  ///< Maximum number of slices per picture
-#endif
 static const int MLS_GRP_NUM =                                   1024; ///< Max number of coefficient groups, max(16, 256)
 
 static const int MLS_CG_SIZE =                                      4; ///< Coefficient group size of 4x4; = MLS_CG_LOG2_WIDTH + MLS_CG_LOG2_HEIGHT
@@ -277,9 +260,6 @@ static const int LFNST_LAST_SIG_CHROMA =                            1;
 
 static const int NUM_LFNST_NUM_PER_SET =                            3;
 
-static const int LOG2_MAX_NUM_COLUMNS_MINUS1 =                      7;
-static const int LOG2_MAX_NUM_ROWS_MINUS1 =                         7;
-
 static const int CABAC_INIT_PRESENT_FLAG =                          1;
 
 static const int MV_FRACTIONAL_BITS_INTERNAL                      = 4;
@@ -302,8 +282,7 @@ static const int MAX_NUM_QP_VALUES =    MAX_QP + 1 - MIN_QP_VALUE_FOR_16_BIT; //
 // Cost mode support
 static const int LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP =      0; ///< QP to use for lossless coding.
 static const int LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME =4; ///< QP' to use for mixed_lossy_lossless coding.
-
-static const int RExt__GOLOMB_RICE_ADAPTATION_STATISTICS_SETS =     4;
+static const int RExt__GOLOMB_RICE_ADAPTATION_STATISTICS_SETS = MAX_NUM_COMPONENT;
 
 static const int RExt__PREDICTION_WEIGHTING_ANALYSIS_DC_PRECISION = 0; ///< Additional fixed bit precision used during encoder-side weighting prediction analysis. Currently only used when high_precision_prediction_weighting_flag is set, for backwards compatibility reasons.
 
@@ -471,8 +450,6 @@ static const int PIC_CODE_CW_BINS =                              16;
 static const int LMCS_SEG_NUM =                                  32;
 static const int FP_PREC =                                       11;
 static const int CSCALE_FP_PREC =                                11;
-static const int  NEIG_NUM_LOG  =                                 6;
-static const int  NEIG_NUM =                      1 << NEIG_NUM_LOG;
 static const int LOG2_PALETTE_CG_SIZE =                           4;
 static const int RUN_IDX_THRE =                                   4;
 static const int MAX_CU_BLKSIZE_PLT =                            64;
@@ -493,17 +470,19 @@ static const int SCALE_RATIO_BITS =                              14;
 static const int MAX_SCALING_RATIO =                              2;  // max downsampling ratio for RPR
 static const std::pair<int, int> SCALE_1X = std::pair<int, int>( 1 << SCALE_RATIO_BITS, 1 << SCALE_RATIO_BITS );  // scale ratio 1x
 static const int DELTA_QP_ACT[4] =                  { -5, 1, 3, 1 };
-
+static const int MAX_TSRC_RICE =                                  8;  ///<Maximum supported TSRC Rice parameter
+static const int MIN_TSRC_RICE =                                  1;  ///<Minimum supported TSRC Rice parameter
+static const int MAX_CTI_LUT_SIZE =                              64;  ///<Maximum colour transform LUT size for CTI SEI
 // ====================================================================================================================
 // Macro functions
 // ====================================================================================================================
 
 struct ClpRng
 {
-  int min;
-  int max;
-  int bd;
-  int n;
+  int min {0};
+  int max {0};
+  int bd  {0};
+  int n   {0};
 };
 
 struct ClpRngs
@@ -702,16 +681,6 @@ static inline int ceilLog2(uint32_t x)
 #define _UNIT_AREA_AT(_a,_x,_y,_w,_h)
 #endif
 
-#if ENABLE_SPLIT_PARALLELISM
-#include <omp.h>
-
-#define PARL_PARAM(DEF) , DEF
-#define PARL_PARAM0(DEF) DEF
-#else
-#define PARL_PARAM(DEF)
-#define PARL_PARAM0(DEF)
-#endif
-
 static const uint32_t CCALF_CANDS_COEFF_NR = 8;
 static const int CCALF_SMALL_TAB[CCALF_CANDS_COEFF_NR] = { 0, 1, 2, 4, 8, 16, 32, 64 };
 
diff --git a/source/Lib/CommonLib/ContextModelling.cpp b/source/Lib/CommonLib/ContextModelling.cpp
index 3e1a44b0e3058766467612c1606a894f5c47640f..c7d90841cc0470a28ca48226da0936bed2533560 100644
--- a/source/Lib/CommonLib/ContextModelling.cpp
+++ b/source/Lib/CommonLib/ContextModelling.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -40,58 +40,64 @@
 #include "CodingStructure.h"
 #include "Picture.h"
 
-
-CoeffCodingContext::CoeffCodingContext( const TransformUnit& tu, ComponentID component, bool signHide, bool bdpcm )
-  : m_compID                    (component)
-  , m_chType                    (toChannelType(m_compID))
-  , m_width                     (tu.block(m_compID).width)
-  , m_height                    (tu.block(m_compID).height)
-  , m_log2CGWidth               ( g_log2SbbSize[ floorLog2(m_width) ][ floorLog2(m_height) ][0] )
-  , m_log2CGHeight              ( g_log2SbbSize[ floorLog2(m_width) ][ floorLog2(m_height) ][1] )
-  , m_log2CGSize                (m_log2CGWidth + m_log2CGHeight)
+CoeffCodingContext::CoeffCodingContext(const TransformUnit &tu, ComponentID component, bool signHide, bool bdpcm)
+  : m_compID(component)
+  , m_chType(toChannelType(m_compID))
+  , m_width(tu.block(m_compID).width)
+  , m_height(tu.block(m_compID).height)
+  , m_log2CGWidth(g_log2SbbSize[floorLog2(m_width)][floorLog2(m_height)][0])
+  , m_log2CGHeight(g_log2SbbSize[floorLog2(m_width)][floorLog2(m_height)][1])
+  , m_log2CGSize(m_log2CGWidth + m_log2CGHeight)
   , m_widthInGroups(std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, m_width) >> m_log2CGWidth)
   , m_heightInGroups(std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, m_height) >> m_log2CGHeight)
-  , m_log2BlockWidth            ((unsigned)floorLog2(m_width))
-  , m_log2BlockHeight           ((unsigned)floorLog2(m_height))
-  , m_maxNumCoeff               (m_width * m_height)
-  , m_signHiding                (signHide)
-  , m_extendedPrecision         (tu.cs->sps->getSpsRangeExtension().getExtendedPrecisionProcessingFlag())
-  , m_maxLog2TrDynamicRange     (tu.cs->sps->getMaxLog2TrDynamicRange(m_chType))
-  , m_scanType                  (SCAN_DIAG)
-  , m_scan                      (g_scanOrder     [SCAN_GROUPED_4x4][m_scanType][gp_sizeIdxInfo->idxFrom(m_width        )][gp_sizeIdxInfo->idxFrom(m_height        )])
-  , m_scanCG                    (g_scanOrder     [SCAN_UNGROUPED  ][m_scanType][gp_sizeIdxInfo->idxFrom(m_widthInGroups)][gp_sizeIdxInfo->idxFrom(m_heightInGroups)])
-  , m_CtxSetLastX               (Ctx::LastX[m_chType])
-  , m_CtxSetLastY               (Ctx::LastY[m_chType])
-  , m_maxLastPosX(g_uiGroupIdx[std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, m_width) - 1])
-  , m_maxLastPosY(g_uiGroupIdx[std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, m_height) - 1])
-  , m_lastOffsetX               (0)
-  , m_lastOffsetY               (0)
-  , m_lastShiftX                (0)
-  , m_lastShiftY                (0)
-  , m_TrafoBypass               (tu.cs->sps->getSpsRangeExtension().getTransformSkipContextEnabledFlag() && (tu.mtsIdx[m_compID] == MTS_SKIP))
-  , m_minCoeff                  (-(1 << tu.cs->sps->getMaxLog2TrDynamicRange(m_chType)))
-  , m_maxCoeff                  ((1 << tu.cs->sps->getMaxLog2TrDynamicRange(m_chType)) - 1)
-  , m_scanPosLast               (-1)
-  , m_subSetId                  (-1)
-  , m_subSetPos                 (-1)
-  , m_subSetPosX                (-1)
-  , m_subSetPosY                (-1)
-  , m_minSubPos                 (-1)
-  , m_maxSubPos                 (-1)
-  , m_sigGroupCtxId             (-1)
-  , m_tmplCpSum1                (-1)
-  , m_tmplCpDiag                (-1)
-  , m_sigFlagCtxSet             { Ctx::SigFlag[m_chType], Ctx::SigFlag[m_chType+2], Ctx::SigFlag[m_chType+4] }
-  , m_parFlagCtxSet             ( Ctx::ParFlag[m_chType] )
-  , m_gtxFlagCtxSet             { Ctx::GtxFlag[m_chType], Ctx::GtxFlag[m_chType+2] }
-  , m_sigGroupCtxIdTS           (-1)
-  , m_tsSigFlagCtxSet           ( Ctx::TsSigFlag )
-  , m_tsParFlagCtxSet           ( Ctx::TsParFlag )
-  , m_tsGtxFlagCtxSet           ( Ctx::TsGtxFlag )
-  , m_tsLrg1FlagCtxSet          (Ctx::TsLrg1Flag)
-  , m_tsSignFlagCtxSet          (Ctx::TsResidualSign)
-  , m_sigCoeffGroupFlag         ()
-  , m_bdpcm                     (bdpcm)
+  , m_log2BlockWidth((unsigned) floorLog2(m_width))
+  , m_log2BlockHeight((unsigned) floorLog2(m_height))
+  , m_maxNumCoeff(m_width * m_height)
+  , m_signHiding(signHide)
+#if JVET_W0178_CONSTRAINTS_ON_REXT_TOOLS
+  , m_extendedPrecision(tu.cs->sps->getSpsRangeExtension().getExtendedPrecisionProcessingFlag())
+#else
+  , m_extendedPrecision(tu.cs->sps->getSpsRangeExtension().getExtendedPrecisionProcessingFlag() && tu.cs->sps->getBitDepth( m_chType ) > 10)
+#endif
+  , m_maxLog2TrDynamicRange(tu.cs->sps->getMaxLog2TrDynamicRange(m_chType))
+  , m_scanType(SCAN_DIAG)
+  , m_scan(
+      g_scanOrder[SCAN_GROUPED_4x4][m_scanType][gp_sizeIdxInfo->idxFrom(m_width)][gp_sizeIdxInfo->idxFrom(m_height)])
+  , m_scanCG(g_scanOrder[SCAN_UNGROUPED][m_scanType][gp_sizeIdxInfo->idxFrom(m_widthInGroups)]
+                        [gp_sizeIdxInfo->idxFrom(m_heightInGroups)])
+  , m_CtxSetLastX(Ctx::LastX[m_chType])
+  , m_CtxSetLastY(Ctx::LastY[m_chType])
+  , m_maxLastPosX(g_groupIdx[std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, m_width) - 1])
+  , m_maxLastPosY(g_groupIdx[std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, m_height) - 1])
+  , m_lastOffsetX(0)
+  , m_lastOffsetY(0)
+  , m_lastShiftX(0)
+  , m_lastShiftY(0)
+  , m_TrafoBypass(tu.cs->sps->getSpsRangeExtension().getTransformSkipContextEnabledFlag()
+                  && (tu.mtsIdx[m_compID] == MTS_SKIP))
+  , m_minCoeff(-(1 << tu.cs->sps->getMaxLog2TrDynamicRange(m_chType)))
+  , m_maxCoeff((1 << tu.cs->sps->getMaxLog2TrDynamicRange(m_chType)) - 1)
+  , m_scanPosLast(-1)
+  , m_subSetId(-1)
+  , m_subSetPos(-1)
+  , m_subSetPosX(-1)
+  , m_subSetPosY(-1)
+  , m_minSubPos(-1)
+  , m_maxSubPos(-1)
+  , m_sigGroupCtxId(-1)
+  , m_tmplCpSum1(-1)
+  , m_tmplCpDiag(-1)
+  , m_sigFlagCtxSet{ Ctx::SigFlag[m_chType], Ctx::SigFlag[m_chType + 2], Ctx::SigFlag[m_chType + 4] }
+  , m_parFlagCtxSet(Ctx::ParFlag[m_chType])
+  , m_gtxFlagCtxSet{ Ctx::GtxFlag[m_chType], Ctx::GtxFlag[m_chType + 2] }
+  , m_sigGroupCtxIdTS(-1)
+  , m_tsSigFlagCtxSet(Ctx::TsSigFlag)
+  , m_tsParFlagCtxSet(Ctx::TsParFlag)
+  , m_tsGtxFlagCtxSet(Ctx::TsGtxFlag)
+  , m_tsLrg1FlagCtxSet(Ctx::TsLrg1Flag)
+  , m_tsSignFlagCtxSet(Ctx::TsResidualSign)
+  , m_sigCoeffGroupFlag()
+  , m_bdpcm(bdpcm)
 {
   // LOGTODO
   unsigned log2sizeX = m_log2BlockWidth;
@@ -109,6 +115,19 @@ CoeffCodingContext::CoeffCodingContext( const TransformUnit& tu, ComponentID com
     const_cast<int&>(m_lastShiftX)  = (log2sizeX + 1) >> 2;
     const_cast<int&>(m_lastShiftY)  = (log2sizeY + 1) >> 2;
   }
+
+  m_cctxBaseLevel = 4; // default value for RRC rice derivation in VVCv1, is updated for extended RRC rice derivation
+  m_histValue = 0;  // default value for RRC rice derivation in VVCv1, is updated for history-based extention of RRC rice derivation
+  m_updateHist = 0;  // default value for RRC rice derivation (history update is disabled), is updated for history-based extention of RRC rice derivation
+
+  if (tu.cs->sps->getSpsRangeExtension().getRrcRiceExtensionEnableFlag())
+  {
+    deriveRiceRRC = &CoeffCodingContext::deriveRiceExt;
+  }
+  else
+  {
+    deriveRiceRRC = &CoeffCodingContext::deriveRice;
+  }
 }
 
 void CoeffCodingContext::initSubblock( int SubsetId, bool sigGroupFlag )
@@ -305,6 +324,7 @@ unsigned DeriveCtx::CtxAffineFlag( const CodingUnit& cu )
 
   return ctxId;
 }
+
 unsigned DeriveCtx::CtxSkipFlag( const CodingUnit& cu )
 {
   const CodingStructure *cs = cu.cs;
@@ -321,8 +341,6 @@ unsigned DeriveCtx::CtxSkipFlag( const CodingUnit& cu )
   return ctxId;
 }
 
-
-
 unsigned DeriveCtx::CtxPredModeFlag( const CodingUnit& cu )
 {
   const CodingUnit *cuLeft  = cu.cs->getCURestricted(cu.lumaPos().offset(-1, 0), cu, CH_L);
@@ -366,6 +384,25 @@ void MergeCtx::setMergeInfo( PredictionUnit& pu, int candIdx )
   pu.mvpIdx [REF_PIC_LIST_1] = NOT_VALID;
   pu.mvpNum [REF_PIC_LIST_0] = NOT_VALID;
   pu.mvpNum [REF_PIC_LIST_1] = NOT_VALID;
+#if GDR_ENABLED
+  CodingStructure &cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+
+  if (isEncodeGdrClean)
+  {
+    Mv mv0 = pu.mv[REF_PIC_LIST_0];
+    Mv mv1 = pu.mv[REF_PIC_LIST_1];
+
+    int refIdx0 = pu.refIdx[REF_PIC_LIST_0];
+    int refIdx1 = pu.refIdx[REF_PIC_LIST_1];
+
+    pu.mvSolid[REF_PIC_LIST_0] = mvSolid[(candIdx << 1) + 0];
+    pu.mvSolid[REF_PIC_LIST_1] = mvSolid[(candIdx << 1) + 1];
+    pu.mvValid[REF_PIC_LIST_0] = cs.isClean(pu.Y().topRight(), mv0, REF_PIC_LIST_0, refIdx0);
+    pu.mvValid[REF_PIC_LIST_1] = cs.isClean(pu.Y().topRight(), mv1, REF_PIC_LIST_1, refIdx1);
+  }
+#endif
+
   if (CU::isIBC(*pu.cu))
   {
     pu.bv = pu.mv[REF_PIC_LIST_0];
@@ -377,6 +414,7 @@ void MergeCtx::setMergeInfo( PredictionUnit& pu, int candIdx )
   PU::restrictBiPredMergeCandsOne(pu);
   pu.mmvdEncOptMode = 0;
 }
+
 void MergeCtx::setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx)
 {
   const Slice &slice = *pu.cs->slice;
@@ -389,6 +427,11 @@ void MergeCtx::setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx)
   int fPosPosition = 0;
   Mv tempMv[2];
 
+#if GDR_ENABLED
+  const CodingStructure &cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+#endif
+
   tempIdx = candIdx;
   fPosGroup = tempIdx / (MMVD_BASE_MV_NUM * MMVD_MAX_REFINE_NUM);
   tempIdx = tempIdx - fPosGroup * (MMVD_BASE_MV_NUM * MMVD_MAX_REFINE_NUM);
@@ -447,7 +490,9 @@ void MergeCtx::setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx)
         }
       }
       else
-      tempMv[0] = tempMv[1].scaleMv(scale);
+      {
+        tempMv[0] = tempMv[1].scaleMv(scale);
+      }
     }
     else
     {
@@ -466,7 +511,9 @@ void MergeCtx::setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx)
         }
       }
       else
-      tempMv[1] = tempMv[0].scaleMv(scale);
+      {
+        tempMv[1] = tempMv[0].scaleMv(scale);
+      }
     }
 
     pu.interDir = 3;
@@ -474,6 +521,25 @@ void MergeCtx::setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx)
     pu.refIdx[REF_PIC_LIST_0] = refList0;
     pu.mv[REF_PIC_LIST_1] = mmvdBaseMv[fPosBaseIdx][1].mv + tempMv[1];
     pu.refIdx[REF_PIC_LIST_1] = refList1;
+#if GDR_ENABLED
+    if (isEncodeGdrClean)
+    {
+      Mv mv0 = pu.mv[REF_PIC_LIST_0];
+      Mv mv1 = pu.mv[REF_PIC_LIST_1];
+
+      int refIdx0 = pu.refIdx[REF_PIC_LIST_0];
+      int refIdx1 = pu.refIdx[REF_PIC_LIST_1];
+
+      mmvdValid[fPosBaseIdx][0] = cs.isClean(pu.Y().topRight(), mv0, REF_PIC_LIST_0, refIdx0);
+      mmvdValid[fPosBaseIdx][1] = cs.isClean(pu.Y().topRight(), mv1, REF_PIC_LIST_1, refIdx1);
+
+      pu.mvSolid[REF_PIC_LIST_0] = mmvdSolid[fPosBaseIdx][0];
+      pu.mvSolid[REF_PIC_LIST_1] = mmvdSolid[fPosBaseIdx][1];
+
+      pu.mvValid[REF_PIC_LIST_0] = mmvdValid[fPosBaseIdx][0];
+      pu.mvValid[REF_PIC_LIST_1] = mmvdValid[fPosBaseIdx][1];
+    }
+#endif
   }
   else if (refList0 != -1)
   {
@@ -498,6 +564,26 @@ void MergeCtx::setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx)
     pu.refIdx[REF_PIC_LIST_0] = refList0;
     pu.mv[REF_PIC_LIST_1] = Mv(0, 0);
     pu.refIdx[REF_PIC_LIST_1] = -1;
+
+#if GDR_ENABLED
+    if (isEncodeGdrClean)
+    {
+      Mv mv0 = pu.mv[REF_PIC_LIST_0];
+      //Mv mv1 = pu.mv[REF_PIC_LIST_1];
+
+      int refIdx0 = pu.refIdx[REF_PIC_LIST_0];
+      //int refIdx1 = pu.refIdx[REF_PIC_LIST_1];
+
+      pu.mvSolid[REF_PIC_LIST_0] = mmvdSolid[fPosBaseIdx][0];
+      pu.mvSolid[REF_PIC_LIST_1] = true;
+
+      mmvdValid[fPosBaseIdx][0] = cs.isClean(pu.Y().topRight(), mv0, REF_PIC_LIST_0, refIdx0);
+      mmvdValid[fPosBaseIdx][1] = true;
+
+      pu.mvValid[REF_PIC_LIST_0] = mmvdValid[fPosBaseIdx][0];
+      pu.mvValid[REF_PIC_LIST_1] = true;
+    }
+#endif
   }
   else if (refList1 != -1)
   {
@@ -522,6 +608,25 @@ void MergeCtx::setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx)
     pu.refIdx[REF_PIC_LIST_0] = -1;
     pu.mv[REF_PIC_LIST_1] = mmvdBaseMv[fPosBaseIdx][1].mv + tempMv[1];
     pu.refIdx[REF_PIC_LIST_1] = refList1;
+#if GDR_ENABLED
+    if (isEncodeGdrClean)
+    {
+      // Mv mv0 = pu.mv[REF_PIC_LIST_0];
+      Mv mv1 = pu.mv[REF_PIC_LIST_1];
+
+      // int refIdx0 = pu.refIdx[REF_PIC_LIST_0];
+      int refIdx1 = pu.refIdx[REF_PIC_LIST_1];
+
+      mmvdValid[fPosBaseIdx][0] = true;
+      mmvdValid[fPosBaseIdx][1] = cs.isClean(pu.Y().topRight(), mv1, REF_PIC_LIST_1, refIdx1);
+
+      pu.mvSolid[REF_PIC_LIST_0] = true;
+      pu.mvSolid[REF_PIC_LIST_1] = mmvdSolid[fPosBaseIdx][1];
+
+      pu.mvValid[REF_PIC_LIST_0] = true;
+      pu.mvValid[REF_PIC_LIST_1] = mmvdValid[fPosBaseIdx][1];
+    }
+#endif
   }
 
   pu.mmvdMergeFlag = true;
@@ -548,7 +653,6 @@ void MergeCtx::setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx)
     }
   }
 
-
   PU::restrictBiPredMergeCandsOne(pu);
 }
 
@@ -573,7 +677,7 @@ unsigned DeriveCtx::CtxPltCopyFlag( const unsigned prevRunType, const unsigned d
   uint8_t *ucCtxLut = (prevRunType == PLT_RUN_INDEX) ? g_paletteRunLeftLut : g_paletteRunTopLut;
   if ( dist <= RUN_IDX_THRE )
   {
-     return ucCtxLut[dist];
+    return ucCtxLut[dist];
   }
   else
   {
diff --git a/source/Lib/CommonLib/ContextModelling.h b/source/Lib/CommonLib/ContextModelling.h
index e3006c3e503723e4974ea1ebadace39de13dc6eb..494efd08c1ea03ce9a0d56e048b12c80bd76c571 100644
--- a/source/Lib/CommonLib/ContextModelling.h
+++ b/source/Lib/CommonLib/ContextModelling.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -38,7 +38,7 @@
 #ifndef __CONTEXTMODELLING__
 #define __CONTEXTMODELLING__
 
-
+#include "Rom.h"
 #include "CommonDef.h"
 #include "Contexts.h"
 #include "Slice.h"
@@ -192,6 +192,125 @@ public:
     return unsigned(std::max<TCoeff>(std::min<TCoeff>(sum - 5 * baseLevel, 31), 0));
   }
 
+  void updateRiceStat(unsigned &riceStat, TCoeff rem, int remainderFlag)
+  {
+    if (remainderFlag)
+    {
+      riceStat = (riceStat + floorLog2((uint32_t)rem) + 2) >> 1;
+    }
+    else 
+    {
+      riceStat = (riceStat + floorLog2((uint32_t)rem)) >> 1;
+    }
+  }
+  
+  unsigned templateAbsCompare(TCoeff sum)
+  {
+    int rangeIdx = 0;
+    if (sum < g_riceT[0])
+    {
+      rangeIdx = 0;
+    }
+    else if (sum < g_riceT[1])
+    {
+      rangeIdx = 1;
+    }
+    else if (sum < g_riceT[2])
+    {
+      rangeIdx = 2;
+    }
+    else if (sum < g_riceT[3])
+    {
+      rangeIdx = 3;
+    }
+    else
+    {
+      rangeIdx = 4;
+    }
+
+    return g_riceShift[rangeIdx];
+  }
+
+  unsigned templateAbsSumExt(int scanPos, const TCoeff* coeff, int baseLevel)
+  {
+    unsigned riceParam;
+    const uint32_t  posY = m_scan[scanPos].y;
+    const uint32_t  posX = m_scan[scanPos].x;
+    const TCoeff*   data = coeff + posX + posY * m_width;
+    TCoeff          sum = 0;
+    if (posX < m_width - 1)
+    {
+      sum += abs(data[1]);
+      if (posX < m_width - 2)
+      {
+        sum += abs(data[2]);
+      }
+      else
+      {
+        sum += m_histValue;
+      }
+
+      if (posY < m_height - 1)
+      {
+        sum += abs(data[m_width + 1]);
+      }
+      else
+      {
+        sum += m_histValue;
+      }
+    }
+    else
+    {
+      sum += 2 * m_histValue;
+    }
+    if (posY < m_height - 1)
+    {
+      sum += abs(data[m_width]);
+      if (posY < m_height - 2)
+      {
+        sum += abs(data[m_width << 1]);
+      }
+      else
+      {
+        sum += m_histValue;
+      }
+    }
+    else
+    {
+      sum += m_histValue;
+    }
+
+    int currentShift = templateAbsCompare(sum);
+    sum = sum >> currentShift;
+    if (baseLevel == 0)
+    {
+      riceParam = unsigned(std::min<TCoeff>(sum, 31));
+    }
+    else
+    {
+      riceParam = unsigned(std::max<TCoeff>(std::min<TCoeff>(sum - baseLevel, 31), 0));
+    }
+
+    riceParam = g_goRiceParsCoeff[riceParam] + currentShift;
+
+    return riceParam;
+  }
+
+  unsigned (CoeffCodingContext::*deriveRiceRRC)(int scanPos, const TCoeff* coeff, int baseLevel);
+
+  unsigned deriveRice(int scanPos, const TCoeff* coeff, int baseLevel)
+  {
+    unsigned sumAbs = templateAbsSum(scanPos, coeff, baseLevel);
+    unsigned riceParam = g_goRiceParsCoeff[sumAbs];
+    return riceParam;
+  }
+  
+  unsigned deriveRiceExt(int scanPos, const TCoeff* coeff, int baseLevel)
+  {
+    unsigned riceParam = templateAbsSumExt(scanPos, coeff, baseLevel);
+    return riceParam;
+  }
+
   unsigned sigCtxIdAbsTS( int scanPos, const TCoeff* coeff )
   {
     const uint32_t  posY   = m_scan[scanPos].y;
@@ -308,7 +427,9 @@ public:
   {
 
     if (absCoeff == 0)
+    {
       return 0;
+    }
     int pred1, absBelow = abs(belowPixel), absRight = abs(rightPixel);
 
     int absCoeffMod = int(absCoeff);
@@ -334,7 +455,9 @@ public:
   {
 
     if (absCoeff == 0)
+    {
       return 0;
+    }
 
     int pred1, absBelow = abs(belowPixel), absRight = abs(rightPixel);
     pred1 = std::max(absBelow, absRight);
@@ -359,6 +482,13 @@ public:
 
   int                       regBinLimit;
 
+  unsigned  getBaseLevel()                                   { return m_cctxBaseLevel; };
+  void setBaseLevel(int value)                               { m_cctxBaseLevel = value; };
+  TCoeff getHistValue()                                      { return m_histValue; };
+  void setHistValue(TCoeff value)                            { m_histValue = value; };
+  bool getUpdateHist() { return m_updateHist; };
+  void setUpdateHist(bool value) { m_updateHist = value; };
+
 private:
   // constant
   const ComponentID         m_compID;
@@ -413,6 +543,9 @@ private:
   int                       m_remainingContextBins;
   std::bitset<MLS_GRP_NUM>  m_sigCoeffGroupFlag;
   const bool                m_bdpcm;
+  int                       m_cctxBaseLevel; 
+  TCoeff                    m_histValue;    
+  bool                      m_updateHist;
 };
 
 
@@ -457,6 +590,13 @@ public:
   ~MergeCtx() {}
 public:
   MvField       mvFieldNeighbours [ MRG_MAX_NUM_CANDS << 1 ]; // double length for mv of both lists
+#if GDR_ENABLED 
+  // note : check if source of mv and mv itself is valid
+  bool          mvSolid           [MRG_MAX_NUM_CANDS << 1];  
+  bool          mvValid           [MRG_MAX_NUM_CANDS << 1];
+  Position      mvPos             [MRG_MAX_NUM_CANDS << 1];
+  MvpType       mvType            [MRG_MAX_NUM_CANDS << 1];
+#endif
   uint8_t       BcwIdx            [ MRG_MAX_NUM_CANDS      ];
   unsigned char interDirNeighbours[ MRG_MAX_NUM_CANDS      ];
   MergeType     mrgTypeNeighbours [ MRG_MAX_NUM_CANDS      ];
@@ -466,6 +606,10 @@ public:
   MotionBuf     subPuMvpMiBuf;
   MotionBuf     subPuMvpExtMiBuf;
   MvField mmvdBaseMv[MMVD_BASE_MV_NUM][2];
+#if GDR_ENABLED   
+  bool          mmvdSolid[MMVD_BASE_MV_NUM][2];
+  bool          mmvdValid[MMVD_BASE_MV_NUM][2];
+#endif
   void setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx);
   bool          mmvdUseAltHpelIf  [ MMVD_BASE_MV_NUM ];
   bool          useAltHpelIf      [ MRG_MAX_NUM_CANDS ];
@@ -479,6 +623,10 @@ public:
   ~AffineMergeCtx() {}
 public:
   MvField       mvFieldNeighbours[AFFINE_MRG_MAX_NUM_CANDS << 1][3]; // double length for mv of both lists
+#if GDR_ENABLED
+  bool          mvSolid[AFFINE_MRG_MAX_NUM_CANDS << 1][3];   
+  bool          mvValid[AFFINE_MRG_MAX_NUM_CANDS << 1][3];
+#endif
   unsigned char interDirNeighbours[AFFINE_MRG_MAX_NUM_CANDS];
   EAffineModel  affineType[AFFINE_MRG_MAX_NUM_CANDS];
   uint8_t       BcwIdx[AFFINE_MRG_MAX_NUM_CANDS];
diff --git a/source/Lib/CommonLib/Contexts.cpp b/source/Lib/CommonLib/Contexts.cpp
index f3acf16b6f1533f43ba17c42405b123d83bb21d2..d5d4d47a1212531890f1397e1fd98b3bbb9cd111 100644
--- a/source/Lib/CommonLib/Contexts.cpp
+++ b/source/Lib/CommonLib/Contexts.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -131,9 +131,6 @@ void BinProbModel_Std::init( int qp, int initId )
   m_state[1]   = p1 & MASK_1;
 }
 
-
-
-
 CtxSet::CtxSet( std::initializer_list<CtxSet> ctxSets )
 {
   uint16_t  minOffset = std::numeric_limits<uint16_t>::max();
@@ -147,10 +144,6 @@ CtxSet::CtxSet( std::initializer_list<CtxSet> ctxSets )
   Size    = maxOffset - minOffset;
 }
 
-
-
-
-
 const std::vector<uint8_t>& ContextSetCfg::getInitTable( unsigned initId )
 {
   CHECK( initId >= (unsigned)sm_InitTables.size(),
@@ -158,7 +151,6 @@ const std::vector<uint8_t>& ContextSetCfg::getInitTable( unsigned initId )
   return sm_InitTables[initId];
 }
 
-
 CtxSet ContextSetCfg::addCtxSet( std::initializer_list<std::initializer_list<uint8_t>> initSet2d )
 {
   const std::size_t startIdx  = sm_InitTables[0].size();
@@ -958,16 +950,8 @@ void CtxStore<BinProbModel>::savePStates( std::vector<uint16_t>& probStates ) co
   }
 }
 
-
-
-
-
 template class CtxStore<BinProbModel_Std>;
 
-
-
-
-
 Ctx::Ctx()                                  : m_BPMType( BPM_Undefined )                        {}
 Ctx::Ctx( const BinProbModel_Std*   dummy ) : m_BPMType( BPM_Std   ), m_CtxStore_Std  ( true )  {}
 
diff --git a/source/Lib/CommonLib/Contexts.h b/source/Lib/CommonLib/Contexts.h
index 5a94a2d954f200f295edae5bc683c9e0dc5fbc9e..d733b3106538afd5f94c000614903e419599f502 100644
--- a/source/Lib/CommonLib/Contexts.h
+++ b/source/Lib/CommonLib/Contexts.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -382,6 +382,30 @@ public:
     }
   }
 
+#if JVET_W0178_CONSTRAINTS_ON_REXT_TOOLS
+  void riceStatReset(int bitDepth, bool persistentRiceAdaptationEnabledFlag)
+#else
+  void riceStatReset(int bitDepth)
+#endif
+  {
+    for (std::size_t k = 0; k < RExt__GOLOMB_RICE_ADAPTATION_STATISTICS_SETS; k++)
+    {
+#if JVET_W0178_CONSTRAINTS_ON_REXT_TOOLS
+      if (persistentRiceAdaptationEnabledFlag)
+      {
+          CHECK(bitDepth <= 10,"BitDepth shall be larger than 10.");
+          m_GRAdaptStats[k] = 2 * floorLog2(bitDepth - 10);
+      } 
+      else 
+      {
+          m_GRAdaptStats[k] = 0;
+      }
+#else
+      m_GRAdaptStats[k] = (bitDepth > 10) ? 2 * floorLog2(bitDepth - 10) : 0; 
+#endif
+    }
+  }
+
   void  loadPStates( const std::vector<uint16_t>& probStates )
   {
     switch( m_BPMType )
@@ -416,6 +440,9 @@ public:
   const unsigned&     getGRAdaptStats ( unsigned      id )      const { return m_GRAdaptStats[id]; }
   unsigned&           getGRAdaptStats ( unsigned      id )            { return m_GRAdaptStats[id]; }
 
+  const unsigned           getBaseLevel()                     const { return m_baseLevel; }
+  void                setBaseLevel(int value)                         { m_baseLevel = value; }
+
 public:
   unsigned            getBPMType      ()                        const { return m_BPMType; }
   const Ctx&          getCtx          ()                        const { return *this; }
@@ -438,12 +465,8 @@ private:
   CtxStore<BinProbModel_Std>    m_CtxStore_Std;
 protected:
   unsigned                      m_GRAdaptStats[RExt__GOLOMB_RICE_ADAPTATION_STATISTICS_SETS];
-#if ENABLE_SPLIT_PARALLELISM
+  int m_baseLevel;
 
-public:
-  int64_t cacheId;
-  bool    cacheUsed;
-#endif
 };
 
 
diff --git a/source/Lib/CommonLib/LoopFilter.cpp b/source/Lib/CommonLib/DeblockingFilter.cpp
similarity index 94%
rename from source/Lib/CommonLib/LoopFilter.cpp
rename to source/Lib/CommonLib/DeblockingFilter.cpp
index 46b268f23debc893343fa0414919fda1eb2ae877..1e8c9c555ee076fd34a858768d4ed653410b1074 100644
--- a/source/Lib/CommonLib/LoopFilter.cpp
+++ b/source/Lib/CommonLib/DeblockingFilter.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -31,11 +31,11 @@
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-/** \file     LoopFilter.cpp
+/** \file     DeblockingFilter.cpp
     \brief    deblocking filter
 */
 
-#include "LoopFilter.h"
+#include "DeblockingFilter.h"
 #include "Slice.h"
 #include "Mv.h"
 #include "Unit.h"
@@ -58,13 +58,13 @@
 // Tables
 // ====================================================================================================================
 
-const uint16_t LoopFilter::sm_tcTable[MAX_QP + 1 + DEFAULT_INTRA_TC_OFFSET] = {
+const uint16_t DeblockingFilter::sm_tcTable[MAX_QP + 1 + DEFAULT_INTRA_TC_OFFSET] = {
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   3,   4,   4,   4,
   4,  5,  5,  5,  5,  7,  7,  8,  9,  10,  10,  11,  13,  14,  15,  17,  19,  21,  24,  25,  29,  33,
   36, 41, 45, 51, 57, 64, 71, 80, 89, 100, 112, 125, 141, 157, 177, 198, 222, 250, 280, 314, 352, 395
 };
 
-const uint8_t LoopFilter::sm_betaTable[MAX_QP + 1] = { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+const uint8_t DeblockingFilter::sm_betaTable[MAX_QP + 1] = { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
                                                        6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 22, 24,
                                                        26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56,
                                                        58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88 };
@@ -92,18 +92,18 @@ static bool isAvailableAbove( const CodingUnit& cu, const CodingUnit& cu2, const
 // Constructor / destructor / create / destroy
 // ====================================================================================================================
 
-LoopFilter::LoopFilter()
+DeblockingFilter::DeblockingFilter()
 {
 }
 
-LoopFilter::~LoopFilter()
+DeblockingFilter::~DeblockingFilter()
 {
 }
 
 // ====================================================================================================================
 // Public member functions
 // ====================================================================================================================
-void LoopFilter::create( const unsigned uiMaxCUDepth )
+void DeblockingFilter::create( const unsigned uiMaxCUDepth )
 {
   destroy();
   const unsigned numPartitions = 1 << ( uiMaxCUDepth << 1 );
@@ -115,14 +115,14 @@ void LoopFilter::create( const unsigned uiMaxCUDepth )
   m_enc = false;
 }
 
-void LoopFilter::initEncPicYuvBuffer(ChromaFormat chromaFormat, const Size &size, const unsigned maxCUSize)
+void DeblockingFilter::initEncPicYuvBuffer(ChromaFormat chromaFormat, const Size &size, const unsigned maxCUSize)
 {
   const Area a = Area(Position(), size);
   m_encPicYuvBuffer.destroy();
   m_encPicYuvBuffer.create(chromaFormat, a, maxCUSize, 7);
 }
 
-void LoopFilter::destroy()
+void DeblockingFilter::destroy()
 {
   for( int edgeDir = 0; edgeDir < NUM_EDGE_DIR; edgeDir++ )
   {
@@ -137,7 +137,7 @@ void LoopFilter::destroy()
  .
  \param  pcPic   picture class (Pic) pointer
  */
-void LoopFilter::loopFilterPic( CodingStructure& cs
+void DeblockingFilter::deblockingFilterPic( CodingStructure& cs
                                 )
 {
   const PreCalcValues& pcv = *cs.pcv;
@@ -238,11 +238,11 @@ void LoopFilter::loopFilterPic( CodingStructure& cs
   DTRACE_PIC_COMP(D_REC_CB_CHROMA_LF, cs, cs.getRecoBuf(), COMPONENT_Cb);
   DTRACE_PIC_COMP(D_REC_CB_CHROMA_LF, cs, cs.getRecoBuf(), COMPONENT_Cr);
 
-  DTRACE    ( g_trace_ctx, D_CRC, "LoopFilter" );
+  DTRACE    ( g_trace_ctx, D_CRC, "DeblockingFilter" );
   DTRACE_CRC( g_trace_ctx, D_CRC, cs, cs.getRecoBuf() );
 }
 
-void LoopFilter::resetFilterLengths()
+void DeblockingFilter::resetFilterLengths()
 {
   memset(m_aapucBS[EDGE_VER].data(), 0, m_aapucBS[EDGE_VER].byte_size());
   memset(m_aapbEdgeFilter[EDGE_VER].data(), false, m_aapbEdgeFilter[EDGE_VER].byte_size());
@@ -263,7 +263,7 @@ void LoopFilter::resetFilterLengths()
  \param cu               the CU to be deblocked
  \param edgeDir          the direction of the edge in block boundary (horizontal/vertical), which is added newly
 */
-void LoopFilter::xDeblockCU( CodingUnit& cu, const DeblockEdgeDir edgeDir )
+void DeblockingFilter::xDeblockCU( CodingUnit& cu, const DeblockEdgeDir edgeDir )
 {
   const PreCalcValues& pcv = *cu.cs->pcv;
   const Area area          = cu.Y().valid() ? cu.Y() : Area( recalcPosition( cu.chromaFormat, cu.chType, CHANNEL_TYPE_LUMA, cu.blocks[cu.chType].pos() ), recalcSize( cu.chromaFormat, cu.chType, CHANNEL_TYPE_LUMA, cu.blocks[cu.chType].size() ) );
@@ -275,7 +275,7 @@ void LoopFilter::xDeblockCU( CodingUnit& cu, const DeblockEdgeDir edgeDir )
 
   bool isCuCrossedByVirtualBoundaries = isCrossedByVirtualBoundaries( area.x, area.y, area.width, area.height, numHorVirBndry, numVerVirBndry, horVirBndryPos, verVirBndryPos, cu.cs->picHeader );
 
-  xSetLoopfilterParam( cu );
+  xSetDeblockingFilterParam( cu );
   static_vector<int, 2*MAX_CU_SIZE> edgeIdx;
   edgeIdx.clear();
 
@@ -445,7 +445,7 @@ void LoopFilter::xDeblockCU( CodingUnit& cu, const DeblockEdgeDir edgeDir )
   }
 }
 
-inline bool LoopFilter::isCrossedByVirtualBoundaries(const int xPos, const int yPos, const int width, const int height, int& numHorVirBndry, int& numVerVirBndry, int horVirBndryPos[], int verVirBndryPos[], const PicHeader* picHeader )
+inline bool DeblockingFilter::isCrossedByVirtualBoundaries(const int xPos, const int yPos, const int width, const int height, int& numHorVirBndry, int& numVerVirBndry, int horVirBndryPos[], int verVirBndryPos[], const PicHeader* picHeader )
 {
   numHorVirBndry = 0; numVerVirBndry = 0;
   if( picHeader->getVirtualBoundariesPresentFlag() )
@@ -468,7 +468,7 @@ inline bool LoopFilter::isCrossedByVirtualBoundaries(const int xPos, const int y
   return numHorVirBndry > 0 || numVerVirBndry > 0;
 }
 
-inline void LoopFilter::xDeriveEdgefilterParam( const int xPos, const int yPos, const int numVerVirBndry, const int numHorVirBndry, const int verVirBndryPos[], const int horVirBndryPos[], bool &verEdgeFilter, bool &horEdgeFilter )
+inline void DeblockingFilter::xDeriveEdgefilterParam( const int xPos, const int yPos, const int numVerVirBndry, const int numHorVirBndry, const int verVirBndryPos[], const int horVirBndryPos[], bool &verEdgeFilter, bool &horEdgeFilter )
 {
   for (int i = 0; i < numVerVirBndry; i++)
   {
@@ -489,7 +489,7 @@ inline void LoopFilter::xDeriveEdgefilterParam( const int xPos, const int yPos,
   }
 }
 
-void LoopFilter::xSetMaxFilterLengthPQFromTransformSizes(const DeblockEdgeDir edgeDir, const CodingUnit &cu,
+void DeblockingFilter::xSetMaxFilterLengthPQFromTransformSizes(const DeblockEdgeDir edgeDir, const CodingUnit &cu,
                                                          const TransformUnit &currTU, const int firstComponent)
 {
   const TransformUnit& tuQ = currTU;
@@ -541,7 +541,7 @@ void LoopFilter::xSetMaxFilterLengthPQFromTransformSizes(const DeblockEdgeDir ed
   }
   if ( edgeDir == EDGE_VER )
   {
-    for ( int cIdx = 0; cIdx < ::getNumberValidComponents(tuQ.chromaFormat); cIdx++ ) // per component
+    for ( int cIdx = firstComponent; cIdx < ::getNumberValidComponents(tuQ.chromaFormat); cIdx++ ) // per component
     {
       const ComponentID comp = ComponentID(cIdx);
       const ChannelType ch   = toChannelType(comp);
@@ -586,7 +586,7 @@ void LoopFilter::xSetMaxFilterLengthPQFromTransformSizes(const DeblockEdgeDir ed
   }
 }
 
-void LoopFilter::xSetMaxFilterLengthPQForCodingSubBlocks( const DeblockEdgeDir edgeDir, const CodingUnit& cu, const PredictionUnit& currPU, const bool& mvSubBlocks, const int& subBlockSize, const Area& areaPu )
+void DeblockingFilter::xSetMaxFilterLengthPQForCodingSubBlocks( const DeblockEdgeDir edgeDir, const CodingUnit& cu, const PredictionUnit& currPU, const bool& mvSubBlocks, const int& subBlockSize, const Area& areaPu )
 {
   if ( mvSubBlocks && currPU.Y().valid() )
   {
@@ -663,7 +663,7 @@ void LoopFilter::xSetMaxFilterLengthPQForCodingSubBlocks( const DeblockEdgeDir e
   }
 }
 
-void LoopFilter::xSetEdgefilterMultiple( const CodingUnit&    cu,
+void DeblockingFilter::xSetEdgefilterMultiple( const CodingUnit&    cu,
                                          const DeblockEdgeDir edgeDir,
                                          const Area&          area,
                                          const bool           bValue,
@@ -693,7 +693,7 @@ void LoopFilter::xSetEdgefilterMultiple( const CodingUnit&    cu,
   }
 }
 
-void LoopFilter::xSetLoopfilterParam( const CodingUnit& cu )
+void DeblockingFilter::xSetDeblockingFilterParam( const CodingUnit& cu )
 {
   const Slice& slice = *cu.slice;
   const PPS&   pps   = *cu.cs->pps;
@@ -714,7 +714,7 @@ void LoopFilter::xSetLoopfilterParam( const CodingUnit& cu )
     !( pps.getSubPicFromCU(cu).getloopFilterAcrossEnabledFlag() && pps.getSubPicFromCU(*cu.cs->getCU(pos.offset(0, -1), cu.chType)).getloopFilterAcrossEnabledFlag()));
 }
 
-unsigned LoopFilter::xGetBoundaryStrengthSingle ( const CodingUnit& cu, const DeblockEdgeDir edgeDir, const Position& localPos, const ChannelType chType ) const
+unsigned DeblockingFilter::xGetBoundaryStrengthSingle ( const CodingUnit& cu, const DeblockEdgeDir edgeDir, const Position& localPos, const ChannelType chType ) const
 {
   // The boundary strength that is output by the function xGetBoundaryStrengthSingle is a multi component boundary strength that contains boundary strength for luma (bits 0 to 1), cb (bits 2 to 3) and cr (bits 4 to 5).
 
@@ -903,7 +903,7 @@ unsigned LoopFilter::xGetBoundaryStrengthSingle ( const CodingUnit& cu, const De
 }
 
 #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET
-void LoopFilter::deriveLADFShift( const Pel* src, const int stride, int& shift, const DeblockEdgeDir edgeDir, const SPS sps )
+void DeblockingFilter::deriveLADFShift( const Pel* src, const int stride, int& shift, const DeblockEdgeDir edgeDir, const SPS sps )
 {
   uint32_t lumaLevel = 0;
   shift = sps.getLadfQpOffset(0);
@@ -932,7 +932,7 @@ void LoopFilter::deriveLADFShift( const Pel* src, const int stride, int& shift,
 }
 #endif
 
-void LoopFilter::xEdgeFilterLuma( const CodingUnit& cu, const DeblockEdgeDir edgeDir, const int iEdge )
+void DeblockingFilter::xEdgeFilterLuma( const CodingUnit& cu, const DeblockEdgeDir edgeDir, const int iEdge )
 {
   const CompArea&  lumaArea = cu.block(COMPONENT_Y);
   const PreCalcValues& pcv = *cu.cs->pcv;
@@ -1180,7 +1180,7 @@ void LoopFilter::xEdgeFilterLuma( const CodingUnit& cu, const DeblockEdgeDir edg
 }
 
 
-void LoopFilter::xEdgeFilterChroma(const CodingUnit& cu, const DeblockEdgeDir edgeDir, const int iEdge)
+void DeblockingFilter::xEdgeFilterChroma(const CodingUnit& cu, const DeblockEdgeDir edgeDir, const int iEdge)
 {
   const Position lumaPos   = cu.Y().valid() ? cu.Y().pos() : recalcPosition( cu.chromaFormat, cu.chType, CHANNEL_TYPE_LUMA, cu.blocks[cu.chType].pos() );
   const Size     lumaSize  = cu.Y().valid() ? cu.Y().size() : recalcSize( cu.chromaFormat, cu.chType, CHANNEL_TYPE_LUMA, cu.blocks[cu.chType].size() );
@@ -1406,7 +1406,7 @@ void LoopFilter::xEdgeFilterChroma(const CodingUnit& cu, const DeblockEdgeDir ed
  \param bFilterSecondQ  decision weak filter/no filter for partQ
  \param bitDepthLuma    luma bit depth
 */
-inline void LoopFilter::xBilinearFilter(Pel* srcP, Pel* srcQ, int offset, int refMiddle, int refP, int refQ, int numberPSide, int numberQSide, const int* dbCoeffsP, const int* dbCoeffsQ, int tc) const
+inline void DeblockingFilter::xBilinearFilter(Pel* srcP, Pel* srcQ, int offset, int refMiddle, int refP, int refQ, int numberPSide, int numberQSide, const int* dbCoeffsP, const int* dbCoeffsQ, int tc) const
 {
   const char tc7[7] = { 6, 5, 4, 3, 2, 1, 1 };
   const char tc3[3] = { 6, 4, 2 };
@@ -1430,7 +1430,7 @@ inline void LoopFilter::xBilinearFilter(Pel* srcP, Pel* srcQ, int offset, int re
   }
 }
 
-inline void LoopFilter::xFilteringPandQ(Pel* src, int offset, int numberPSide, int numberQSide, int tc) const
+inline void DeblockingFilter::xFilteringPandQ(Pel* src, int offset, int numberPSide, int numberQSide, int tc) const
 {
   CHECK(numberPSide <= 3 && numberQSide <= 3, "Short filtering in long filtering function");
   Pel* srcP = src-offset;
@@ -1504,7 +1504,7 @@ inline void LoopFilter::xFilteringPandQ(Pel* src, int offset, int numberPSide, i
   xBilinearFilter(srcP,srcQ,offset,refMiddle,refP,refQ,numberPSide,numberQSide,dbCoeffsP,dbCoeffsQ,tc);
 }
 
-inline void LoopFilter::xPelFilterLuma(Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const int iThrCut, const bool bFilterSecondP, const bool bFilterSecondQ, const ClpRng& clpRng, bool sidePisLarge, bool sideQisLarge, int maxFilterLengthP, int maxFilterLengthQ) const
+inline void DeblockingFilter::xPelFilterLuma(Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const int iThrCut, const bool bFilterSecondP, const bool bFilterSecondQ, const ClpRng& clpRng, bool sidePisLarge, bool sideQisLarge, int maxFilterLengthP, int maxFilterLengthQ) const
 {
   int delta;
 
@@ -1604,7 +1604,7 @@ inline void LoopFilter::xPelFilterLuma(Pel* piSrc, const int iOffset, const int
  \param bPartQNoFilter  indicator to disable filtering on partQ
  \param bitDepthChroma  chroma bit depth
  */
-inline void LoopFilter::xPelFilterChroma(Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const ClpRng& clpRng, const bool largeBoundary, const bool isChromaHorCTBBoundary) const
+inline void DeblockingFilter::xPelFilterChroma(Pel* piSrc, const int iOffset, const int tc, const bool sw, const bool bPartPNoFilter, const bool bPartQNoFilter, const ClpRng& clpRng, const bool largeBoundary, const bool isChromaHorCTBBoundary) const
 {
   int delta;
 
@@ -1673,7 +1673,7 @@ inline void LoopFilter::xPelFilterChroma(Pel* piSrc, const int iOffset, const in
  \param tc              tc value
  \param piSrc           pointer to picture data
  */
-inline bool LoopFilter::xUseStrongFiltering(Pel* piSrc, const int iOffset, const int d, const int beta, const int tc, bool sidePisLarge, bool sideQisLarge, int maxFilterLengthP, int maxFilterLengthQ, bool isChromaHorCTBBoundary) const
+inline bool DeblockingFilter::xUseStrongFiltering(Pel* piSrc, const int iOffset, const int d, const int beta, const int tc, bool sidePisLarge, bool sideQisLarge, int maxFilterLengthP, int maxFilterLengthQ, bool isChromaHorCTBBoundary) const
 {
   const Pel m4 = piSrc[ 0          ];
   const Pel m3 = piSrc[-iOffset    ];
@@ -1732,7 +1732,7 @@ inline bool LoopFilter::xUseStrongFiltering(Pel* piSrc, const int iOffset, const
   }
 }
 
-inline int LoopFilter::xCalcDP(Pel* piSrc, const int iOffset, const bool isChromaHorCTBBoundary) const
+inline int DeblockingFilter::xCalcDP(Pel* piSrc, const int iOffset, const bool isChromaHorCTBBoundary) const
 {
   if (isChromaHorCTBBoundary)
   {
@@ -1744,12 +1744,12 @@ inline int LoopFilter::xCalcDP(Pel* piSrc, const int iOffset, const bool isChrom
   }
 }
 
-inline int LoopFilter::xCalcDQ( Pel* piSrc, const int iOffset ) const
+inline int DeblockingFilter::xCalcDQ( Pel* piSrc, const int iOffset ) const
 {
   return abs( piSrc[0] - 2 * piSrc[iOffset] + piSrc[iOffset * 2] );
 }
 
-inline unsigned LoopFilter::BsSet(unsigned val, const ComponentID compIdx) const { return (val << (compIdx << 1)); }
-inline unsigned LoopFilter::BsGet(unsigned val, const ComponentID compIdx) const { return ((val >> (compIdx << 1)) & 3); }
+inline unsigned DeblockingFilter::BsSet(unsigned val, const ComponentID compIdx) const { return (val << (compIdx << 1)); }
+inline unsigned DeblockingFilter::BsGet(unsigned val, const ComponentID compIdx) const { return ((val >> (compIdx << 1)) & 3); }
 
 //! \}
diff --git a/source/Lib/CommonLib/LoopFilter.h b/source/Lib/CommonLib/DeblockingFilter.h
similarity index 95%
rename from source/Lib/CommonLib/LoopFilter.h
rename to source/Lib/CommonLib/DeblockingFilter.h
index 566254023b08945c70b2fe6f243fd6006f45606e..9deb304c1bec1227e6c59374a634d424c6b9a1f7 100644
--- a/source/Lib/CommonLib/LoopFilter.h
+++ b/source/Lib/CommonLib/DeblockingFilter.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -31,12 +31,12 @@
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-/** \file     LoopFilter.h
+/** \file     DeblockingFilter.h
     \brief    deblocking filter (header)
 */
 
-#ifndef __LOOPFILTER__
-#define __LOOPFILTER__
+#ifndef __DEBLOCKINGFILTER__
+#define __DEBLOCKINGFILTER__
 
 #include "CommonDef.h"
 #include "Unit.h"
@@ -52,7 +52,7 @@
 // ====================================================================================================================
 
 /// deblocking filter class
-class LoopFilter
+class DeblockingFilter
 {
 private:
   static_vector<char, MAX_NUM_PARTS_IN_CTU> m_aapucBS       [NUM_EDGE_DIR];         ///< Bs for [Ver/Hor][Y/U/V][Blk_Idx]
@@ -68,7 +68,7 @@ private:
 private:
 
   // set / get functions
-  void xSetLoopfilterParam        ( const CodingUnit& cu );
+  void xSetDeblockingFilterParam        ( const CodingUnit& cu );
 
   // filtering functions
   unsigned
@@ -107,8 +107,8 @@ private:
 
 public:
 
-  LoopFilter();
-  ~LoopFilter();
+  DeblockingFilter();
+  ~DeblockingFilter();
 
   /// CU-level deblocking function
   void xDeblockCU(CodingUnit& cu, const DeblockEdgeDir edgeDir);
@@ -120,8 +120,7 @@ public:
   void  destroy                   ();
 
   /// picture-level deblocking filter
-  void loopFilterPic              ( CodingStructure& cs
-                                    );
+  void deblockingFilterPic        ( CodingStructure& cs );
 
   static int getBeta              ( const int qp )
   {
diff --git a/source/Lib/CommonLib/DepQuant.cpp b/source/Lib/CommonLib/DepQuant.cpp
index 85c07184d0a1b77c1052b385fdbbe37fde6f12be..d7c0175b9d265a6b1a5caabe1a7f1e45338c157c 100644
--- a/source/Lib/CommonLib/DepQuant.cpp
+++ b/source/Lib/CommonLib/DepQuant.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -38,7 +38,7 @@
 
 #include <bitset>
 
-
+#include "ContextModelling.h"
 
 
 
@@ -386,9 +386,13 @@ namespace DQIntern
     scanInfo.eosbb      = ( scanInfo.insidePos == 0 );
     scanInfo.spt        = SCAN_ISCSBB;
     if(  scanInfo.insidePos == m_sbbMask && scanIdx > scanInfo.sbbSize && scanIdx < m_numCoeff - 1 )
+    {
       scanInfo.spt      = SCAN_SOCSBB;
+    }
     else if( scanInfo.eosbb && scanIdx > 0 && scanIdx < m_numCoeff - m_sbbSize )
+    {
       scanInfo.spt      = SCAN_EOCSBB;
+    }
     scanInfo.posX = m_scanId2BlkPos[scanIdx].x;
     scanInfo.posY = m_scanId2BlkPos[scanIdx].y;
     if( scanIdx )
@@ -433,10 +437,20 @@ namespace DQIntern
       return m_sigFracBits[std::max(((int) stateId) - 1, 0)];
     }
     inline const CoeffFracBits *gtxFracBits(unsigned stateId) const { return m_gtxFracBits; }
+#if JVET_W0046_RLSCP
+    inline int32_t lastOffset(unsigned scanIdx, int effWidth, int effHeight, bool reverseLast) const
+    {
+      if (reverseLast)
+        return m_lastBitsX[effWidth - 1 - m_scanId2Pos[scanIdx].x] + m_lastBitsY[effHeight - 1 - m_scanId2Pos[scanIdx].y];
+      else
+        return m_lastBitsX[m_scanId2Pos[scanIdx].x] + m_lastBitsY[m_scanId2Pos[scanIdx].y];
+    }
+#else
     inline int32_t              lastOffset(unsigned scanIdx) const
     {
       return m_lastBitsX[m_scanId2Pos[scanIdx].x] + m_lastBitsY[m_scanId2Pos[scanIdx].y];
     }
+#endif
 
   private:
     void  xSetLastCoeffOffset ( const FracBitsAccess& fracBitsAccess, const TUParameters& tuPars, const TransformUnit& tu, const ComponentID compID );
@@ -528,7 +542,7 @@ namespace DQIntern
       const unsigned      lastShift   = ( compID == COMPONENT_Y ? (log2Size+1)>>2 : Clip3<unsigned>(0,2,size>>3) );
       const unsigned      lastOffset  = ( compID == COMPONENT_Y ? ( prefixCtx[log2Size] ) : 0 );
       uint32_t            sumFBits    = 0;
-      unsigned            maxCtxId    = g_uiGroupIdx[std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, size) - 1];
+      unsigned            maxCtxId    = g_groupIdx[std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, size) - 1];
       for( unsigned ctxId = 0; ctxId < maxCtxId; ctxId++ )
       {
         const BinFracBits bits  = fracBitsAccess.getFracBitsArray( ctxSetLast( lastOffset + ( ctxId >> lastShift ) ) );
@@ -538,7 +552,7 @@ namespace DQIntern
       ctxBits  [ maxCtxId ]     = sumFBits + ( maxCtxId>3 ? ((maxCtxId-2)>>1)<<SCALE_BITS : 0 ) + bitOffset;
       for (unsigned pos = 0; pos < std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, size); pos++)
       {
-        lastBits[ pos ]         = ctxBits[ g_uiGroupIdx[ pos ] ];
+        lastBits[pos] = ctxBits[g_groupIdx[pos]];
       }
     }
   }
@@ -756,7 +770,9 @@ namespace DQIntern
       if( level )
       {
         if (enableScalingLists)
+        {
           invQScale = piDequantCoef[rasterPos];//scalingfactor*levelScale
+        }
         if (shift < 0 && (enableScalingLists || scanIdx == lastScanIdx))
         {
           invQScale <<= -shift;
@@ -843,13 +859,38 @@ namespace DQIntern
     uint8_t                     m_memory[ 8 * ( MAX_TB_SIZEY * MAX_TB_SIZEY + MLS_GRP_NUM ) ];
   };
 
+#if JVET_V0106_DEP_QUANT_ENC_OPT
+#define RICEMAX 64
+#define RICE_ORDER_MAX 16
+  const int32_t g_goRiceBits[RICE_ORDER_MAX][RICEMAX] =
+#else
 #define RICEMAX 32
   const int32_t g_goRiceBits[4][RICEMAX] =
+#endif
   {
+#if JVET_V0106_DEP_QUANT_ENC_OPT
+    { 32768, 65536, 98304, 131072, 163840, 196608, 262144, 262144, 327680, 327680, 327680, 327680, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288 },
+    { 65536, 65536, 98304, 98304, 131072, 131072, 163840, 163840, 196608, 196608, 229376, 229376, 294912, 294912, 294912, 294912, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520 },
+    { 98304, 98304, 98304, 98304, 131072, 131072, 131072, 131072, 163840, 163840, 163840, 163840, 196608, 196608, 196608, 196608, 229376, 229376, 229376, 229376, 262144, 262144, 262144, 262144, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752 },
+    { 131072, 131072, 131072, 131072, 131072, 131072, 131072, 131072, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448 },
+    { 163840, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144 },
+    { 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376 },
+    { 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376 },
+    { 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144, 262144 },
+    { 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912, 294912 },
+    { 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680 },
+    { 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448 },
+    { 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216 },
+    { 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984 },
+    { 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752 },
+    { 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520, 491520 },
+    { 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288, 524288 },
+#else
     { 32768,  65536,  98304, 131072, 163840, 196608, 262144, 262144, 327680, 327680, 327680, 327680, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752},
     { 65536,  65536,  98304,  98304, 131072, 131072, 163840, 163840, 196608, 196608, 229376, 229376, 294912, 294912, 294912, 294912, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984},
     { 98304,  98304,  98304,  98304, 131072, 131072, 131072, 131072, 163840, 163840, 163840, 163840, 196608, 196608, 196608, 196608, 229376, 229376, 229376, 229376, 262144, 262144, 262144, 262144, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680},
     {131072, 131072, 131072, 131072, 131072, 131072, 131072, 131072, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376}
+#endif
   };
 
   class State
@@ -859,7 +900,7 @@ namespace DQIntern
     State( const RateEstimator& rateEst, CommonCtx& commonCtx, const int stateId );
 
     template<uint8_t numIPos>
-    inline void updateState(const ScanInfo &scanInfo, const State *prevStates, const Decision &decision);
+    inline void updateState(const ScanInfo &scanInfo, const State *prevStates, const Decision &decision, const int baseLevel, const bool extRiceFlag);
     inline void updateStateEOS(const ScanInfo &scanInfo, const State *prevStates, const State *skipStates,
                                const Decision &decision);
 
@@ -880,70 +921,82 @@ namespace DQIntern
       int64_t         rdCostA   = m_rdCost + pqDataA.deltaDist;
       int64_t         rdCostB   = m_rdCost + pqDataB.deltaDist;
       int64_t         rdCostZ   = m_rdCost;
-        if( m_remRegBins >= 4 )
+      if (m_remRegBins >= 4)
+      {
+        if (pqDataA.absLevel < 4)
         {
-          if( pqDataA.absLevel < 4 )
-            rdCostA += m_coeffFracBits.bits[ pqDataA.absLevel ];
-          else
-          {
-            const TCoeff value = ( pqDataA.absLevel - 4 ) >> 1;
-            rdCostA += m_coeffFracBits.bits[ pqDataA.absLevel - ( value << 1 ) ] + goRiceTab[ value < RICEMAX ? value : RICEMAX - 1 ];
-          }
-          if( pqDataB.absLevel < 4 )
-            rdCostB += m_coeffFracBits.bits[ pqDataB.absLevel ];
-          else
-          {
-            const TCoeff value = ( pqDataB.absLevel - 4 ) >> 1;
-            rdCostB += m_coeffFracBits.bits[ pqDataB.absLevel - ( value << 1 ) ] + goRiceTab[ value < RICEMAX ? value : RICEMAX - 1 ];
-          }
-          if( spt == SCAN_ISCSBB )
-          {
-            rdCostA += m_sigFracBits.intBits[ 1 ];
-            rdCostB += m_sigFracBits.intBits[ 1 ];
-            rdCostZ += m_sigFracBits.intBits[ 0 ];
-          }
-          else if( spt == SCAN_SOCSBB )
-          {
-            rdCostA += m_sbbFracBits.intBits[ 1 ] + m_sigFracBits.intBits[ 1 ];
-            rdCostB += m_sbbFracBits.intBits[ 1 ] + m_sigFracBits.intBits[ 1 ];
-            rdCostZ += m_sbbFracBits.intBits[ 1 ] + m_sigFracBits.intBits[ 0 ];
-          }
-          else if( m_numSigSbb )
-          {
-            rdCostA += m_sigFracBits.intBits[ 1 ];
-            rdCostB += m_sigFracBits.intBits[ 1 ];
-            rdCostZ += m_sigFracBits.intBits[ 0 ];
-          }
-          else
-          {
-            rdCostZ = decisionA.rdCost;
-          }
+          rdCostA += m_coeffFracBits.bits[pqDataA.absLevel];
+        }
+        else
+        {
+          const TCoeff value = (pqDataA.absLevel - 4) >> 1;
+          rdCostA +=
+            m_coeffFracBits.bits[pqDataA.absLevel - (value << 1)] + goRiceTab[value < RICEMAX ? value : RICEMAX - 1];
+        }
+        if (pqDataB.absLevel < 4)
+        {
+          rdCostB += m_coeffFracBits.bits[pqDataB.absLevel];
         }
         else
         {
-          rdCostA += ( 1 << SCALE_BITS ) + goRiceTab[ pqDataA.absLevel <= m_goRiceZero ? pqDataA.absLevel - 1 : ( pqDataA.absLevel < RICEMAX ? pqDataA.absLevel : RICEMAX - 1 ) ];
-          rdCostB += ( 1 << SCALE_BITS ) + goRiceTab[ pqDataB.absLevel <= m_goRiceZero ? pqDataB.absLevel - 1 : ( pqDataB.absLevel < RICEMAX ? pqDataB.absLevel : RICEMAX - 1 ) ];
-          rdCostZ += goRiceTab[ m_goRiceZero ];
+          const TCoeff value = (pqDataB.absLevel - 4) >> 1;
+          rdCostB +=
+            m_coeffFracBits.bits[pqDataB.absLevel - (value << 1)] + goRiceTab[value < RICEMAX ? value : RICEMAX - 1];
         }
-        if( rdCostA < decisionA.rdCost )
+        if (spt == SCAN_ISCSBB)
         {
-          decisionA.rdCost = rdCostA;
-          decisionA.absLevel = pqDataA.absLevel;
-          decisionA.prevId = m_stateId;
+          rdCostA += m_sigFracBits.intBits[1];
+          rdCostB += m_sigFracBits.intBits[1];
+          rdCostZ += m_sigFracBits.intBits[0];
         }
-        if( rdCostZ < decisionA.rdCost )
+        else if (spt == SCAN_SOCSBB)
         {
-          decisionA.rdCost = rdCostZ;
-          decisionA.absLevel = 0;
-          decisionA.prevId = m_stateId;
+          rdCostA += m_sbbFracBits.intBits[1] + m_sigFracBits.intBits[1];
+          rdCostB += m_sbbFracBits.intBits[1] + m_sigFracBits.intBits[1];
+          rdCostZ += m_sbbFracBits.intBits[1] + m_sigFracBits.intBits[0];
         }
-        if( rdCostB < decisionB.rdCost )
+        else if (m_numSigSbb)
         {
-          decisionB.rdCost = rdCostB;
-          decisionB.absLevel = pqDataB.absLevel;
-          decisionB.prevId = m_stateId;
+          rdCostA += m_sigFracBits.intBits[1];
+          rdCostB += m_sigFracBits.intBits[1];
+          rdCostZ += m_sigFracBits.intBits[0];
         }
+        else
+        {
+          rdCostZ = decisionA.rdCost;
+        }
+      }
+      else
+      {
+        rdCostA +=
+          (1 << SCALE_BITS)
+          + goRiceTab[pqDataA.absLevel <= m_goRiceZero ? pqDataA.absLevel - 1
+                                                       : (pqDataA.absLevel < RICEMAX ? pqDataA.absLevel : RICEMAX - 1)];
+        rdCostB +=
+          (1 << SCALE_BITS)
+          + goRiceTab[pqDataB.absLevel <= m_goRiceZero ? pqDataB.absLevel - 1
+                                                       : (pqDataB.absLevel < RICEMAX ? pqDataB.absLevel : RICEMAX - 1)];
+        rdCostZ += goRiceTab[m_goRiceZero];
       }
+      if (rdCostA < decisionA.rdCost)
+      {
+        decisionA.rdCost   = rdCostA;
+        decisionA.absLevel = pqDataA.absLevel;
+        decisionA.prevId   = m_stateId;
+      }
+      if (rdCostZ < decisionA.rdCost)
+      {
+        decisionA.rdCost   = rdCostZ;
+        decisionA.absLevel = 0;
+        decisionA.prevId   = m_stateId;
+      }
+      if (rdCostB < decisionB.rdCost)
+      {
+        decisionB.rdCost   = rdCostB;
+        decisionB.absLevel = pqDataB.absLevel;
+        decisionB.prevId   = m_stateId;
+      }
+    }
 
     inline void checkRdCostStart(int32_t lastOffset, const PQData &pqData, Decision &decision) const
     {
@@ -1004,6 +1057,31 @@ namespace DQIntern
     unsigned                  effHeight;
   };
 
+  unsigned templateAbsCompare(TCoeff sum)
+  {
+    int rangeIdx = 0;
+    if (sum < g_riceT[0])
+    {
+      rangeIdx = 0;
+    }
+    else if (sum < g_riceT[1])
+    {
+      rangeIdx = 1;
+    }
+    else if (sum < g_riceT[2])
+    {
+      rangeIdx = 2;
+    }
+    else if (sum < g_riceT[3])
+    {
+      rangeIdx = 3;
+    }
+    else
+    {
+      rangeIdx = 4;
+    }
+    return g_riceShift[rangeIdx];
+  }
 
   State::State( const RateEstimator& rateEst, CommonCtx& commonCtx, const int stateId )
     : m_sbbFracBits     { { 0, 0 } }
@@ -1015,7 +1093,7 @@ namespace DQIntern
   }
 
   template<uint8_t numIPos>
-  inline void State::updateState(const ScanInfo &scanInfo, const State *prevStates, const Decision &decision)
+  inline void State::updateState(const ScanInfo &scanInfo, const State *prevStates, const Decision &decision, const int baseLevel, const bool extRiceFlag)
   {
     m_rdCost = decision.rdCost;
     if( decision.prevId > -2 )
@@ -1120,8 +1198,19 @@ namespace DQIntern
           UPDATE(4);
         }
 #undef UPDATE
-        int sumAll = std::max(std::min(31, (int)sumAbs - 4 * 5), 0);
-        m_goRicePar = g_auiGoRiceParsCoeff[sumAll];
+        if (extRiceFlag)
+        {
+          unsigned currentShift = templateAbsCompare(sumAbs);
+          sumAbs = sumAbs >> currentShift;
+          int sumAll = std::max(std::min(31, (int)sumAbs - (int)baseLevel), 0);
+          m_goRicePar = g_goRiceParsCoeff[sumAll];
+          m_goRicePar += currentShift;
+        }
+        else
+        {
+          int sumAll = std::max(std::min(31, (int)sumAbs - 4 * 5), 0);
+          m_goRicePar = g_goRiceParsCoeff[sumAll];
+        }
       }
       else
       {
@@ -1158,9 +1247,20 @@ namespace DQIntern
           UPDATE(4);
         }
 #undef UPDATE
-        sumAbs = std::min<TCoeff>(31, sumAbs);
-        m_goRicePar = g_auiGoRiceParsCoeff[sumAbs];
-        m_goRiceZero = g_auiGoRicePosCoeff0(m_stateId, m_goRicePar);
+        if (extRiceFlag)
+        {
+          unsigned currentShift = templateAbsCompare(sumAbs);
+          sumAbs = sumAbs >> currentShift;
+          sumAbs = std::min<TCoeff>(31, sumAbs);
+          m_goRicePar = g_goRiceParsCoeff[sumAbs];
+          m_goRicePar += currentShift;
+        }
+        else
+        {
+          sumAbs = std::min<TCoeff>(31, sumAbs);
+          m_goRicePar = g_goRiceParsCoeff[sumAbs];
+        }
+        m_goRiceZero = g_goRicePosCoeff0(m_stateId, m_goRicePar);
       }
     }
   }
@@ -1290,8 +1390,15 @@ namespace DQIntern
     void    quant   ( TransformUnit& tu, const CCoeffBuf& srcCoeff, const ComponentID compID, const QpParam& cQP, const double lambda, const Ctx& ctx, TCoeff& absSum, bool enableScalingLists, int* quantCoeff );
     void    dequant ( const TransformUnit& tu, CoeffBuf& recCoeff, const ComponentID compID, const QpParam& cQP, bool enableScalingLists, int* quantCoeff );
 
+    int m_baseLevel;
+    bool m_extRiceRRCFlag;
+
   private:
+#if JVET_W0046_RLSCP
+    void    xDecideAndUpdate  ( const TCoeff absCoeff, const ScanInfo &scanInfo, bool zeroOut, TCoeff quantCoeff, int effWidth, int effHeight, bool reverseLast );
+#else
     void    xDecideAndUpdate  ( const TCoeff absCoeff, const ScanInfo& scanInfo, bool zeroOut, TCoeff quantCoeff);
+#endif
     void    xDecide           ( const ScanPosType spt, const TCoeff absCoeff, const int lastOffset, Decision* decisions, bool zeroOut, TCoeff quantCoeff );
 
   private:
@@ -1354,23 +1461,31 @@ namespace DQIntern
     m_prevStates[3].checkRdCosts( spt, pqData[3], pqData[1], decisions[3], decisions[1]);
     if( spt==SCAN_EOCSBB )
     {
-        m_skipStates[0].checkRdCostSkipSbb( decisions[0] );
-        m_skipStates[1].checkRdCostSkipSbb( decisions[1] );
-        m_skipStates[2].checkRdCostSkipSbb( decisions[2] );
-        m_skipStates[3].checkRdCostSkipSbb( decisions[3] );
+      m_skipStates[0].checkRdCostSkipSbb(decisions[0]);
+      m_skipStates[1].checkRdCostSkipSbb(decisions[1]);
+      m_skipStates[2].checkRdCostSkipSbb(decisions[2]);
+      m_skipStates[3].checkRdCostSkipSbb(decisions[3]);
     }
 
     m_startState.checkRdCostStart( lastOffset, pqData[0], decisions[0] );
     m_startState.checkRdCostStart( lastOffset, pqData[2], decisions[2] );
   }
 
+#if JVET_W0046_RLSCP
+  void DepQuant::xDecideAndUpdate( const TCoeff absCoeff, const ScanInfo &scanInfo, bool zeroOut, TCoeff quantCoeff, int effWidth, int effHeight, bool reverseLast )
+#else
   void DepQuant::xDecideAndUpdate( const TCoeff absCoeff, const ScanInfo& scanInfo, bool zeroOut, TCoeff quantCoeff )
+#endif
   {
     Decision* decisions = m_trellis[ scanInfo.scanIdx ];
 
     std::swap( m_prevStates, m_currStates );
 
+#if JVET_W0046_RLSCP
+    xDecide( scanInfo.spt, absCoeff, lastOffset(scanInfo.scanIdx, effWidth, effHeight, reverseLast), decisions, zeroOut, quantCoeff );
+#else
     xDecide( scanInfo.spt, absCoeff, lastOffset(scanInfo.scanIdx), decisions, zeroOut, quantCoeff );
+#endif
 
     if( scanInfo.scanIdx )
     {
@@ -1388,40 +1503,40 @@ namespace DQIntern
         switch( scanInfo.nextNbInfoSbb.num )
         {
         case 0:
-          m_currStates[0].updateState<0>( scanInfo, m_prevStates, decisions[0] );
-          m_currStates[1].updateState<0>( scanInfo, m_prevStates, decisions[1] );
-          m_currStates[2].updateState<0>( scanInfo, m_prevStates, decisions[2] );
-          m_currStates[3].updateState<0>( scanInfo, m_prevStates, decisions[3] );
+          m_currStates[0].updateState<0>(scanInfo, m_prevStates, decisions[0], m_baseLevel, m_extRiceRRCFlag);
+          m_currStates[1].updateState<0>(scanInfo, m_prevStates, decisions[1], m_baseLevel, m_extRiceRRCFlag);
+          m_currStates[2].updateState<0>(scanInfo, m_prevStates, decisions[2], m_baseLevel, m_extRiceRRCFlag);
+          m_currStates[3].updateState<0>(scanInfo, m_prevStates, decisions[3], m_baseLevel, m_extRiceRRCFlag);
           break;
         case 1:
-          m_currStates[0].updateState<1>( scanInfo, m_prevStates, decisions[0] );
-          m_currStates[1].updateState<1>( scanInfo, m_prevStates, decisions[1] );
-          m_currStates[2].updateState<1>( scanInfo, m_prevStates, decisions[2] );
-          m_currStates[3].updateState<1>( scanInfo, m_prevStates, decisions[3] );
+          m_currStates[0].updateState<1>(scanInfo, m_prevStates, decisions[0], m_baseLevel, m_extRiceRRCFlag);
+          m_currStates[1].updateState<1>(scanInfo, m_prevStates, decisions[1], m_baseLevel, m_extRiceRRCFlag);
+          m_currStates[2].updateState<1>(scanInfo, m_prevStates, decisions[2], m_baseLevel, m_extRiceRRCFlag);
+          m_currStates[3].updateState<1>(scanInfo, m_prevStates, decisions[3], m_baseLevel, m_extRiceRRCFlag);
           break;
         case 2:
-          m_currStates[0].updateState<2>( scanInfo, m_prevStates, decisions[0] );
-          m_currStates[1].updateState<2>( scanInfo, m_prevStates, decisions[1] );
-          m_currStates[2].updateState<2>( scanInfo, m_prevStates, decisions[2] );
-          m_currStates[3].updateState<2>( scanInfo, m_prevStates, decisions[3] );
+          m_currStates[0].updateState<2>(scanInfo, m_prevStates, decisions[0], m_baseLevel, m_extRiceRRCFlag);
+          m_currStates[1].updateState<2>(scanInfo, m_prevStates, decisions[1], m_baseLevel, m_extRiceRRCFlag);
+          m_currStates[2].updateState<2>(scanInfo, m_prevStates, decisions[2], m_baseLevel, m_extRiceRRCFlag);
+          m_currStates[3].updateState<2>(scanInfo, m_prevStates, decisions[3], m_baseLevel, m_extRiceRRCFlag);
           break;
         case 3:
-          m_currStates[0].updateState<3>( scanInfo, m_prevStates, decisions[0] );
-          m_currStates[1].updateState<3>( scanInfo, m_prevStates, decisions[1] );
-          m_currStates[2].updateState<3>( scanInfo, m_prevStates, decisions[2] );
-          m_currStates[3].updateState<3>( scanInfo, m_prevStates, decisions[3] );
+          m_currStates[0].updateState<3>(scanInfo, m_prevStates, decisions[0], m_baseLevel, m_extRiceRRCFlag);
+          m_currStates[1].updateState<3>(scanInfo, m_prevStates, decisions[1], m_baseLevel, m_extRiceRRCFlag);
+          m_currStates[2].updateState<3>(scanInfo, m_prevStates, decisions[2], m_baseLevel, m_extRiceRRCFlag);
+          m_currStates[3].updateState<3>(scanInfo, m_prevStates, decisions[3], m_baseLevel, m_extRiceRRCFlag);
           break;
         case 4:
-          m_currStates[0].updateState<4>( scanInfo, m_prevStates, decisions[0] );
-          m_currStates[1].updateState<4>( scanInfo, m_prevStates, decisions[1] );
-          m_currStates[2].updateState<4>( scanInfo, m_prevStates, decisions[2] );
-          m_currStates[3].updateState<4>( scanInfo, m_prevStates, decisions[3] );
+          m_currStates[0].updateState<4>(scanInfo, m_prevStates, decisions[0], m_baseLevel, m_extRiceRRCFlag);
+          m_currStates[1].updateState<4>(scanInfo, m_prevStates, decisions[1], m_baseLevel, m_extRiceRRCFlag);
+          m_currStates[2].updateState<4>(scanInfo, m_prevStates, decisions[2], m_baseLevel, m_extRiceRRCFlag);
+          m_currStates[3].updateState<4>(scanInfo, m_prevStates, decisions[3], m_baseLevel, m_extRiceRRCFlag);
           break;
         default:
-          m_currStates[0].updateState<5>( scanInfo, m_prevStates, decisions[0] );
-          m_currStates[1].updateState<5>( scanInfo, m_prevStates, decisions[1] );
-          m_currStates[2].updateState<5>( scanInfo, m_prevStates, decisions[2] );
-          m_currStates[3].updateState<5>( scanInfo, m_prevStates, decisions[3] );
+          m_currStates[0].updateState<5>(scanInfo, m_prevStates, decisions[0], m_baseLevel, m_extRiceRRCFlag);
+          m_currStates[1].updateState<5>(scanInfo, m_prevStates, decisions[1], m_baseLevel, m_extRiceRRCFlag);
+          m_currStates[2].updateState<5>(scanInfo, m_prevStates, decisions[2], m_baseLevel, m_extRiceRRCFlag);
+          m_currStates[3].updateState<5>(scanInfo, m_prevStates, decisions[3], m_baseLevel, m_extRiceRRCFlag);
         }
       }
 
@@ -1440,6 +1555,8 @@ namespace DQIntern
     //===== reset / pre-init =====
     const TUParameters& tuPars  = *g_Rom.getTUPars( tu.blocks[compID], compID );
     m_quant.initQuantBlock    ( tu, compID, cQP, lambda );
+    m_baseLevel = ctx.getBaseLevel();
+    m_extRiceRRCFlag = tu.cs->sps->getSpsRangeExtension().getRrcRiceExtensionEnableFlag();
     TCoeff*       qCoeff      = tu.getCoeffs( compID ).buf;
     const TCoeff* tCoeff      = srcCoeff.buf;
     const int     numCoeff    = tu.blocks[compID].area();
@@ -1478,7 +1595,9 @@ namespace DQIntern
     {
       if (zeroOutforThres && (tuPars.m_scanId2BlkPos[firstTestPos].x >= ((tuPars.m_width == 32 && zeroOut) ? 16 : 32)
                            || tuPars.m_scanId2BlkPos[firstTestPos].y >= ((tuPars.m_height == 32 && zeroOut) ? 16 : 32)))
+      {
         continue;
+      }
       TCoeff thresTmp = (enableScalingLists) ? TCoeff(thres / (4 * quantCoeff[tuPars.m_scanId2BlkPos[firstTestPos].idx]))
                                              : TCoeff(thres / (4 * defaultQuantisationCoefficient));
 
@@ -1519,10 +1638,20 @@ namespace DQIntern
       if (enableScalingLists)
       {
         m_quant.initQuantBlock(tu, compID, cQP, lambda, quantCoeff[scanInfo.rasterPos]);
+#if JVET_W0046_RLSCP
+        xDecideAndUpdate( abs( tCoeff[scanInfo.rasterPos]), scanInfo, (zeroOut && (scanInfo.posX >= effWidth || scanInfo.posY >= effHeight)), quantCoeff[scanInfo.rasterPos], effectWidth, effectHeight, tu.cu->slice->getReverseLastSigCoeffFlag() );
+#else
         xDecideAndUpdate( abs( tCoeff[scanInfo.rasterPos]), scanInfo, (zeroOut && (scanInfo.posX >= effWidth || scanInfo.posY >= effHeight)), quantCoeff[scanInfo.rasterPos] );
+#endif
       }
       else
+      {
+#if JVET_W0046_RLSCP
+        xDecideAndUpdate( abs( tCoeff[scanInfo.rasterPos]), scanInfo, (zeroOut && (scanInfo.posX >= effWidth || scanInfo.posY >= effHeight)), defaultQuantisationCoefficient, effectWidth, effectHeight, tu.cu->slice->getReverseLastSigCoeffFlag() );
+#else
         xDecideAndUpdate( abs( tCoeff[scanInfo.rasterPos]), scanInfo, (zeroOut && (scanInfo.posX >= effWidth || scanInfo.posY >= effHeight)), defaultQuantisationCoefficient );
+#endif
+      }
     }
 
     //===== find best path =====
diff --git a/source/Lib/CommonLib/DepQuant.h b/source/Lib/CommonLib/DepQuant.h
index eb2685a2e02adfb9277abb74d81e36b0a77f0fed..12168a6037131fa2d67f34106103ede45c7866dc 100644
--- a/source/Lib/CommonLib/DepQuant.h
+++ b/source/Lib/CommonLib/DepQuant.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/HRD.cpp b/source/Lib/CommonLib/HRD.cpp
index fcd707c063c624e97f4a0630f0855e3c6b9ce49b..b350e913bb4060f3cdd389eb0d7e12bd4d3cdfa5 100644
--- a/source/Lib/CommonLib/HRD.cpp
+++ b/source/Lib/CommonLib/HRD.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/HRD.h b/source/Lib/CommonLib/HRD.h
index 07305789f6ead75a032403cc35059a79c72934b1..2762958eb8e56e6377b8e86b90931db3172867ca 100644
--- a/source/Lib/CommonLib/HRD.h
+++ b/source/Lib/CommonLib/HRD.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/Hash.cpp b/source/Lib/CommonLib/Hash.cpp
index a6e7844e9a001c3d2de86a4234b13c86e2e70944..67bcdc8856c57e7ed14cd4aaf1b269e804261fba 100644
--- a/source/Lib/CommonLib/Hash.cpp
+++ b/source/Lib/CommonLib/Hash.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -124,6 +124,7 @@ TComHash::~TComHash()
     m_lookupTable = NULL;
   }
 }
+
 void TComHash::create(int picWidth, int picHeight)
 {
   if (m_lookupTable)
@@ -502,6 +503,7 @@ bool TComHash::isBlock2x2ColSameValue(unsigned char* p, bool includeAllComponent
 
   return true;
 }
+
 bool TComHash::isHorizontalPerfectLuma(const Pel* srcPel, int stride, int width, int height)
 {
   for (int i = 0; i < height; i++)
@@ -532,6 +534,7 @@ bool TComHash::isVerticalPerfectLuma(const Pel* srcPel, int stride, int width, i
   }
   return true;
 }
+
 bool TComHash::getBlockHashValue(const PelUnitBuf &curPicBuf, int width, int height, int xStart, int yStart, const BitDepths bitDepths, uint32_t& hashValue1, uint32_t& hashValue2)
 {
   int addValue = m_blockSizeToIndex[width][height];
diff --git a/source/Lib/CommonLib/Hash.h b/source/Lib/CommonLib/Hash.h
index 2a47b0ffb60fd3c46044d73439588bbe73e35c3f..06686200ae00754c758c149b6d7d03efcb6756ae 100644
--- a/source/Lib/CommonLib/Hash.h
+++ b/source/Lib/CommonLib/Hash.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/IbcHashMap.cpp b/source/Lib/CommonLib/IbcHashMap.cpp
index 3b0b2d2f12038f79e7bdcf1166d9d0c5a5d03bce..7c4cb01b6eb9f30eb1b903e277284543611e0a5c 100644
--- a/source/Lib/CommonLib/IbcHashMap.cpp
+++ b/source/Lib/CommonLib/IbcHashMap.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/IbcHashMap.h b/source/Lib/CommonLib/IbcHashMap.h
index bd90e10fdb3ffa8f68a6363e75f89aa379b4c024..f0ddbae6c894bc911adf9f75f3525516016f7940 100644
--- a/source/Lib/CommonLib/IbcHashMap.h
+++ b/source/Lib/CommonLib/IbcHashMap.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp
index 5cfd46a0f85e181c1ba9c757072b804186e2dbfa..34adec56d9fcc87f15a086e5e91a9bbbac2dd1b8 100644
--- a/source/Lib/CommonLib/InterPrediction.cpp
+++ b/source/Lib/CommonLib/InterPrediction.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -698,13 +698,22 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio
   }
   else
   {
-    int xFrac = mv.hor & ((1 << shiftHor) - 1);
-    int yFrac = mv.ver & ((1 << shiftVer) - 1);
+    int xFrac, yFrac;
     if (isIBC)
     {
       xFrac = yFrac = 0;
       JVET_J0090_SET_CACHE_ENABLE(false);
     }
+    else if (isLuma(compID))
+    {
+      xFrac = mv.hor & 15;
+      yFrac = mv.ver & 15;
+    }
+    else
+    {
+      xFrac = (mv.hor << (1 - ::getComponentScaleX(compID, chFmt))) & 31;
+      yFrac = (mv.ver << (1 - ::getComponentScaleY(compID, chFmt))) & 31;
+    }
 
     PelBuf & dstBuf = dstPic.bufs[compID];
     unsigned width  = dstBuf.width;
@@ -752,12 +761,12 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio
     if (yFrac == 0)
     {
       m_if.filterHor(compID, (Pel *) refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight,
-                     xFrac, rndRes, chFmt, clpRng, bilinearMC, bilinearMC, useAltHpelIf);
+                     xFrac, rndRes, clpRng, bilinearMC, bilinearMC, useAltHpelIf);
     }
     else if (xFrac == 0)
     {
       m_if.filterVer(compID, (Pel *) refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight,
-                     yFrac, true, rndRes, chFmt, clpRng, bilinearMC, bilinearMC, useAltHpelIf);
+                     yFrac, true, rndRes, clpRng, bilinearMC, bilinearMC, useAltHpelIf);
     }
     else
     {
@@ -774,12 +783,12 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio
         vFilterSize = NTAPS_BILINEAR;
       }
       m_if.filterHor(compID, (Pel *) refBuf.buf - ((vFilterSize >> 1) - 1) * refBuf.stride, refBuf.stride, tmpBuf.buf,
-                     tmpBuf.stride, backupWidth, backupHeight + vFilterSize - 1, xFrac, false, chFmt, clpRng,
-                     bilinearMC, bilinearMC, useAltHpelIf);
+                     tmpBuf.stride, backupWidth, backupHeight + vFilterSize - 1, xFrac, false, clpRng, bilinearMC,
+                     bilinearMC, useAltHpelIf);
       JVET_J0090_SET_CACHE_ENABLE(false);
       m_if.filterVer(compID, (Pel *) tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, dstBuf.buf,
-                     dstBuf.stride, backupWidth, backupHeight, yFrac, false, rndRes, chFmt, clpRng, bilinearMC,
-                     bilinearMC, useAltHpelIf);
+                     dstBuf.stride, backupWidth, backupHeight, yFrac, false, rndRes, clpRng, bilinearMC, bilinearMC,
+                     useAltHpelIf);
     }
     JVET_J0090_SET_CACHE_ENABLE(
       (srcPadStride == 0)
@@ -869,7 +878,11 @@ bool InterPrediction::isSubblockVectorSpreadOverLimit( int a, int b, int c, int
   return false;
 }
 
+#if GDR_ENABLED
+bool InterPrediction::xPredAffineBlk(const ComponentID &compID, const PredictionUnit &pu, const Picture *refPic, const Mv *_mv, PelUnitBuf &dstPic, const bool &bi, const ClpRng &clpRng, bool genChromaMv, const std::pair<int, int> scalingRatio)
+#else
 void InterPrediction::xPredAffineBlk(const ComponentID &compID, const PredictionUnit &pu, const Picture *refPic, const Mv *_mv, PelUnitBuf &dstPic, const bool &bi, const ClpRng &clpRng, bool genChromaMv, const std::pair<int, int> scalingRatio)
+#endif
 {
 
   JVET_J0090_SET_REF_PICTURE( refPic, compID );
@@ -880,6 +893,13 @@ void InterPrediction::xPredAffineBlk(const ComponentID &compID, const Prediction
   Mv mvLT =_mv[0];
   Mv mvRT =_mv[1];
   Mv mvLB =_mv[2];
+#if GDR_ENABLED
+  bool allOk = true;
+  const CodingStructure &cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+  const int pux = pu.lx();
+  const int puy = pu.ly();
+#endif
 
   // get affine sub-block width and height
   const int width  = pu.Y().width;
@@ -1104,6 +1124,16 @@ void InterPrediction::xPredAffineBlk(const ComponentID &compID, const Prediction
             iMvScaleTmpVer = tmpMv.getVer();
           }
         }
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          Position subPuPos = Position(pux + w + blockWidth, puy + h + blockHeight);
+          Mv subPuMv = Mv(iMvScaleTmpHor, iMvScaleTmpVer);
+          bool puClean = cs.isClean(subPuPos, subPuMv, refPic);
+
+          allOk = allOk && puClean;
+        }
+#endif
       }
       else
       {
@@ -1124,6 +1154,17 @@ void InterPrediction::xPredAffineBlk(const ComponentID &compID, const Prediction
         }
         iMvScaleTmpHor = curMv.hor;
         iMvScaleTmpVer = curMv.ver;
+
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          Position subPuPos = Position(pux + (w + blockWidth) * 2, puy + (h + blockHeight) * 2);
+          Mv subPuMv = Mv(iMvScaleTmpHor, iMvScaleTmpVer);
+          bool puClean = cs.isClean(subPuPos, subPuMv, refPic);
+
+          allOk = allOk && puClean;
+        }
+#endif
       }
 
       if( xPredInterBlkRPR( scalingRatio, *pu.cs->pps, CompArea( compID, chFmt, pu.blocks[compID].offset( w, h ), Size( blockWidth, blockHeight ) ), refPic, Mv( iMvScaleTmpHor, iMvScaleTmpVer ), dstBuf.buf + w + h * dstBuf.stride, dstBuf.stride, bi, wrapRef, clpRng, 2 ) )
@@ -1135,25 +1176,19 @@ void InterPrediction::xPredAffineBlk(const ComponentID &compID, const Prediction
         // get the MV in high precision
         int xFrac, yFrac, xInt, yInt;
 
-        if (!iScaleX)
+        if (isLuma(compID))
         {
           xInt  = iMvScaleTmpHor >> 4;
           xFrac = iMvScaleTmpHor & 15;
-        }
-        else
-        {
-          xInt  = iMvScaleTmpHor >> 5;
-          xFrac = iMvScaleTmpHor & 31;
-        }
-        if (!iScaleY)
-        {
           yInt  = iMvScaleTmpVer >> 4;
           yFrac = iMvScaleTmpVer & 15;
         }
         else
         {
-          yInt  = iMvScaleTmpVer >> 5;
-          yFrac = iMvScaleTmpVer & 31;
+          xInt  = (iMvScaleTmpHor << (1 - iScaleX)) >> 5;
+          xFrac = (iMvScaleTmpHor << (1 - iScaleX)) & 31;
+          yInt  = (iMvScaleTmpVer << (1 - iScaleY)) >> 5;
+          yFrac = (iMvScaleTmpVer << (1 - iScaleY)) & 31;
         }
 
         const CPelBuf refBuf = refPic->getRecoBuf(
@@ -1176,19 +1211,19 @@ void InterPrediction::xPredAffineBlk(const ComponentID &compID, const Prediction
 
         if (yFrac == 0)
         {
-          m_if.filterHor(compID, (Pel *) ref, refStride, dst, dstStride, bw, bh, xFrac, isLast, chFmt, clpRng);
+          m_if.filterHor(compID, (Pel *) ref, refStride, dst, dstStride, bw, bh, xFrac, isLast, clpRng);
         }
         else if (xFrac == 0)
         {
-          m_if.filterVer(compID, (Pel *) ref, refStride, dst, dstStride, bw, bh, yFrac, true, isLast, chFmt, clpRng);
+          m_if.filterVer(compID, (Pel *) ref, refStride, dst, dstStride, bw, bh, yFrac, true, isLast, clpRng);
         }
         else
         {
           m_if.filterHor(compID, (Pel *) ref - ((vFilterSize >> 1) - 1) * refStride, refStride, tmpBuf.buf,
-                         tmpBuf.stride, bw, bh + vFilterSize - 1, xFrac, false, chFmt, clpRng);
+                         tmpBuf.stride, bw, bh + vFilterSize - 1, xFrac, false, clpRng);
           JVET_J0090_SET_CACHE_ENABLE(false);
           m_if.filterVer(compID, tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, dst, dstStride,
-                         bw, bh, yFrac, false, isLast, chFmt, clpRng);
+                         bw, bh, yFrac, false, isLast, clpRng);
           JVET_J0090_SET_CACHE_ENABLE(true);
         }
         if (enablePROF)
@@ -1234,6 +1269,9 @@ void InterPrediction::xPredAffineBlk(const ComponentID &compID, const Prediction
       }
     }
   }
+#if GDR_ENABLED
+  return allOk;
+#endif
 }
 
 void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf &yuvSrc0, const CPelUnitBuf &yuvSrc1, const int &refIdx0, const int &refIdx1, PelUnitBuf &yuvDst, const BitDepths &clipBitDepths)
@@ -2314,8 +2352,8 @@ bool InterPrediction::xPredInterBlkRPR( const std::pair<int, int>& scalingRatio,
   const ComponentID compID = blk.compID;
   const bool          rndRes = !bi;
 
-  int shiftHor = MV_FRACTIONAL_BITS_INTERNAL + ::getComponentScaleX( compID, chFmt );
-  int shiftVer = MV_FRACTIONAL_BITS_INTERNAL + ::getComponentScaleY( compID, chFmt );
+  int shiftHor = MV_FRACTIONAL_BITS_INTERNAL + (isLuma(compID) ? 0 : 1);
+  int shiftVer = MV_FRACTIONAL_BITS_INTERNAL + (isLuma(compID) ? 0 : 1);
 
   int width = blk.width;
   int height = blk.height;
@@ -2410,8 +2448,7 @@ bool InterPrediction::xPredInterBlkRPR( const std::pair<int, int>& scalingRatio,
 
     int addX = isLuma( compID ) ? 0 : int( 1 - refPic->cs->sps->getHorCollocatedChromaFlag() ) * 8 * ( scalingRatio.first - SCALE_1X.first );
     int addY = isLuma( compID ) ? 0 : int( 1 - refPic->cs->sps->getVerCollocatedChromaFlag() ) * 8 * ( scalingRatio.second - SCALE_1X.second );
-    
-#if FIX_SUBPICS_W_RPR
+
     int boundLeft   = 0;
     int boundRight  = refPicWidth >> ::getComponentScaleX( compID, chFmt );
     int boundTop    = 0;
@@ -2427,7 +2464,6 @@ bool InterPrediction::xPredInterBlkRPR( const std::pair<int, int>& scalingRatio,
         boundBottom = curSubPic.getSubPicBottom() >> ::getComponentScaleY( compID, chFmt );
       }
     }
-#endif
 
     x0Int = ( ( posX << ( 4 + ::getComponentScaleX( compID, chFmt ) ) ) + mv.getHor() ) * (int64_t)scalingRatio.first + addX;
     x0Int = SIGN( x0Int ) * ( ( llabs( x0Int ) + ( (long long)1 << ( 7 + ::getComponentScaleX( compID, chFmt ) ) ) ) >> ( 8 + ::getComponentScaleX( compID, chFmt ) ) ) + ( ( refPic->getScalingWindow().getWindowLeftOffset() * SPS::getWinUnitX( chFmt ) ) << ( ( posShift - ::getComponentScaleX( compID, chFmt ) ) ) );
@@ -2439,18 +2475,10 @@ bool InterPrediction::xPredInterBlkRPR( const std::pair<int, int>& scalingRatio,
     int vFilterSize = isLuma( compID ) ? NTAPS_LUMA : NTAPS_CHROMA;
 
     int yInt0 = ( (int32_t)y0Int + offY ) >> posShift;
-#if FIX_SUBPICS_W_RPR
     yInt0 = std::min( std::max( boundTop - (NTAPS_LUMA / 2), yInt0 ), boundBottom + (NTAPS_LUMA / 2) );
-#else
-    yInt0 = std::min( std::max( -(NTAPS_LUMA / 2), yInt0 ), ( refPicHeight >> ::getComponentScaleY( compID, chFmt ) ) + (NTAPS_LUMA / 2) );
-#endif
 
     int xInt0 = ( (int32_t)x0Int + offX ) >> posShift;
-#if FIX_SUBPICS_W_RPR
     xInt0 = std::min( std::max( boundLeft - (NTAPS_LUMA / 2), xInt0 ), boundRight + (NTAPS_LUMA / 2) );
-#else
-    xInt0 = std::min( std::max( -(NTAPS_LUMA / 2), xInt0 ), ( refPicWidth >> ::getComponentScaleX( compID, chFmt ) ) + (NTAPS_LUMA / 2) );
-#endif
 
     int refHeight = ((((int32_t)y0Int + (height-1) * stepY) + offY ) >> posShift) - ((((int32_t)y0Int + 0 * stepY) + offY ) >> posShift) + 1;
     refHeight = std::max<int>( 1, refHeight );
@@ -2465,11 +2493,7 @@ bool InterPrediction::xPredInterBlkRPR( const std::pair<int, int>& scalingRatio,
     {
       int posX = (int32_t)x0Int + col * stepX;
       xInt = ( posX + offX ) >> posShift;
-#if FIX_SUBPICS_W_RPR
       xInt = std::min( std::max( boundLeft - (NTAPS_LUMA / 2), xInt ), boundRight + (NTAPS_LUMA / 2) );
-#else
-      xInt = std::min( std::max( -(NTAPS_LUMA / 2), xInt ), ( refPicWidth >> ::getComponentScaleX( compID, chFmt ) ) + (NTAPS_LUMA / 2) );
-#endif
       int xFrac = ( ( posX + offX ) >> ( posShift - shiftHor ) ) & ( ( 1 << shiftHor ) - 1 );
 
       CHECK( xInt0 > xInt, "Wrong horizontal starting point" );
@@ -2478,18 +2502,16 @@ bool InterPrediction::xPredInterBlkRPR( const std::pair<int, int>& scalingRatio,
       refBuf = refPic->getRecoBuf( CompArea( compID, chFmt, offset, Size( 1, refHeight ) ), wrapRef );
       Pel* tempBuf = buffer + col;
 
-      m_if.filterHor( compID, (Pel*)refBuf.buf - ( ( vFilterSize >> 1 ) - 1 ) * refBuf.stride, refBuf.stride, tempBuf, tmpStride, 1, refHeight + vFilterSize - 1 + extSize, xFrac, false, chFmt, clpRng, xFilter, false, useAltHpelIf && scalingRatio.first == 1 << SCALE_RATIO_BITS );
+      m_if.filterHor(compID, (Pel *) refBuf.buf - ((vFilterSize >> 1) - 1) * refBuf.stride, refBuf.stride, tempBuf,
+                     tmpStride, 1, refHeight + vFilterSize - 1 + extSize, xFrac, false, clpRng, xFilter, false,
+                     useAltHpelIf && scalingRatio.first == 1 << SCALE_RATIO_BITS);
     }
 
     for( row = 0; row < height; row++ )
     {
       int posY = (int32_t)y0Int + row * stepY;
       yInt = ( posY + offY ) >> posShift;
-#if FIX_SUBPICS_W_RPR
       yInt = std::min( std::max( boundTop - (NTAPS_LUMA / 2), yInt ), boundBottom + (NTAPS_LUMA / 2) );
-#else
-      yInt = std::min( std::max( -(NTAPS_LUMA / 2), yInt ), ( refPicHeight >> ::getComponentScaleY( compID, chFmt ) ) + (NTAPS_LUMA / 2) );
-#endif
       int yFrac = ( ( posY + offY ) >> ( posShift - shiftVer ) ) & ( ( 1 << shiftVer ) - 1 );
 
       CHECK( yInt0 > yInt, "Wrong vertical starting point" );
@@ -2497,7 +2519,9 @@ bool InterPrediction::xPredInterBlkRPR( const std::pair<int, int>& scalingRatio,
       Pel* tempBuf = buffer + ( yInt - yInt0 ) * tmpStride;
 
       JVET_J0090_SET_CACHE_ENABLE( false );
-      m_if.filterVer( compID, tempBuf + ( ( vFilterSize >> 1 ) - 1 ) * tmpStride, tmpStride, dst + row * dstStride, dstStride, width, 1, yFrac, false, rndRes, chFmt, clpRng, yFilter, false, useAltHpelIf && scalingRatio.second == 1 << SCALE_RATIO_BITS );
+      m_if.filterVer(compID, tempBuf + ((vFilterSize >> 1) - 1) * tmpStride, tmpStride, dst + row * dstStride,
+                     dstStride, width, 1, yFrac, false, rndRes, clpRng, yFilter, false,
+                     useAltHpelIf && scalingRatio.second == 1 << SCALE_RATIO_BITS);
       JVET_J0090_SET_CACHE_ENABLE( true );
     }
   }
diff --git a/source/Lib/CommonLib/InterPrediction.h b/source/Lib/CommonLib/InterPrediction.h
index 6b67d430fbef514b8b34903db31209e541b9368c..0c9a69509b2e2d53fd977022d1f869233da7d91d 100644
--- a/source/Lib/CommonLib/InterPrediction.h
+++ b/source/Lib/CommonLib/InterPrediction.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -137,7 +137,11 @@ protected:
   void xCalcBIOPar              (const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, int bitDepth);
   void xCalcBlkGradient         (int sx, int sy, int    *arraysGx2, int     *arraysGxGy, int     *arraysGxdI, int     *arraysGy2, int     *arraysGydI, int     &sGx2, int     &sGy2, int     &sGxGy, int     &sGxdI, int     &sGydI, int width, int height, int unitSize);
   void xWeightedAverage         ( const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied, const bool lumaOnly = false, const bool chromaOnly = false, PelUnitBuf* yuvDstTmp = NULL );
+#if GDR_ENABLED
+  bool xPredAffineBlk           ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng, const bool genChromaMv = false, const std::pair<int, int> scalingRatio = SCALE_1X );
+#else
   void xPredAffineBlk           ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng, const bool genChromaMv = false, const std::pair<int, int> scalingRatio = SCALE_1X );
+#endif
 
   static bool xCheckIdenticalMotion( const PredictionUnit& pu );
 
diff --git a/source/Lib/CommonLib/InterpolationFilter.cpp b/source/Lib/CommonLib/InterpolationFilter.cpp
index cfbd190a0ac991aced717d4831f578df2e2b9600..eb79626c68e5b8971917ea5723ea30d3407f3522 100644
--- a/source/Lib/CommonLib/InterpolationFilter.cpp
+++ b/source/Lib/CommonLib/InterpolationFilter.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -289,26 +289,6 @@ const TFilterCoeff InterpolationFilter::m_chromaFilterRPR2[CHROMA_INTERPOLATION_
   { -1, 18, 30, 17 }
 };
 
-const TFilterCoeff InterpolationFilter::m_bilinearFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_BILINEAR] =
-{
-  { 64,  0, },
-  { 60,  4, },
-  { 56,  8, },
-  { 52, 12, },
-  { 48, 16, },
-  { 44, 20, },
-  { 40, 24, },
-  { 36, 28, },
-  { 32, 32, },
-  { 28, 36, },
-  { 24, 40, },
-  { 20, 44, },
-  { 16, 48, },
-  { 12, 52, },
-  { 8, 56, },
-  { 4, 60, },
-};
-
 const TFilterCoeff InterpolationFilter::m_bilinearFilterPrec4[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_BILINEAR] =
 {
   { 16,  0, },
@@ -733,10 +713,11 @@ void InterpolationFilter::filterVer(const ClpRng& clpRng, Pel const *src, int sr
  * \param  height     Height of block
  * \param  frac       Fractional sample offset
  * \param  isLast     Flag indicating whether it is the last filtering operation
- * \param  fmt        Chroma format
  * \param  bitDepth   Bit depth
  */
-void InterpolationFilter::filterHor(const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx, bool biMCForDMVR, bool useAltHpelIf)
+void InterpolationFilter::filterHor(const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride,
+                                    int width, int height, int frac, bool isLast, const ClpRng &clpRng, int nFilterIdx,
+                                    bool biMCForDMVR, bool useAltHpelIf)
 {
   if( frac == 0 && nFilterIdx < 2 )
   {
@@ -784,19 +765,21 @@ void InterpolationFilter::filterHor(const ComponentID compID, Pel const *src, in
   }
   else
   {
-    const uint32_t csx = getComponentScaleX( compID, fmt );
-    CHECK( frac < 0 || csx >= 2 || ( frac << ( 1 - csx ) ) >= CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" );
+    CHECK(frac < 0 || frac >= CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction");
     if( nFilterIdx == 3 )
     {
-      filterHor<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilterRPR1[frac << ( 1 - csx )], biMCForDMVR );
+      filterHor<NTAPS_CHROMA>(clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilterRPR1[frac],
+                              biMCForDMVR);
     }
     else if( nFilterIdx == 4 )
     {
-      filterHor<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilterRPR2[frac << ( 1 - csx )], biMCForDMVR );
+      filterHor<NTAPS_CHROMA>(clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilterRPR2[frac],
+                              biMCForDMVR);
     }
     else
     {
-      filterHor<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilter[frac << ( 1 - csx )], biMCForDMVR );
+      filterHor<NTAPS_CHROMA>(clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilter[frac],
+                              biMCForDMVR);
     }
   }
 }
@@ -815,10 +798,11 @@ void InterpolationFilter::filterHor(const ComponentID compID, Pel const *src, in
  * \param  frac       Fractional sample offset
  * \param  isFirst    Flag indicating whether it is the first filtering operation
  * \param  isLast     Flag indicating whether it is the last filtering operation
- * \param  fmt        Chroma format
  * \param  bitDepth   Bit depth
  */
-void InterpolationFilter::filterVer(const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx, bool biMCForDMVR, bool useAltHpelIf)
+void InterpolationFilter::filterVer(const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride,
+                                    int width, int height, int frac, bool isFirst, bool isLast, const ClpRng &clpRng,
+                                    int nFilterIdx, bool biMCForDMVR, bool useAltHpelIf)
 {
   if( frac == 0 && nFilterIdx < 2 )
   {
@@ -866,19 +850,21 @@ void InterpolationFilter::filterVer(const ComponentID compID, Pel const *src, in
   }
   else
   {
-    const uint32_t csy = getComponentScaleY( compID, fmt );
-    CHECK( frac < 0 || csy >= 2 || ( frac << ( 1 - csy ) ) >= CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" );
+    CHECK(frac < 0 || frac >= CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction");
     if( nFilterIdx == 3 )
     {
-      filterVer<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_chromaFilterRPR1[frac << ( 1 - csy )], biMCForDMVR );
+      filterVer<NTAPS_CHROMA>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast,
+                              m_chromaFilterRPR1[frac], biMCForDMVR);
     }
     else if( nFilterIdx == 4 )
     {
-      filterVer<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_chromaFilterRPR2[frac << ( 1 - csy )], biMCForDMVR );
+      filterVer<NTAPS_CHROMA>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast,
+                              m_chromaFilterRPR2[frac], biMCForDMVR);
     }
     else
     {
-      filterVer<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_chromaFilter[frac << ( 1 - csy )], biMCForDMVR );
+      filterVer<NTAPS_CHROMA>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast,
+                              m_chromaFilter[frac], biMCForDMVR);
     }
   }
 }
diff --git a/source/Lib/CommonLib/InterpolationFilter.h b/source/Lib/CommonLib/InterpolationFilter.h
index 278e7cf87e67885465c06d6547ed180964d1821e..373bc1cbb74d02936ad764deb7abc2d586e5869f 100644
--- a/source/Lib/CommonLib/InterpolationFilter.h
+++ b/source/Lib/CommonLib/InterpolationFilter.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -68,7 +68,6 @@ public:
   static const TFilterCoeff m_affineLumaFilterRPR2[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA]; ///< Luma filter taps 2x
 private:
   static const TFilterCoeff m_lumaAltHpelIFilter[NTAPS_LUMA]; ///< Luma filter taps
-  static const TFilterCoeff m_bilinearFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_BILINEAR]; ///< bilinear filter taps
   static const TFilterCoeff m_bilinearFilterPrec4[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_BILINEAR]; ///< bilinear filter taps
 public:
   template<bool isFirst, bool isLast>
@@ -102,8 +101,12 @@ public:
   template <X86_VEXT vext>
   void _initInterpolationFilterX86();
 #endif
-  void filterHor(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac,               bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx = 0, bool biMCForDMVR = false, bool useAltHpelIf = false);
-  void filterVer(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx = 0, bool biMCForDMVR = false, bool useAltHpelIf = false);
+  void filterHor(const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width,
+                 int height, int frac, bool isLast, const ClpRng &clpRng, int nFilterIdx = 0, bool biMCForDMVR = false,
+                 bool useAltHpelIf = false);
+  void filterVer(const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width,
+                 int height, int frac, bool isFirst, bool isLast, const ClpRng &clpRng, int nFilterIdx = 0,
+                 bool biMCForDMVR = false, bool useAltHpelIf = false);
 #if JVET_J0090_MEMORY_BANDWITH_MEASURE
   void cacheAssign( CacheModel *cache ) { m_cacheModel = cache; }
 #endif
diff --git a/source/Lib/CommonLib/IntraPrediction.cpp b/source/Lib/CommonLib/IntraPrediction.cpp
index c70917b889ea91a2f1820d787289a70801a4b292..9a167074801fd36d011e205b84a49bd73beff30d 100644
--- a/source/Lib/CommonLib/IntraPrediction.cpp
+++ b/source/Lib/CommonLib/IntraPrediction.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -182,7 +182,7 @@ Pel IntraPrediction::xGetPredValDc( const CPelBuf &pSrc, const Size &dstSize )
 
 int IntraPrediction::getModifiedWideAngle( int width, int height, int predMode )
 {
-  //The function returns a 'modified' wide angle index, given that it is not necessary 
+  //The function returns a 'modified' wide angle index, given that it is not necessary
   //in this software implementation to reserve the values 0 and 1 for Planar and DC to generate the prediction signal.
   //It should only be used to obtain the intraPredAngle parameter.
   //To simply obtain the wide angle index, the function PU::getWideAngle should be used instead.
@@ -196,7 +196,7 @@ int IntraPrediction::getModifiedWideAngle( int width, int height, int predMode )
     }
     else if (height > width && predMode > VDIA_IDX - modeShift[deltaSize])
     {
-      predMode -= (VDIA_IDX - 1); 
+      predMode -= (VDIA_IDX - 1);
     }
   }
   return predMode;
diff --git a/source/Lib/CommonLib/IntraPrediction.h b/source/Lib/CommonLib/IntraPrediction.h
index ff4c6d12e7f6c5ad0fdd889b7f19668a82232cef..8f5d504d507441911f18aede3105cacb08063233 100644
--- a/source/Lib/CommonLib/IntraPrediction.h
+++ b/source/Lib/CommonLib/IntraPrediction.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/MCTS.cpp b/source/Lib/CommonLib/MCTS.cpp
index b0795736dcb039e78d8a37a4ae5fd080995dbdd6..1690bd4bd16b7c08bec79087717eed762b5781ca 100644
--- a/source/Lib/CommonLib/MCTS.cpp
+++ b/source/Lib/CommonLib/MCTS.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/MCTS.h b/source/Lib/CommonLib/MCTS.h
index 3d9d3bf52e4207673645aabed5160ecc2fc41c5a..315e8dad9dc74acfad9dc32016bbd84789c1799d 100644
--- a/source/Lib/CommonLib/MCTS.h
+++ b/source/Lib/CommonLib/MCTS.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/MatrixIntraPrediction.cpp b/source/Lib/CommonLib/MatrixIntraPrediction.cpp
index b8296b1bd01a768c3a02d7709019938856f80c37..62ac705053f3cab9c9cd5e029f9f37f491ff0e77 100644
--- a/source/Lib/CommonLib/MatrixIntraPrediction.cpp
+++ b/source/Lib/CommonLib/MatrixIntraPrediction.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/MatrixIntraPrediction.h b/source/Lib/CommonLib/MatrixIntraPrediction.h
index 4bbd750d3b90b06ab5142b5bbf5e89614a8b30b7..67f6c5fd343bfb7e9819596831aefe1861415b22 100644
--- a/source/Lib/CommonLib/MatrixIntraPrediction.h
+++ b/source/Lib/CommonLib/MatrixIntraPrediction.h
@@ -1,9 +1,9 @@
-ï»¿/* The copyright in this software is being made available under the BSD
+/* The copyright in this software is being made available under the BSD
 * License, included below. This software may be subject to other third party
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/MipData.h b/source/Lib/CommonLib/MipData.h
index bc7ea3e6222c9254e9c5ddd1ef900d13c814bca0..bc8cf49afb0d419268a6922dc2f06341cbbf7a16 100644
--- a/source/Lib/CommonLib/MipData.h
+++ b/source/Lib/CommonLib/MipData.h
@@ -1,9 +1,9 @@
-ï»¿/* The copyright in this software is being made available under the BSD
+/* The copyright in this software is being made available under the BSD
 * License, included below. This software may be subject to other third party
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/MotionInfo.h b/source/Lib/CommonLib/MotionInfo.h
index 20059c29ce0e150cb974be3494775170ce0383b1..5a9d76e9f051609683b9fff51fb38770454d2c4a 100644
--- a/source/Lib/CommonLib/MotionInfo.h
+++ b/source/Lib/CommonLib/MotionInfo.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -48,12 +48,43 @@
 // ====================================================================================================================
 // Type definition
 // ====================================================================================================================
+#if GDR_ENABLED
+enum MvpType
+{
+  MVP_LEFT,
+  MVP_ABOVE,
+  MVP_ABOVE_RIGHT,
+  MVP_BELOW_LEFT,
+  MVP_ABOVE_LEFT,
+
+  MVP_BELOW_RIGHT,
+  MVP_COMPOSITE,
+
+  MVP_TMVP_C0,
+  MVP_TMVP_C1,
+  MVP_HMVP,
+  MVP_ZERO,
+
+  AFFINE_INHERIT,
+  AFFINE_INHERIT_LB_RB,
+
+  NUM_MVPTYPES
+};
+#endif
 
 /// parameters for AMVP
 struct AMVPInfo
 {
   Mv       mvCand[ AMVP_MAX_NUM_CANDS_MEM ];  ///< array of motion vector predictor candidates
   unsigned numCand;                       ///< number of motion vector predictor candidates
+#if GDR_ENABLED
+  bool     allCandSolidInAbove;
+  bool     mvSolid[AMVP_MAX_NUM_CANDS_MEM];
+  bool     mvValid[AMVP_MAX_NUM_CANDS_MEM];
+
+  Position mvPos[AMVP_MAX_NUM_CANDS_MEM];
+  MvpType  mvType[AMVP_MAX_NUM_CANDS_MEM];
+#endif
 };
 
 struct AffineAMVPInfo
@@ -62,6 +93,25 @@ struct AffineAMVPInfo
   Mv       mvCandRT[ AMVP_MAX_NUM_CANDS_MEM ];  ///< array of affine motion vector predictor candidates for right-top corner
   Mv       mvCandLB[ AMVP_MAX_NUM_CANDS_MEM ];  ///< array of affine motion vector predictor candidates for left-bottom corner
   unsigned numCand;                       ///< number of motion vector predictor candidates
+#if GDR_ENABLED
+  bool     allCandSolidInAbove;  
+
+  bool     mvSolidLT[AMVP_MAX_NUM_CANDS_MEM];
+  bool     mvSolidRT[AMVP_MAX_NUM_CANDS_MEM];
+  bool     mvSolidLB[AMVP_MAX_NUM_CANDS_MEM];
+
+  bool     mvValidLT[AMVP_MAX_NUM_CANDS_MEM];
+  bool     mvValidRT[AMVP_MAX_NUM_CANDS_MEM];
+  bool     mvValidLB[AMVP_MAX_NUM_CANDS_MEM];
+
+  MvpType  mvTypeLT[AMVP_MAX_NUM_CANDS_MEM];
+  MvpType  mvTypeRT[AMVP_MAX_NUM_CANDS_MEM];
+  MvpType  mvTypeLB[AMVP_MAX_NUM_CANDS_MEM];
+  
+  Position mvPosLT[AMVP_MAX_NUM_CANDS_MEM];
+  Position mvPosRT[AMVP_MAX_NUM_CANDS_MEM];
+  Position mvPosLB[AMVP_MAX_NUM_CANDS_MEM];
+#endif
 };
 
 // ====================================================================================================================
@@ -109,6 +159,11 @@ struct MotionInfo
   int16_t   refIdx [ NUM_REF_PIC_LIST_01 ];
   uint8_t         BcwIdx;
   Mv      bv;
+#if GDR_ENABLED
+  bool      sourceClean;  // source Position is clean/dirty
+  Position  sourcePos;    // source Position of Mv
+#endif
+
   MotionInfo() : isInter(false), isIBCmot(false), interDir(0), useAltHpelIf(false), sliceIdx(0), refIdx{ NOT_VALID, NOT_VALID }, BcwIdx(0) { }
   // ensure that MotionInfo(0) produces '\x000....' bit pattern - needed to work with AreaBuf - don't use this constructor for anything else
   MotionInfo(int i) : isInter(i != 0), isIBCmot(false), interDir(0), useAltHpelIf(false), sliceIdx(0), refIdx{ 0,         0 }, BcwIdx(0) { CHECKD(i != 0, "The argument for this constructor has to be '0'"); }
@@ -149,12 +204,20 @@ class BcwMotionParam
   bool       m_readOnly[2][33];       // 2 RefLists, 33 RefFrams
   Mv         m_mv[2][33];
   Distortion m_dist[2][33];
+  
+#if GDR_ENABLED
+  bool       m_mvSolid[2][33];
+#endif
 
   bool       m_readOnlyAffine[2][2][33];
   Mv         m_mvAffine[2][2][33][3];
   Distortion m_distAffine[2][2][33];
   int        m_mvpIdx[2][2][33];
 
+#if GDR_ENABLED
+  bool       m_mvAffineSolid[2][2][33][3];
+#endif
+
 public:
 
   void reset()
@@ -176,6 +239,14 @@ public:
     memset(m_readOnlyAffine, false, 2 * 2 * 33 * sizeof(bool));
     memset(m_distAffine, -1, 2 * 2 * 33 * sizeof(Distortion));
     memset( m_mvpIdx, 0, 2 * 2 * 33 * sizeof( int ) );
+
+#if GDR_ENABLED
+    memset(m_mvSolid, true, 2 * 2 * 33 * sizeof(bool));
+#endif
+
+#if GDR_ENABLED
+    memset(m_mvAffineSolid, true, 2 * 2 * 33 * sizeof(bool));
+#endif
   }
 
   void setReadMode(bool b, uint32_t uiRefList, uint32_t uiRefIdx) { m_readOnly[uiRefList][uiRefIdx] = b; }
@@ -192,12 +263,30 @@ public:
     m_dist[uiRefList][uiRefIdx] = uiDist;
   }
 
+#if GDR_ENABLED
+  void copyFrom(Mv& rcMv, bool& rcMvSolid, Distortion uiDist, uint32_t uiRefList, uint32_t uiRefIdx)
+  {
+    m_mv[uiRefList][uiRefIdx] = rcMv;
+    m_dist[uiRefList][uiRefIdx] = uiDist;
+    m_mvSolid[uiRefList][uiRefIdx] = rcMvSolid;
+  }
+#endif
+
   void copyTo(Mv& rcMv, Distortion& ruiDist, uint32_t uiRefList, uint32_t uiRefIdx)
   {
     rcMv = m_mv[uiRefList][uiRefIdx];
     ruiDist = m_dist[uiRefList][uiRefIdx];
   }
 
+#if GDR_ENABLED
+  void copyTo(Mv& rcMv, bool& rcMvSolid, Distortion& ruiDist, uint32_t uiRefList, uint32_t uiRefIdx)
+  {
+    rcMv      = m_mv[uiRefList][uiRefIdx];
+    ruiDist   = m_dist[uiRefList][uiRefIdx];
+    rcMvSolid = m_mvSolid[uiRefList][uiRefIdx];
+  }
+#endif
+
   Mv& getAffineMv(uint32_t uiRefList, uint32_t uiRefIdx, uint32_t uiAffineMvIdx, int bP4) { return m_mvAffine[bP4][uiRefList][uiRefIdx][uiAffineMvIdx]; }
 
   void copyAffineMvFrom(Mv(&racAffineMvs)[3], Distortion uiDist, uint32_t uiRefList, uint32_t uiRefIdx, int bP4
@@ -217,6 +306,26 @@ public:
     ruiDist = m_distAffine[bP4][uiRefList][uiRefIdx];
     mvpIdx  = m_mvpIdx[bP4][uiRefList][uiRefIdx];
   }
+
+#if GDR_ENABLED
+  void copyAffineMvFrom(Mv(&racAffineMvs)[3], bool(&racAffineMvsSolid)[3], Distortion uiDist, uint32_t uiRefList, uint32_t uiRefIdx, int bP4, const int mvpIdx)
+  {
+    memcpy(m_mvAffine[bP4][uiRefList][uiRefIdx],      racAffineMvs,      3 * sizeof(Mv));
+    memcpy(m_mvAffineSolid[bP4][uiRefList][uiRefIdx], racAffineMvsSolid, 3 * sizeof(bool));
+    m_distAffine[bP4][uiRefList][uiRefIdx] = uiDist;
+    m_mvpIdx[bP4][uiRefList][uiRefIdx]     = mvpIdx;
+  }
+#endif
+
+#if GDR_ENABLED
+  void copyAffineMvTo(Mv acAffineMvs[3], bool acAffineMvsSolid[3], Distortion& ruiDist, uint32_t uiRefList, uint32_t uiRefIdx, int bP4, int& mvpIdx)
+  {
+    memcpy(acAffineMvs,      m_mvAffine[bP4][uiRefList][uiRefIdx],      3 * sizeof(Mv));
+    memcpy(acAffineMvsSolid, m_mvAffineSolid[bP4][uiRefList][uiRefIdx], 3 * sizeof(bool));
+    ruiDist = m_distAffine[bP4][uiRefList][uiRefIdx];
+    mvpIdx  = m_mvpIdx[bP4][uiRefList][uiRefIdx];
+  }
+#endif
 };
 struct LutMotionCand
 {
diff --git a/source/Lib/CommonLib/Mv.cpp b/source/Lib/CommonLib/Mv.cpp
index 1058862974656e64f76bd5958c9e17277f69af71..4aa433be2c7079d1318d7fc7893650a3dbb501d7 100644
--- a/source/Lib/CommonLib/Mv.cpp
+++ b/source/Lib/CommonLib/Mv.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/Mv.h b/source/Lib/CommonLib/Mv.h
index 6ac57e1a9b4ecfbc9ebb4c3d0e72c077773efee8..7c8e71ac564823cae188e5d4a89e4d8440924dd0 100644
--- a/source/Lib/CommonLib/Mv.h
+++ b/source/Lib/CommonLib/Mv.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/NAL.h b/source/Lib/CommonLib/NAL.h
index 19585c7489052ea2a9997ea29d184ea9600d3ca1..ec10c28fcfaff2e0c0489dc0c9faa493852daf21 100644
--- a/source/Lib/CommonLib/NAL.h
+++ b/source/Lib/CommonLib/NAL.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/ParameterSetManager.cpp b/source/Lib/CommonLib/ParameterSetManager.cpp
index 5fb051c6bee8c485beb5f7773a18572f04472029..49975baf0c0b12fa12917ae2d05c488079bcb48e 100644
--- a/source/Lib/CommonLib/ParameterSetManager.cpp
+++ b/source/Lib/CommonLib/ParameterSetManager.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/ParameterSetManager.h b/source/Lib/CommonLib/ParameterSetManager.h
index 6f3a0b615e46e93054a0a610f5028fbdd0e85095..47e863d6f5e669a2c45273528029e55d7349107c 100644
--- a/source/Lib/CommonLib/ParameterSetManager.h
+++ b/source/Lib/CommonLib/ParameterSetManager.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -162,7 +162,7 @@ public:
       }
       else
       {
-        CHECK( true, "Wrong APS type" );
+        THROW("Wrong APS type");
       }
     }
     else
diff --git a/source/Lib/CommonLib/PicYuvMD5.cpp b/source/Lib/CommonLib/PicYuvMD5.cpp
index 8c13bd7bcd5315b2cdf43dafe6ee834c8402ee1a..a15f81ac5b56662a7364562f8e676088f95fb973 100644
--- a/source/Lib/CommonLib/PicYuvMD5.cpp
+++ b/source/Lib/CommonLib/PicYuvMD5.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -253,9 +253,7 @@ int calcAndPrintHashStatus(const CPelUnitBuf& pic, const SEIDecodedPictureHash*
 
   if (pictureHashSEI)
   {
-#if FIX_TICKET_1405
     CHECK ((uint32_t)pic.bufs.size() != ( pictureHashSEI->singleCompFlag ? 1 : 3 ), "The value of dph_sei_single_component_flag shall be equal to (ChromaFormatIdc == 0).");
-#endif
     switch (pictureHashSEI->method)
     {
       case HASHTYPE_MD5:
diff --git a/source/Lib/CommonLib/Picture.cpp b/source/Lib/CommonLib/Picture.cpp
index a7205badc3cc42c2e685d439d1bcf19415668d42..675e817aab9399fd635e64daa6c660a69809f257 100644
--- a/source/Lib/CommonLib/Picture.cpp
+++ b/source/Lib/CommonLib/Picture.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -40,130 +40,6 @@
 #include "ChromaFormat.h"
 #include "CommonLib/InterpolationFilter.h"
 
-
-#if ENABLE_SPLIT_PARALLELISM
-
-int g_wppThreadId( 0 );
-#pragma omp threadprivate(g_wppThreadId)
-
-#if ENABLE_SPLIT_PARALLELISM
-int g_splitThreadId( 0 );
-#pragma omp threadprivate(g_splitThreadId)
-
-int g_splitJobId( 0 );
-#pragma omp threadprivate(g_splitJobId)
-#endif
-
-Scheduler::Scheduler() :
-#if ENABLE_SPLIT_PARALLELISM
-  m_numSplitThreads( 1 )
-#endif
-{
-}
-
-Scheduler::~Scheduler()
-{
-}
-
-#if ENABLE_SPLIT_PARALLELISM
-unsigned Scheduler::getSplitDataId( int jobId ) const
-{
-  if( m_numSplitThreads > 1 && m_hasParallelBuffer )
-  {
-    int splitJobId = jobId == CURR_THREAD_ID ? g_splitJobId : jobId;
-
-    return ( g_wppThreadId * NUM_RESERVERD_SPLIT_JOBS ) + splitJobId;
-  }
-  else
-  {
-    return 0;
-  }
-}
-
-unsigned Scheduler::getSplitPicId( int tId /*= CURR_THREAD_ID */ ) const
-{
-  if( m_numSplitThreads > 1 && m_hasParallelBuffer )
-  {
-    int threadId = tId == CURR_THREAD_ID ? g_splitThreadId : tId;
-
-    return ( g_wppThreadId * m_numSplitThreads ) + threadId;
-  }
-  else
-  {
-    return 0;
-  }
-}
-
-unsigned Scheduler::getSplitJobId() const
-{
-  if( m_numSplitThreads > 1 )
-  {
-    return g_splitJobId;
-  }
-  else
-  {
-    return 0;
-  }
-}
-
-void Scheduler::setSplitJobId( const int jobId )
-{
-  CHECK( g_splitJobId != 0 && jobId != 0, "Need to reset the jobId after usage!" );
-  g_splitJobId = jobId;
-}
-
-void Scheduler::startParallel()
-{
-  m_hasParallelBuffer = true;
-}
-
-void Scheduler::finishParallel()
-{
-  m_hasParallelBuffer = false;
-}
-
-void Scheduler::setSplitThreadId( const int tId )
-{
-  g_splitThreadId = tId == CURR_THREAD_ID ? omp_get_thread_num() : tId;
-}
-
-#endif
-
-
-
-unsigned Scheduler::getDataId() const
-{
-#if ENABLE_SPLIT_PARALLELISM
-  if( m_numSplitThreads > 1 )
-  {
-    return getSplitDataId();
-  }
-#endif
-  return 0;
-}
-
-bool Scheduler::init( const int ctuYsize, const int ctuXsize, const int numWppThreadsRunning, const int numWppExtraLines, const int numSplitThreads )
-{
-#if ENABLE_SPLIT_PARALLELISM
-  m_numSplitThreads = numSplitThreads;
-#endif
-
-  return true;
-}
-
-
-int Scheduler::getNumPicInstances() const
-{
-#if !ENABLE_SPLIT_PARALLELISM
-  return 1;
-#else
-  return ( m_numSplitThreads > 1 ? m_numSplitThreads : 1 );
-#endif
-}
-
-#endif
-
-
 // ---------------------------------------------------------------------------
 // picture methods
 // ---------------------------------------------------------------------------
@@ -186,6 +62,8 @@ Picture::Picture()
   fieldPic             = false;
   topField             = false;
   precedingDRAP        = false;
+  edrapRapId           = -1;
+  m_colourTranfParams  = NULL;
   nonReferencePictureFlag = false;
 
   for( int i = 0; i < MAX_NUM_CHANNEL_TYPE; i++ )
@@ -225,40 +103,43 @@ void Picture::create( const ChromaFormat &_chromaFormat, const Size &size, const
 
 void Picture::destroy()
 {
-#if ENABLE_SPLIT_PARALLELISM
-  for( int jId = 0; jId < PARL_SPLIT_MAX_NUM_THREADS; jId++ )
-#endif
+  for (uint32_t t = 0; t < NUM_PIC_TYPES; t++)
   {
-    for (uint32_t t = 0; t < NUM_PIC_TYPES; t++)
-    {
-      M_BUFS(jId, t).destroy();
-    }
-    m_hashMap.clearAll();
-    if (cs)
+    M_BUFS(jId, t).destroy();
+  }
+  m_hashMap.clearAll();
+  if (cs)
+  {
+#if GDR_ENABLED
+    if (cs->picHeader)
     {
-      cs->destroy();
-      delete cs;
-      cs = nullptr;
+      delete cs->picHeader;
     }
+    cs->picHeader = nullptr;
+#endif
+    cs->destroy();
+    delete cs;
+    cs = nullptr;
+  }
 
-    for (auto &ps: slices)
-    {
-      delete ps;
-    }
-    slices.clear();
+  for (auto &ps: slices)
+  {
+    delete ps;
+  }
+  slices.clear();
 
-    for (auto &psei: SEIs)
-    {
-      delete psei;
-    }
-    SEIs.clear();
+  for (auto &psei: SEIs)
+  {
+    delete psei;
+  }
+  SEIs.clear();
 
-    if (m_spliceIdx)
-    {
-      delete[] m_spliceIdx;
-      m_spliceIdx = NULL;
-    }
+  if (m_spliceIdx)
+  {
+    delete[] m_spliceIdx;
+    m_spliceIdx = NULL;
   }
+  m_invColourTransfBuf = NULL;
 }
 
 void Picture::createTempBuffers( const unsigned _maxCUSize )
@@ -269,21 +150,8 @@ void Picture::createTempBuffers( const unsigned _maxCUSize )
   const Area a = m_ctuArea.Y();
 #endif
 
-#if ENABLE_SPLIT_PARALLELISM
-  scheduler.startParallel();
-
-  for( int jId = 0; jId < scheduler.getNumPicInstances(); jId++ )
-#endif
-  {
-    M_BUFS( jId, PIC_PREDICTION                   ).create( chromaFormat, a,   _maxCUSize );
-    M_BUFS( jId, PIC_RESIDUAL                     ).create( chromaFormat, a,   _maxCUSize );
-#if ENABLE_SPLIT_PARALLELISM
-    if (jId > 0)
-    {
-      M_BUFS(jId, PIC_RECONSTRUCTION).create(chromaFormat, Y(), _maxCUSize, margin, MEMORY_ALIGN_DEF_SIZE);
-    }
-#endif
-  }
+  M_BUFS( jId, PIC_PREDICTION                   ).create( chromaFormat, a,   _maxCUSize );
+  M_BUFS( jId, PIC_RESIDUAL                     ).create( chromaFormat, a,   _maxCUSize );
 
   if (cs)
   {
@@ -293,24 +161,11 @@ void Picture::createTempBuffers( const unsigned _maxCUSize )
 
 void Picture::destroyTempBuffers()
 {
-#if ENABLE_SPLIT_PARALLELISM
-  scheduler.finishParallel();
-
-  for( int jId = 0; jId < scheduler.getNumPicInstances(); jId++ )
-#endif
+  for (uint32_t t = 0; t < NUM_PIC_TYPES; t++)
   {
-    for (uint32_t t = 0; t < NUM_PIC_TYPES; t++)
+    if (t == PIC_RESIDUAL || t == PIC_PREDICTION)
     {
-      if (t == PIC_RESIDUAL || t == PIC_PREDICTION)
-      {
-        M_BUFS(jId, t).destroy();
-      }
-#if ENABLE_SPLIT_PARALLELISM
-      if (t == PIC_RECONSTRUCTION && jId > 0)
-      {
-        M_BUFS(jId, t).destroy();
-      }
-#endif
+      M_BUFS(0, t).destroy();
     }
   }
 
@@ -329,6 +184,8 @@ const CPelUnitBuf Picture::getOrigBuf()                     const { return M_BUF
 
        PelBuf     Picture::getOrigBuf(const ComponentID compID)       { return getBuf(compID, PIC_ORIGINAL); }
 const CPelBuf     Picture::getOrigBuf(const ComponentID compID) const { return getBuf(compID, PIC_ORIGINAL); }
+       PelBuf     Picture::getTrueOrigBuf(const ComponentID compID)       { return getBuf(compID, PIC_TRUE_ORIGINAL); }
+const CPelBuf     Picture::getTrueOrigBuf(const ComponentID compID) const { return getBuf(compID, PIC_TRUE_ORIGINAL); }
        PelUnitBuf Picture::getTrueOrigBuf()                           { return M_BUFS(0, PIC_TRUE_ORIGINAL); }
 const CPelUnitBuf Picture::getTrueOrigBuf()                     const { return M_BUFS(0, PIC_TRUE_ORIGINAL); }
        PelBuf     Picture::getTrueOrigBuf(const CompArea &blk)        { return getBuf(blk, PIC_TRUE_ORIGINAL); }
@@ -388,6 +245,9 @@ void Picture::finalInit( const VPS* vps, const SPS& sps, const PPS& pps, PicHead
   cs->pps     = &pps;
   picHeader->setSPSId( sps.getSPSId() );
   picHeader->setPPSId( pps.getPPSId() );
+#if GDR_ENABLED
+  picHeader->setPic(this);
+#endif
   cs->picHeader = picHeader;
   memcpy(cs->alfApss, alfApss, sizeof(cs->alfApss));
   cs->lmcsAps = lmcsAps;
@@ -395,6 +255,8 @@ void Picture::finalInit( const VPS* vps, const SPS& sps, const PPS& pps, PicHead
   cs->pcv     = pps.pcv;
   m_conformanceWindow = pps.getConformanceWindow();
   m_scalingWindow = pps.getScalingWindow();
+  mixedNaluTypesInPicFlag = pps.getMixedNaluTypesInPicFlag();
+  nonReferencePictureFlag = picHeader->getNonReferencePictureFlag();
 
   if (m_spliceIdx == NULL)
   {
@@ -424,10 +286,10 @@ void Picture::fillSliceLossyLosslessArray(std::vector<uint16_t> sliceLosslessInd
 {
   uint16_t numElementsinsliceLosslessIndexArray = (uint16_t)sliceLosslessIndexArray.size();
   uint32_t numSlices = this->cs->pps->getNumSlicesInPic();
-  m_lossylosslessSliceArray.assign(numSlices, true); // initialize to all slices are lossless 
+  m_lossylosslessSliceArray.assign(numSlices, true); // initialize to all slices are lossless
   if (mixedLossyLossless)
   {
-    m_lossylosslessSliceArray.assign(numSlices, false); // initialize to all slices are lossless 
+    m_lossylosslessSliceArray.assign(numSlices, false); // initialize to all slices are lossless
     CHECK(numElementsinsliceLosslessIndexArray == 0 , "sliceLosslessArray is empty, must need to configure for mixed lossy/lossless");
 
     // mixed lossy/lossless slices, set only lossless slices;
@@ -436,7 +298,7 @@ void Picture::fillSliceLossyLosslessArray(std::vector<uint16_t> sliceLosslessInd
         CHECK(sliceLosslessIndexArray[i] >= numSlices || sliceLosslessIndexArray[i] < 0, "index of lossless slice is out of slice index bound");
         m_lossylosslessSliceArray[sliceLosslessIndexArray[i]] = true;
     }
-  } 
+  }
   CHECK(m_lossylosslessSliceArray.size() < numSlices, "sliceLosslessArray size is less than number of slices");
 }
 
@@ -467,23 +329,6 @@ void Picture::clearSliceBuffer()
   slices.clear();
 }
 
-#if ENABLE_SPLIT_PARALLELISM
-void Picture::finishParallelPart( const UnitArea& area )
-{
-  const UnitArea clipdArea = clipArea( area, *this );
-  const int      sourceID  = scheduler.getSplitPicId( 0 );
-  CHECK( scheduler.getSplitJobId() > 0, "Finish-CU cannot be called from within a mode- or split-parallelized block!" );
-
-  // distribute the reconstruction across all of the parallel workers
-  for( int tId = 1; tId < scheduler.getNumSplitThreads(); tId++ )
-  {
-    const int destID = scheduler.getSplitPicId( tId );
-
-    M_BUFS( destID, PIC_RECONSTRUCTION ).subBuf( clipdArea ).copyFrom( M_BUFS( sourceID, PIC_RECONSTRUCTION ).subBuf( clipdArea ) );
-  }
-}
-#endif
-
 const TFilterCoeff DownsamplingFilterSRC[8][16][12] =
 {
     { // D = 1
@@ -1237,9 +1082,6 @@ PelBuf Picture::getBuf( const CompArea &blk, const PictureType &type )
     return PelBuf();
   }
 
-#if ENABLE_SPLIT_PARALLELISM
-  const int jId = ( type == PIC_ORIGINAL || type == PIC_TRUE_ORIGINAL || type == PIC_ORIGINAL_INPUT || type == PIC_TRUE_ORIGINAL_INPUT ) ? 0 : scheduler.getSplitPicId();
-#endif
 #if !KEEP_PRED_AND_RESI_SIGNALS
   if( type == PIC_RESIDUAL || type == PIC_PREDICTION )
   {
@@ -1261,10 +1103,6 @@ const CPelBuf Picture::getBuf( const CompArea &blk, const PictureType &type ) co
     return PelBuf();
   }
 
-#if ENABLE_SPLIT_PARALLELISM
-  const int jId = ( type == PIC_ORIGINAL || type == PIC_TRUE_ORIGINAL ) ? 0 : scheduler.getSplitPicId();
-
-#endif
 #if !KEEP_PRED_AND_RESI_SIGNALS
   if( type == PIC_RESIDUAL || type == PIC_PREDICTION )
   {
@@ -1305,9 +1143,6 @@ const CPelUnitBuf Picture::getBuf( const UnitArea &unit, const PictureType &type
 
 Pel* Picture::getOrigin( const PictureType &type, const ComponentID compID ) const
 {
-#if ENABLE_SPLIT_PARALLELISM
-  const int jId = ( type == PIC_ORIGINAL || type == PIC_TRUE_ORIGINAL ) ? 0 : scheduler.getSplitPicId();
-#endif
   return M_BUFS( jId, type ).getOrigin( compID );
 }
 
@@ -1386,3 +1221,44 @@ void Picture::addPictureToHashMapForInter()
     }
   }
 }
+void Picture::createColourTransfProcessor(bool firstPictureInSequence, SEIColourTransformApply* ctiCharacteristics, PelStorage* ctiBuf, int width, int height, ChromaFormat fmt, int bitDepth)
+{
+  m_colourTranfParams = ctiCharacteristics;
+  m_invColourTransfBuf = ctiBuf;
+  if (firstPictureInSequence)
+  {
+    // Create and initialize the Colour Transform Processor
+    m_colourTranfParams->create(width, height, fmt, bitDepth);
+
+    //Frame level PelStorage buffer created to apply the Colour Transform
+    m_invColourTransfBuf->create(UnitArea(chromaFormat, Area(0, 0, width, height)));
+  }
+}
+
+PelUnitBuf Picture::getDisplayBuf()
+{
+  int payloadType = 0;
+  std::list<SEI*>::iterator message;
+
+  for (message = SEIs.begin(); message != SEIs.end(); ++message)
+  {
+    payloadType = (*message)->payloadType();
+    if (payloadType == SEI::COLOUR_TRANSFORM_INFO)
+    {
+      // re-init parameters
+      *m_colourTranfParams->m_pColourTransfParams = *static_cast<SEIColourTransformInfo*>(*message);
+      //m_colourTranfParams->m_pColourTransfParams = static_cast<SEIColourTransformInfo*>(*message);
+      break;
+    }
+  }
+
+  m_invColourTransfBuf->copyFrom(getRecoBuf());
+
+  if (m_colourTranfParams->m_pColourTransfParams != NULL)
+  {
+    m_colourTranfParams->generateColourTransfLUTs();
+    m_colourTranfParams->inverseColourTransform(m_invColourTransfBuf);
+  }
+
+  return *m_invColourTransfBuf;
+}
diff --git a/source/Lib/CommonLib/Picture.h b/source/Lib/CommonLib/Picture.h
index 66073bf619aede2aab4ebc95f81fff37e8c47667..2b6c9d732806bf978afa1839ee3a1584d81781fc 100644
--- a/source/Lib/CommonLib/Picture.h
+++ b/source/Lib/CommonLib/Picture.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -48,51 +48,16 @@
 #include "CodingStructure.h"
 #include "Hash.h"
 #include "MCTS.h"
+#include "SEIColourTransform.h"
 #include <deque>
 
-#if ENABLE_SPLIT_PARALLELISM
-
-#define CURR_THREAD_ID -1
-
-class Scheduler
-{
-public:
-  Scheduler();
-  ~Scheduler();
-
-#if ENABLE_SPLIT_PARALLELISM
-  unsigned getSplitDataId( int jobId = CURR_THREAD_ID ) const;
-  unsigned getSplitPicId ( int tId   = CURR_THREAD_ID ) const;
-  unsigned getSplitJobId () const;
-  void     setSplitJobId ( const int jobId );
-  void     startParallel ();
-  void     finishParallel();
-  void     setSplitThreadId( const int tId = CURR_THREAD_ID );
-  unsigned getNumSplitThreads() const { return m_numSplitThreads; };
-#endif
-  unsigned getDataId     () const;
-  bool init              ( const int ctuYsize, const int ctuXsize, const int numWppThreadsRunning, const int numWppExtraLines, const int numSplitThreads );
-  int  getNumPicInstances() const;
-#if ENABLE_SPLIT_PARALLELISM
-
-  int   m_numSplitThreads;
-  bool  m_hasParallelBuffer;
-#endif
-};
-#endif
 
 class SEI;
 class AQpLayer;
 
 typedef std::list<SEI*> SEIMessages;
 
-
-
-#if ENABLE_SPLIT_PARALLELISM
-#define M_BUFS(JID,PID) m_bufs[JID][PID]
-#else
 #define M_BUFS(JID,PID) m_bufs[PID]
-#endif
 
 struct Picture : public UnitArea
 {
@@ -104,6 +69,10 @@ struct Picture : public UnitArea
 
   void createTempBuffers( const unsigned _maxCUSize );
   void destroyTempBuffers();
+  SEIColourTransformApply* m_colourTranfParams;
+  PelStorage*              m_invColourTransfBuf;
+  void              createColourTransfProcessor(bool firstPictureInSequence, SEIColourTransformApply* ctiCharacteristics, PelStorage* ctiBuf, int width, int height, ChromaFormat fmt, int bitDepth);
+  PelUnitBuf        getDisplayBuf();
 
          PelBuf     getOrigBuf(const CompArea &blk);
   const CPelBuf     getOrigBuf(const CompArea &blk) const;
@@ -113,6 +82,8 @@ struct Picture : public UnitArea
   const CPelUnitBuf getOrigBuf() const;
          PelBuf     getOrigBuf(const ComponentID compID);
   const CPelBuf     getOrigBuf(const ComponentID compID) const;
+         PelBuf     getTrueOrigBuf(const ComponentID compID);
+  const CPelBuf     getTrueOrigBuf(const ComponentID compID) const;
          PelUnitBuf getTrueOrigBuf();
   const CPelUnitBuf getTrueOrigBuf() const;
         PelBuf      getTrueOrigBuf(const CompArea &blk);
@@ -160,6 +131,8 @@ struct Picture : public UnitArea
   void setPictureType(const NalUnitType val)        { m_pictureType = val;          }
   void setBorderExtension( bool bFlag)              { m_bIsBorderExtended = bFlag;}
   Pel* getOrigin( const PictureType &type, const ComponentID compID ) const;
+  int  getEdrapRapId()                        const { return edrapRapId ; }
+  void setEdrapRapId(const int val)                 { edrapRapId = val; }
 
   void setLossyQPValue(int i)                 { m_lossyQP = i; }
   int getLossyQPValue()                       const { return m_lossyQP; }
@@ -217,6 +190,7 @@ public:
   bool fieldPic;
   int  m_prevQP[MAX_NUM_CHANNEL_TYPE];
   bool precedingDRAP; // preceding a DRAP picture in decoding order
+  int  edrapRapId;
   bool nonReferencePictureFlag;
 
   int  poc;
@@ -233,13 +207,9 @@ public:
   int m_lossyQP;
   std::vector<bool> m_lossylosslessSliceArray;
   bool interLayerRefPicFlag;
+  bool mixedNaluTypesInPicFlag;
 
-
-#if ENABLE_SPLIT_PARALLELISM
-  PelStorage m_bufs[PARL_SPLIT_MAX_NUM_JOBS][NUM_PIC_TYPES];
-#else
   PelStorage m_bufs[NUM_PIC_TYPES];
-#endif
   const Picture*           unscaledPic;
 
   TComHash           m_hashMap;
@@ -277,15 +247,6 @@ private:
   UnitArea m_ctuArea;
 #endif
 
-#if ENABLE_SPLIT_PARALLELISM
-public:
-  void finishParallelPart   ( const UnitArea& ctuArea );
-#endif
-#if ENABLE_SPLIT_PARALLELISM
-public:
-  Scheduler                  scheduler;
-#endif
-
 public:
   SAOBlkParam    *getSAO(int id = 0)                        { return &m_sao[id][0]; };
   void            resizeSAO(unsigned numEntries, int dstid) { m_sao[dstid].resize(numEntries); }
diff --git a/source/Lib/CommonLib/ProfileLevelTier.cpp b/source/Lib/CommonLib/ProfileLevelTier.cpp
index a277b36b9f9e262068c4d2419979f5f520e77624..831ed4df2eaf1a5ff325b895d1742013faa8cda4 100644
--- a/source/Lib/CommonLib/ProfileLevelTier.cpp
+++ b/source/Lib/CommonLib/ProfileLevelTier.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -70,9 +70,7 @@ static const LevelTierFeatures mainLevelTierInfo[] =
     { Level::LEVEL6  , 35651584, {    80000,   240000 },      600,      440,       20, 1069547520ULL, {   60000,   240000 }, { 8, 4} },
     { Level::LEVEL6_1, 35651584, {   120000,   480000 },      600,      440,       20, 2139095040ULL, {  120000,   480000 }, { 8, 4} },
     { Level::LEVEL6_2, 35651584, {   180000,   800000 },      600,      440,       20, 4278190080ULL, {  240000,   800000 }, { 8, 4} },
-#if JVET_T0065_LEVEL_6_3
     { Level::LEVEL6_3, 80216064, {   240000,   800000 },     1000,      990,       30, 4812963840ULL, {  320000,   800000 }, { 8, 4} },
-#endif
     { Level::LEVEL15_5, MAX_UINT,{ MAX_UINT, MAX_UINT }, MAX_UINT, MAX_UINT, MAX_UINT, MAX_CNFUINT64, {MAX_UINT, MAX_UINT }, { 0, 0} },
     { Level::NONE    }
 };
@@ -94,6 +92,17 @@ static const ProfileFeatures validProfiles[] = {
   { Profile::MAIN_10_444, "Main_444_10", 10, CHROMA_444, false, 2500, 2750, 3750, 75, mainLevelTierInfo, false },
   { Profile::MULTILAYER_MAIN_10_444, "Multilayer_Main_444_10", 10, CHROMA_444, false, 2500, 2750, 3750, 75,
     mainLevelTierInfo, false },
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+  { Profile::MAIN_12, "Main_12", 12, CHROMA_420, true, 1500, 1650, 2250, 100, mainLevelTierInfo, false },
+  { Profile::MAIN_12_INTRA, "Main_12_Intra", 12, CHROMA_420, true, 1500, 1650, 2250, 100, mainLevelTierInfo, false },
+  { Profile::MAIN_12_STILL_PICTURE, "Main_12_Still_Picture", 12, CHROMA_420, true, 1500, 1650, 2250, 100, mainLevelTierInfo, false },
+  { Profile::MAIN_12_444, "Main_12_444", 12, CHROMA_444, true, 3000, 3300, 4500, 50, mainLevelTierInfo, false },
+  { Profile::MAIN_12_444_INTRA, "Main_12_444_Intra", 12, CHROMA_444, true, 3000, 3300, 4500, 50, mainLevelTierInfo, false },
+  { Profile::MAIN_12_444_STILL_PICTURE, "Main_12_444_Still_Picture", 12, CHROMA_444, true, 3000, 3300, 4500, 50, mainLevelTierInfo, false },
+  { Profile::MAIN_16_444, "Main_16_444", 16, CHROMA_444, true, 4000, 4400, 6000, 50, mainLevelTierInfo, false },
+  { Profile::MAIN_16_444_INTRA, "Main_16_444_Intra", 16, CHROMA_444, true, 4000, 4400, 6000, 50, mainLevelTierInfo, false },
+  { Profile::MAIN_16_444_STILL_PICTURE, "Main_16_444_Still_Picture", 16, CHROMA_444, true, 4000, 4400, 6000, 50, mainLevelTierInfo, false },
+#endif
   { Profile::NONE, 0 },
 };
 
@@ -146,16 +155,40 @@ ProfileLevelTierFeatures::extractPTLInformation(const SPS &sps)
       }
     }
   }
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+  if (m_pProfile)
+  {
+    Profile::Name profile = m_pProfile->profile;
+    if (profile == Profile::MAIN_10 || profile == Profile::MAIN_10_444 ||
+        profile == Profile::MULTILAYER_MAIN_10 || profile == Profile::MULTILAYER_MAIN_10_444 ||
+        profile == Profile::MAIN_12 || profile == Profile::MAIN_12_444 || profile == Profile::MAIN_16_444)
+    {
+      m_hbrFactor = 1;
+    }
+    else
+    {
+      m_hbrFactor = 2 - sps.getProfileTierLevel()->getConstraintInfo()->getLowerBitRateConstraintFlag();
+    }
+  }
+#endif
 }
 
 double ProfileLevelTierFeatures::getMinCr() const
 {
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+  return (m_pLevelTier!=0 && m_pProfile!=0) ? (m_pProfile->minCrScaleFactorx100 * m_pLevelTier->minCrBase[m_tier?1:0] / m_hbrFactor)/100.0 : 0.0 ;
+#else
   return (m_pLevelTier!=0 && m_pProfile!=0) ? (m_pProfile->minCrScaleFactorx100 * m_pLevelTier->minCrBase[m_tier?1:0])/100.0 : 0.0 ;
+#endif
 }
 
 uint64_t ProfileLevelTierFeatures::getCpbSizeInBits() const
 {
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+  return (m_pLevelTier!=0 && m_pProfile!=0) ? uint64_t(m_pProfile->cpbVclFactor) * m_pLevelTier->maxCpb[m_tier?1:0] * m_hbrFactor : uint64_t(0);
+#else
   return (m_pLevelTier!=0 && m_pProfile!=0) ? uint64_t(m_pProfile->cpbVclFactor) * m_pLevelTier->maxCpb[m_tier?1:0] : uint64_t(0);
+#endif
 }
 
 uint32_t ProfileLevelTierFeatures::getMaxDpbSize( uint32_t picSizeMaxInSamplesY ) const
diff --git a/source/Lib/CommonLib/ProfileLevelTier.h b/source/Lib/CommonLib/ProfileLevelTier.h
index ab3f349a93602bb6fba0d22b2dcc54c9f0ca5a45..ceffdf809383dbc51320a2f4c30cb8114292a90d 100644
--- a/source/Lib/CommonLib/ProfileLevelTier.h
+++ b/source/Lib/CommonLib/ProfileLevelTier.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -88,6 +88,9 @@ class ProfileLevelTierFeatures
     const ProfileFeatures   *m_pProfile;
     const LevelTierFeatures *m_pLevelTier;
     Level::Tier              m_tier;
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+    int                      m_hbrFactor;
+#endif
   public:
     ProfileLevelTierFeatures() : m_pProfile(nullptr), m_pLevelTier(nullptr), m_tier(Level::MAIN) {}
 
diff --git a/source/Lib/CommonLib/Quant.cpp b/source/Lib/CommonLib/Quant.cpp
index 789dad2c1afa3de18a572287d2f00c7934b595b7..5b09d9426b7f533af0162972b56b39d8597f0ca9 100644
--- a/source/Lib/CommonLib/Quant.cpp
+++ b/source/Lib/CommonLib/Quant.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -501,14 +501,6 @@ void Quant::init( uint32_t uiMaxTrSize,
   m_resetStore = true;
 }
 
-#if ENABLE_SPLIT_PARALLELISM
-void Quant::copyState( const Quant& other )
-{
-  m_dLambda = other.m_dLambda;
-  memcpy( m_lambdas, other.m_lambdas, sizeof( m_lambdas ) );
-}
-#endif
-
 /** set quantized matrix coefficient for encode
  * \param scalingList            quantized matrix address
  * \param format                 chroma format
@@ -527,7 +519,9 @@ void Quant::setScalingList(ScalingList *scalingList, const int maxLog2TrDynamicR
     for(uint32_t list = 0; list < SCALING_LIST_NUM; list++)
     {
       if (size == SCALING_LIST_2x2 && list < 4)   // skip 2x2 luma
+      {
         continue;
+      }
       scalingListId = g_scalingListId[size][list];
       if (scalingList->getChromaScalingListPresentFlag() || scalingList->isLumaScalingList(scalingListId))
       {
@@ -548,7 +542,11 @@ void Quant::setScalingList(ScalingList *scalingList, const int maxLog2TrDynamicR
   {
     for (uint32_t sizeh = 0; sizeh <= SCALING_LIST_LAST_CODED; sizeh++) //7
     {
-      if (sizew == sizeh || (sizew == SCALING_LIST_1x1 && sizeh<SCALING_LIST_4x4) || (sizeh == SCALING_LIST_1x1 && sizew<SCALING_LIST_4x4)) continue;
+      if (sizew == sizeh || (sizew == SCALING_LIST_1x1 && sizeh < SCALING_LIST_4x4)
+          || (sizeh == SCALING_LIST_1x1 && sizew < SCALING_LIST_4x4))
+      {
+        continue;
+      }
       for (uint32_t list = 0; list < SCALING_LIST_NUM; list++) //9
       {
         int largerSide = (sizew > sizeh) ? sizew : sizeh;
@@ -579,7 +577,9 @@ void Quant::setScalingListDec(const ScalingList &scalingList)
     for(uint32_t list = 0; list < SCALING_LIST_NUM; list++)
     {
       if (size == SCALING_LIST_2x2 && list < 4)   // skip 2x2 luma
+      {
         continue;
+      }
       scalingListId = g_scalingListId[size][list];
       for(int qp = minimumQp; qp < maximumQp; qp++)
       {
@@ -593,7 +593,11 @@ void Quant::setScalingListDec(const ScalingList &scalingList)
   {
     for (uint32_t sizeh = 0; sizeh <= SCALING_LIST_LAST_CODED; sizeh++) //7
     {
-      if (sizew == sizeh || (sizew == SCALING_LIST_1x1 && sizeh<SCALING_LIST_4x4) || (sizeh == SCALING_LIST_1x1 && sizew<SCALING_LIST_4x4)) continue;
+      if (sizew == sizeh || (sizew == SCALING_LIST_1x1 && sizeh < SCALING_LIST_4x4)
+          || (sizeh == SCALING_LIST_1x1 && sizew < SCALING_LIST_4x4))
+      {
+        continue;
+      }
       for (uint32_t list = 0; list < SCALING_LIST_NUM; list++) //9
       {
         int largerSide = (sizew > sizeh) ? sizew : sizeh;
@@ -866,7 +870,9 @@ void Quant::processScalingListDec( const int *coeff, int *dequantcoeff, int invQ
     }
     int largeOne = (width > height) ? width : height;
     if (largeOne > 8)
+    {
       dequantcoeff[0] = invQuantScales * dc;
+    }
     return;
   }
   for (uint32_t j = 0; j<height; j++)
@@ -946,7 +952,10 @@ void Quant::xInitScalingList( const Quant* other )
  */
 void Quant::xDestroyScalingList()
 {
-  if( !m_isScalingListOwner ) return;
+  if (!m_isScalingListOwner)
+  {
+    return;
+  }
 
   delete[] m_quantCoef[0][0][0][0];
 }
@@ -1170,7 +1179,7 @@ void Quant::invTrSkipDeQuantOneSample(TransformUnit &tu, const ComponentID &comp
   const int            channelBitDepth        = sps.getBitDepth(toChannelType(compID));
   const int            iTransformShift        = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
   const int            scalingListType        = getScalingListType(tu.cu->predMode, compID);
-  
+
   const bool           disableSMForLFNST = tu.cs->slice->getExplicitScalingListUsed() ? tu.cs->slice->getSPS()->getDisableScalingMatrixForLfnstBlks() : false;
   const bool           isLfnstApplied = tu.cu->lfnstIdx > 0 && (tu.cu->isSepTree() ? true : isLuma(compID));
   const bool           disableSMForACT = tu.cs->slice->getSPS()->getScalingMatrixForAlternativeColourSpaceDisabledFlag() && (tu.cs->slice->getSPS()->getScalingMatrixDesignatedColourSpaceFlag() == tu.cu->colorTransform);
diff --git a/source/Lib/CommonLib/Quant.h b/source/Lib/CommonLib/Quant.h
index 752b20d9321b4754370bc4845477d299ee3c52b1..b9b0d56c297409efdf721f2d9ce209c1fe2cc2d7 100644
--- a/source/Lib/CommonLib/Quant.h
+++ b/source/Lib/CommonLib/Quant.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -49,12 +49,6 @@
 //! \ingroup CommonLib
 //! \{
 
-// ====================================================================================================================
-// Constants
-// ====================================================================================================================
-
-#define QP_BITS                 15
-
 // ====================================================================================================================
 // Class definition
 // ====================================================================================================================
@@ -146,10 +140,6 @@ public:
   // de-quantization
   virtual void dequant           ( const TransformUnit &tu, CoeffBuf &dstCoeff, const ComponentID &compID, const QpParam &cQP );
 
-#if ENABLE_SPLIT_PARALLELISM
-  virtual void copyState         ( const Quant& other );
-#endif
-
 protected:
 
 #if T0196_SELECTIVE_RDOQ
diff --git a/source/Lib/CommonLib/QuantRDOQ.cpp b/source/Lib/CommonLib/QuantRDOQ.cpp
index 9b9eccf286785f4c5758cadfb14fbeb4a38c2b1b..a57388e299938bd9fc1f3fc28351c1ebcc853c36 100644
--- a/source/Lib/CommonLib/QuantRDOQ.cpp
+++ b/source/Lib/CommonLib/QuantRDOQ.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -302,18 +302,18 @@ inline double QuantRDOQ::xGetRateSigCoeffGroup( const BinFracBits& fracBitsSigCG
 */
 inline double QuantRDOQ::xGetRateLast( const int* lastBitsX, const int* lastBitsY, unsigned PosX, unsigned PosY ) const
 {
-  uint32_t    CtxX  = g_uiGroupIdx[PosX];
-  uint32_t    CtxY  = g_uiGroupIdx[PosY];
-  double  Cost  = lastBitsX[ CtxX ] + lastBitsY[ CtxY ];
-  if( CtxX > 3 )
+  uint32_t ctxX = g_groupIdx[PosX];
+  uint32_t ctxY = g_groupIdx[PosY];
+  double   cost = lastBitsX[ctxX] + lastBitsY[ctxY];
+  if (ctxX > 3)
   {
-    Cost += xGetIEPRate() * ((CtxX-2)>>1);
+    cost += xGetIEPRate() * ((ctxX - 2) >> 1);
   }
-  if( CtxY > 3 )
+  if (ctxY > 3)
   {
-    Cost += xGetIEPRate() * ((CtxY-2)>>1);
+    cost += xGetIEPRate() * ((ctxY - 2) >> 1);
   }
-  return xGetICost( Cost );
+  return xGetICost(cost);
 }
 
 
@@ -643,8 +643,19 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID,
 
   const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange);
   const TCoeff entropyCodingMaximum =  (1 << maxLog2TrDynamicRange) - 1;
-  
+
   CoeffCodingContext cctx(tu, compID, tu.cs->slice->getSignDataHidingEnabledFlag());
+  int baseLevel = cctx.getBaseLevel();
+  if (tu.cs->slice->getSPS()->getSpsRangeExtension().getPersistentRiceAdaptationEnabledFlag())
+  {
+    unsigned riceStats = ctx.getGRAdaptStats((unsigned)compID);
+    TCoeff historyValue = (TCoeff)1 << riceStats;
+    cctx.setHistValue(historyValue);
+  }
+  else
+  {
+    cctx.setHistValue(0);
+  }
   const int    iCGSizeM1      = (1 << cctx.log2CGSize()) - 1;
 
   int     iCGLastScanPos      = -1;
@@ -736,9 +747,8 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID,
         uint32_t    goRiceZero    = 0;
         if( remRegBins < 4 )
         {
-          unsigned  sumAbs = cctx.templateAbsSum( iScanPos, piDstCoeff, 0 );
-          goRiceParam             = g_auiGoRiceParsCoeff   [ sumAbs ];
-          goRiceZero              = g_auiGoRicePosCoeff0(0, goRiceParam);
+          goRiceParam = (cctx.*(cctx.deriveRiceRRC))(iScanPos, piDstCoeff, 0);
+          goRiceZero       = g_goRicePosCoeff0(0, goRiceParam);
         }
 
         const BinFracBits fracBitsPar = fracBits.getFracBitsArray( uiParCtx );
@@ -793,8 +803,7 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID,
         }
         else if( remRegBins >= 4 )
         {
-          int  sumAll = cctx.templateAbsSum(iScanPos, piDstCoeff, 4);
-          goRiceParam = g_auiGoRiceParsCoeff[sumAll];
+          goRiceParam = (cctx.*(cctx.deriveRiceRRC))(iScanPos, piDstCoeff, baseLevel);
           remRegBins -= (uiLevel < 2 ? uiLevel : 3) + (iScanPos != iLastScanPos);
         }
       }
@@ -954,7 +963,7 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID,
     int bitsY = 0;
     int ctxId;
     //X-coordinate
-    for ( ctxId = 0; ctxId < g_uiGroupIdx[dim1-1]; ctxId++)
+    for (ctxId = 0; ctxId < g_groupIdx[dim1 - 1]; ctxId++)
     {
       const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastXCtxId(ctxId) );
       lastBitsX[ ctxId ]   = bitsX + fB.intBits[ 0 ];
@@ -962,7 +971,7 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID,
     }
     lastBitsX[ctxId] = bitsX;
     //Y-coordinate
-    for ( ctxId = 0; ctxId < g_uiGroupIdx[dim2-1]; ctxId++)
+    for (ctxId = 0; ctxId < g_groupIdx[dim2 - 1]; ctxId++)
     {
       const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastYCtxId(ctxId) );
       lastBitsY[ ctxId ]   = bitsY + fB.intBits[ 0 ];
@@ -971,6 +980,16 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID,
     lastBitsY[ctxId] = bitsY;
   }
 
+#if JVET_W0046_RLSCP
+  unsigned zoTbWdith  = std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, cctx.width());
+  unsigned zoTbHeight = std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, cctx.height());
+  if (tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[compID].width <= 32 && tu.blocks[compID].height <= 32
+      && compID == COMPONENT_Y)
+  {
+    zoTbWdith  = (tu.blocks[compID].width == 32) ? 16 : zoTbWdith;
+    zoTbHeight = (tu.blocks[compID].height == 32) ? 16 : zoTbHeight;
+  }
+#endif
 
   bool bFoundLast = false;
   for (int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
@@ -997,6 +1016,13 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID,
         {
           uint32_t   uiPosY = uiBlkPos >> uiLog2BlockWidth;
           uint32_t   uiPosX = uiBlkPos - ( uiPosY << uiLog2BlockWidth );
+#if JVET_W0046_RLSCP
+          if (tu.cu->slice->getReverseLastSigCoeffFlag())
+          {
+            uiPosX = zoTbWdith - 1 - uiPosX;
+            uiPosY = zoTbHeight - 1 - uiPosY;
+          }
+#endif
           double d64CostLast  = xGetRateLast( lastBitsX, lastBitsY, uiPosX, uiPosY );
 
           double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
@@ -1300,6 +1326,10 @@ void QuantRDOQ::xRateDistOptQuantTS( TransformUnit &tu, const ComponentID &compI
       const BinFracBits fracBitsPar = fracBits.getFracBitsArray( cctx.parityCtxIdAbsTS() );
 
       goRiceParam = cctx.templateAbsSumTS( scanPos, dstCoeff );
+      if (tu.cu->slice->getSPS()->getSpsRangeExtension().getTSRCRicePresentFlag() && tu.mtsIdx[compID] == MTS_SKIP)
+      {
+        goRiceParam = goRiceParam + tu.cu->slice->get_tsrc_index();
+      }
       unsigned ctxIdSign = cctx.signCtxIdAbsTS(scanPos, dstCoeff, 0);
       const BinFracBits fracBitsSign = fracBits.getFracBitsArray(ctxIdSign);
       const uint8_t     sign         = srcCoeff[ blkPos ] < 0 ? 1 : 0;
@@ -1519,6 +1549,10 @@ void QuantRDOQ::forwardBDPCM(TransformUnit &tu, const ComponentID &compID, const
       const BinFracBits fracBitsPar = fracBits.getFracBitsArray(cctx.parityCtxIdAbsTS());
 
       goRiceParam = cctx.templateAbsSumTS(scanPos, dstCoeff);
+      if (tu.cu->slice->getSPS()->getSpsRangeExtension().getTSRCRicePresentFlag() && tu.mtsIdx[compID] == MTS_SKIP)
+      {
+        goRiceParam = goRiceParam + tu.cu->slice->get_tsrc_index();
+      }
       unsigned ctxIdSign = cctx.signCtxIdAbsTS(scanPos, dstCoeff, dirMode);
       const BinFracBits fracBitsSign = fracBits.getFracBitsArray(ctxIdSign);
       const uint8_t     sign = srcCoeff[blkPos] - predCoeff < 0 ? 1 : 0;
diff --git a/source/Lib/CommonLib/QuantRDOQ.h b/source/Lib/CommonLib/QuantRDOQ.h
index f0e8dee72e67e2c3a442f427562a8374577c0a08..c6ae8d11cefcde176a1011ec7643e1d66afa5268 100644
--- a/source/Lib/CommonLib/QuantRDOQ.h
+++ b/source/Lib/CommonLib/QuantRDOQ.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/RdCost.cpp b/source/Lib/CommonLib/RdCost.cpp
index 59b97786af9a4dce56ba3ca80b61e7639dab6b3e..4c8c56564f82f2739c9c65a41af4b977f5ca5943 100644
--- a/source/Lib/CommonLib/RdCost.cpp
+++ b/source/Lib/CommonLib/RdCost.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -222,26 +222,6 @@ void RdCost::init()
   m_pairCheck    = 0;
 }
 
-
-#if ENABLE_SPLIT_PARALLELISM
-
-void RdCost::copyState( const RdCost& other )
-{
-  m_costMode      = other.m_costMode;
-  m_dLambda       = other.m_dLambda;
-  m_DistScale     = other.m_DistScale;
-  memcpy( m_distortionWeight, other.m_distortionWeight, sizeof( m_distortionWeight ) );
-  m_mvPredictor   = other.m_mvPredictor;
-  m_motionLambda  = other.m_motionLambda;
-  m_iCostScale    = other.m_iCostScale;
-  m_dLambdaMotionSAD = other.m_dLambdaMotionSAD;
-#if WCG_EXT
-  m_dLambda_unadjusted  = other.m_dLambda_unadjusted ;
-  m_DistScaleUnadjusted = other.m_DistScaleUnadjusted;
-#endif
-}
-#endif
-
 void RdCost::setDistParam( DistParam &rcDP, const CPelBuf &org, const Pel* piRefY, int iRefStride, int bitDepth, ComponentID compID, int subShiftMode, int step, bool useHadamard )
 {
   rcDP.bitDepth   = bitDepth;
@@ -2950,11 +2930,16 @@ void RdCost::saveUnadjustedLambda()
   m_DistScaleUnadjusted = m_DistScale;
 }
 
-void RdCost::initLumaLevelToWeightTable()
+void RdCost::initLumaLevelToWeightTable(int bitDepth)
 {
-  for (int i = 0; i < LUMA_LEVEL_TO_DQP_LUT_MAXSIZE; i++)
+  int lutSize = 1 << bitDepth;
+  if (m_lumaLevelToWeightPLUT.empty())
+  {
+    m_lumaLevelToWeightPLUT.resize(lutSize, 1.0);
+  }
+  for (int i = 0; i < lutSize; i++)
   {
-    double x = i;
+    double x = bitDepth < 10 ? i << (10 - bitDepth) : bitDepth > 10 ? i >> (bitDepth - 10) : i;
     double y;
 
     y = 0.015 * x - 1.5
diff --git a/source/Lib/CommonLib/RdCost.h b/source/Lib/CommonLib/RdCost.h
index b6da1c65fefd779ba7e1d69bb1e9a8758120aa65..5737cdd56c33a34ee07e3a69ef6c18e00637fdae 100644
--- a/source/Lib/CommonLib/RdCost.h
+++ b/source/Lib/CommonLib/RdCost.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -293,10 +293,6 @@ public:
     return length;
   }
 
-#if ENABLE_SPLIT_PARALLELISM
-  void copyState( const RdCost& other );
-#endif
-
   // for motion cost
   static uint32_t    xGetExpGolombNumberOfBits( int iVal )
   {
@@ -315,7 +311,7 @@ public:
   uint32_t           getBitsOfVectorWithPredictor( const int x, const int y, const unsigned imvShift )  { return xGetExpGolombNumberOfBits(((x << m_iCostScale) - m_mvPredictor.getHor())>>imvShift) + xGetExpGolombNumberOfBits(((y << m_iCostScale) - m_mvPredictor.getVer())>>imvShift); }
 #if WCG_EXT
          void    saveUnadjustedLambda       ();
-         void    initLumaLevelToWeightTable ();
+         void    initLumaLevelToWeightTable (int bitDepth);
   inline double  getWPSNRLumaLevelWeight    (int val) { return m_lumaLevelToWeightPLUT[val]; }
   void           initLumaLevelToWeightTableReshape();
   void           updateReshapeLumaLevelToWeightTableChromaMD (std::vector<Pel>& ILUT);
@@ -393,6 +389,10 @@ private:
   static Distortion xGetSSE_SIMD    ( const DistParam& pcDtParam );
   template<int iWidth, X86_VEXT vext>
   static Distortion xGetSSE_NxN_SIMD( const DistParam& pcDtParam );
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+  template<X86_VEXT vext>
+  static Distortion xGetSSE_HBD_SIMD(const DistParam& pcDtParam);
+#endif
 
   template<X86_VEXT vext>
   static Distortion xGetSAD_SIMD    ( const DistParam& pcDtParam );
@@ -400,12 +400,23 @@ private:
   static Distortion xGetSAD_NxN_SIMD( const DistParam& pcDtParam );
   template<X86_VEXT vext>
   static Distortion xGetSAD_IBD_SIMD( const DistParam& pcDtParam );
-
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+  template<X86_VEXT vext>
+  static Distortion xGetHADs_HBD_SIMD(const DistParam& pcDtParam);
+#else
   template<X86_VEXT vext>
   static Distortion xGetHADs_SIMD   ( const DistParam& pcDtParam );
+#endif
 
   template< X86_VEXT vext >
   static Distortion xGetSADwMask_SIMD( const DistParam& pcDtParam );
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+  template<X86_VEXT vext>
+  static Distortion xGetSAD_HBD_SIMD(const DistParam& pcDtParam);
+
+  template< X86_VEXT vext >
+  static Distortion xGetSADwMask_HBD_SIMD(const DistParam& pcDtParam);
+#endif
 #endif
 
 public:
diff --git a/source/Lib/CommonLib/RdCostWeightPrediction.cpp b/source/Lib/CommonLib/RdCostWeightPrediction.cpp
index f88f665c0cf3074255305c6931c0032d8c4e2d95..ad77316afececdcfdd56839dd30a8fe7e8c1bc2f 100644
--- a/source/Lib/CommonLib/RdCostWeightPrediction.cpp
+++ b/source/Lib/CommonLib/RdCostWeightPrediction.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/RdCostWeightPrediction.h b/source/Lib/CommonLib/RdCostWeightPrediction.h
index cf7d55e6f2fb26f2df631611d1e0b3417be50268..7ef14fd6e7ec2c1004ff7a5f5cce4c695058d910 100644
--- a/source/Lib/CommonLib/RdCostWeightPrediction.h
+++ b/source/Lib/CommonLib/RdCostWeightPrediction.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/Reshape.cpp b/source/Lib/CommonLib/Reshape.cpp
index 21371171a815d25ed71f8b9b7b85ca0e8463945c..eb24bbbb6360a4979da8b171f73a443c360ec426 100644
--- a/source/Lib/CommonLib/Reshape.cpp
+++ b/source/Lib/CommonLib/Reshape.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -65,22 +65,32 @@ void  Reshape::createDec(int bitDepth)
   m_lumaBD = bitDepth;
   m_reshapeLUTSize = 1 << m_lumaBD;
   m_initCW = m_reshapeLUTSize / PIC_CODE_CW_BINS;
-  if (m_fwdLUT.empty())
-    m_fwdLUT.resize(m_reshapeLUTSize, 0);
-  if (m_invLUT.empty())
-    m_invLUT.resize(m_reshapeLUTSize, 0);
+  m_fwdLUT.resize(m_reshapeLUTSize, 0);
+  m_invLUT.resize(m_reshapeLUTSize, 0);
   if (m_binCW.empty())
+  {
     m_binCW.resize(PIC_CODE_CW_BINS, 0);
+  }
   if (m_inputPivot.empty())
+  {
     m_inputPivot.resize(PIC_CODE_CW_BINS + 1, 0);
+  }
   if (m_fwdScaleCoef.empty())
+  {
     m_fwdScaleCoef.resize(PIC_CODE_CW_BINS, 1 << FP_PREC);
+  }
   if (m_invScaleCoef.empty())
+  {
     m_invScaleCoef.resize(PIC_CODE_CW_BINS, 1 << FP_PREC);
+  }
   if (m_reshapePivot.empty())
+  {
     m_reshapePivot.resize(PIC_CODE_CW_BINS + 1, 0);
+  }
   if (m_chromaAdjHelpLUT.empty())
+  {
     m_chromaAdjHelpLUT.resize(PIC_CODE_CW_BINS, 1<<CSCALE_FP_PREC);
+  }
 }
 
 void  Reshape::destroy()
@@ -228,9 +238,13 @@ void Reshape::copySliceReshaperInfo(SliceReshapeInfo& tInfo, SliceReshapeInfo& s
   }
   tInfo.sliceReshaperEnableFlag = sInfo.sliceReshaperEnableFlag;
   if (sInfo.sliceReshaperEnableFlag)
+  {
     tInfo.enableChromaAdj = sInfo.enableChromaAdj;
+  }
   else
+  {
     tInfo.enableChromaAdj = 0;
+  }
 }
 
 /** Construct reshaper from syntax
@@ -243,11 +257,17 @@ void Reshape::constructReshaper()
   int pwlFwdBinLen = m_reshapeLUTSize / PIC_CODE_CW_BINS;
 
   for (int i = 0; i < m_sliceReshapeInfo.reshaperModelMinBinIdx; i++)
+  {
     m_binCW[i] = 0;
+  }
   for (int i = m_sliceReshapeInfo.reshaperModelMaxBinIdx + 1; i < PIC_CODE_CW_BINS; i++)
+  {
     m_binCW[i] = 0;
+  }
   for (int i = m_sliceReshapeInfo.reshaperModelMinBinIdx; i <= m_sliceReshapeInfo.reshaperModelMaxBinIdx; i++)
+  {
     m_binCW[i] = (uint16_t)(m_sliceReshapeInfo.reshaperModelBinCWDelta[i] + (int)m_initCW);
+  }
 
   for (int i = 0; i < pwlFwdLUTsize; i++)
   {
@@ -265,6 +285,22 @@ void Reshape::constructReshaper()
       m_chromaAdjHelpLUT[i] = (int32_t)(m_initCW * (1 << FP_PREC) / ( m_binCW[i] + m_sliceReshapeInfo.chrResScalingOffset ) );
     }
   }
+
+  int sumBinCW = 0;
+  for (int i = m_sliceReshapeInfo.reshaperModelMinBinIdx; i <= m_sliceReshapeInfo.reshaperModelMaxBinIdx; i++)
+  {
+    sumBinCW += m_binCW[i];
+    if (m_binCW[i] != 0)
+    {
+      CHECK((m_binCW[i] + m_sliceReshapeInfo.chrResScalingOffset) < (m_initCW >> 3) || (m_binCW[i] + m_sliceReshapeInfo.chrResScalingOffset) > ((m_initCW << 3) - 1),
+        "It is a requirement of bitstream conformance that, when lmcsCW[ i ] is not equal to 0, ( lmcsCW[ i ] + lmcsDeltaCrs ) shall be in the range of ( OrgCW >> 3 ) to ( ( OrgCW << 3 ) - 1 ), inclusive.");
+    }
+    CHECK(m_binCW[i] < (m_initCW >> 3) || m_binCW[i] > ((m_initCW << 3) - 1), " lmcsCW[ i ] shall be in the range of ( OrgCW >> 3 ) to ( ( OrgCW << 3 ) - 1 if not equal to 0 ).");
+    CHECK((((m_reshapePivot[i] % (1 << (m_lumaBD - 5))) != 0) && ((m_reshapePivot[i] >> (m_lumaBD - 5)) == (m_reshapePivot[i + 1] >> (m_lumaBD - 5)))),
+      "It is a requirement of bitstream conformance that, for i = lmcs_min_bin_idx..LmcsMaxBinIdx, when the value of LmcsPivot[ i ] is not a multiple of 1 << ( BitDepth - 5 ), the value of(LmcsPivot[i] >> (BitDepth - 5)) shall not be equal to the value of(LmcsPivot[i + 1] >> (BitDepth - 5)).");
+  }
+  CHECK(sumBinCW > ((1 << m_lumaBD) - 1), "It is a requirement of bitstream conformance that the following condition is true: Sum_(i = 0) ^ 15 [lmcsCW[i]] <= (1 << BitDepth) - 1.");
+
   for (int lumaSample = 0; lumaSample < m_reshapeLUTSize; lumaSample++)
   {
     int idxY = lumaSample / m_initCW;
diff --git a/source/Lib/CommonLib/Reshape.h b/source/Lib/CommonLib/Reshape.h
index 6b6e9d58e17adcafa6551c32a41e15f896cfc0ac..2d28d97196a9dba42cc424c30609ef9951beff8f 100644
--- a/source/Lib/CommonLib/Reshape.h
+++ b/source/Lib/CommonLib/Reshape.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -74,11 +74,7 @@ protected:
   int                     m_vpduY;
 public:
   Reshape();
-#if ENABLE_SPLIT_PARALLELISM
-  virtual ~Reshape();
-#else
   ~Reshape();
-#endif
 
   void createDec(int bitDepth);
   void destroy();
diff --git a/source/Lib/CommonLib/Rom.cpp b/source/Lib/CommonLib/Rom.cpp
index dc1c29aedeb1bd14078082f975495715280ea495..72e8a697fd963e33abfcdefc8302e766a7356255 100644
--- a/source/Lib/CommonLib/Rom.cpp
+++ b/source/Lib/CommonLib/Rom.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -70,9 +70,7 @@ const char* nalUnitTypeToString(NalUnitType type)
   case NAL_UNIT_CODED_SLICE_IDR_N_LP:   return "IDR_N_LP";
   case NAL_UNIT_CODED_SLICE_CRA:        return "CRA";
   case NAL_UNIT_CODED_SLICE_GDR:        return "GDR";
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
   case NAL_UNIT_OPI:                    return "OPI";
-#endif
   case NAL_UNIT_DCI:                    return "DCI";
   case NAL_UNIT_VPS:                    return "VPS";
   case NAL_UNIT_SPS:                    return "SPS";
@@ -496,35 +494,13 @@ const int g_invQuantScales[2][SCALING_LIST_REM_NUM] = // can be represented as a
 // Intra prediction
 // ====================================================================================================================
 
-const uint8_t g_aucIntraModeNumFast_UseMPM_2D[7 - MIN_CU_LOG2 + 1][7 - MIN_CU_LOG2 + 1] =
-{
-  {3, 3, 3, 3, 2, 2},  //   4x4,   4x8,   4x16,   4x32,   4x64,   4x128,
-  {3, 3, 3, 3, 3, 2},  //   8x4,   8x8,   8x16,   8x32,   8x64,   8x128,
-  {3, 3, 3, 3, 3, 2},  //  16x4,  16x8,  16x16,  16x32,  16x64,  16x128,
-  {3, 3, 3, 3, 3, 2},  //  32x4,  32x8,  32x16,  32x32,  32x64,  32x128,
-  {2, 3, 3, 3, 3, 2},  //  64x4,  64x8,  64x16,  64x32,  64x64,  64x128,
-  {2, 2, 2, 2, 2, 3},  // 128x4, 128x8, 128x16, 128x32, 128x64, 128x128,
-};
-
-const uint8_t g_aucIntraModeNumFast_UseMPM[MAX_CU_DEPTH] =
-{
-  3,  //   2x2
-  8,  //   4x4
-  8,  //   8x8
-  3,  //  16x16
-  3,  //  32x32
-  3,  //  64x64
-  3   // 128x128
-};
-const uint8_t g_aucIntraModeNumFast_NotUseMPM[MAX_CU_DEPTH] =
-{
-  3,  //   2x2
-  9,  //   4x4
-  9,  //   8x8
-  4,  //  16x16   33
-  4,  //  32x32   33
-  5,  //  64x64   33
-  5   // 128x128
+const uint8_t g_intraModeNumFastUseMPM2D[7 - MIN_CU_LOG2 + 1][7 - MIN_CU_LOG2 + 1] = {
+  { 3, 3, 3, 3, 2, 2 },   //   4x4,   4x8,   4x16,   4x32,   4x64,   4x128,
+  { 3, 3, 3, 3, 3, 2 },   //   8x4,   8x8,   8x16,   8x32,   8x64,   8x128,
+  { 3, 3, 3, 3, 3, 2 },   //  16x4,  16x8,  16x16,  16x32,  16x64,  16x128,
+  { 3, 3, 3, 3, 3, 2 },   //  32x4,  32x8,  32x16,  32x32,  32x64,  32x128,
+  { 2, 3, 3, 3, 3, 2 },   //  64x4,  64x8,  64x16,  64x32,  64x64,  64x128,
+  { 2, 2, 2, 2, 2, 3 },   // 128x4, 128x8, 128x16, 128x32, 128x64, 128x128,
 };
 
 const uint8_t g_chroma422IntraAngleMappingTable[NUM_INTRA_MODE] =
@@ -547,17 +523,21 @@ UnitScale g_miScaling( MIN_CU_LOG2, MIN_CU_LOG2 );
 // ====================================================================================================================
 // Scanning order & context model mapping
 // ====================================================================================================================
-
+int g_riceT[4] = { 32,128, 512, 2048 };
+int g_riceShift[5] = { 0, 2, 4, 6, 8 };
 // scanning order table
 ScanElement *g_scanOrder[SCAN_NUMBER_OF_GROUP_TYPES][SCAN_NUMBER_OF_TYPES][MAX_CU_SIZE / 2 + 1][MAX_CU_SIZE / 2 + 1];
 ScanElement  g_coefTopLeftDiagScan8x8[ MAX_CU_SIZE / 2 + 1 ][ 64 ];
 
-const uint32_t g_uiMinInGroup[LAST_SIGNIFICANT_GROUPS] = { 0,1,2,3,4,6,8,12,16,24,32,48,64,96 };
-const uint32_t g_uiGroupIdx[MAX_TB_SIZEY] = { 0,1,2,3,4,4,5,5,6,6,6,6,7,7,7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9, 10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11 };
-const uint32_t g_auiGoRiceParsCoeff[32] =
-{
-  0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3
-};
+const uint32_t g_minInGroup[LAST_SIGNIFICANT_GROUPS] = { 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96 };
+
+const uint32_t g_groupIdx[MAX_TB_SIZEY] = { 0,  1,  2,  3,  4,  4,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,
+                                            8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9,
+                                            10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+                                            11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11 };
+
+const uint32_t g_goRiceParsCoeff[32] = { 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2,
+                                         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 };
 const char *MatrixType[SCALING_LIST_SIZE_NUM][SCALING_LIST_NUM] =
 {
   {
@@ -799,3 +779,4 @@ int8_t    g_angle2mask[GEO_NUM_ANGLES] = { 0, -1, 1, 2, 3, 4, -1, -1, 5, -1, -1,
 int8_t    g_Dis[GEO_NUM_ANGLES] = { 8, 8, 8, 8, 4, 4, 2, 1, 0, -1, -2, -4, -4, -8, -8, -8, -8, -8, -8, -8, -4, -4, -2, -1, 0, 1, 2, 4, 4, 8, 8, 8 };
 int8_t    g_angle2mirror[GEO_NUM_ANGLES] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2 };
 //! \}
+
diff --git a/source/Lib/CommonLib/Rom.h b/source/Lib/CommonLib/Rom.h
index e7352e3c102a64b9de7b1e59eeed547e92918496..42fa15f1bbace96a19d067941ac8ba467da5c968 100644
--- a/source/Lib/CommonLib/Rom.h
+++ b/source/Lib/CommonLib/Rom.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -87,11 +87,12 @@ static const int g_transformMatrixShift[TRANSFORM_NUMBER_OF_DIRECTIONS] = {  6,
 // ====================================================================================================================
 // Scanning order & context mapping table
 // ====================================================================================================================
-
-extern const uint32_t   g_uiGroupIdx[ MAX_TB_SIZEY ];
-extern const uint32_t   g_uiMinInGroup[ LAST_SIGNIFICANT_GROUPS ];
-extern const uint32_t   g_auiGoRiceParsCoeff     [ 32 ];
-inline uint32_t g_auiGoRicePosCoeff0(int st, uint32_t ricePar)
+extern int g_riceT[4];
+extern int g_riceShift[5];
+extern const uint32_t g_groupIdx[MAX_TB_SIZEY];
+extern const uint32_t g_minInGroup[LAST_SIGNIFICANT_GROUPS];
+extern const uint32_t g_goRiceParsCoeff[32];
+inline uint32_t       g_goRicePosCoeff0(int st, uint32_t ricePar)
 {
   return (st < 2 ? 1 : 2) << ricePar;
 }
@@ -100,9 +101,7 @@ inline uint32_t g_auiGoRicePosCoeff0(int st, uint32_t ricePar)
 // Intra prediction table
 // ====================================================================================================================
 
-extern const uint8_t  g_aucIntraModeNumFast_UseMPM_2D[7 - MIN_CU_LOG2 + 1][7 - MIN_CU_LOG2 + 1];
-extern const uint8_t  g_aucIntraModeNumFast_UseMPM   [MAX_CU_DEPTH];
-extern const uint8_t  g_aucIntraModeNumFast_NotUseMPM[MAX_CU_DEPTH];
+extern const uint8_t g_intraModeNumFastUseMPM2D[7 - MIN_CU_LOG2 + 1][7 - MIN_CU_LOG2 + 1];
 
 extern const uint8_t  g_chroma422IntraAngleMappingTable[NUM_INTRA_MODE];
 
diff --git a/source/Lib/CommonLib/RomLFNST.cpp b/source/Lib/CommonLib/RomLFNST.cpp
index d09e56cde128697853f9772d2f5b5bf06dd89fb3..10bcbbc621903ddd6edf9f48eaac6067192fe1a8 100644
--- a/source/Lib/CommonLib/RomLFNST.cpp
+++ b/source/Lib/CommonLib/RomLFNST.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/RomTr.cpp b/source/Lib/CommonLib/RomTr.cpp
index 294bfd3dd42b4385cff102472b02946d15ac0947..45690c36f8df70b6fcd9929b1f1e265acdba640e 100644
--- a/source/Lib/CommonLib/RomTr.cpp
+++ b/source/Lib/CommonLib/RomTr.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -387,7 +387,7 @@ const TMatrixCoeff g_trCoreDCT2P64[TRANSFORM_NUMBER_OF_DIRECTIONS][64][64] =
 // DCT-8
 const TMatrixCoeff g_trCoreDCT8P4[TRANSFORM_NUMBER_OF_DIRECTIONS][4][4] =
 {
-  DEFINE_DCT8_P4_MATRIX(21505, 18893, 14081,  7425),
+  DEFINE_DCT8_P4_MATRIX(21505, 18893, 14081,  7424),
   DEFINE_DCT8_P4_MATRIX(84,     74,     55,     29)
 };
 const TMatrixCoeff g_trCoreDCT8P8[TRANSFORM_NUMBER_OF_DIRECTIONS][8][8] =
@@ -409,7 +409,7 @@ const TMatrixCoeff g_trCoreDCT8P32[TRANSFORM_NUMBER_OF_DIRECTIONS][32][32] =
 // DST-7
 const TMatrixCoeff g_trCoreDST7P4[TRANSFORM_NUMBER_OF_DIRECTIONS][4][4] =
 {
-  DEFINE_DST7_P4_MATRIX( 7425, 14081, 18893, 21505),
+  DEFINE_DST7_P4_MATRIX(7424, 14081, 18893, 21505),
   DEFINE_DST7_P4_MATRIX(   29,    55,    74,    84)
 };
 const TMatrixCoeff g_trCoreDST7P8[TRANSFORM_NUMBER_OF_DIRECTIONS][8][8] =
diff --git a/source/Lib/CommonLib/SEI.cpp b/source/Lib/CommonLib/SEI.cpp
index cb27029c4210aa708fae276ac813898c094c2557..6650a1b921d9934ef2757542ef98cf7599902469 100644
--- a/source/Lib/CommonLib/SEI.cpp
+++ b/source/Lib/CommonLib/SEI.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -149,6 +149,293 @@ void SEIPictureTiming::copyTo (SEIPictureTiming& target) const
   target.m_vclCpbDelayOffset = m_vclCpbDelayOffset;
 }
 
+bool SEIScalabilityDimensionInfo::isSDISameContent(SEIScalabilityDimensionInfo* sdiB)
+{
+  if (!sdiB)
+  {
+    return false;
+  }
+  if (m_sdiNumViews != sdiB->m_sdiNumViews)
+  {
+    return false;
+  }
+  if (m_sdiMaxLayersMinus1 != sdiB->m_sdiMaxLayersMinus1)
+  {
+    return false;
+  }
+  if (m_sdiMultiviewInfoFlag != sdiB->m_sdiMultiviewInfoFlag)
+  {
+    return false;
+  }
+  if (m_sdiAuxiliaryInfoFlag != sdiB->m_sdiAuxiliaryInfoFlag)
+  {
+    return false;
+  }
+  if (m_sdiMultiviewInfoFlag || m_sdiAuxiliaryInfoFlag)
+  {
+    if (m_sdiMultiviewInfoFlag)
+    {
+      if (m_sdiViewIdLenMinus1 != sdiB->m_sdiViewIdLenMinus1)
+      {
+        return false;
+      }
+    }
+    for (int i = 0; i <= m_sdiMaxLayersMinus1; i++)
+    {
+      if (m_sdiMultiviewInfoFlag)
+      {
+        if (m_sdiViewIdVal[i] != sdiB->m_sdiViewIdVal[i])
+        {
+          return false;
+        }
+      }
+      if (m_sdiAuxiliaryInfoFlag)
+      {
+        if (m_sdiAuxId[i] != sdiB->m_sdiAuxId[i])
+        {
+          return false;
+        }
+        if (m_sdiAuxId[i] > 0)
+        {
+          if (m_sdiNumAssociatedPrimaryLayersMinus1[i] != sdiB->m_sdiNumAssociatedPrimaryLayersMinus1[i])
+          {
+            return false;
+          }
+          for (int j = 0; j <= m_sdiNumAssociatedPrimaryLayersMinus1[i]; j++)
+          {
+            if (m_sdiAssociatedPrimaryLayerIdx[i][j] != sdiB->m_sdiAssociatedPrimaryLayerIdx[i][j])
+            {
+              return false;
+            }
+          }
+        }
+      }
+    }
+  }
+  return true;
+}
+
+uint32_t SEIMultiviewAcquisitionInfo::getMantissaFocalLengthXLen( int i ) const
+{
+  return xGetSyntaxElementLen( m_maiExponentFocalLengthX[i], m_maiPrecFocalLength, m_maiMantissaFocalLengthX[ i ] );
+}
+
+uint32_t SEIMultiviewAcquisitionInfo::getMantissaFocalLengthYLen( int i ) const
+{
+  return xGetSyntaxElementLen( m_maiExponentFocalLengthY[i], m_maiPrecFocalLength, m_maiMantissaFocalLengthY[ i ]  );
+}
+
+
+uint32_t SEIMultiviewAcquisitionInfo::getMantissaPrincipalPointXLen( int i ) const
+{
+  return xGetSyntaxElementLen( m_maiExponentPrincipalPointX[i], m_maiPrecPrincipalPoint, m_maiMantissaPrincipalPointX[ i ]  );
+}
+
+uint32_t SEIMultiviewAcquisitionInfo::getMantissaPrincipalPointYLen( int i ) const
+{
+  return xGetSyntaxElementLen( m_maiExponentPrincipalPointY[i], m_maiPrecPrincipalPoint, m_maiMantissaPrincipalPointY[ i ] );
+}
+
+uint32_t SEIMultiviewAcquisitionInfo::getMantissaSkewFactorLen( int i ) const
+{
+  return xGetSyntaxElementLen( m_maiExponentSkewFactor[ i ], m_maiPrecSkewFactor, m_maiMantissaSkewFactor[ i ] );
+}
+
+uint32_t SEIMultiviewAcquisitionInfo::getMantissaRLen( int i, int j, int k ) const
+{
+  return xGetSyntaxElementLen( m_maiExponentR[ i ][ j ][ k ], m_maiPrecRotationParam, m_maiMantissaR[ i ][ j] [ k ] );
+}
+
+uint32_t SEIMultiviewAcquisitionInfo::getMantissaTLen( int i, int j ) const
+{
+  return xGetSyntaxElementLen( m_maiExponentT[ i ][ j ], m_maiPrecTranslationParam, m_maiMantissaT[ i ][ j ] );
+}
+uint32_t SEIMultiviewAcquisitionInfo::xGetSyntaxElementLen( int expo, int prec, int val ) const
+{
+  uint32_t len;
+  if( expo == 0 )
+  {
+    len = std::max(0, prec - 30 );
+  }
+  else
+  {
+    len = std::max( 0, expo + prec - 31 );
+  }
+
+  assert( val >= 0 );
+  assert( val <= ( ( 1 << len )- 1) );
+  return len;
+}
+
+bool SEIMultiviewAcquisitionInfo::isMAISameContent(SEIMultiviewAcquisitionInfo *maiB)
+{
+  if (!maiB)
+  {
+    return false;
+  }
+  if (m_maiIntrinsicParamFlag != maiB->m_maiIntrinsicParamFlag)
+  {
+    return false;
+  }
+  if (m_maiExtrinsicParamFlag != maiB->m_maiExtrinsicParamFlag)
+  {
+    return false;
+  }
+  if (m_maiNumViewsMinus1 != maiB->m_maiNumViewsMinus1)
+  {
+    return false;
+  }
+  if (m_maiIntrinsicParamFlag)
+  {
+    if (m_maiIntrinsicParamsEqualFlag != maiB->m_maiIntrinsicParamsEqualFlag)
+    {
+      return false;
+    }
+    if (m_maiPrecFocalLength != maiB->m_maiPrecFocalLength)
+    {
+      return false;
+    }
+    if (m_maiPrecPrincipalPoint != maiB->m_maiPrecPrincipalPoint)
+    {
+      return false;
+    }
+    if (m_maiPrecSkewFactor != maiB->m_maiPrecSkewFactor)
+    {
+      return false;
+    }
+    for (int i = 0; i <= (m_maiIntrinsicParamsEqualFlag ? 0 : m_maiNumViewsMinus1); i++)
+    {
+      if (m_maiSignFocalLengthX[i] != maiB->m_maiSignFocalLengthX[i])
+      {
+        return false;
+      }
+      if (m_maiExponentFocalLengthX[i] != maiB->m_maiExponentFocalLengthX[i])
+      {
+        return false;
+      }
+      if (m_maiMantissaFocalLengthX[i] != maiB->m_maiMantissaFocalLengthX[i])
+      {
+        return false;
+      }
+      if (m_maiSignFocalLengthY[i] != maiB->m_maiSignFocalLengthY[i])
+      {
+        return false;
+      }
+      if (m_maiExponentFocalLengthY[i] != maiB->m_maiExponentFocalLengthY[i])
+      {
+        return false;
+      }
+      if (m_maiMantissaFocalLengthY[i] != maiB->m_maiMantissaFocalLengthY[i])
+      {
+        return false;
+      }
+      if (m_maiSignPrincipalPointX[i] != maiB->m_maiSignPrincipalPointX[i])
+      {
+        return false;
+      }
+      if (m_maiExponentPrincipalPointX[i] != maiB->m_maiExponentPrincipalPointX[i])
+      {
+        return false;
+      }
+      if (m_maiMantissaPrincipalPointX[i] != maiB->m_maiMantissaPrincipalPointX[i])
+      {
+        return false;
+      }
+      if (m_maiSignPrincipalPointY[i] != maiB->m_maiSignPrincipalPointY[i])
+      {
+        return false;
+      }
+      if (m_maiExponentPrincipalPointY[i] != maiB->m_maiExponentPrincipalPointY[i])
+      {
+        return false;
+      }
+      if (m_maiMantissaPrincipalPointY[i] != maiB->m_maiMantissaPrincipalPointY[i])
+      {
+        return false;
+      }
+      if (m_maiSignSkewFactor[i] != maiB->m_maiSignSkewFactor[i])
+      {
+        return false;
+      }
+      if (m_maiExponentSkewFactor[i] != maiB->m_maiExponentSkewFactor[i])
+      {
+        return false;
+      }
+      if (m_maiMantissaSkewFactor[i] != maiB->m_maiMantissaSkewFactor[i])
+      {
+        return false;
+      }
+    }
+  }
+  if (m_maiExtrinsicParamFlag)
+  {
+    if (m_maiPrecRotationParam != maiB->m_maiPrecRotationParam)
+    {
+      return false;
+    }
+    if (m_maiPrecTranslationParam != maiB->m_maiPrecTranslationParam)
+    {
+      return false;
+    }
+    for (int i = 0; i <= m_maiNumViewsMinus1; i++)
+    {
+      for (int j = 0; j < 3; j++)
+      {
+        for (int k = 0; k < 3; k++)
+        {
+          if (m_maiSignR[i][j][k] != maiB->m_maiSignR[i][j][k])
+          {
+            return false;
+          }
+          if (m_maiExponentR[i][j][k] != maiB->m_maiExponentR[i][j][k])
+          {
+            return false;
+          }
+          if (m_maiMantissaR[i][j][k] != maiB->m_maiMantissaR[i][j][k])
+          {
+            return false;
+          }
+        }
+        if (m_maiSignT[i][j] != maiB->m_maiSignT[i][j])
+        {
+          return false;
+        }
+        if (m_maiExponentT[i][j] != maiB->m_maiExponentT[i][j])
+        {
+          return false;
+        }
+        if (m_maiMantissaT[i][j] != maiB->m_maiMantissaT[i][j])
+        {
+          return false;
+        }
+      }
+    }
+  }
+  return true;
+}
+
+#if JVET_W0078_MVP_SEI 
+bool SEIMultiviewViewPosition::isMVPSameContent(SEIMultiviewViewPosition *mvpB)
+{
+  if (!mvpB)
+  {
+    return false;
+  }
+  if (m_mvpNumViewsMinus1 != mvpB->m_mvpNumViewsMinus1)
+  {
+    return false;
+  }
+  for (int i = 0; i <= m_mvpNumViewsMinus1; i++)
+  {
+    if (m_mvpViewPosition[i] != mvpB->m_mvpViewPosition[i])
+    {
+      return false;
+    }
+  }
+  return true;
+}
+#endif
+
 // Static member
 const char *SEI::getSEIMessageString(SEI::PayloadType payloadType)
 {
@@ -161,6 +448,7 @@ const char *SEI::getSEIMessageString(SEI::PayloadType payloadType)
     case SEI::USER_DATA_UNREGISTERED:               return "User data unregistered";
     case SEI::FILM_GRAIN_CHARACTERISTICS:           return "Film grain characteristics";           // not currently decoded
     case SEI::FRAME_PACKING:                        return "Frame packing arrangement";
+    case SEI::DISPLAY_ORIENTATION:                  return "Display orientation";
     case SEI::PARAMETER_SETS_INCLUSION_INDICATION:  return "Parameter sets inclusion indication";
     case SEI::DECODING_UNIT_INFO:                   return "Decoding unit information";
     case SEI::SCALABLE_NESTING:                     return "Scalable nesting";
@@ -173,13 +461,26 @@ const char *SEI::getSEIMessageString(SEI::PayloadType payloadType)
     case SEI::CONTENT_LIGHT_LEVEL_INFO:             return "Content light level information";
     case SEI::AMBIENT_VIEWING_ENVIRONMENT:          return "Ambient viewing environment";
     case SEI::CONTENT_COLOUR_VOLUME:                return "Content colour volume";
+    case SEI::COLOUR_TRANSFORM_INFO:                return "Colour transform information";
     case SEI::EQUIRECTANGULAR_PROJECTION:           return "Equirectangular projection";
     case SEI::SPHERE_ROTATION:                      return "Sphere rotation";
     case SEI::REGION_WISE_PACKING:                  return "Region wise packing information";
     case SEI::OMNI_VIEWPORT:                        return "Omni viewport";
     case SEI::GENERALIZED_CUBEMAP_PROJECTION:       return "Generalized cubemap projection";
+    case SEI::ALPHA_CHANNEL_INFO:                   return "Alpha channel information";
+    case SEI::DEPTH_REPRESENTATION_INFO:            return "Depth representation information";
+    case SEI::MULTIVIEW_ACQUISITION_INFO:           return "Multiview acquisition information";
+#if JVET_W0078_MVP_SEI 
+    case SEI::MULTIVIEW_VIEW_POSITION:              return "Multiview view position";
+#endif
     case SEI::SAMPLE_ASPECT_RATIO_INFO:             return "Sample aspect ratio information";
     case SEI::SUBPICTURE_LEVEL_INFO:                return "Subpicture level information";
+    case SEI::ANNOTATED_REGIONS:                    return "Annotated Region";
+    case SEI::SCALABILITY_DIMENSION_INFO:           return "Scalability dimension information";
+    case SEI::EXTENDED_DRAP_INDICATION:             return "Extended DRAP indication";
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+    case SEI::CONSTRAINED_RASL_ENCODING:            return "Constrained RASL encoding";
+#endif
     default:                                        return "Unknown";
   }
 }
diff --git a/source/Lib/CommonLib/SEI.h b/source/Lib/CommonLib/SEI.h
index cb33d7a0cfe65219ea33a6c2ffeb8ef7f7e0a95b..989745276832214d9f42c640feaced66919b233a 100644
--- a/source/Lib/CommonLib/SEI.h
+++ b/source/Lib/CommonLib/SEI.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -61,24 +61,38 @@ public:
     USER_DATA_UNREGISTERED               = 5,
     FILM_GRAIN_CHARACTERISTICS           = 19,
     FRAME_PACKING                        = 45,
+    DISPLAY_ORIENTATION                  = 47,
     PARAMETER_SETS_INCLUSION_INDICATION  = 129,
     DECODING_UNIT_INFO                   = 130,
     DECODED_PICTURE_HASH                 = 132,
     SCALABLE_NESTING                     = 133,
     MASTERING_DISPLAY_COLOUR_VOLUME      = 137,
+    COLOUR_TRANSFORM_INFO                = 142,
     DEPENDENT_RAP_INDICATION             = 145,
     EQUIRECTANGULAR_PROJECTION           = 150,
     SPHERE_ROTATION                      = 154,
     REGION_WISE_PACKING                  = 155,
     OMNI_VIEWPORT                        = 156,
     GENERALIZED_CUBEMAP_PROJECTION       = 153,
+    ALPHA_CHANNEL_INFO                   = 165,
     FRAME_FIELD_INFO                     = 168,
+    DEPTH_REPRESENTATION_INFO            = 177,
+    MULTIVIEW_ACQUISITION_INFO           = 179,
+#if JVET_W0078_MVP_SEI 
+    MULTIVIEW_VIEW_POSITION              = 180,
+#endif
     SUBPICTURE_LEVEL_INFO                = 203,
     SAMPLE_ASPECT_RATIO_INFO             = 204,
     CONTENT_LIGHT_LEVEL_INFO             = 144,
     ALTERNATIVE_TRANSFER_CHARACTERISTICS = 147,
     AMBIENT_VIEWING_ENVIRONMENT          = 148,
     CONTENT_COLOUR_VOLUME                = 149,
+    ANNOTATED_REGIONS                    = 202,
+    SCALABILITY_DIMENSION_INFO           = 205,
+    EXTENDED_DRAP_INDICATION             = 206,
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+    CONSTRAINED_RASL_ENCODING            = 207,
+#endif
   };
 
   SEI() {}
@@ -201,6 +215,201 @@ public:
   uint8_t              m_gcmpGuardBandSamplesMinus1;
 };
 
+class SEIScalabilityDimensionInfo : public SEI
+{
+public:
+  PayloadType payloadType() const { return SCALABILITY_DIMENSION_INFO; }
+  SEIScalabilityDimensionInfo()
+  : m_sdiNumViews (0)
+  , m_sdiMaxLayersMinus1 (0)
+  , m_sdiMultiviewInfoFlag (false)
+  , m_sdiAuxiliaryInfoFlag (false)
+  , m_sdiViewIdLenMinus1 (0)
+  {
+  }
+  virtual ~SEIScalabilityDimensionInfo() {}
+  bool isSDISameContent(SEIScalabilityDimensionInfo* sdiB);
+
+  int                   m_sdiNumViews;
+  int                   m_sdiMaxLayersMinus1;
+  bool                  m_sdiMultiviewInfoFlag;
+  bool                  m_sdiAuxiliaryInfoFlag;
+  int                   m_sdiViewIdLenMinus1;
+  std::vector<int>      m_sdiLayerId;
+  std::vector<int>      m_sdiViewIdVal;
+  std::vector<int>      m_sdiAuxId;
+  std::vector<int>      m_sdiNumAssociatedPrimaryLayersMinus1;
+  std::vector<std::vector<int>> m_sdiAssociatedPrimaryLayerIdx;
+};
+
+class SEIMultiviewAcquisitionInfo : public SEI
+{
+public:
+  PayloadType payloadType( ) const { return MULTIVIEW_ACQUISITION_INFO; }
+  SEIMultiviewAcquisitionInfo ( ) { };
+  ~SEIMultiviewAcquisitionInfo( ) { };
+  SEI* getCopy( ) const { return new SEIMultiviewAcquisitionInfo(*this); };
+  bool isMAISameContent(SEIMultiviewAcquisitionInfo* maiB);
+
+  void resizeArrays( )
+  {
+    int numViews = m_maiIntrinsicParamsEqualFlag ? 1 : m_maiNumViewsMinus1 + 1;
+    m_maiSignFocalLengthX       .resize( numViews );
+    m_maiExponentFocalLengthX   .resize( numViews );
+    m_maiMantissaFocalLengthX   .resize( numViews );
+    m_maiSignFocalLengthY       .resize( numViews );
+    m_maiExponentFocalLengthY   .resize( numViews );
+    m_maiMantissaFocalLengthY   .resize( numViews );
+    m_maiSignPrincipalPointX    .resize( numViews );
+    m_maiExponentPrincipalPointX.resize( numViews );
+    m_maiMantissaPrincipalPointX.resize( numViews );
+    m_maiSignPrincipalPointY    .resize( numViews );
+    m_maiExponentPrincipalPointY.resize( numViews );
+    m_maiMantissaPrincipalPointY.resize( numViews );
+    m_maiSignSkewFactor         .resize( numViews );
+    m_maiExponentSkewFactor     .resize( numViews );
+    m_maiMantissaSkewFactor     .resize( numViews );
+
+    m_maiSignR                  .resize( m_maiNumViewsMinus1 + 1 );
+    m_maiExponentR              .resize( m_maiNumViewsMinus1 + 1 );
+    m_maiMantissaR              .resize( m_maiNumViewsMinus1 + 1 );
+    m_maiSignT                  .resize( m_maiNumViewsMinus1 + 1 );
+    m_maiExponentT              .resize( m_maiNumViewsMinus1 + 1 );
+    m_maiMantissaT              .resize( m_maiNumViewsMinus1 + 1 );
+
+    for( int i = 0; i <= m_maiNumViewsMinus1 ; i++ )
+    {
+      m_maiSignR    [i].resize( 3 );
+      m_maiExponentR[i].resize( 3 );
+      m_maiMantissaR[i].resize( 3 );
+      m_maiSignT    [i].resize( 3 );
+      m_maiExponentT[i].resize( 3 );
+      m_maiMantissaT[i].resize( 3 );
+
+      for (int j = 0; j < 3; j++)
+      {
+        m_maiSignR    [i][j].resize( 3 );
+        m_maiExponentR[i][j].resize( 3 );
+        m_maiMantissaR[i][j].resize( 3 );
+      }
+    }
+  }
+
+  uint32_t getMantissaFocalLengthXLen   (int i) const;
+  uint32_t getMantissaFocalLengthYLen   (int i) const;
+  uint32_t getMantissaPrincipalPointXLen(int i) const;
+  uint32_t getMantissaPrincipalPointYLen(int i) const;
+  uint32_t getMantissaSkewFactorLen     (int i) const;
+  uint32_t getMantissaRLen              (int i, int j, int k ) const;
+  uint32_t getMantissaTLen              (int i, int j )        const;
+
+  bool              m_maiIntrinsicParamFlag;
+  bool              m_maiExtrinsicParamFlag;
+  int               m_maiNumViewsMinus1;
+  bool              m_maiIntrinsicParamsEqualFlag;
+  int               m_maiPrecFocalLength;
+  int               m_maiPrecPrincipalPoint;
+  int               m_maiPrecSkewFactor;
+  std::vector<bool> m_maiSignFocalLengthX;
+  std::vector<int>  m_maiExponentFocalLengthX;
+  std::vector<int>  m_maiMantissaFocalLengthX;
+  std::vector<bool> m_maiSignFocalLengthY;
+  std::vector<int>  m_maiExponentFocalLengthY;
+  std::vector<int>  m_maiMantissaFocalLengthY;
+  std::vector<bool> m_maiSignPrincipalPointX;
+  std::vector<int>  m_maiExponentPrincipalPointX;
+  std::vector<int>  m_maiMantissaPrincipalPointX;
+  std::vector<bool> m_maiSignPrincipalPointY;
+  std::vector<int>  m_maiExponentPrincipalPointY;
+  std::vector<int>  m_maiMantissaPrincipalPointY;
+  std::vector<bool> m_maiSignSkewFactor;
+  std::vector<int>  m_maiExponentSkewFactor;
+  std::vector<int>  m_maiMantissaSkewFactor;
+  int               m_maiPrecRotationParam;
+  int               m_maiPrecTranslationParam;
+  std::vector< std::vector<std::vector<bool>>> m_maiSignR;
+  std::vector< std::vector<std::vector<int>>>  m_maiExponentR;
+  std::vector< std::vector<std::vector<int>>>  m_maiMantissaR;
+  std::vector< std::vector<bool>> m_maiSignT;
+  std::vector< std::vector<int>>  m_maiExponentT;
+  std::vector< std::vector<int>>  m_maiMantissaT;
+private:
+  uint32_t xGetSyntaxElementLen( int expo, int prec, int val ) const;
+};
+
+#if JVET_W0078_MVP_SEI 
+class SEIMultiviewViewPosition : public SEI
+{
+public:
+  PayloadType payloadType() const { return MULTIVIEW_VIEW_POSITION; }
+  SEIMultiviewViewPosition() { };
+  ~SEIMultiviewViewPosition() { };
+  bool isMVPSameContent(SEIMultiviewViewPosition* mvpB);
+
+  int               m_mvpNumViewsMinus1;
+  std::vector<int>  m_mvpViewPosition;
+};
+#endif
+
+class SEIAlphaChannelInfo : public SEI
+{
+public:
+  PayloadType payloadType() const { return ALPHA_CHANNEL_INFO; }
+  SEIAlphaChannelInfo()
+  : m_aciCancelFlag (false)
+  , m_aciUseIdc (0)
+  , m_aciBitDepthMinus8 (0)
+  , m_aciTransparentValue (0)
+  , m_aciOpaqueValue (255)
+  , m_aciIncrFlag (false)
+  , m_aciClipFlag (false)
+  , m_aciClipTypeFlag (false)
+  {};
+  virtual ~SEIAlphaChannelInfo() {};
+
+  bool m_aciCancelFlag;
+  int  m_aciUseIdc;
+  int  m_aciBitDepthMinus8;
+  int  m_aciTransparentValue;
+  int  m_aciOpaqueValue;
+  bool m_aciIncrFlag;
+  bool m_aciClipFlag;
+  bool m_aciClipTypeFlag;
+};
+
+class SEIDepthRepresentationInfo : public SEI
+{
+public:
+  PayloadType payloadType() const { return DEPTH_REPRESENTATION_INFO; }
+  SEIDepthRepresentationInfo()
+  : m_driZNearFlag (false)
+  , m_driZFarFlag (false)
+  , m_driDMinFlag (false)
+  , m_driDMaxFlag (false)
+  , m_driZNear (0.0)
+  , m_driZFar (0.0)
+  , m_driDMin (0.0)
+  , m_driDMax (0.0)
+  , m_driDepthRepresentationType (0)
+  , m_driDisparityRefViewId (0)
+  , m_driDepthNonlinearRepresentationNumMinus1 (0)
+  {};
+  virtual ~SEIDepthRepresentationInfo() {};
+
+  bool m_driZNearFlag;
+  bool m_driZFarFlag;
+  bool m_driDMinFlag;
+  bool m_driDMaxFlag;
+  double m_driZNear;
+  double m_driZFar;
+  double m_driDMin;
+  double m_driDMax;
+  int m_driDepthRepresentationType;
+  int m_driDisparityRefViewId;
+  int m_driDepthNonlinearRepresentationNumMinus1;
+  std::vector<int> m_driDepthNonlinearRepresentationModel;
+};
+
 class SEISampleAspectRatioInfo : public SEI
 {
 public:
@@ -244,9 +453,7 @@ public:
   virtual ~SEIDecodedPictureHash() {}
 
   HashType method;
-#if FIX_TICKET_1405
   bool     singleCompFlag;
-#endif
 
   PictureHash m_pictureHash;
 };
@@ -384,7 +591,7 @@ public:
   SEIDecodingUnitInfo()
     : m_decodingUnitIdx(0)
     , m_dpbOutputDuDelayPresentFlag(false)
-    , m_picSptDpbOutputDuDelay(0)
+    , m_picSptDpbOutputDuDelay(-1)
   {
     ::memset(m_duiSubLayerDelaysPresentFlag, 0, sizeof(m_duiSubLayerDelaysPresentFlag));
     ::memset(m_duSptCpbRemovalDelayIncrement, 0, sizeof(m_duSptCpbRemovalDelayIncrement));
@@ -456,6 +663,19 @@ public:
   bool m_upsampledAspectRatio;
 };
 
+class SEIDisplayOrientation : public SEI
+{
+public:
+  PayloadType payloadType() const { return DISPLAY_ORIENTATION; }
+
+  SEIDisplayOrientation() {}
+  virtual ~SEIDisplayOrientation() {}
+
+  bool                  m_doCancelFlag;
+  bool                  m_doPersistenceFlag;
+  int                   m_doTransformType;
+};
+
 class SEIParameterSetsInclusionIndication : public SEI
 {
 public:
@@ -619,6 +839,28 @@ public:
   uint16_t m_ambientLightY;
 };
 
+class SEIColourTransformInfo : public SEI
+{
+public:
+  PayloadType payloadType() const { return COLOUR_TRANSFORM_INFO; }
+  SEIColourTransformInfo() { }
+
+  virtual ~SEIColourTransformInfo() { }
+
+  uint16_t m_id;
+  bool     m_signalInfoFlag;
+  bool     m_fullRangeFlag;
+  uint16_t m_primaries;
+  uint16_t m_transferFunction;
+  uint16_t m_matrixCoefs;
+  bool     m_crossComponentFlag;
+  bool     m_crossComponentInferred;
+  uint16_t m_numberChromaLutMinus1;
+  int      m_chromaOffset;
+  uint16_t m_bitdepth;
+  uint16_t m_log2NumberOfPointsPerLut;
+  LutModel m_lut[MAX_NUM_COMPONENT];
+};
 class SEIContentColourVolume : public SEI
 {
 public:
@@ -667,9 +909,95 @@ public:
   std::vector<std::vector<std::vector<int>>> m_refLevelFraction;
 };
 
+class SEIAnnotatedRegions : public SEI
+{
+public:
+  PayloadType payloadType() const { return ANNOTATED_REGIONS; }
+  SEIAnnotatedRegions() {}
+  virtual ~SEIAnnotatedRegions() {}
+
+  void copyFrom(const SEIAnnotatedRegions &seiAnnotatedRegions)
+  {
+    (*this) = seiAnnotatedRegions;
+  }
+
+  struct AnnotatedRegionObject
+  {
+    AnnotatedRegionObject() :
+      objectCancelFlag(false),
+      objectLabelValid(false),
+      boundingBoxValid(false)
+    { }
+    bool objectCancelFlag;
+
+    bool objectLabelValid;
+    uint32_t objLabelIdx;            // only valid if bObjectLabelValid
+
+    bool boundingBoxValid;
+    uint32_t boundingBoxTop;         // only valid if bBoundingBoxValid
+    uint32_t boundingBoxLeft;
+    uint32_t boundingBoxWidth;
+    uint32_t boundingBoxHeight;
+
+    bool partialObjectFlag;        // only valid if bPartialObjectFlagValid
+    uint32_t objectConfidence;
+  };
+  struct AnnotatedRegionLabel
+  {
+    AnnotatedRegionLabel() : labelValid(false) { }
+    bool        labelValid;
+    std::string label;           // only valid if bLabelValid
+  };
+
+  struct AnnotatedRegionHeader
+  {
+    AnnotatedRegionHeader() : m_cancelFlag(true), m_receivedSettingsOnce(false) { }
+    bool      m_cancelFlag;
+    bool      m_receivedSettingsOnce; // used for decoder conformance checking. Other confidence flags must be unchanged once this flag is set.
+
+    bool      m_notOptimizedForViewingFlag;
+    bool      m_trueMotionFlag;
+    bool      m_occludedObjectFlag;
+    bool      m_partialObjectFlagPresentFlag;
+    bool      m_objectLabelPresentFlag;
+    bool      m_objectConfidenceInfoPresentFlag;
+    uint32_t      m_objectConfidenceLength;         // Only valid if m_objectConfidenceInfoPresentFlag
+    bool      m_objectLabelLanguagePresentFlag; // Only valid if m_objectLabelPresentFlag
+    std::string m_annotatedRegionsObjectLabelLang;
+  };
+  typedef uint32_t AnnotatedRegionObjectIndex;
+  typedef uint32_t AnnotatedRegionLabelIndex;
+
+  AnnotatedRegionHeader m_hdr;
+  std::vector<std::pair<AnnotatedRegionObjectIndex, AnnotatedRegionObject> > m_annotatedRegions;
+  std::vector<std::pair<AnnotatedRegionLabelIndex,  AnnotatedRegionLabel>  > m_annotatedLabels;
+};
+
+class SEIExtendedDrapIndication : public SEI
+{
+public:
+  PayloadType payloadType() const { return EXTENDED_DRAP_INDICATION; }
 
+  SEIExtendedDrapIndication() {}
+  virtual ~SEIExtendedDrapIndication() {}
+
+  int               m_edrapIndicationRapIdMinus1;
+  bool              m_edrapIndicationLeadingPicturesDecodableFlag;
+  int               m_edrapIndicationReservedZero12Bits;
+  int               m_edrapIndicationNumRefRapPicsMinus1;
+  std::vector<int>  m_edrapIndicationRefRapId;
+};
 
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+class SEIConstrainedRaslIndication : public SEI
+{
+public:
+  PayloadType payloadType() const { return CONSTRAINED_RASL_ENCODING; }
+  SEIConstrainedRaslIndication() { }
 
+  virtual ~SEIConstrainedRaslIndication() { }
+};
+#endif
 //! \}
 
 
diff --git a/source/Lib/CommonLib/SEIColourTransform.cpp b/source/Lib/CommonLib/SEIColourTransform.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..721da6604fc402046e302d83de451e03104740a1
--- /dev/null
+++ b/source/Lib/CommonLib/SEIColourTransform.cpp
@@ -0,0 +1,199 @@
+/* The copyright in this software is being made available under the BSD
+ * License, included below. This software may be subject to other third party
+ * and contributor rights, including patent rights, and no such rights are
+ * granted under this license.
+ *
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+ /** \file     SEIColourTransform.cpp
+     \brief    Colour transform SEI
+ */
+
+#include "SEIColourTransform.h"
+
+#include "SEI.h"
+#include "Unit.h"
+#include "Buffer.h"
+
+SEIColourTransformApply::SEIColourTransformApply()
+  : m_width               (0)
+  , m_height              (0)
+  , m_chromaFormat        (NUM_CHROMA_FORMAT)
+  , m_bitDepth            (0)
+  , m_pColourTransfParams (NULL)
+{
+}
+
+void SEIColourTransformApply::create(uint32_t width, uint32_t height, ChromaFormat fmt, uint8_t bitDepth)
+{
+  m_width               = width;
+  m_height              = height;
+  m_chromaFormat        = fmt;
+  m_bitDepth            = bitDepth;
+  m_pColourTransfParams = new SEIColourTransformInfo;
+  m_lutSize             = 1 << m_bitDepth;
+  for (int i = 0; i < MAX_NUM_COMPONENT; i++)
+  {
+    m_mapLut[i].resize(m_lutSize, 0);
+  }
+}
+
+SEIColourTransformApply::~SEIColourTransformApply()
+{
+  delete m_pColourTransfParams;
+}
+
+void SEIColourTransformApply::inverseColourTransform(PelStorage* transformBuf)
+{
+  uint8_t   numComp = m_chromaFormat ? MAX_NUM_COMPONENT : 1;
+  PelBuf*   buffY   = &transformBuf->Y();
+  PelBuf*   buffCb  = &transformBuf->Cb();
+  PelBuf*   buffCr  = &transformBuf->Cr();
+
+  if (numComp == 3)
+  {
+    if (m_pColourTransfParams->m_crossComponentFlag) 
+    {
+      buffCb->applyChromaCTI(buffY->buf, buffY->stride, m_mapLut[COMPONENT_Cb], m_bitDepth, m_chromaFormat, false);
+      buffCr->applyChromaCTI(buffY->buf, buffY->stride, m_mapLut[COMPONENT_Cr], m_bitDepth, m_chromaFormat, false);
+    }
+    else 
+    {
+      buffCb->applyLumaCTI(m_mapLut[COMPONENT_Cb]); // apply direct mapping like in luma (no cross component mapping); same function, but different lut.
+      buffCr->applyLumaCTI(m_mapLut[COMPONENT_Cr]);
+    }
+  }
+  buffY->applyLumaCTI(m_mapLut[COMPONENT_Y]);
+}
+
+void SEIColourTransformApply::generateColourTransfLUTs()
+{
+  uint8_t numComp     = m_chromaFormat ? MAX_NUM_COMPONENT : 1;
+  int numPreLutPoints = 1 << m_pColourTransfParams->m_log2NumberOfPointsPerLut;
+  int dynamicRange    = 1 << m_bitDepth;
+  const int orgCW     = dynamicRange / numPreLutPoints;
+  int scalingPreLut   = 1 << ( 11 - (int)floorLog2(orgCW) ); // scale-up values from cfg file (chroma preLut is scaled down in cfg)
+
+  std::vector<Pel> pivotInPoints;
+  std::vector<Pel> pivotMappedPointsY(numPreLutPoints+1);
+  std::vector<Pel> pivotMappedPointsX(numPreLutPoints+1);
+
+  // Create Inverse Luma LUT - same for all possible combinations of ctiCrossComp and ctiChromaLutInferred
+
+  std::vector<int> invScale(numPreLutPoints);
+
+  pivotInPoints = m_pColourTransfParams->m_lut[0].lutValues;
+  pivotMappedPointsX[0] = pivotInPoints[0];
+  pivotMappedPointsY[0] = 0;
+  for (int j = 1; j < numPreLutPoints; j++) 
+  {
+    pivotMappedPointsX[j] = pivotMappedPointsX[j - 1] + pivotInPoints[j];
+    pivotMappedPointsY[j] = j * orgCW;
+  }
+
+  for (int i = 0; i < numPreLutPoints; i++)
+  {
+    invScale[i] = ((int32_t)m_pColourTransfParams->m_lut[0].lutValues[i + 1] * (1 << FP_PREC) + (1 << (floorLog2(orgCW) - 1))) >> floorLog2(orgCW);
+  }
+
+  for (int i = 0; i < dynamicRange; i++)
+  {
+    int idx = i / orgCW;
+    int tempVal = pivotMappedPointsX[idx] + ((invScale[idx] * (i - pivotMappedPointsY[idx]) + (1 << (FP_PREC - 1))) >> FP_PREC);
+    m_mapLut[0][i] = Clip3((Pel)0, (Pel)(dynamicRange - 1), (Pel)(tempVal));
+  }
+
+  //  calculate chroma LUTs
+  if (m_pColourTransfParams->m_crossComponentInferred == 0)
+  {    
+    for (int i = 1; i < numComp; i++) 
+    { // loop for U and V
+      if (m_pColourTransfParams->m_crossComponentFlag == 1)
+      {
+        // cross-component U and V LUT
+        for (int j = 0; j < dynamicRange; j++) 
+        {
+          int     idx     = j / orgCW;
+          int  slope = scalingPreLut * (m_pColourTransfParams->m_lut[i].lutValues[idx + 1] - m_pColourTransfParams->m_lut[i].lutValues[idx]);
+          m_mapLut[i][j] = scalingPreLut * m_pColourTransfParams->m_lut[i].lutValues[idx] + slope * (j - pivotMappedPointsY[idx]) / orgCW;
+        }
+      }
+      else
+      {
+        // intra-component Chroma (U and V) LUT
+        // initialize pivot points
+        pivotInPoints = m_pColourTransfParams->m_lut[i].lutValues;
+        pivotMappedPointsX[0] = pivotInPoints[0];
+        for (int j = 1; j <= numPreLutPoints; j++) 
+        {
+          pivotMappedPointsX[j] = pivotMappedPointsX[j-1] + pivotInPoints[j];
+        }
+
+        for (int i = 0; i < numPreLutPoints; i++)
+        {
+          invScale[i] = ((int32_t)m_pColourTransfParams->m_lut[0].lutValues[i + 1] * (1 << FP_PREC) + (1 << (floorLog2(orgCW) - 1))) >> floorLog2(orgCW);
+        }
+
+        for (int j = 0; j < dynamicRange; j++) 
+        {
+          int idx = j / orgCW;
+          int tempVal = pivotMappedPointsX[idx] + ((invScale[idx] * (j - pivotMappedPointsY[idx]) + (1 << (FP_PREC - 1))) >> FP_PREC);
+          m_mapLut[i][j] = Clip3((Pel)0, (Pel)(dynamicRange - 1), (Pel)(tempVal));
+        }
+      }
+    }
+  }
+  else
+  {
+    int chrOffset = m_pColourTransfParams->m_chromaOffset;
+
+    std::vector<int> chromaAdjHelpLUT (numPreLutPoints);
+    for (int i = 0; i < numPreLutPoints; i++)
+    {
+      chromaAdjHelpLUT[i] = (m_pColourTransfParams->m_lut[0].lutValues[i + 1] == 0) ? (1 << CSCALE_FP_PREC) : ((int32_t)((m_pColourTransfParams->m_lut[0].lutValues[i + 1] + chrOffset) * (1 << FP_PREC) / orgCW));
+    }
+
+    // generate smoothed chroma LUT as done by JVET-U0078
+    std::vector<int> interpLut(numPreLutPoints + 1);
+    for (int i = 1; i < numPreLutPoints; i++) 
+    {
+      interpLut[i] = (chromaAdjHelpLUT[i] + chromaAdjHelpLUT[i - 1] + 1) / 2;
+    }
+    interpLut[0]                = chromaAdjHelpLUT[0];
+    interpLut[numPreLutPoints]  = chromaAdjHelpLUT[numPreLutPoints - 1];
+
+    for (int i = 0; i < dynamicRange; i++)
+    {
+      int idx = i / orgCW;
+      int slope = interpLut[idx + 1] - interpLut[idx];
+      m_mapLut[1][i] = interpLut[idx] + slope * (i - pivotMappedPointsY[idx]) / orgCW;
+      m_mapLut[2][i] = m_mapLut[1][i];
+    }
+  }
+}
diff --git a/source/Lib/CommonLib/SEIColourTransform.h b/source/Lib/CommonLib/SEIColourTransform.h
new file mode 100644
index 0000000000000000000000000000000000000000..feebfe7ae9d71f754d2e614aea39a6ec3a883318
--- /dev/null
+++ b/source/Lib/CommonLib/SEIColourTransform.h
@@ -0,0 +1,71 @@
+/* The copyright in this software is being made available under the BSD
+ * License, included below. This software may be subject to other third party
+ * and contributor rights, including patent rights, and no such rights are
+ * granted under this license.
+ *
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+ /** \file     SEIColourTransform.h
+     \brief    Colour transform SEI
+ */
+
+#ifndef __SEIFILMCOLOURTRANFORMAPPLY__
+#define __SEIFILMCOLOURTRANFORMAPPLY__
+
+#include "CommonDef.h"
+//! \ingroup CommonLib
+//! \{
+
+struct PelStorage;
+class SEIColourTransformInfo;
+
+class SEIColourTransformApply
+{
+private:
+  uint32_t                     m_width;
+  uint32_t                     m_height;
+  ChromaFormat                 m_chromaFormat;
+  uint8_t                      m_bitDepth;
+  uint32_t                     m_lutSize;
+  std::vector<Pel>             m_mapLut[MAX_NUM_COMPONENT];
+
+public:
+  SEIColourTransformInfo*      m_pColourTransfParams;
+
+public:
+  SEIColourTransformApply();
+  virtual ~SEIColourTransformApply();
+
+  void create                   (uint32_t width, uint32_t height, ChromaFormat fmt, uint8_t bitDepth);
+  void inverseColourTransform   (PelStorage* transformBuf);
+  void generateColourTransfLUTs ();
+
+};// END CLASS DEFINITION SEIColourTransformApply
+
+#endif
\ No newline at end of file
diff --git a/source/Lib/CommonLib/SampleAdaptiveOffset.cpp b/source/Lib/CommonLib/SampleAdaptiveOffset.cpp
index a34b44676ae08a1e7ef54515f7498e84c8ce5fd4..1eed30383742d6858d0fb3df47e8d5e0025aebc0 100644
--- a/source/Lib/CommonLib/SampleAdaptiveOffset.cpp
+++ b/source/Lib/CommonLib/SampleAdaptiveOffset.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/SampleAdaptiveOffset.h b/source/Lib/CommonLib/SampleAdaptiveOffset.h
index b8b47d48f7bb27590bb75e2ed1a8b1a7a0d84cbd..e2487056bd9de75979a135567a9d8d60e8674242 100644
--- a/source/Lib/CommonLib/SampleAdaptiveOffset.h
+++ b/source/Lib/CommonLib/SampleAdaptiveOffset.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/Slice.cpp b/source/Lib/CommonLib/Slice.cpp
index 7531b345cff586c13a649251e511337e7b3d19bb..55461434f20fda7a3ba60e7578d05ff6111d837f 100644
--- a/source/Lib/CommonLib/Slice.cpp
+++ b/source/Lib/CommonLib/Slice.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -74,11 +74,17 @@ Slice::Slice()
 , m_deblockingFilterCrBetaOffsetDiv2( 0 )
 , m_deblockingFilterCrTcOffsetDiv2  ( 0 )
 , m_depQuantEnabledFlag             ( false )
+#if JVET_W0046_RLSCP
+, m_reverseLastSigCoeffFlag         ( false )
+#endif
 , m_signDataHidingEnabledFlag       ( false )
 , m_tsResidualCodingDisabledFlag  ( false )
 , m_pendingRasInit                ( false )
 , m_bCheckLDC                     ( false )
 , m_biDirPred                    ( false )
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+, m_lmChromaCheckDisable          ( false )
+#endif
 , m_iSliceQpDelta                 ( 0 )
 , m_iDepth                        ( 0 )
 , m_pcSPS                         ( NULL )
@@ -98,11 +104,21 @@ Slice::Slice()
 , m_substreamSizes                ( )
 , m_numEntryPoints                ( 0 )
 , m_cabacInitFlag                 ( false )
- , m_sliceSubPicId               ( 0 )
+, m_sliceSubPicId                 ( 0 )
 , m_encCABACTableIdx              (I_SLICE)
 , m_iProcessingStartTime          ( 0 )
 , m_dProcessingTime               ( 0 )
+, m_tsrc_index                    ( 0 )
 {
+  for (uint32_t i = 0; i < MAX_TSRC_RICE; i++)
+  {
+    m_riceBit[i] = 0;
+  }
+
+#if JVET_W0046_RLSCP
+  m_cnt_right_bottom = 0;
+#endif
+
   for(uint32_t i=0; i<NUM_REF_PIC_LIST_01; i++)
   {
     m_aiNumRefIdx[i] = 0;
@@ -141,9 +157,9 @@ Slice::Slice()
 
   memset(m_alfApss, 0, sizeof(m_alfApss));
   m_ccAlfFilterParam.reset();
-  resetTileGroupAlfEnabledFlag();
-  resetTileGroupCcAlCbfEnabledFlag();
-  resetTileGroupCcAlCrfEnabledFlag();
+  resetAlfEnabledFlag();
+  resetCcAlCbfEnabledFlag();
+  resetCcAlCrfEnabledFlag();
 
   m_sliceMap.initSliceMap();
 }
@@ -171,6 +187,9 @@ void Slice::initSlice()
   m_bCheckLDC = false;
 
   m_biDirPred = false;
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+  m_lmChromaCheckDisable = false;
+#endif
   m_symRefIdx[0] = -1;
   m_symRefIdx[1] = -1;
 
@@ -189,12 +208,20 @@ void Slice::initSlice()
   m_useLTforDRAP         = false;
   m_isDRAP               = false;
   m_latestDRAPPOC        = MAX_INT;
-  resetTileGroupAlfEnabledFlag();
+  m_edrapRapId           = 0;
+  m_enableEdrapSEI       = false;
+  m_edrapRapId           = 0;
+  m_useLTforEdrap        = false;
+  m_edrapNumRefRapPics   = 0;
+  m_edrapRefRapIds.resize(0);
+  m_latestEDRAPPOC       = MAX_INT;
+  m_latestEdrapLeadingPicDecodableFlag = false;
+  resetAlfEnabledFlag();
   m_ccAlfFilterParam.reset();
-  m_tileGroupCcAlfCbEnabledFlag = 0;
-  m_tileGroupCcAlfCrEnabledFlag = 0;
-  m_tileGroupCcAlfCbApsId = -1;
-  m_tileGroupCcAlfCrApsId = -1;
+  m_ccAlfCbEnabledFlag = 0;
+  m_ccAlfCrEnabledFlag = 0;
+  m_ccAlfCbApsId = -1;
+  m_ccAlfCrApsId = -1;
   m_nuhLayerId = 0;
 }
 
@@ -238,16 +265,16 @@ void Slice::inheritFromPicHeader( PicHeader *picHeader, const PPS *pps, const SP
   setSaoEnabledFlag(CHANNEL_TYPE_LUMA,     picHeader->getSaoEnabledFlag(CHANNEL_TYPE_LUMA));
   setSaoEnabledFlag(CHANNEL_TYPE_CHROMA,   picHeader->getSaoEnabledFlag(CHANNEL_TYPE_CHROMA));
 
-  setTileGroupAlfEnabledFlag(COMPONENT_Y,  picHeader->getAlfEnabledFlag(COMPONENT_Y));
-  setTileGroupAlfEnabledFlag(COMPONENT_Cb, picHeader->getAlfEnabledFlag(COMPONENT_Cb));
-  setTileGroupAlfEnabledFlag(COMPONENT_Cr, picHeader->getAlfEnabledFlag(COMPONENT_Cr));
-  setTileGroupNumAps(picHeader->getNumAlfAps());
-  setAlfAPSs(picHeader->getAlfAPSs());
-  setTileGroupApsIdChroma(picHeader->getAlfApsIdChroma());
-  setTileGroupCcAlfCbEnabledFlag(picHeader->getCcAlfEnabledFlag(COMPONENT_Cb));
-  setTileGroupCcAlfCrEnabledFlag(picHeader->getCcAlfEnabledFlag(COMPONENT_Cr));
-  setTileGroupCcAlfCbApsId(picHeader->getCcAlfCbApsId());
-  setTileGroupCcAlfCrApsId(picHeader->getCcAlfCrApsId());
+  setAlfEnabledFlag(COMPONENT_Y,  picHeader->getAlfEnabledFlag(COMPONENT_Y));
+  setAlfEnabledFlag(COMPONENT_Cb, picHeader->getAlfEnabledFlag(COMPONENT_Cb));
+  setAlfEnabledFlag(COMPONENT_Cr, picHeader->getAlfEnabledFlag(COMPONENT_Cr));
+  setNumAlfApsIdsLuma(picHeader->getNumAlfApsIdsLuma());
+  setAlfApsIdsLuma(picHeader->getAlfApsIdsLuma());
+  setAlfApsIdChroma(picHeader->getAlfApsIdChroma());
+  setCcAlfCbEnabledFlag(picHeader->getCcAlfEnabledFlag(COMPONENT_Cb));
+  setCcAlfCrEnabledFlag(picHeader->getCcAlfEnabledFlag(COMPONENT_Cr));
+  setCcAlfCbApsId(picHeader->getCcAlfCbApsId());
+  setCcAlfCrApsId(picHeader->getCcAlfCrApsId());
   m_ccAlfFilterParam.ccAlfFilterEnabled[COMPONENT_Cb - 1] = picHeader->getCcAlfEnabledFlag(COMPONENT_Cb);
   m_ccAlfFilterParam.ccAlfFilterEnabled[COMPONENT_Cr - 1] = picHeader->getCcAlfEnabledFlag(COMPONENT_Cr);
 }
@@ -490,8 +517,7 @@ void Slice::constructRefPicList(PicList& rcListPic)
       pcRefPic = xGetRefPic( rcListPic, getPOC(), refLayerId );
       pcRefPic->longTerm = true;
     }
-    else
-    if (!m_RPL1.isRefPicLongterm(ii))
+    else if (!m_RPL1.isRefPicLongterm(ii))
     {
       pcRefPic = xGetRefPic(rcListPic, getPOC() + m_RPL1.getRefPicIdentifier(ii), m_pcPic->layerId);
       pcRefPic->longTerm = false;
@@ -614,7 +640,7 @@ void Slice::checkRPL(const ReferencePictureList* pRPL0, const ReferencePictureLi
 
   int irapPOC = getAssociatedIRAPPOC();
 
-  const int numEntries[] = { pRPL0->getNumberOfShorttermPictures() + pRPL0->getNumberOfLongtermPictures() + pRPL0->getNumberOfInterLayerPictures(), pRPL1->getNumberOfShorttermPictures() + pRPL1->getNumberOfLongtermPictures() + pRPL1->getNumberOfInterLayerPictures() };
+  int numEntries[] = { pRPL0->getNumberOfShorttermPictures() + pRPL0->getNumberOfLongtermPictures() + pRPL0->getNumberOfInterLayerPictures(), pRPL1->getNumberOfShorttermPictures() + pRPL1->getNumberOfLongtermPictures() + pRPL1->getNumberOfInterLayerPictures() };
   const int numActiveEntries[] = { getNumRefIdx( REF_PIC_LIST_0 ), getNumRefIdx( REF_PIC_LIST_1 ) };
   const ReferencePictureList* rpl[] = { pRPL0, pRPL1 };
   const bool fieldSeqFlag = getSPS()->getFieldSeqFlag();
@@ -666,9 +692,7 @@ void Slice::checkRPL(const ReferencePictureList* pRPL0, const ReferencePictureLi
         }
 
         // Generated reference picture does not have picture header
-        const bool isGeneratedRefPic = pcRefPic->slices[0]->getPicHeader() ? false : true;
-
-        const bool nonReferencePictureFlag = isGeneratedRefPic ? pcRefPic->slices[0]->getPicHeader()->getNonReferencePictureFlag() : pcRefPic->nonReferencePictureFlag;
+        const bool nonReferencePictureFlag = pcRefPic->nonReferencePictureFlag;
         CHECK( pcRefPic == m_pcPic || nonReferencePictureFlag, "The picture referred to by each entry in RefPicList[ 0 ] or RefPicList[ 1 ] shall not be the current picture and shall have ph_non_ref_pic_flag equal to 0" );
 
         if( i < numActiveEntries[refPicList] )
@@ -685,23 +709,20 @@ void Slice::checkRPL(const ReferencePictureList* pRPL0, const ReferencePictureLi
           if( m_eNalUnitType == NAL_UNIT_CODED_SLICE_RADL )
           {
             CHECK( refPicDecodingOrderNumber < associatedIRAPDecodingOrderNumber, "RADL picture detected that violate the rule that no active entry in RefPicList[] shall precede the associated IRAP picture in decoding order" );
-#if JVET_S0084_S0110_RADL
-            // Checking this: "When the current picture is a RADL picture, there shall be no active entry in RefPicList[ 0 ] or 
+            // Checking this: "When the current picture is a RADL picture, there shall be no active entry in RefPicList[ 0 ] or
             // RefPicList[ 1 ] that is any of the following: A RASL picture with pps_mixed_nalu_types_in_pic_flag is equal to 0
             for (int i = 0; i < pcRefPic->numSlices; i++)
             {
-              if (pcRefPic->slices[i]->getPPS()->getMixedNaluTypesInPicFlag() == 0)
+              if (!pcRefPic->mixedNaluTypesInPicFlag)
               {
                 CHECK(pcRefPic->slices[i]->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL, "When the current picture is a RADL picture, there shall be no active entry in RefPicList[ 0 ] or RefPicList[ 1 ] that is a RASL picture with pps_mixed_nalu_types_in_pic_flag is equal to 0");
               }
             }
-#endif
 
           }
 
           CHECK( pcRefPic->temporalId > m_pcPic->temporalId, "The picture referred to by each active entry in RefPicList[ 0 ] or RefPicList[ 1 ] shall be present in the DPB and shall have TemporalId less than or equal to that of the current picture." );
         }
-#if JVET_R0046_IRAP_ASPECT2
         // Add a constraint on an ILRP being either an IRAP picture or having TemporalId less than or equal to
         // Max (0, vps_max_tid_il_ref_pics_plus1[ refPicVpsLayerId ] - 1 ), with refPicVpsLayerId equal to the value of
         // the nuh_layer_id of the referenced picture.
@@ -710,12 +731,13 @@ void Slice::checkRPL(const ReferencePictureList* pRPL0, const ReferencePictureLi
           bool cond1      = (pcRefPic->getPictureType() == NAL_UNIT_CODED_SLICE_GDR);
           bool cond2      = (pcRefPic->slices[0]->getPicHeader()->getRecoveryPocCnt() == 0);
           bool cond3      = (pcRefPic->cs->slice->isIRAP());
-          
+
           const VPS *vps                  = pcRefPic->cs->vps;
-          const int  maxTidILRefPicsPlus1 = vps->getMaxTidIlRefPicsPlus1(layerIdx, pcRefPic->layerId);
+          const int  maxTidILRefPicsPlus1 =
+            vps->getMaxTidIlRefPicsPlus1(layerIdx, vps->getGeneralLayerIdx(pcRefPic->layerId));
           bool cond4 = (pcRefPic->temporalId < maxTidILRefPicsPlus1);
 
-          CHECK((cond1 && cond2) || cond3 || cond4,
+          CHECK(!((cond1 && cond2) || cond3 || cond4),
                 "Either of the following conditions shall apply for the picture referred to by each ILRP entry, when "
                 "present, in RefPicList[ 0 ] or RefPicList[ 1 ] of a slice of the current picture:-The picture is a "
                 "GDR picture with "
@@ -724,7 +746,6 @@ void Slice::checkRPL(const ReferencePictureList* pRPL0, const ReferencePictureLi
                 "where currLayerIdx and refLayerIdx are equal to "
                 "GeneralLayerIdx[ nuh_layer_id ] and GeneralLayerIdx[ refpicLayerId ], respectively. ");
         }
-#endif
       }
     }
   }
@@ -901,6 +922,16 @@ void Slice::copySliceInfo(Slice *pSrc, bool cpyAlmostAll)
   m_depQuantEnabledFlag               = pSrc->m_depQuantEnabledFlag;
   m_signDataHidingEnabledFlag         = pSrc->m_signDataHidingEnabledFlag;
   m_tsResidualCodingDisabledFlag      = pSrc->m_tsResidualCodingDisabledFlag;
+  m_tsrc_index                        = pSrc->m_tsrc_index;
+
+  for (i = 0; i < MAX_TSRC_RICE; i++)
+  {
+    m_riceBit[i] = pSrc->m_riceBit[i];
+  }
+#if JVET_W0046_RLSCP
+  m_reverseLastSigCoeffFlag = pSrc->m_reverseLastSigCoeffFlag;
+  m_cnt_right_bottom        = pSrc->m_cnt_right_bottom;
+#endif
 
   for (i = 0; i < NUM_REF_PIC_LIST_01; i++)
   {
@@ -916,6 +947,9 @@ void Slice::copySliceInfo(Slice *pSrc, bool cpyAlmostAll)
   m_iSliceQpDelta        = pSrc->m_iSliceQpDelta;
 
   m_biDirPred = pSrc->m_biDirPred;
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+  m_lmChromaCheckDisable = pSrc->m_lmChromaCheckDisable;;
+#endif
   m_symRefIdx[0] = pSrc->m_symRefIdx[0];
   m_symRefIdx[1] = pSrc->m_symRefIdx[1];
 
@@ -935,20 +969,35 @@ void Slice::copySliceInfo(Slice *pSrc, bool cpyAlmostAll)
     }
     m_bIsUsedAsLongTerm[i][MAX_NUM_REF] = pSrc->m_bIsUsedAsLongTerm[i][MAX_NUM_REF];
   }
-  if( cpyAlmostAll ) m_iDepth = pSrc->m_iDepth;
+  if (cpyAlmostAll)
+  {
+    m_iDepth = pSrc->m_iDepth;
+  }
 
   // access channel
-  if (cpyAlmostAll) m_RPL0 = pSrc->m_RPL0;
-  if (cpyAlmostAll) m_RPL1 = pSrc->m_RPL1;
+  if (cpyAlmostAll)
+  {
+    m_RPL0 = pSrc->m_RPL0;
+  }
+  if (cpyAlmostAll)
+  {
+    m_RPL1 = pSrc->m_RPL1;
+  }
   m_iLastIDR             = pSrc->m_iLastIDR;
 
-  if( cpyAlmostAll ) m_pcPic  = pSrc->m_pcPic;
+  if (cpyAlmostAll)
+  {
+    m_pcPic = pSrc->m_pcPic;
+  }
 
   m_pcPicHeader          = pSrc->m_pcPicHeader;
   m_colFromL0Flag        = pSrc->m_colFromL0Flag;
   m_colRefIdx            = pSrc->m_colRefIdx;
 
-  if( cpyAlmostAll ) setLambdas(pSrc->getLambdas());
+  if (cpyAlmostAll)
+  {
+    setLambdas(pSrc->getLambdas());
+  }
 
   for (i = 0; i < NUM_REF_PIC_LIST_01; i++)
   {
@@ -988,14 +1037,17 @@ void Slice::copySliceInfo(Slice *pSrc, bool cpyAlmostAll)
 
   m_cabacInitFlag                 = pSrc->m_cabacInitFlag;
   memcpy(m_alfApss, pSrc->m_alfApss, sizeof(m_alfApss)); // this might be quite unsafe
-  memcpy( m_tileGroupAlfEnabledFlag, pSrc->m_tileGroupAlfEnabledFlag, sizeof(m_tileGroupAlfEnabledFlag));
-  m_tileGroupNumAps               = pSrc->m_tileGroupNumAps;
-  m_tileGroupLumaApsId            = pSrc->m_tileGroupLumaApsId;
-  m_tileGroupChromaApsId          = pSrc->m_tileGroupChromaApsId;
+  memcpy( m_alfEnabledFlag, pSrc->m_alfEnabledFlag, sizeof(m_alfEnabledFlag));
+  m_numAlfApsIdsLuma              = pSrc->m_numAlfApsIdsLuma;
+  m_alfApsIdsLuma                 = pSrc->m_alfApsIdsLuma;
+  m_alfApsIdChroma                = pSrc->m_alfApsIdChroma;
   m_disableSATDForRd              = pSrc->m_disableSATDForRd;
   m_isLossless = pSrc->m_isLossless;
 
-  if( cpyAlmostAll ) m_encCABACTableIdx  = pSrc->m_encCABACTableIdx;
+  if (cpyAlmostAll)
+  {
+    m_encCABACTableIdx = pSrc->m_encCABACTableIdx;
+  }
   for( int i = 0; i < NUM_REF_PIC_LIST_01; i ++ )
   {
     for (int j = 0; j < MAX_NUM_REF_PICS; j ++ )
@@ -1006,10 +1058,10 @@ void Slice::copySliceInfo(Slice *pSrc, bool cpyAlmostAll)
   m_ccAlfFilterParam                        = pSrc->m_ccAlfFilterParam;
   m_ccAlfFilterControl[0]                   = pSrc->m_ccAlfFilterControl[0];
   m_ccAlfFilterControl[1]                   = pSrc->m_ccAlfFilterControl[1];
-  m_tileGroupCcAlfCbEnabledFlag             = pSrc->m_tileGroupCcAlfCbEnabledFlag;
-  m_tileGroupCcAlfCrEnabledFlag             = pSrc->m_tileGroupCcAlfCrEnabledFlag;
-  m_tileGroupCcAlfCbApsId                   = pSrc->m_tileGroupCcAlfCbApsId;
-  m_tileGroupCcAlfCrApsId                   = pSrc->m_tileGroupCcAlfCrApsId;
+  m_ccAlfCbEnabledFlag             = pSrc->m_ccAlfCbEnabledFlag;
+  m_ccAlfCrEnabledFlag             = pSrc->m_ccAlfCrEnabledFlag;
+  m_ccAlfCbApsId                   = pSrc->m_ccAlfCbApsId;
+  m_ccAlfCrApsId                   = pSrc->m_ccAlfCrApsId;
 }
 
 
@@ -1061,7 +1113,7 @@ void Slice::checkLeadingPictureRestrictions(PicList& rcListPic, const PPS& pps)
   if(this->getAssociatedIRAPPOC() > this->getPOC())
   {
     //check this only when pps_mixed_nalu_types_in_pic_flag is equal to 0
-    if (pps.getMixedNaluTypesInPicFlag() == 0)
+    if (!pps.getMixedNaluTypesInPicFlag())
     {
       // Do not check IRAP pictures since they may get a POC lower than their associated IRAP
       if (nalUnitType < NAL_UNIT_CODED_SLICE_IDR_W_RADL ||
@@ -1075,7 +1127,7 @@ void Slice::checkLeadingPictureRestrictions(PicList& rcListPic, const PPS& pps)
 
   if (this->getAssociatedIRAPPOC() <= this->getPOC())
   {
-    if (pps.getMixedNaluTypesInPicFlag() == 0)
+    if (!pps.getMixedNaluTypesInPicFlag())
     {
       CHECK(nalUnitType == NAL_UNIT_CODED_SLICE_RASL || nalUnitType == NAL_UNIT_CODED_SLICE_RADL, "When a picture is not a leading picture, it shall not be a RADL or RASL picture.");
     }
@@ -1113,41 +1165,44 @@ void Slice::checkLeadingPictureRestrictions(PicList& rcListPic, const PPS& pps)
     }
     const Slice* pcSlice = pcPic->slices[0];
 
-    if (pcSlice->getPicHeader()->getPicOutputFlag() == 1 && !this->getNoOutputOfPriorPicsFlag() && pcPic->layerId == this->m_nuhLayerId)
+    if(pcSlice->getPicHeader()) // Generated reference picture does not have picture header
     {
-      if ((nalUnitType == NAL_UNIT_CODED_SLICE_CRA || nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP || nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL) && !pps.getMixedNaluTypesInPicFlag())
+      if (pcSlice->getPicHeader()->getPicOutputFlag() == 1 && !this->getNoOutputOfPriorPicsFlag() && pcPic->layerId == this->m_nuhLayerId)
       {
-        CHECK(pcPic->poc >= this->getPOC(), "Any picture, with nuh_layer_id equal to a particular value layerId, that precedes an IRAP picture with nuh_layer_id "
-              "equal to layerId in decoding order shall precede the IRAP picture in output order.");
+        if ((nalUnitType == NAL_UNIT_CODED_SLICE_CRA || nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP || nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL) && !pps.getMixedNaluTypesInPicFlag())
+        {
+          CHECK(pcPic->poc >= this->getPOC(), "Any picture, with nuh_layer_id equal to a particular value layerId, that precedes an IRAP picture with nuh_layer_id "
+                "equal to layerId in decoding order shall precede the IRAP picture in output order.");
+        }
       }
-    }
 
-    if (pcSlice->getPicHeader()->getPicOutputFlag() == 1 && pcPic->layerId == this->m_nuhLayerId)
-    {
-      if (nalUnitType == NAL_UNIT_CODED_SLICE_RADL)
+      if (pcSlice->getPicHeader()->getPicOutputFlag() == 1 && pcPic->layerId == this->m_nuhLayerId)
       {
-        if (this->getAssociatedIRAPPOC() > pcSlice->getAssociatedIRAPPOC() && !pps.getMixedNaluTypesInPicFlag())
+        if (nalUnitType == NAL_UNIT_CODED_SLICE_RADL)
         {
-          if (this->getAssociatedIRAPPOC() != pcPic->poc)
+          if (this->getAssociatedIRAPPOC() > pcSlice->getAssociatedIRAPPOC() && !pps.getMixedNaluTypesInPicFlag())
           {
-            CHECK(pcPic->poc >= this->getPOC(), "Any picture, with nuh_layer_id equal to a particular value layerId, that precedes an IRAP picture with nuh_layer_id "
-                  "equal to layerId in decoding order shall precede any RADL picture associated with the IRAP picture in output order.");
+            if (this->getAssociatedIRAPPOC() != pcPic->poc)
+            {
+              CHECK(pcPic->poc >= this->getPOC(), "Any picture, with nuh_layer_id equal to a particular value layerId, that precedes an IRAP picture with nuh_layer_id "
+                    "equal to layerId in decoding order shall precede any RADL picture associated with the IRAP picture in output order.");
+            }
           }
         }
       }
-    }
 
-    if (pcSlice->getPicHeader()->getPicOutputFlag() == 1 && !this->getPicHeader()->getNoOutputBeforeRecoveryFlag() && pcPic->layerId == this->m_nuhLayerId
-        && nalUnitType != NAL_UNIT_CODED_SLICE_GDR && this->getPicHeader()->getRecoveryPocCnt() != -1)
-    {
-      if (this->getPOC() == this->getPicHeader()->getRecoveryPocCnt() + this->getPrevGDRInSameLayerPOC())
+      if (pcSlice->getPicHeader()->getPicOutputFlag() == 1 && !this->getPicHeader()->getNoOutputBeforeRecoveryFlag() && pcPic->layerId == this->m_nuhLayerId
+          && nalUnitType != NAL_UNIT_CODED_SLICE_GDR && this->getPicHeader()->getRecoveryPocCnt() != -1)
       {
-        CHECK(pcPic->poc >= this->getPOC(), "Any picture, with nuh_layer_id equal to a particular value layerId, that precedes a recovery point picture with "
-              "nuh_layer_id equal to layerId in decoding order shall precede the recovery point picture in output order.");
+        if (this->getPOC() == this->getPicHeader()->getRecoveryPocCnt() + this->getPrevGDRInSameLayerPOC())
+        {
+          CHECK(pcPic->poc >= this->getPOC(), "Any picture, with nuh_layer_id equal to a particular value layerId, that precedes a recovery point picture with "
+                "nuh_layer_id equal to layerId in decoding order shall precede the recovery point picture in output order.");
+        }
       }
     }
 
-    if ((nalUnitType == NAL_UNIT_CODED_SLICE_RASL || nalUnitType == NAL_UNIT_CODED_SLICE_RADL) && 
+    if ((nalUnitType == NAL_UNIT_CODED_SLICE_RASL || nalUnitType == NAL_UNIT_CODED_SLICE_RADL) &&
       (pcSlice->getNalUnitType() != NAL_UNIT_CODED_SLICE_RASL && pcSlice->getNalUnitType() != NAL_UNIT_CODED_SLICE_RADL) && !pps.getMixedNaluTypesInPicFlag())
     {
       if (pcSlice->getAssociatedIRAPPOC() == this->getAssociatedIRAPPOC() && pcPic->layerId == this->m_nuhLayerId)
@@ -1254,14 +1309,18 @@ void Slice::checkSubpicTypeConstraints(PicList& rcListPic, const ReferencePictur
       bool isBufPicOutput = false;
       int bufSubpicType = NAL_UNIT_INVALID;
       int bufSubpicPrevIRAPSubpicPOC = 0;
-      for (int i = 0; i < bufPic->numSlices; i++)
+
+      if (bufPic->slices[0]->getPicHeader() != NULL) // Generated reference picture does not have picture header
       {
-        if (bufPic->sliceSubpicIdx[i] == curSubpicIdx)
+        for (int i = 0; i < bufPic->numSlices; i++)
         {
-          isBufPicOutput = bufPic->slices[i]->getPicHeader()->getPicOutputFlag();
-          bufSubpicType = bufPic->slices[i]->getNalUnitType();
-          bufSubpicPrevIRAPSubpicPOC = bufPic->slices[i]->getPrevIRAPSubpicPOC();
-          break;
+          if (bufPic->sliceSubpicIdx[i] == curSubpicIdx)
+          {
+            isBufPicOutput = bufPic->slices[i]->getPicHeader()->getPicOutputFlag();
+            bufSubpicType = bufPic->slices[i]->getNalUnitType();
+            bufSubpicPrevIRAPSubpicPOC = bufPic->slices[i]->getPrevIRAPSubpicPOC();
+            break;
+          }
         }
       }
 
@@ -1494,7 +1553,7 @@ void Slice::applyReferencePictureListBasedMarking( PicList& rcListPic, const Ref
   int i, isReference;
   checkLeadingPictureRestrictions(rcListPic, pps);
 
-  bool isNeedToCheck = (this->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP || this->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL) ? false : true;
+  bool isNeedToCheck = (this->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP || this->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL) && !pps.getMixedNaluTypesInPicFlag() ? false : true;
 
   // mark long-term reference pictures in List0
   for( i = 0; i < pRPL0->getNumberOfShorttermPictures() + pRPL0->getNumberOfLongtermPictures() + pRPL0->getNumberOfInterLayerPictures(); i++ )
@@ -1635,7 +1694,9 @@ void Slice::applyReferencePictureListBasedMarking( PicList& rcListPic, const Ref
     Picture* pcPic = *(iterPic++);
 
     if (!pcPic->referenced)
+    {
       continue;
+    }
 
     isReference = 0;
     // loop through all pictures in the Reference Picture Set
@@ -1752,7 +1813,10 @@ int Slice::checkThatAllRefPicsAreAvailable(PicList& rcListPic, const ReferencePi
   int isAvailable = 0;
   int notPresentPoc = 0;
 
-  if (this->isIDRorBLA()) return 0; //Assume that all pic in the DPB will be flushed anyway so no need to check.
+  if (this->isIDRorBLA())
+  {
+    return 0;   // Assume that all pic in the DPB will be flushed anyway so no need to check.
+  }
 
   int numberOfPictures = pRPL->getNumberOfLongtermPictures() + pRPL->getNumberOfShorttermPictures() + pRPL->getNumberOfInterLayerPictures();
   //Check long term ref pics
@@ -1816,7 +1880,7 @@ int Slice::checkThatAllRefPicsAreAvailable(PicList& rcListPic, const ReferencePi
     {
       if (printErrors)
       {
-        msg(ERROR, "\nCurrent picture: %d Long-term reference picture with POC = %3d seems to have been removed or not correctly decoded.", this->getPOC(), notPresentPoc);
+        msg(ERROR, "Error: Current picture: %d Long-term reference picture with POC = %3d seems to have been removed or not correctly decoded.\n", this->getPOC(), notPresentPoc);
       }
       return notPresentPoc;
     }
@@ -1828,7 +1892,9 @@ int Slice::checkThatAllRefPicsAreAvailable(PicList& rcListPic, const ReferencePi
   for (int ii = 0; ii < numberOfPictures; ii++)
   {
     if (pRPL->isRefPicLongterm(ii))
+    {
       continue;
+    }
 
     notPresentPoc = this->getPOC() + pRPL->getRefPicIdentifier(ii);
     isAvailable = 0;
@@ -1847,7 +1913,7 @@ int Slice::checkThatAllRefPicsAreAvailable(PicList& rcListPic, const ReferencePi
     {
       if (printErrors)
       {
-        msg(ERROR, "\nCurrent picture: %d Short-term reference picture with POC = %3d seems to have been removed or not correctly decoded.", this->getPOC(), notPresentPoc);
+        msg(ERROR, "Error: Current picture: %d Short-term reference picture with POC = %3d seems to have been removed or not correctly decoded.\n", this->getPOC(), notPresentPoc);
       }
       return notPresentPoc;
     }
@@ -1926,7 +1992,7 @@ int Slice::checkThatAllRefPicsAreAvailable(PicList& rcListPic, const ReferencePi
     {
       if (printErrors)
       {
-        msg(ERROR, "\nCurrent picture: %d Long-term reference picture with POC = %3d seems to have been removed or not correctly decoded.", this->getPOC(), notPresentPoc);
+        msg(ERROR, "Error: Current picture: %d Long-term reference picture with POC = %3d seems to have been removed or not correctly decoded.\n", this->getPOC(), notPresentPoc);
       }
       *refPicIndex = ii;
       return notPresentPoc;
@@ -1939,7 +2005,9 @@ int Slice::checkThatAllRefPicsAreAvailable(PicList& rcListPic, const ReferencePi
   for (int ii = 0; ii < numberOfPictures; ii++)
   {
     if (pRPL->isRefPicLongterm(ii))
+    {
       continue;
+    }
 
     notPresentPoc = this->getPOC() + pRPL->getRefPicIdentifier(ii);
     isAvailable = 0;
@@ -1958,7 +2026,7 @@ int Slice::checkThatAllRefPicsAreAvailable(PicList& rcListPic, const ReferencePi
     {
       if (printErrors)
       {
-        msg(ERROR, "\nCurrent picture: %d Short-term reference picture with POC = %3d seems to have been removed or not correctly decoded.", this->getPOC(), notPresentPoc);
+        msg(ERROR, "Error: Current picture: %d Short-term reference picture with POC = %3d seems to have been removed or not correctly decoded.\n", this->getPOC(), notPresentPoc);
       }
       *refPicIndex = ii;
       return notPresentPoc;
@@ -1981,8 +2049,7 @@ bool Slice::isPOCInRefPicList(const ReferencePictureList *rpl, int poc )
         return true;
       }
     }
-    else
-    if (rpl->isRefPicLongterm(i))
+    else if (rpl->isRefPicLongterm(i))
     {
       if (poc == rpl->getRefPicIdentifier(i))
       {
@@ -2010,6 +2077,15 @@ bool Slice::isPocRestrictedByDRAP( int poc, bool precedingDRAPInDecodingOrder )
          ( cvsHasPreviousDRAP() && getPOC() > getLatestDRAPPOC() && (precedingDRAPInDecodingOrder || poc < getLatestDRAPPOC()) );
 }
 
+bool Slice::isPocRestrictedByEdrap( int poc )
+{
+  if (!getEnableEdrapSEI())
+  {
+    return false;
+  }
+  return getEdrapRapId() > 0 && poc != getAssociatedIRAPPOC();
+}
+
 void Slice::checkConformanceForDRAP( uint32_t temporalId )
 {
   if (!(isDRAP() || cvsHasPreviousDRAP()))
@@ -2071,6 +2147,59 @@ void Slice::checkConformanceForDRAP( uint32_t temporalId )
   }
 }
 
+void Slice::checkConformanceForEDRAP( uint32_t temporalId )
+{
+  if (!(getEdrapRapId() > 0 || cvsHasPreviousEDRAP()))
+  {
+    return;
+  }
+
+  if (getEdrapRapId() > 0)
+  {
+    if (!(getNalUnitType() == NalUnitType::NAL_UNIT_CODED_SLICE_TRAIL ||
+          getNalUnitType() == NalUnitType::NAL_UNIT_CODED_SLICE_STSA))
+    {
+      msg( WARNING, "Warning, non-conforming bitstream. The EDRAP picture should be a trailing picture.\n");
+    }
+    if ( temporalId != 0)
+    {
+      msg( WARNING, "Warning, non-conforming bitstream. The EDRAP picture shall have a temporal sublayer identifier equal to 0.\n");
+    }
+    for (int i = 0; i < getNumRefIdx(REF_PIC_LIST_0); i++)
+    {
+      if (getRefPic(REF_PIC_LIST_0,i)->getEdrapRapId() < 0)
+      {
+        msg( WARNING, "Warning, non-conforming bitstream. Any picture that is in the same layer and follows the EDRAP picture in both decoding order and output order does not include, in the active entries of its reference picture lists, any picture that is in the same layer and precedes the EDRAP picture in decoding order or output order, with the exception of the referenceablePictures.\n");
+      }
+    }
+    for (int i = 0; i < getNumRefIdx(REF_PIC_LIST_1); i++)
+    {
+      if (getRefPic(REF_PIC_LIST_1,i)->getEdrapRapId() < 0)
+      {
+        msg( WARNING, "Warning, non-conforming bitstream. Any picture that is in the same layer and follows the EDRAP picture in both decoding order and output order does not include, in the active entries of its reference picture lists, any picture that is in the same layer and precedes the EDRAP picture in decoding order or output order, with the exception of the referenceablePictures.\n");
+      }
+    }
+  }
+
+  if (cvsHasPreviousEDRAP() && getPOC() > getLatestEDRAPPOC() && getLatestEdrapLeadingPicDecodableFlag())
+  {
+    for (int i = 0; i < getNumRefIdx(REF_PIC_LIST_0); i++)
+    {
+      if (getRefPic(REF_PIC_LIST_0,i)->getPOC() < getLatestEDRAPPOC() && getRefPic(REF_PIC_LIST_0,i)->getEdrapRapId() < 0)
+      {
+        msg( WARNING, "Warning, non-conforming bitstream. Any picture that is in the same layer and follows the EDRAP picture in decoding order and precedes the EDRAP picture in output order does not include, in the active entries of its reference picture lists, any picture that is in the same layer and precedes the EDRAP picture in decoding order, with the exception of the referenceablePictures. Problem is POC %d in RPL0.\n", getRefPic(REF_PIC_LIST_0,i)->getPOC());
+      }
+    }
+    for (int i = 0; i < getNumRefIdx(REF_PIC_LIST_1); i++)
+    {
+      if (getRefPic(REF_PIC_LIST_1,i)->getPOC() < getLatestEDRAPPOC() && getRefPic(REF_PIC_LIST_1,i)->getEdrapRapId() < 0)
+      {
+        msg( WARNING, "Warning, non-conforming bitstream. Any picture that is in the same layer and follows the EDRAP picture in decoding order and precedes the EDRAP picture in output order does not include, in the active entries of its reference picture lists, any picture that is in the same layer and precedes the EDRAP picture in decoding order, with the exception of the referenceablePictures. Problem is POC %d in RPL1\n", getRefPic(REF_PIC_LIST_1,i)->getPOC());
+      }
+    }
+  }
+}
+
 
 //! get AC and DC values for weighted pred
 void  Slice::getWpAcDcParam(const WPACDCParam *&wp) const
@@ -2184,8 +2313,7 @@ unsigned Slice::getMinPictureDistance() const
   {
     minPicDist = 0;
   }
-  else
-  if( ! isIntra() )
+  else if (!isIntra())
   {
     const int currPOC  = getPOC();
     for (int refIdx = 0; refIdx < getNumRefIdx(REF_PIC_LIST_0); refIdx++)
@@ -2234,9 +2362,6 @@ VPS::VPS()
   {
     m_vpsLayerId[i] = 0;
     m_vpsIndependentLayerFlag[i] = true;
-#if !JVET_R0193
-    m_vpsMaxTidIlRefPicsPlus1[i] = 7;
-#endif
     m_generalLayerIdx[i] = 0;
     for (int j = 0; j < MAX_VPS_LAYERS; j++)
     {
@@ -2330,11 +2455,7 @@ void VPS::deriveOutputLayerSets()
 
   m_numOutputLayersInOls[0] = 1;
   m_outputLayerIdInOls[0][0] = m_vpsLayerId[0];
-#if JVET_R0193_S0141
   m_numSubLayersInLayerInOLS[0][0] = m_ptlMaxTemporalId[m_olsPtlIdx[0]] + 1;
-#else
-  m_numSubLayersInLayerInOLS[0][0] = m_vpsMaxSubLayers;
-#endif
   layerUsedAsOutputLayerFlag[0] = 1;
   for (int i = 1; i < m_maxLayers; i++)
   {
@@ -2353,7 +2474,6 @@ void VPS::deriveOutputLayerSets()
     {
       m_numOutputLayersInOls[i] = 1;
       m_outputLayerIdInOls[i][0] = m_vpsLayerId[i];
-#if JVET_R0193_S0141
       if (m_vpsEachLayerIsAnOlsFlag)
       {
         m_numSubLayersInLayerInOLS[i][0] = m_ptlMaxTemporalId[m_olsPtlIdx[i]] + 1;
@@ -2372,15 +2492,8 @@ void VPS::deriveOutputLayerSets()
               m_numSubLayersInLayerInOLS[i][k] = maxSublayerNeeded;
             }
           }
+        }
       }
-      }
-#else
-      for(int  j = 0; j < i  &&  ( m_vpsOlsModeIdc  ==  0 ); j++ )
-      {
-        m_numSubLayersInLayerInOLS[i][j] = m_vpsMaxTidIlRefPicsPlus1[i];
-      }
-      m_numSubLayersInLayerInOLS[i][i] = m_vpsMaxSubLayers;
-#endif
     }
     else if( m_vpsOlsModeIdc == 1 )
     {
@@ -2389,19 +2502,13 @@ void VPS::deriveOutputLayerSets()
       for( int j = 0; j < m_numOutputLayersInOls[i]; j++ )
       {
         m_outputLayerIdInOls[i][j] = m_vpsLayerId[j];
-#if JVET_R0193_S0141
         m_numSubLayersInLayerInOLS[i][j] = m_ptlMaxTemporalId[m_olsPtlIdx[i]] + 1;
-#else
-        m_numSubLayersInLayerInOLS[i][j] = m_vpsMaxSubLayers;
-#endif
       }
     }
     else if( m_vpsOlsModeIdc == 2 )
     {
       int j = 0;
-#if JVET_R0193
       int highestIncludedLayer = 0;
-#endif
       for( j = 0; j  <  m_maxLayers; j++ )
       {
         m_numSubLayersInLayerInOLS[i][j] = 0;
@@ -2412,17 +2519,11 @@ void VPS::deriveOutputLayerSets()
         if( m_vpsOlsOutputLayerFlag[i][k] )
         {
           layerIncludedInOlsFlag[i][k] = 1;
-#if JVET_R0193
           highestIncludedLayer = k;
-#endif
           layerUsedAsOutputLayerFlag[k] = 1;
           outputLayerIdx[i][j] = k;
           m_outputLayerIdInOls[i][j++] = m_vpsLayerId[k];
-#if JVET_R0193_S0141
           m_numSubLayersInLayerInOLS[i][k] = m_ptlMaxTemporalId[m_olsPtlIdx[i]] + 1;
-#else
-          m_numSubLayersInLayerInOLS[i][k] = m_vpsMaxSubLayers;
-#endif
         }
       }
       m_numOutputLayersInOls[i] = j;
@@ -2433,15 +2534,8 @@ void VPS::deriveOutputLayerSets()
         for( int k = 0; k < numRefLayers[idx]; k++ )
         {
           layerIncludedInOlsFlag[i][refLayerIdx[idx][k]] = 1;
-#if !JVET_R0193
-          if( m_numSubLayersInLayerInOLS[i][ refLayerIdx[idx][k] ] < m_vpsMaxTidIlRefPicsPlus1[ m_outputLayerIdInOls[i][j] ] )
-          {
-            m_numSubLayersInLayerInOLS[i][ refLayerIdx[idx][k] ] =  m_vpsMaxTidIlRefPicsPlus1[ m_outputLayerIdInOls[i][j] ];
-          }
-#endif
         }
       }
-#if JVET_R0193
       for (int k = highestIncludedLayer - 1; k >= 0; k--)
       {
         if (layerIncludedInOlsFlag[i][k] && !m_vpsOlsOutputLayerFlag[i][k])
@@ -2456,7 +2550,6 @@ void VPS::deriveOutputLayerSets()
           }
         }
       }
-#endif
     }
   }
   for (int i = 0; i < m_maxLayers; i++)
@@ -2547,7 +2640,6 @@ void VPS::deriveTargetOutputLayerSet( int targetOlsIdx )
   }
 }
 
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
 int VPS::deriveTargetOLSIdx(void)
 {
   int lowestIdx = 0;
@@ -2579,7 +2671,6 @@ uint32_t VPS::getMaxTidinTOls(int m_targetOlsIdx)
   return getPtlMaxTemporalId(getOlsPtlIdx(m_targetOlsIdx));
 }
 
-#endif
 
 // ------------------------------------------------------------------------------------------------
 // Picture Header
@@ -2619,9 +2710,9 @@ PicHeader::PicHeader()
 , m_profDisabledFlag                              ( 0 )
 , m_jointCbCrSignFlag                             ( 0 )
 , m_qpDelta                                       ( 0 )
-, m_numAlfAps                                     ( 0 )
-, m_alfApsId                                      ( 0 )
-, m_alfChromaApsId                                ( 0 )
+, m_numAlfApsIdsLuma                              ( 0 )
+, m_alfApsIdsLuma                                 ( 0 )
+, m_alfApsIdChroma                                ( 0 )
 , m_deblockingFilterOverrideFlag                  ( 0 )
 , m_deblockingFilterDisable                       ( 0 )
 , m_deblockingFilterBetaOffsetDiv2                ( 0 )
@@ -2661,14 +2752,14 @@ PicHeader::PicHeader()
   m_RPL1.setLtrpInSliceHeaderFlag(0);
   m_RPL1.setNumberOfInterLayerPictures( 0 );
 
-  m_alfApsId.resize(0);
+  m_alfApsIdsLuma.resize(0);
 
   resetWpScaling();
 }
 
 PicHeader::~PicHeader()
 {
-  m_alfApsId.resize(0);
+  m_alfApsIdsLuma.resize(0);
 }
 
 /**
@@ -2706,8 +2797,8 @@ void PicHeader::initPicHeader()
   m_profDisabledFlag                              = 0;
   m_jointCbCrSignFlag                             = 0;
   m_qpDelta                                       = 0;
-  m_numAlfAps                                     = 0;
-  m_alfChromaApsId                                = 0;
+  m_numAlfApsIdsLuma                              = 0;
+  m_alfApsIdChroma                                = 0;
   m_deblockingFilterOverrideFlag                  = 0;
   m_deblockingFilterDisable                       = 0;
   m_deblockingFilterBetaOffsetDiv2                = 0;
@@ -2744,7 +2835,11 @@ void PicHeader::initPicHeader()
   m_RPL1.setNumberOfLongtermPictures(0);
   m_RPL1.setLtrpInSliceHeaderFlag(0);
 
-  m_alfApsId.resize(0);
+  m_alfApsIdsLuma.resize(0);
+#if GDR_ENABLED
+  m_inGdrInterval      = false;
+  m_lastGdrIntervalPoc = -1;
+#endif
 }
 
 const WPScalingParam *PicHeader::getWpScaling(const RefPicList refPicList, const int refIdx) const
@@ -2798,9 +2893,14 @@ SPSRExt::SPSRExt()
  : m_transformSkipRotationEnabledFlag   (false)
  , m_transformSkipContextEnabledFlag    (false)
  , m_extendedPrecisionProcessingFlag    (false)
+ , m_tsrcRicePresentFlag                (false)
  , m_intraSmoothingDisabledFlag         (false)
  , m_highPrecisionOffsetsEnabledFlag    (false)
+ , m_rrcRiceExtensionEnableFlag(false)
  , m_persistentRiceAdaptationEnabledFlag(false)
+#if JVET_W0046_RLSCP
+ , m_reverseLastSigCoeffEnabledFlag     (false)
+#endif
  , m_cabacBypassAlignmentEnabledFlag    (false)
 {
 }
@@ -3112,6 +3212,7 @@ PPS::PPS()
 , m_conformanceWindowFlag            (false)
 , m_picWidthInLumaSamples(352)
 , m_picHeightInLumaSamples( 288 )
+, m_explicitScalingWindowFlag        (false)
 , m_wrapAroundEnabledFlag            (false)
 , m_picWidthMinusWrapAroundOffset    (0)
 , m_wrapAroundOffset                 (0)
@@ -3504,7 +3605,7 @@ void PPS::initSubPic(const SPS &sps)
     m_subPics[i].setSubPicWidthInCTUs(sps.getSubPicWidth(i));
     m_subPics[i].setSubPicHeightInCTUs(sps.getSubPicHeight(i));
 
-    uint32_t firstCTU = sps.getSubPicCtuTopLeftY(i) * m_picWidthInCtu + sps.getSubPicCtuTopLeftX(i); 	
+    uint32_t firstCTU = sps.getSubPicCtuTopLeftY(i) * m_picWidthInCtu + sps.getSubPicCtuTopLeftX(i);
     m_subPics[i].setFirstCTUInSubPic(firstCTU);
     uint32_t lastCTU = (sps.getSubPicCtuTopLeftY(i) + sps.getSubPicHeight(i) - 1) * m_picWidthInCtu + sps.getSubPicCtuTopLeftX(i) + sps.getSubPicWidth(i) - 1;
     m_subPics[i].setLastCTUInSubPic(lastCTU);
@@ -3551,7 +3652,7 @@ void PPS::initSubPic(const SPS &sps)
         {
           // add ctus in a slice to the subpicture it belongs to
           m_subPics[i].addCTUsToSubPic(m_sliceMap[j].getCtuAddrList());
-	  numSlicesInSubPic++;
+          numSlicesInSubPic++;
           idxLastSliceInSubpic = j;
         }
         else if (idxFirstSliceAfterSubpic == m_numSlicesInPic && idxLastSliceInSubpic != -1)
@@ -3811,42 +3912,39 @@ bool ScalingList::isNotDefaultScalingList()
   return !isAllDefault;
 }
 
-/** get scaling matrix from RefMatrixID
- * \param sizeId    size index
- * \param listId    index of input matrix
- * \param refListId index of reference matrix
- */
-int ScalingList::lengthUvlc(int uiCode)
+int ScalingList::lengthUvlc(int code)
 {
-  if (uiCode < 0) printf("Error UVLC! \n");
+  CHECK(code < 0,        "Unsigned VLC cannot be negative");
+  CHECK(code == MAX_INT, "Maximum supported UVLC code is MAX_INT-1");
 
-  int uiLength = 1;
-  int uiTemp = ++uiCode;
+  int length = 1;
+  int temp = ++code;
 
-  CHECK(!uiTemp, "Integer overflow");
 
-  while (1 != uiTemp)
+  while (1 != temp)
   {
-    uiTemp >>= 1;
-    uiLength += 2;
+    temp >>= 1;
+    length += 2;
   }
-  return (uiLength >> 1) + ((uiLength + 1) >> 1);
+  return (length >> 1) + ((length + 1) >> 1);
 }
-int ScalingList::lengthSvlc(int uiCode)
+
+int ScalingList::lengthSvlc(int code)
 {
-  uint32_t uiCode2 = uint32_t(uiCode <= 0 ? (-uiCode) << 1 : (uiCode << 1) - 1);
-  int uiLength = 1;
-  int uiTemp = ++uiCode2;
+  uint32_t code2 = uint32_t(code <= 0 ? (-code) << 1 : (code << 1) - 1);
+  int length = 1;
+  int temp = ++code2;
 
-  CHECK(!uiTemp, "Integer overflow");
+  CHECK(temp < 0, "Integer overflow constructing SVLC code");
 
-  while (1 != uiTemp)
+  while (1 != temp)
   {
-    uiTemp >>= 1;
-    uiLength += 2;
+    temp >>= 1;
+    length += 2;
   }
-  return (uiLength >> 1) + ((uiLength + 1) >> 1);
+  return (length >> 1) + ((length + 1) >> 1);
 }
+
 void ScalingList::codePredScalingList(int* scalingList, const int* scalingListPred, int scalingListDC, int scalingListPredDC, int scalingListId, int& bitsCost) //sizeId, listId is current to-be-coded matrix idx
 {
   int deltaValue = 0;
@@ -3886,6 +3984,7 @@ void ScalingList::codePredScalingList(int* scalingList, const int* scalingListPr
     bitsCost += lengthSvlc(data);
   }
 }
+
 void ScalingList::codeScalingList(int* scalingList, int scalingListDC, int scalingListId, int& bitsCost) //sizeId, listId is current to-be-coded matrix idx
 {
   int matrixSize = (scalingListId < SCALING_LIST_1D_START_4x4) ? 2 : (scalingListId < SCALING_LIST_1D_START_8x8) ? 4 : 8;
@@ -3904,7 +4003,9 @@ void ScalingList::codeScalingList(int* scalingList, int scalingListDC, int scali
   for (int i = 0; i < coefNum; i++)
   {
     if (scalingListId >= SCALING_LIST_1D_START_64x64 && scan[i].x >= 4 && scan[i].y >= 4)
+    {
       continue;
+    }
     data = int8_t(src[scan[i].idx] - nextCoef);
     nextCoef = src[scan[i].idx];
 
@@ -3923,18 +4024,20 @@ void ScalingList::CheckBestPredScalingList(int scalingListId, int predListId, in
   int matrixSize = (scalingListId < SCALING_LIST_1D_START_4x4) ? 2 : (scalingListId < SCALING_LIST_1D_START_8x8) ? 4 : 8;
   int predMatrixSize = (predListId < SCALING_LIST_1D_START_4x4) ? 2 : (predListId < SCALING_LIST_1D_START_8x8) ? 4 : 8;
 
-  if (matrixSize != predMatrixSize) printf("Predictor size mismatch! \n");
+  CHECK(matrixSize != predMatrixSize, "Predictor size mismatch");
 
   bitsCost = 2 + lengthUvlc(scalingListId - predListId);
   //copy-flag + predictor-mode-flag + deltaListId
   codePredScalingList(scalingList, scalingListPred, scalingListDC, scalingListPredDC, scalingListId, bitsCost);
   BitsCount = bitsCost;
 }
+
 void ScalingList::processRefMatrix(uint32_t scalinListId, uint32_t refListId)
 {
   int matrixSize = (scalinListId < SCALING_LIST_1D_START_4x4) ? 2 : (scalinListId < SCALING_LIST_1D_START_8x8) ? 4 : 8;
   ::memcpy(getScalingListAddress(scalinListId), ((scalinListId == refListId) ? getScalingListDefaultAddress(refListId) : getScalingListAddress(refListId)), sizeof(int)*matrixSize*matrixSize);
 }
+
 void ScalingList::checkPredMode(uint32_t scalingListId)
 {
   int bestBitsCount = MAX_INT;
@@ -3949,7 +4052,9 @@ void ScalingList::checkPredMode(uint32_t scalingListId)
     int matrixSize = (scalingListId < SCALING_LIST_1D_START_4x4) ? 2 : (scalingListId < SCALING_LIST_1D_START_8x8) ? 4 : 8;
     int predMatrixSize = (predListIdx < SCALING_LIST_1D_START_4x4) ? 2 : (predListIdx < SCALING_LIST_1D_START_8x8) ? 4 : 8;
     if (((scalingListId == SCALING_LIST_1D_START_2x2 || scalingListId == SCALING_LIST_1D_START_4x4 || scalingListId == SCALING_LIST_1D_START_8x8) && predListIdx != (int)scalingListId) || matrixSize != predMatrixSize)
+    {
       continue;
+    }
     const int* refScalingList = (scalingListId == predListIdx) ? getScalingListDefaultAddress(predListIdx) : getScalingListAddress(predListIdx);
     const int refDC = (predListIdx < SCALING_LIST_1D_START_16x16) ? refScalingList[0] : (scalingListId == predListIdx) ? 16 : getScalingListDC(predListIdx);
     if (!::memcmp(getScalingListAddress(scalingListId), refScalingList, sizeof(int)*matrixSize*matrixSize) // check value of matrix
@@ -4230,9 +4335,13 @@ uint32_t PreCalcValues::getValIdx( const Slice &slice, const ChannelType chType
 uint32_t PreCalcValues::getMaxBtDepth( const Slice &slice, const ChannelType chType ) const
 {
   if ( slice.getPicHeader()->getSplitConsOverrideFlag() )
+  {
     return slice.getPicHeader()->getMaxMTTHierarchyDepth( slice.getSliceType(), ISingleTree ? CHANNEL_TYPE_LUMA : chType);
+  }
   else
-  return maxBtDepth[getValIdx( slice, chType )];
+  {
+    return maxBtDepth[getValIdx(slice, chType)];
+  }
 }
 
 uint32_t PreCalcValues::getMinBtSize( const Slice &slice, const ChannelType chType ) const
@@ -4243,9 +4352,13 @@ uint32_t PreCalcValues::getMinBtSize( const Slice &slice, const ChannelType chTy
 uint32_t PreCalcValues::getMaxBtSize( const Slice &slice, const ChannelType chType ) const
 {
   if (slice.getPicHeader()->getSplitConsOverrideFlag())
+  {
     return slice.getPicHeader()->getMaxBTSize( slice.getSliceType(), ISingleTree ? CHANNEL_TYPE_LUMA : chType);
+  }
   else
+  {
     return maxBtSize[getValIdx(slice, chType)];
+  }
 }
 
 uint32_t PreCalcValues::getMinTtSize( const Slice &slice, const ChannelType chType ) const
@@ -4256,16 +4369,24 @@ uint32_t PreCalcValues::getMinTtSize( const Slice &slice, const ChannelType chTy
 uint32_t PreCalcValues::getMaxTtSize( const Slice &slice, const ChannelType chType ) const
 {
   if (slice.getPicHeader()->getSplitConsOverrideFlag())
+  {
     return slice.getPicHeader()->getMaxTTSize( slice.getSliceType(), ISingleTree ? CHANNEL_TYPE_LUMA : chType);
+  }
   else
-  return maxTtSize[getValIdx( slice, chType )];
+  {
+    return maxTtSize[getValIdx(slice, chType)];
+  }
 }
 uint32_t PreCalcValues::getMinQtSize( const Slice &slice, const ChannelType chType ) const
 {
   if (slice.getPicHeader()->getSplitConsOverrideFlag())
+  {
     return slice.getPicHeader()->getMinQTSize( slice.getSliceType(), ISingleTree ? CHANNEL_TYPE_LUMA : chType);
+  }
   else
-  return minQtSize[getValIdx( slice, chType )];
+  {
+    return minQtSize[getValIdx(slice, chType)];
+  }
 }
 
 void Slice::scaleRefPicList( Picture *scaledRefPic[ ], PicHeader *picHeader, APS** apss, APS* lmcsAps, APS* scalingListAps, const bool isDecoder )
@@ -4451,64 +4572,233 @@ bool Slice::checkRPR()
 
 bool             operator == (const ConstraintInfo& op1, const ConstraintInfo& op2)
 {
-  if( op1.m_intraOnlyConstraintFlag                      != op2.m_intraOnlyConstraintFlag                        ) return false;
-  if( op1.m_maxBitDepthConstraintIdc                     != op2.m_maxBitDepthConstraintIdc                       ) return false;
-  if( op1.m_maxChromaFormatConstraintIdc                 != op2.m_maxChromaFormatConstraintIdc                   ) return false;
-  if( op1.m_onePictureOnlyConstraintFlag                 != op2.m_onePictureOnlyConstraintFlag                   ) return false;
-  if( op1.m_lowerBitRateConstraintFlag                   != op2.m_lowerBitRateConstraintFlag                     ) return false;
-  if (op1.m_allLayersIndependentConstraintFlag           != op2.m_allLayersIndependentConstraintFlag             ) return false;
-  if (op1.m_noMrlConstraintFlag                          != op2.m_noMrlConstraintFlag                            ) return false;
-  if (op1.m_noIspConstraintFlag                          != op2.m_noIspConstraintFlag                            ) return false;
-  if (op1.m_noMipConstraintFlag                          != op2.m_noMipConstraintFlag                            ) return false;
-  if (op1.m_noLfnstConstraintFlag                        != op2.m_noLfnstConstraintFlag                          ) return false;
-  if (op1.m_noMmvdConstraintFlag                         != op2.m_noMmvdConstraintFlag                           ) return false;
-  if (op1.m_noSmvdConstraintFlag                         != op2.m_noSmvdConstraintFlag                           ) return false;
-  if (op1.m_noProfConstraintFlag                         != op2.m_noProfConstraintFlag                           ) return false;
-  if (op1.m_noPaletteConstraintFlag                      != op2.m_noPaletteConstraintFlag                        ) return false;
-  if (op1.m_noActConstraintFlag                          != op2.m_noActConstraintFlag                            ) return false;
-  if (op1.m_noLmcsConstraintFlag                         != op2.m_noLmcsConstraintFlag                           ) return false;
-  if (op1.m_noExplicitScaleListConstraintFlag            != op2.m_noExplicitScaleListConstraintFlag              ) return false;
-  if (op1.m_noVirtualBoundaryConstraintFlag              != op2.m_noVirtualBoundaryConstraintFlag                ) return false;
-  if (op1.m_noChromaQpOffsetConstraintFlag               != op2.m_noChromaQpOffsetConstraintFlag                 ) return false;
-  if (op1.m_noRprConstraintFlag                          != op2.m_noRprConstraintFlag                            ) return false;
-  if (op1.m_noResChangeInClvsConstraintFlag              != op2.m_noResChangeInClvsConstraintFlag                ) return false;
-  if (op1.m_noMttConstraintFlag                          != op2.m_noMttConstraintFlag                            ) return false;
-  if( op1.m_noQtbttDualTreeIntraConstraintFlag           != op2.m_noQtbttDualTreeIntraConstraintFlag             ) return false;
-  if( op1.m_noPartitionConstraintsOverrideConstraintFlag != op2.m_noPartitionConstraintsOverrideConstraintFlag   ) return false;
-  if( op1.m_noSaoConstraintFlag                          != op2.m_noSaoConstraintFlag                            ) return false;
-  if( op1.m_noAlfConstraintFlag                          != op2.m_noAlfConstraintFlag                            ) return false;
-  if( op1.m_noCCAlfConstraintFlag                        != op2.m_noCCAlfConstraintFlag                          ) return false;
-  if (op1.m_noWeightedPredictionConstraintFlag           != op2.m_noWeightedPredictionConstraintFlag             ) return false;
-  if( op1.m_noRefWraparoundConstraintFlag                != op2.m_noRefWraparoundConstraintFlag                  ) return false;
-  if( op1.m_noTemporalMvpConstraintFlag                  != op2.m_noTemporalMvpConstraintFlag                    ) return false;
-  if( op1.m_noSbtmvpConstraintFlag                       != op2.m_noSbtmvpConstraintFlag                         ) return false;
-  if( op1.m_noAmvrConstraintFlag                         != op2.m_noAmvrConstraintFlag                           ) return false;
-  if( op1.m_noBdofConstraintFlag                         != op2.m_noBdofConstraintFlag                           ) return false;
-  if( op1.m_noDmvrConstraintFlag                         != op2.m_noDmvrConstraintFlag                           ) return false;
-  if( op1.m_noCclmConstraintFlag                         != op2.m_noCclmConstraintFlag                           ) return false;
-  if( op1.m_noMtsConstraintFlag                          != op2.m_noMtsConstraintFlag                            ) return false;
-  if( op1.m_noSbtConstraintFlag                          != op2.m_noSbtConstraintFlag                            ) return false;
-  if( op1.m_noAffineMotionConstraintFlag                 != op2.m_noAffineMotionConstraintFlag                   ) return false;
-  if( op1.m_noBcwConstraintFlag                          != op2.m_noBcwConstraintFlag                            ) return false;
-  if( op1.m_noIbcConstraintFlag                          != op2.m_noIbcConstraintFlag                            ) return false;
-  if( op1.m_noCiipConstraintFlag                         != op2.m_noCiipConstraintFlag                           ) return false;
-  if( op1.m_noLadfConstraintFlag                         != op2.m_noLadfConstraintFlag                           ) return false;
-  if( op1.m_noTransformSkipConstraintFlag                != op2.m_noTransformSkipConstraintFlag                  ) return false;
-  if( op1.m_noBDPCMConstraintFlag                        != op2.m_noBDPCMConstraintFlag                          ) return false;
-  if( op1.m_noJointCbCrConstraintFlag                    != op2.m_noJointCbCrConstraintFlag                      ) return false;
-  if( op1.m_noCuQpDeltaConstraintFlag                    != op2.m_noCuQpDeltaConstraintFlag                      ) return false;
-  if( op1.m_noDepQuantConstraintFlag                     != op2.m_noDepQuantConstraintFlag                       ) return false;
-  if( op1.m_noSignDataHidingConstraintFlag               != op2.m_noSignDataHidingConstraintFlag                 ) return false;
-  if( op1.m_noTrailConstraintFlag                        != op2.m_noTrailConstraintFlag                          ) return false;
-  if( op1.m_noStsaConstraintFlag                         != op2.m_noStsaConstraintFlag                           ) return false;
-  if( op1.m_noRaslConstraintFlag                         != op2.m_noRaslConstraintFlag                           ) return false;
-  if( op1.m_noRadlConstraintFlag                         != op2.m_noRadlConstraintFlag                           ) return false;
-  if( op1.m_noIdrConstraintFlag                          != op2.m_noIdrConstraintFlag                            ) return false;
-  if( op1.m_noCraConstraintFlag                          != op2.m_noCraConstraintFlag                            ) return false;
-  if( op1.m_noGdrConstraintFlag                          != op2.m_noGdrConstraintFlag                            ) return false;
-  if( op1.m_noApsConstraintFlag                          != op2.m_noApsConstraintFlag                            ) return false;
+  if (op1.m_intraOnlyConstraintFlag != op2.m_intraOnlyConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_maxBitDepthConstraintIdc != op2.m_maxBitDepthConstraintIdc)
+  {
+    return false;
+  }
+  if (op1.m_maxChromaFormatConstraintIdc != op2.m_maxChromaFormatConstraintIdc)
+  {
+    return false;
+  }
+  if (op1.m_onePictureOnlyConstraintFlag != op2.m_onePictureOnlyConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_lowerBitRateConstraintFlag != op2.m_lowerBitRateConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_allLayersIndependentConstraintFlag != op2.m_allLayersIndependentConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noMrlConstraintFlag != op2.m_noMrlConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noIspConstraintFlag != op2.m_noIspConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noMipConstraintFlag != op2.m_noMipConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noLfnstConstraintFlag != op2.m_noLfnstConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noMmvdConstraintFlag != op2.m_noMmvdConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noSmvdConstraintFlag != op2.m_noSmvdConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noProfConstraintFlag != op2.m_noProfConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noPaletteConstraintFlag != op2.m_noPaletteConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noActConstraintFlag != op2.m_noActConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noLmcsConstraintFlag != op2.m_noLmcsConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noExplicitScaleListConstraintFlag != op2.m_noExplicitScaleListConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noVirtualBoundaryConstraintFlag != op2.m_noVirtualBoundaryConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noChromaQpOffsetConstraintFlag != op2.m_noChromaQpOffsetConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noRprConstraintFlag != op2.m_noRprConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noResChangeInClvsConstraintFlag != op2.m_noResChangeInClvsConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noMttConstraintFlag != op2.m_noMttConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noQtbttDualTreeIntraConstraintFlag != op2.m_noQtbttDualTreeIntraConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noPartitionConstraintsOverrideConstraintFlag != op2.m_noPartitionConstraintsOverrideConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noSaoConstraintFlag != op2.m_noSaoConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noAlfConstraintFlag != op2.m_noAlfConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noCCAlfConstraintFlag != op2.m_noCCAlfConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noWeightedPredictionConstraintFlag != op2.m_noWeightedPredictionConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noRefWraparoundConstraintFlag != op2.m_noRefWraparoundConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noTemporalMvpConstraintFlag != op2.m_noTemporalMvpConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noSbtmvpConstraintFlag != op2.m_noSbtmvpConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noAmvrConstraintFlag != op2.m_noAmvrConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noBdofConstraintFlag != op2.m_noBdofConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noDmvrConstraintFlag != op2.m_noDmvrConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noCclmConstraintFlag != op2.m_noCclmConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noMtsConstraintFlag != op2.m_noMtsConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noSbtConstraintFlag != op2.m_noSbtConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noAffineMotionConstraintFlag != op2.m_noAffineMotionConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noBcwConstraintFlag != op2.m_noBcwConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noIbcConstraintFlag != op2.m_noIbcConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noCiipConstraintFlag != op2.m_noCiipConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noLadfConstraintFlag != op2.m_noLadfConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noTransformSkipConstraintFlag != op2.m_noTransformSkipConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noBDPCMConstraintFlag != op2.m_noBDPCMConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noJointCbCrConstraintFlag != op2.m_noJointCbCrConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noCuQpDeltaConstraintFlag != op2.m_noCuQpDeltaConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noDepQuantConstraintFlag != op2.m_noDepQuantConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noSignDataHidingConstraintFlag != op2.m_noSignDataHidingConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noTrailConstraintFlag != op2.m_noTrailConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noStsaConstraintFlag != op2.m_noStsaConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noRaslConstraintFlag != op2.m_noRaslConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noRadlConstraintFlag != op2.m_noRadlConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noIdrConstraintFlag != op2.m_noIdrConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noCraConstraintFlag != op2.m_noCraConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noGdrConstraintFlag != op2.m_noGdrConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_noApsConstraintFlag != op2.m_noApsConstraintFlag)
+  {
+    return false;
+  }
   return true;
 }
+
 bool             operator != (const ConstraintInfo& op1, const ConstraintInfo& op2)
 {
   return !(op1 == op2);
@@ -4516,14 +4806,38 @@ bool             operator != (const ConstraintInfo& op1, const ConstraintInfo& o
 
 bool             operator == (const ProfileTierLevel& op1, const ProfileTierLevel& op2)
 {
-  if (op1.m_tierFlag        != op2.m_tierFlag) return false;
-  if (op1.m_profileIdc      != op2.m_profileIdc) return false;
-  if (op1.m_numSubProfile   != op2.m_numSubProfile) return false;
-  if (op1.m_levelIdc        != op2.m_levelIdc) return false;
-  if (op1.m_frameOnlyConstraintFlag != op2.m_frameOnlyConstraintFlag) return false;
-  if (op1.m_multiLayerEnabledFlag   != op2.m_multiLayerEnabledFlag) return false;
-  if (op1.m_constraintInfo  != op2.m_constraintInfo) return false;
-  if (op1.m_subProfileIdc   != op2.m_subProfileIdc) return false;
+  if (op1.m_tierFlag != op2.m_tierFlag)
+  {
+    return false;
+  }
+  if (op1.m_profileIdc != op2.m_profileIdc)
+  {
+    return false;
+  }
+  if (op1.m_numSubProfile != op2.m_numSubProfile)
+  {
+    return false;
+  }
+  if (op1.m_levelIdc != op2.m_levelIdc)
+  {
+    return false;
+  }
+  if (op1.m_frameOnlyConstraintFlag != op2.m_frameOnlyConstraintFlag)
+  {
+    return false;
+  }
+  if (op1.m_multiLayerEnabledFlag != op2.m_multiLayerEnabledFlag)
+  {
+    return false;
+  }
+  if (op1.m_constraintInfo != op2.m_constraintInfo)
+  {
+    return false;
+  }
+  if (op1.m_subProfileIdc != op2.m_subProfileIdc)
+  {
+    return false;
+  }
 
   for (int i = 0; i < MAX_TLAYER - 1; i++)
   {
@@ -4541,6 +4855,7 @@ bool             operator == (const ProfileTierLevel& op1, const ProfileTierLeve
   }
   return true;
 }
+
 bool             operator != (const ProfileTierLevel& op1, const ProfileTierLevel& op2)
 {
   return !(op1 == op2);
@@ -4573,12 +4888,10 @@ void xTraceVPSHeader()
   DTRACE( g_trace_ctx, D_HEADER, "=========== Video Parameter Set     ===========\n" );
 }
 
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
 void xTraceOPIHeader()
 {
   DTRACE(g_trace_ctx, D_HEADER, "=========== Operating Point Information     ===========\n");
 }
-#endif
 
 void xTraceDCIHeader()
 {
diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h
index fc7fbeeb2b76d84a6cc59788270fbf8a70e4a878..f328bc56f2659c59185b758711724c06f1ae120d 100644
--- a/source/Lib/CommonLib/Slice.h
+++ b/source/Lib/CommonLib/Slice.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -923,7 +923,6 @@ public:
   }
 };
 
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
 class OPI
 {
 private:
@@ -952,7 +951,6 @@ public:
   void setOpiHtidPlus1(uint32_t val) { m_opihtidplus1 = val; }
 
 };
-#endif
 
 class VPS
 {
@@ -967,11 +965,7 @@ private:
   uint32_t              m_vpsCfgPredDirection[MAX_VPS_SUBLAYERS];
   bool                  m_vpsIndependentLayerFlag[MAX_VPS_LAYERS];
   bool                  m_vpsDirectRefLayerFlag[MAX_VPS_LAYERS][MAX_VPS_LAYERS];
-#if JVET_R0193
   std::vector<std::vector<uint32_t>>              m_vpsMaxTidIlRefPicsPlus1;
-#else
-  uint32_t              m_vpsMaxTidIlRefPicsPlus1[MAX_VPS_LAYERS];
-#endif
   bool                  m_vpsEachLayerIsAnOlsFlag;
   uint32_t              m_vpsOlsModeIdc;
   uint32_t              m_vpsNumOutputLayerSets;
@@ -1045,14 +1039,19 @@ public:
 
   bool              getIndependentLayerFlag(uint32_t layerIdx) const { return m_vpsIndependentLayerFlag[layerIdx]; }
   void              setIndependentLayerFlag(uint32_t layerIdx, bool t) { m_vpsIndependentLayerFlag[layerIdx] = t; }
-#if JVET_R0193
-  uint32_t          getMaxTidIlRefPicsPlus1(uint32_t layerIdx, uint32_t refLayerIdx) const { return m_vpsMaxTidIlRefPicsPlus1[layerIdx][refLayerIdx]; }
-  void              setMaxTidIlRefPicsPlus1(uint32_t layerIdx, uint32_t refLayerIdx, uint32_t i) { m_vpsMaxTidIlRefPicsPlus1[layerIdx][refLayerIdx] = i; }
+  uint32_t getMaxTidIlRefPicsPlus1(const uint32_t layerIdx, const uint32_t refLayerIdx) const
+  {
+    CHECK(layerIdx >= m_vpsMaxTidIlRefPicsPlus1.size(), "layerIdx out of bounds");
+    CHECK(refLayerIdx >= m_vpsMaxTidIlRefPicsPlus1[layerIdx].size(), "refLayerIdx out of bounds");
+    return m_vpsMaxTidIlRefPicsPlus1[layerIdx][refLayerIdx];
+  }
+  void setMaxTidIlRefPicsPlus1(const uint32_t layerIdx, const uint32_t refLayerIdx, const uint32_t i)
+  {
+    CHECK(layerIdx >= m_vpsMaxTidIlRefPicsPlus1.size(), "layerIdx out of bounds");
+    CHECK(refLayerIdx >= m_vpsMaxTidIlRefPicsPlus1[layerIdx].size(), "refLayerIdx out of bounds");
+    m_vpsMaxTidIlRefPicsPlus1[layerIdx][refLayerIdx] = i;
+  }
   void              setMaxTidIlRefPicsPlus1(std::vector<std::vector<uint32_t>> i) { m_vpsMaxTidIlRefPicsPlus1 = i; }
-#else
-  uint32_t          getMaxTidIlRefPicsPlus1(uint32_t layerIdx) const { return m_vpsMaxTidIlRefPicsPlus1[layerIdx]; }
-  void              setMaxTidIlRefPicsPlus1(uint32_t layerIdx, uint32_t i) { m_vpsMaxTidIlRefPicsPlus1[layerIdx] = i; }
-#endif
 
   bool              getDirectRefLayerFlag(uint32_t layerIdx, uint32_t refLayerIdx) const { return m_vpsDirectRefLayerFlag[layerIdx][refLayerIdx]; }
   void              setDirectRefLayerFlag(uint32_t layerIdx, uint32_t refLayerIdx, bool t) { m_vpsDirectRefLayerFlag[layerIdx][refLayerIdx] = t; }
@@ -1132,10 +1131,8 @@ public:
 
   void              deriveOutputLayerSets();
   void              deriveTargetOutputLayerSet( int targetOlsIdx );
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
   int               deriveTargetOLSIdx();
   uint32_t          getMaxTidinTOls(int m_targetOlsIdx);
-#endif
 
   void              checkVPS();
 
@@ -1305,11 +1302,17 @@ private:
   bool             m_transformSkipRotationEnabledFlag;
   bool             m_transformSkipContextEnabledFlag;
   bool             m_extendedPrecisionProcessingFlag;
+  bool             m_tsrcRicePresentFlag;
   bool             m_intraSmoothingDisabledFlag;
   bool             m_highPrecisionOffsetsEnabledFlag;
+  bool             m_rrcRiceExtensionEnableFlag;
   bool             m_persistentRiceAdaptationEnabledFlag;
+#if JVET_W0046_RLSCP
+  bool             m_reverseLastSigCoeffEnabledFlag;
+#endif
   bool             m_cabacBypassAlignmentEnabledFlag;
 
+
 public:
   SPSRExt();
 
@@ -1318,9 +1321,14 @@ public:
     return getTransformSkipRotationEnabledFlag()
         || getTransformSkipContextEnabledFlag()
         || getExtendedPrecisionProcessingFlag()
+        || getTSRCRicePresentFlag()
         || getIntraSmoothingDisabledFlag()
         || getHighPrecisionOffsetsEnabledFlag()
+        || getRrcRiceExtensionEnableFlag()
         || getPersistentRiceAdaptationEnabledFlag()
+#if JVET_W0046_RLSCP
+        || getReverseLastSigCoeffEnabledFlag()
+#endif
         || getCabacBypassAlignmentEnabledFlag();
   }
 
@@ -1334,15 +1342,26 @@ public:
   bool getExtendedPrecisionProcessingFlag() const                                      { return m_extendedPrecisionProcessingFlag;      }
   void setExtendedPrecisionProcessingFlag(bool value)                                  { m_extendedPrecisionProcessingFlag = value;     }
 
+  bool getTSRCRicePresentFlag() const                                                  { return m_tsrcRicePresentFlag;                  }
+  void setTSRCRicePresentFlag(bool b)                                                  { m_tsrcRicePresentFlag = b;                     }
+
   bool getIntraSmoothingDisabledFlag() const                                           { return m_intraSmoothingDisabledFlag;           }
   void setIntraSmoothingDisabledFlag(bool bValue)                                      { m_intraSmoothingDisabledFlag=bValue;           }
 
   bool getHighPrecisionOffsetsEnabledFlag() const                                      { return m_highPrecisionOffsetsEnabledFlag;      }
   void setHighPrecisionOffsetsEnabledFlag(bool value)                                  { m_highPrecisionOffsetsEnabledFlag = value;     }
 
+  bool getRrcRiceExtensionEnableFlag()                                                 const { return m_rrcRiceExtensionEnableFlag; }
+  void setRrcRiceExtensionEnableFlag(const bool value)                                       { m_rrcRiceExtensionEnableFlag = value; }
+
   bool getPersistentRiceAdaptationEnabledFlag() const                                  { return m_persistentRiceAdaptationEnabledFlag;  }
   void setPersistentRiceAdaptationEnabledFlag(const bool value)                        { m_persistentRiceAdaptationEnabledFlag = value; }
 
+#if JVET_W0046_RLSCP
+  bool getReverseLastSigCoeffEnabledFlag() const                                       { return m_reverseLastSigCoeffEnabledFlag;       }
+  void setReverseLastSigCoeffEnabledFlag(bool value)                                   { m_reverseLastSigCoeffEnabledFlag = value;      }
+#endif
+
   bool getCabacBypassAlignmentEnabledFlag() const                                      { return m_cabacBypassAlignmentEnabledFlag;      }
   void setCabacBypassAlignmentEnabledFlag(const bool value)                            { m_cabacBypassAlignmentEnabledFlag = value;     }
 };
@@ -1712,8 +1731,11 @@ public:
   void                    setEntropyCodingSyncEnabledFlag(bool val)                                       { m_entropyCodingSyncEnabledFlag = val;                                }
   bool                    getEntryPointsPresentFlag() const                                               { return m_entryPointPresentFlag;                                      }
   void                    setEntryPointsPresentFlag(bool val)                                             { m_entryPointPresentFlag = val;                                       }
-  int                     getMaxLog2TrDynamicRange(ChannelType channelType) const                         { return getSpsRangeExtension().getExtendedPrecisionProcessingFlag() ? std::max<int>(15, int(m_bitDepths.recon[channelType] + 6)) : 15; }
-
+#if JVET_W0178_CONSTRAINTS_ON_REXT_TOOLS
+  int                     getMaxLog2TrDynamicRange(ChannelType channelType) const                         { return getSpsRangeExtension().getExtendedPrecisionProcessingFlag() ? std::min<int>(20, int(m_bitDepths.recon[channelType] + 6)) : 15; }
+#else
+  int                     getMaxLog2TrDynamicRange(ChannelType channelType) const                         { return getSpsRangeExtension().getExtendedPrecisionProcessingFlag() && int(m_bitDepths.recon[channelType]) > 10 ? std::min<int>(20, int(m_bitDepths.recon[channelType] + 6)) : 15; }
+#endif
   int                     getDifferentialLumaChromaBitDepth() const                                       { return int(m_bitDepths.recon[CHANNEL_TYPE_LUMA]) - int(m_bitDepths.recon[CHANNEL_TYPE_CHROMA]); }
   int                     getQpBDOffset(ChannelType type) const                                           { return m_qpBDOffset[type];                                           }
   void                    setQpBDOffset(ChannelType type, int i)                                          { m_qpBDOffset[type] = i;                                              }
@@ -2001,6 +2023,7 @@ private:
   uint32_t         m_picWidthInLumaSamples;
   uint32_t         m_picHeightInLumaSamples;
   Window           m_conformanceWindow;
+  bool             m_explicitScalingWindowFlag;
   Window           m_scalingWindow;
 
   bool             m_wrapAroundEnabledFlag;               //< reference wrap around enabled or not
@@ -2243,11 +2266,13 @@ public:
   const Window&           getConformanceWindow() const                                    { return  m_conformanceWindow; }
   void                    setConformanceWindow( Window& conformanceWindow )               { m_conformanceWindow = conformanceWindow; }
 
+  void                    setExplicitScalingWindowFlag(bool flag)                         { m_explicitScalingWindowFlag = flag; }
+  bool                    getExplicitScalingWindowFlag() const                            { return m_explicitScalingWindowFlag; }
   Window&                 getScalingWindow()                                              { return  m_scalingWindow; }
   const Window&           getScalingWindow()                                        const { return  m_scalingWindow; }
   void                    setScalingWindow( Window& scalingWindow )                       { m_scalingWindow = scalingWindow; }
 
-  int                     getMixedNaluTypesInPicFlag() const                              { return m_mixedNaluTypesInPicFlag; }
+  bool                    getMixedNaluTypesInPicFlag() const                              { return m_mixedNaluTypesInPicFlag; }
   void                    setMixedNaluTypesInPicFlag( const bool flag )                   { m_mixedNaluTypesInPicFlag = flag; }
 };
 
@@ -2335,6 +2360,10 @@ private:
   bool                        m_nonReferencePictureFlag;                                //!< non-reference picture flag
   bool                        m_gdrOrIrapPicFlag;                                       //!< gdr or irap picture flag
   bool                        m_gdrPicFlag;                                             //!< gradual decoding refresh picture flag
+#if GDR_ENABLED
+  bool                        m_inGdrInterval;
+  int                         m_lastGdrIntervalPoc;
+#endif
   uint32_t                    m_recoveryPocCnt;                                         //!< recovery POC count
   bool                        m_noOutputBeforeRecoveryFlag;                             //!< NoOutputBeforeRecoveryFlag
   bool                        m_handleCraAsCvsStartFlag;                                //!< HandleCraAsCvsStartFlag
@@ -2374,9 +2403,9 @@ private:
   int                         m_qpDelta;                                                //!< value of Qp delta
   bool                        m_saoEnabledFlag[MAX_NUM_CHANNEL_TYPE];                   //!< sao enabled flags for each channel
   bool                        m_alfEnabledFlag[MAX_NUM_COMPONENT];                      //!< alf enabled flags for each component
-  int                         m_numAlfAps;                                              //!< number of alf aps active for the picture
-  std::vector<int>            m_alfApsId;                                               //!< list of alf aps for the picture
-  int                         m_alfChromaApsId;                                         //!< chroma alf aps ID
+  int                         m_numAlfApsIdsLuma;                                       //!< number of alf aps active for the picture
+  std::vector<int>            m_alfApsIdsLuma;                                          //!< list of alf aps for the picture
+  int                         m_alfApsIdChroma;                                         //!< chroma alf aps ID
   bool m_ccalfEnabledFlag[MAX_NUM_COMPONENT];
   int  m_ccalfCbApsId;
   int  m_ccalfCrApsId;
@@ -2421,6 +2450,10 @@ public:
   bool                        getGdrOrIrapPicFlag() const                               { return m_gdrOrIrapPicFlag;                                                                   }
   void                        setGdrPicFlag( bool b )                                   { m_gdrPicFlag = b;                                                                            }
   bool                        getGdrPicFlag() const                                     { return m_gdrPicFlag;                                                                         }
+#if GDR_ENABLED
+  void                        setInGdrInterval(bool b)                                  { m_inGdrInterval = b;                                                                         }
+  bool                        getInGdrInterval() const                                  { return m_inGdrInterval;                                                                      }  
+#endif
   void                        setRecoveryPocCnt( uint32_t u )                           { m_recoveryPocCnt = u;                                                                        }
   uint32_t                    getRecoveryPocCnt() const                                 { return m_recoveryPocCnt;                                                                     }
   void                        setSPSId( uint32_t u )                                    { m_spsId = u;                                                                                 }
@@ -2492,17 +2525,16 @@ public:
   bool                        getSaoEnabledFlag(ChannelType chType) const               { return m_saoEnabledFlag[chType];                                                             }
   void                        setAlfEnabledFlag(ComponentID compId, bool b)             { m_alfEnabledFlag[compId] = b;                                                                }
   bool                        getAlfEnabledFlag(ComponentID compId) const               { return m_alfEnabledFlag[compId];                                                             }
-  void                        setNumAlfAps(int i)                                       { m_numAlfAps = i;                                                                             }
-  int                         getNumAlfAps() const                                      { return m_numAlfAps;                                                                          }
-  void                        setAlfApsIdChroma(int i)                                  { m_alfChromaApsId = i;                                                                        }
-  int                         getAlfApsIdChroma() const                                 { return m_alfChromaApsId;                                                                     }
-  void setCcAlfEnabledFlag(ComponentID compId, bool b) { m_ccalfEnabledFlag[compId] = b; }
-  bool getCcAlfEnabledFlag(ComponentID compId) const { return m_ccalfEnabledFlag[compId]; }
-
-  void setCcAlfCbApsId(int i) { m_ccalfCbApsId = i; }
-  int  getCcAlfCbApsId() const { return m_ccalfCbApsId; }
-  void setCcAlfCrApsId(int i) { m_ccalfCrApsId = i; }
-  int  getCcAlfCrApsId() const { return m_ccalfCrApsId; }
+  void                        setNumAlfApsIdsLuma(int i)                                { m_numAlfApsIdsLuma = i;                                                                      }
+  int                         getNumAlfApsIdsLuma() const                               { return m_numAlfApsIdsLuma;                                                                   }
+  void                        setAlfApsIdChroma(int i)                                  { m_alfApsIdChroma = i;                                                                        }
+  int                         getAlfApsIdChroma() const                                 { return m_alfApsIdChroma;                                                                     }
+  void                        setCcAlfEnabledFlag(ComponentID compId, bool b)           { m_ccalfEnabledFlag[compId] = b; }
+  bool                        getCcAlfEnabledFlag(ComponentID compId) const             { return m_ccalfEnabledFlag[compId]; }
+  void                        setCcAlfCbApsId(int i)                                    { m_ccalfCbApsId = i; }
+  int                         getCcAlfCbApsId() const                                   { return m_ccalfCbApsId; }
+  void                        setCcAlfCrApsId(int i)                                    { m_ccalfCrApsId = i; }
+  int                         getCcAlfCrApsId() const                                   { return m_ccalfCrApsId; }
   void                        setDeblockingFilterOverrideFlag( bool b )                 { m_deblockingFilterOverrideFlag = b;                                                          }
   bool                        getDeblockingFilterOverrideFlag() const                   { return m_deblockingFilterOverrideFlag;                                                       }
   void                        setDeblockingFilterDisable( bool b )                      { m_deblockingFilterDisable= b;                                                                }
@@ -2561,14 +2593,14 @@ public:
   unsigned                    getMaxTTSize(SliceType   slicetype,
                                        ChannelType chType = CHANNEL_TYPE_LUMA) const    { return slicetype == I_SLICE ? (chType == CHANNEL_TYPE_LUMA ? m_maxTTSize[0] : m_maxTTSize[2]) : m_maxTTSize[1];                                  }
 
-  void                        setAlfAPSs(std::vector<int> apsIDs)                       { m_alfApsId.resize(m_numAlfAps);
-                                                                                          for (int i = 0; i < m_numAlfAps; i++)
+  void                        setAlfApsIdsLuma(std::vector<int> apsIDs)                 { m_alfApsIdsLuma.resize(m_numAlfApsIdsLuma);
+                                                                                          for (int i = 0; i < m_numAlfApsIdsLuma; i++)
                                                                                           {
-                                                                                            m_alfApsId[i] = apsIDs[i];
+                                                                                            m_alfApsIdsLuma[i] = apsIDs[i];
                                                                                           }
                                                                                         }
 
-  std::vector<int>            getAlfAPSs() const                                        { return m_alfApsId; }
+  std::vector<int>            getAlfApsIdsLuma() const                                  { return m_alfApsIdsLuma; }
 
   void                        setWpScaling(WPScalingParam *wp)
   {
@@ -2610,6 +2642,13 @@ private:
   bool                       m_useLTforDRAP;
   bool                       m_isDRAP;
   int                        m_latestDRAPPOC;
+  bool                       m_enableEdrapSEI;
+  int                        m_edrapRapId;
+  bool                       m_useLTforEdrap;
+  int                        m_edrapNumRefRapPics;
+  std::vector<int>           m_edrapRefRapIds;
+  int                        m_latestEDRAPPOC;
+  bool                       m_latestEdrapLeadingPicDecodableFlag;
   ReferencePictureList        m_RPL0;            //< RPL for L0 when present in slice header
   ReferencePictureList        m_RPL1;            //< RPL for L1 when present in slice header
   int                         m_rpl0Idx;              //< index of used RPL in the SPS or -1 for local RPL in the slice header
@@ -2633,6 +2672,10 @@ private:
   int                        m_deblockingFilterCrBetaOffsetDiv2;  //< beta offset for deblocking filter
   int                        m_deblockingFilterCrTcOffsetDiv2;    //< tc offset for deblocking filter
   bool                       m_depQuantEnabledFlag;               //!< dependent quantization enabled flag
+  int                        m_riceBaseLevelValue;    //< baseLevel value for abs_remainder 
+#if JVET_W0046_RLSCP
+  bool                       m_reverseLastSigCoeffFlag;
+#endif
   bool                       m_signDataHidingEnabledFlag;         //!< sign data hiding enabled flag
   bool                       m_tsResidualCodingDisabledFlag;
   int                        m_list1IdxToList0Idx[MAX_NUM_REF];
@@ -2642,6 +2685,9 @@ private:
   bool                       m_bCheckLDC;
 
   bool                       m_biDirPred;
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+  bool                       m_lmChromaCheckDisable;
+#endif
   int                        m_symRefIdx[2];
 
   //  Data
@@ -2700,16 +2746,21 @@ private:
 
   int                        m_rpPicOrderCntVal;
   APS*                       m_alfApss[ALF_CTB_MAX_NUM_APS];
-  bool                       m_tileGroupAlfEnabledFlag[MAX_NUM_COMPONENT];
-  int                        m_tileGroupNumAps;
-  std::vector<int>           m_tileGroupLumaApsId;
-  int                        m_tileGroupChromaApsId;
-  bool                       m_tileGroupCcAlfCbEnabledFlag;
-  bool                       m_tileGroupCcAlfCrEnabledFlag;
-  int                        m_tileGroupCcAlfCbApsId;
-  int                        m_tileGroupCcAlfCrApsId;
+  bool                       m_alfEnabledFlag[MAX_NUM_COMPONENT];
+  int                        m_numAlfApsIdsLuma;
+  std::vector<int>           m_alfApsIdsLuma;
+  int                        m_alfApsIdChroma;
+  bool                       m_ccAlfCbEnabledFlag;
+  bool                       m_ccAlfCrEnabledFlag;
+  int                        m_ccAlfCbApsId;
+  int                        m_ccAlfCrApsId;
   bool                       m_disableSATDForRd;
   bool                       m_isLossless;
+  int                        m_tsrc_index;
+  unsigned                   m_riceBit[8];
+#if JVET_W0046_RLSCP
+  int                        m_cnt_right_bottom;
+#endif
 public:
                               Slice();
   virtual                     ~Slice();
@@ -2825,6 +2876,12 @@ public:
   void                        setDeblockingFilterCrTcOffsetDiv2( int i )             { m_deblockingFilterCrTcOffsetDiv2 = i;                           }
   void                        setDepQuantEnabledFlag( bool b )                       { m_depQuantEnabledFlag = b;                                                                   }
   bool                        getDepQuantEnabledFlag() const                         { return m_depQuantEnabledFlag;                                                                }  
+  void                        setRiceBaseLevel(int b) { m_riceBaseLevelValue = b; }
+  int                         getRiceBaseLevel() const { return m_riceBaseLevelValue; }
+#if JVET_W0046_RLSCP
+  void                        setReverseLastSigCoeffFlag( bool b )                   { m_reverseLastSigCoeffFlag = b;                                }
+  bool                        getReverseLastSigCoeffFlag() const                     { return m_reverseLastSigCoeffFlag;                             }
+#endif
   void                        setSignDataHidingEnabledFlag( bool b )                 { m_signDataHidingEnabledFlag = b;                                                             }
   bool                        getSignDataHidingEnabledFlag() const                   { return m_signDataHidingEnabledFlag;                                                          }  
   void                        setTSResidualCodingDisabledFlag(bool b) { m_tsResidualCodingDisabledFlag = b; }
@@ -2843,11 +2900,18 @@ public:
 
   void                        setBiDirPred( bool b, int refIdx0, int refIdx1 ) { m_biDirPred = b; m_symRefIdx[0] = refIdx0; m_symRefIdx[1] = refIdx1; }
   bool                        getBiDirPred() const { return m_biDirPred; }
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+  void                        setDisableLmChromaCheck( bool b )  { m_lmChromaCheckDisable = b; }
+  bool                        getDisableLmChromaCheck() const { return m_lmChromaCheckDisable; }
+#endif
   int                         getSymRefIdx( int refList ) const { return m_symRefIdx[refList]; }
 
   bool                        isIntra() const                                        { return m_eSliceType == I_SLICE;                               }
   bool                        isInterB() const                                       { return m_eSliceType == B_SLICE;                               }
   bool                        isInterP() const                                       { return m_eSliceType == P_SLICE;                               }
+#if GDR_ENABLED
+  bool                        isInterGDR() const { return (m_eSliceType == B_SLICE && m_eNalUnitType == NAL_UNIT_CODED_SLICE_GDR); }  
+#endif
 
   bool                        getEnableDRAPSEI () const                              { return m_enableDRAPSEI;                                       }
   void                        setEnableDRAPSEI ( bool b )                            { m_enableDRAPSEI = b;                                          }
@@ -2861,6 +2925,24 @@ public:
   bool                        isPocRestrictedByDRAP( int poc, bool precedingDRAPinDecodingOrder );
   bool                        isPOCInRefPicList( const ReferencePictureList *rpl, int poc );
   void                        checkConformanceForDRAP( uint32_t temporalId );
+  bool                        getEnableEdrapSEI () const                             { return m_enableEdrapSEI; }
+  void                        setEnableEdrapSEI ( bool b )                           { m_enableEdrapSEI = b; }
+  int                         getEdrapRapId () const                                 { return m_edrapRapId; }
+  void                        setEdrapRapId (int i)                                  { m_edrapRapId = i; }
+  bool                        getUseLTforEdrap () const                              { return m_useLTforEdrap; }
+  void                        setUseLTforEdrap ( bool b )                            { m_useLTforEdrap = b; }
+  int                         getEdrapNumRefRapPics () const                         { return m_edrapNumRefRapPics; }
+  void                        setEdrapNumRefRapPics (int i)                          { m_edrapNumRefRapPics = i; }
+  int                         getEdrapRefRapId (int idx) const                       { return m_edrapRefRapIds[idx]; }
+  void                        addEdrapRefRapIds (int i)                              { m_edrapRefRapIds.push_back(i); }
+  void                        deleteEdrapRefRapIds (int i)                           { m_edrapRefRapIds.erase(m_edrapRefRapIds.begin() + i); m_edrapNumRefRapPics--; }
+  bool                        isPocRestrictedByEdrap( int poc );
+  void                        setLatestEDRAPPOC ( int i )                            { m_latestEDRAPPOC = i; }
+  int                         getLatestEDRAPPOC () const                             { return m_latestEDRAPPOC; }
+  bool                        cvsHasPreviousEDRAP() const                            { return m_latestEDRAPPOC != MAX_INT; }
+  void                        setLatestEdrapLeadingPicDecodableFlag ( bool b )       { m_latestEdrapLeadingPicDecodableFlag = b; }
+  bool                        getLatestEdrapLeadingPicDecodableFlag () const         { return m_latestEdrapLeadingPicDecodableFlag; }
+  void                        checkConformanceForEDRAP( uint32_t temporalId );
 
   void                        setLambdas( const double lambdas[MAX_NUM_COMPONENT] )  { for (int component = 0; component < MAX_NUM_COMPONENT; component++) m_lambdas[component] = lambdas[component]; }
   const double*               getLambdas() const                                     { return m_lambdas;                                             }
@@ -2974,34 +3056,34 @@ public:
   void resetProcessingTime()       { m_dProcessingTime = m_iProcessingStartTime = 0; }
   double getProcessingTime() const { return m_dProcessingTime; }
 
-  void                        resetTileGroupAlfEnabledFlag() { memset(m_tileGroupAlfEnabledFlag, 0, sizeof(m_tileGroupAlfEnabledFlag)); }
-  bool                        getTileGroupAlfEnabledFlag(ComponentID compId) const { return m_tileGroupAlfEnabledFlag[compId]; }
-  void                        setTileGroupAlfEnabledFlag(ComponentID compId, bool b) { m_tileGroupAlfEnabledFlag[compId] = b; }
-  int                         getTileGroupNumAps() const { return m_tileGroupNumAps; }
-  void                        setTileGroupNumAps(int i) { m_tileGroupNumAps = i; }
-  int                         getTileGroupApsIdChroma() const { return m_tileGroupChromaApsId; }
-  void                        setTileGroupApsIdChroma(int i) { m_tileGroupChromaApsId = i; }
-  std::vector<int32_t>        getTileGroupApsIdLuma() const { return m_tileGroupLumaApsId; }
-  void                        setAlfAPSs(std::vector<int> ApsIDs)
+  void                        resetAlfEnabledFlag() { memset(m_alfEnabledFlag, 0, sizeof(m_alfEnabledFlag)); }
+  bool                        getAlfEnabledFlag(ComponentID compId) const { return m_alfEnabledFlag[compId]; }
+  void                        setAlfEnabledFlag(ComponentID compId, bool b) { m_alfEnabledFlag[compId] = b; }
+  int                         getNumAlfApsIdsLuma() const { return m_numAlfApsIdsLuma; }
+  void                        setNumAlfApsIdsLuma(int i) { m_numAlfApsIdsLuma = i; }
+  int                         getAlfApsIdChroma() const { return m_alfApsIdChroma; }
+  void                        setAlfApsIdChroma(int i) { m_alfApsIdChroma = i; }
+  std::vector<int>            getAlfApsIdsLuma() const { return m_alfApsIdsLuma; }
+  void                        setAlfApsIdsLuma(std::vector<int> apsIDs)
   {
-    m_tileGroupLumaApsId.resize(m_tileGroupNumAps);
-    for (int i = 0; i < m_tileGroupNumAps; i++)
+    m_alfApsIdsLuma.resize(m_numAlfApsIdsLuma);
+    for (int i = 0; i < m_numAlfApsIdsLuma; i++)
     {
-      m_tileGroupLumaApsId[i] = ApsIDs[i];
+      m_alfApsIdsLuma[i] = apsIDs[i];
     }
   }
-  void resetTileGroupCcAlCbfEnabledFlag() { m_tileGroupCcAlfCbEnabledFlag = 0; }
-  void resetTileGroupCcAlCrfEnabledFlag() { m_tileGroupCcAlfCrEnabledFlag = 0; }
-
-  void setTileGroupCcAlfCbEnabledFlag(bool b) { m_tileGroupCcAlfCbEnabledFlag = b; }
-  void setTileGroupCcAlfCrEnabledFlag(bool b) { m_tileGroupCcAlfCrEnabledFlag = b; }
-  void setTileGroupCcAlfCbApsId(int i) { m_tileGroupCcAlfCbApsId = i; }
-  void setTileGroupCcAlfCrApsId(int i) { m_tileGroupCcAlfCrApsId = i; }
-
-  bool getTileGroupCcAlfCbEnabledFlag() { return m_tileGroupCcAlfCbEnabledFlag; }
-  bool getTileGroupCcAlfCrEnabledFlag() { return m_tileGroupCcAlfCrEnabledFlag; }
-  int  getTileGroupCcAlfCbApsId() { return m_tileGroupCcAlfCbApsId; }
-  int  getTileGroupCcAlfCrApsId() { return m_tileGroupCcAlfCrApsId; }
+  void resetCcAlCbfEnabledFlag() { m_ccAlfCbEnabledFlag = 0; }
+  void resetCcAlCrfEnabledFlag() { m_ccAlfCrEnabledFlag = 0; }
+
+  void setCcAlfCbEnabledFlag(bool b) { m_ccAlfCbEnabledFlag = b; }
+  void setCcAlfCrEnabledFlag(bool b) { m_ccAlfCrEnabledFlag = b; }
+  void setCcAlfCbApsId(int i) { m_ccAlfCbApsId = i; }
+  void setCcAlfCrApsId(int i) { m_ccAlfCrApsId = i; }
+
+  bool getCcAlfCbEnabledFlag() { return m_ccAlfCbEnabledFlag; }
+  bool getCcAlfCrEnabledFlag() { return m_ccAlfCrEnabledFlag; }
+  int  getCcAlfCbApsId() { return m_ccAlfCbApsId; }
+  int  getCcAlfCrApsId() { return m_ccAlfCrApsId; }
   void                        setDisableSATDForRD(bool b) { m_disableSATDForRd = b; }
   bool                        getDisableSATDForRD() { return m_disableSATDForRd; }
   void                        setLossless(bool b) { m_isLossless = b; }
@@ -3017,6 +3099,14 @@ public:
 
   CcAlfFilterParam            m_ccAlfFilterParam;
   uint8_t*                    m_ccAlfFilterControl[2];
+  void                        set_tsrc_index(int v) { m_tsrc_index = v; }
+  int                         get_tsrc_index() const { return m_tsrc_index; }
+  void                        setRiceBit(int idx, int i) { m_riceBit[idx] = i; }
+  unsigned                    getRiceBit(int idx) const { return m_riceBit[idx]; }
+#if JVET_W0046_RLSCP
+  void updateCntRightBottom(int v) { m_cnt_right_bottom += v; }
+  int  getCntRightBottom() { return m_cnt_right_bottom; }
+#endif
 
 protected:
   Picture*              xGetRefPic( PicList& rcListPic, const int poc, const int layerId );
@@ -3111,9 +3201,7 @@ public:
 
 #if ENABLE_TRACING
 void xTraceVPSHeader();
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
 void xTraceOPIHeader();
-#endif
 void xTraceDCIHeader();
 void xTraceSPSHeader();
 void xTracePPSHeader();
diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp
index 95e339fff0df4479f975aa8fd7ce52af541afe61..59d4c22de826684bbe3eb403ec2f66e4ecf7ded4 100644
--- a/source/Lib/CommonLib/TrQuant.cpp
+++ b/source/Lib/CommonLib/TrQuant.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -208,13 +208,6 @@ TrQuant::~TrQuant()
   }
 }
 
-#if ENABLE_SPLIT_PARALLELISM
-void TrQuant::copyState( const TrQuant& other )
-{
-  m_quant->copyState( *other.m_quant );
-}
-#endif
-
 void TrQuant::xDeQuant(const TransformUnit &tu,
                              CoeffBuf      &dstCoeff,
                        const ComponentID   &compID,
diff --git a/source/Lib/CommonLib/TrQuant.h b/source/Lib/CommonLib/TrQuant.h
index 72c951024f16ce361c2fa3508dede65b246a91c6..f93ab67decf38b4b27c0eddc7678f3653137baf7 100644
--- a/source/Lib/CommonLib/TrQuant.h
+++ b/source/Lib/CommonLib/TrQuant.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -115,10 +115,6 @@ public:
   void   lambdaAdjustColorTrans(bool forward) { m_quant->lambdaAdjustColorTrans(forward); }
   void   resetStore() { m_quant->resetStore(); }
 
-#if ENABLE_SPLIT_PARALLELISM
-  void    copyState( const TrQuant& other );
-#endif
-
 protected:
   TCoeff   m_tempCoeff[MAX_TB_SIZEY * MAX_TB_SIZEY];
 
diff --git a/source/Lib/CommonLib/TrQuant_EMT.cpp b/source/Lib/CommonLib/TrQuant_EMT.cpp
index fd2e3917dad1c457a132a2ac9ddf9ba7c129fa94..7c37001cee50463b99db271b5de2236f2abae66a 100644
--- a/source/Lib/CommonLib/TrQuant_EMT.cpp
+++ b/source/Lib/CommonLib/TrQuant_EMT.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/TrQuant_EMT.h b/source/Lib/CommonLib/TrQuant_EMT.h
index d6e6a2a2d207bfc3efebb9fed1c1214239ff13fb..11d30ff740448568da360c75274056212f6bba43 100644
--- a/source/Lib/CommonLib/TrQuant_EMT.h
+++ b/source/Lib/CommonLib/TrQuant_EMT.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index 9a224f53dfd986022600e906a5040bbfec26490f..39d73bb224056b9d2e86d34bf48f638d593717f6 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -53,26 +53,41 @@
 // clang-format off
 
 //########### place macros to be removed in next cycle below this line ###############
+#define JVET_W0133_CONSTRAINED_RASL_ENCODING              1 // SEI message for Constrained RASL encoding for bitstream switching
 
 #define JVET_S0176_ITEM5                                  1 // JVET-S0176 #5: When an SLI SEI message is present for a CVS, the value of sps_num_subpics_minus1 shall be the same for all the SPSs referenced by the pictures in the layers with multiple subpictures per picture.
-#define JVET_S0096_RPL_CONSTRAINT                         1// JVET-S0096 aspect 1: When pps_rpl_info_in_ph_flag is equal to 1 and ph_inter_slice_allowed_flag is equal to 1, the value of num_ref_entries[ 0 ][ RplsIdx[ 0 ] ] shall be greater than 0.
-#define JVET_S0078_NOOUTPUTPRIORPICFLAG                   0 // JVET-S0078: Handling of NoOutputOfPriorPicsFlag in output process
-#define JVET_S0219_ASPECT1                                1 // JVET-S0219 aspect1 : removal non-referred APS parameter set in the non-output layer.
-#define JVET_R0193                                        1 // JVET-R0193: signalling of the number of maximum sublayers used for inter-layer prediction for each layer
-#define JVET_R0193_S0141                                  1 // JVET-S0141 item 47 : item 47: In the general sub-bitstream extraction process, specify the conditions under which an output sub-bitstream is required to be a conforming bitstream such that the value of tIdTarget is specified to be in the range of 0 to vps_ptl_max_tid[ vps_ols_ptl_idx[ targetOlsIdx ] ], inclusive (instead of 0 to 6 inclusive). (JVET-S0158 aspect 1)
-#define JVET_T0065_LEVEL_6_3                              1 // JVET-T0065: Add level 6.3
-#define JVET_T0091_LMCS_ENC_OVERFLOW_FIX                  1 // JVET-T0091: LMCS encoder overflow fix at high bit-depth for SDR
-#define JVET_S0163_ON_TARGETOLS_SUBLAYERS                 1 // JVET-S0163: On target OLS and sublayers for decoding (OPI NAL Unit)
-#define JVET_R0266_GCI                                    1 // JVET-R0266 #5: Specify that no_gdr_constraint_flag equal to 1 specifies that sps_gdr_enabled_flag shall be equal to 0
-#define JVET_S0084_S0110_RADL                             1 // When the current picture is a RADL picture, allow RASL pictures with pps_mixed_nalu_types_in_pic_flag is equal to 1 in active entries in RefPicList[ 0 ] or RefPicList[ 1 ]
-#define FIX_TICKET_1405                                   1 // Add dph_sei_single_component_flag and dph_sei_reserved_zero_7bits syntax to decoded picture hash SEI message
-#define FIX_SUBPICS_W_RPR                                 1 // Fix handling of RPR with subpictures (via scaling windows with no resolution change)
-#define JVET_S0175_ASPECT5                                1 // use u(8) instead of u(4) for (ffi_)display_elemental_periods_minus1 and pt_display_elemental_periods_minus1
-#define JVET_S0175_ASPECT6                                1 // The general_nal_hrd_params_present_flag and general_vcl_hrd_params_present_flag are allowed to both be equal to 0
-#define JVET_R0046_IRAP_ASPECT2                           1 // Add a constraint on an ILRP being either an IRAP picture or having TemporalId less than or equal to Max (0, vps_max_tid_il_ref_pics_plus1[ refPicVpsLayerId ] - 1 )
-#define JVET_T0064                                        1 // JVET-T0064: control of filter strength for ALF
+
+#define JVET_W0078_MVP_SEI                                1 // JVET-W0078 Multiview view position SEI message
+
+#define JVET_W0129_ENABLE_ALF_TRUEORG                    1 // Using true original samples for ALF as default setting
+
+#define JVET_W0134_UNIFORM_METRICS_LOG                    1 // change metrics output for easy parsing
+
+#define JVET_W0070_W0121_SPSRE_CLEANUP                    1 // JVET-W0070 Proposal 3 & JVET-W0121 Option 1 : condition the signaling of sps_ts_residual_coding_rice_present_in_sh_flag
+
+#define JVET_W0043                                        1 // Alignment of smooth block QP control with adaptive QP in VTM
+
+#define JVET_W0178_CONSTRAINTS_ON_REXT_TOOLS              1 // JVET-W0178: bitstream constraints on RExt tools for low bit-depth (bit-depth <=10)
+
+#define JVET_W0046_RLSCP                                  1 // JVET-W0046: CE1.1 reverse last significant coefficient position
+
+#define JVET_W2005_RANGE_EXTENSION_PROFILES               1 // JVET-W2005 (JVET-W0136 profile plus meeting decisions)
+
+#define JVET_S0154_ASPECT9_AND_S0158_ASPECT4              1 // JVET-S0154 #9:  In the subpicture sub-bitstream extraction process, insert SEI NAL units to directly contain those SEI messages that were scalable-nested HRD-related SEI messages that apply to the output bitstream, and remove their original container SEI NAL units from the output bitstream. When the target OLS includes only one layer, apply the same for scalable-nested non-HRD-related SEI messages.
+                                                            // JVET-S0158 #4c: Insert SEI NAL units to directly contain those SEI messages that were scalable-nested HRD-related SEI messages that apply to the output bitstream, and remove their original container SEI NAL units from the output bitstream. When the target OLS includes only one layer, apply the same for scalable-nested non-HRD-related SEI messages.
+
+
+#define JVET_S0117_VB                                     1 // sub-picture extraction VB rewriting
 
 //########### place macros to be be kept below this line ###############
+#define GDR_ENABLED   1
+
+#if GDR_ENABLED
+#define GDR_LEAK_TEST  0
+#define GDR_ENC_TRACE  0
+#define GDR_DEC_TRACE  0
+#endif
+
 #define JVET_S0257_DUMP_360SEI_MESSAGE                    1 // Software support of 360 SEI messages
 
 #define JVET_R0351_HIGH_BIT_DEPTH_ENABLED                 0 // JVET-R0351: high bit depth coding enabled (increases accuracies of some calculations, e.g. transforms)
@@ -110,17 +125,6 @@ typedef std::pair<int, int>  TrCost;
 #define JVET_O0756_CALCULATE_HDRMETRICS                   1
 #endif
 
-#ifndef ENABLE_SPLIT_PARALLELISM
-#define ENABLE_SPLIT_PARALLELISM                          0
-#endif
-#if ENABLE_SPLIT_PARALLELISM
-#define PARL_SPLIT_MAX_NUM_JOBS                           6                             // number of parallel jobs that can be defined and need memory allocated
-#define NUM_RESERVERD_SPLIT_JOBS                        ( PARL_SPLIT_MAX_NUM_JOBS + 1 )  // number of all data structures including the merge thread (0)
-#define PARL_SPLIT_MAX_NUM_THREADS                        PARL_SPLIT_MAX_NUM_JOBS
-#define NUM_SPLIT_THREADS_IF_MSVC                         4
-
-#endif
-
 // clang-format on
 
 // ====================================================================================================================
@@ -201,7 +205,7 @@ typedef std::pair<int, int>  TrCost;
 
 // SIMD optimizations
 #define SIMD_ENABLE                                       1
-#define ENABLE_SIMD_OPT                                 ( SIMD_ENABLE && !RExt__HIGH_BIT_DEPTH_SUPPORT )    ///< SIMD optimizations, no impact on RD performance
+#define ENABLE_SIMD_OPT                                 SIMD_ENABLE                                         ///< SIMD optimizations, no impact on RD performance
 #define ENABLE_SIMD_OPT_MCIF                            ( 1 && ENABLE_SIMD_OPT )                            ///< SIMD optimization for the interpolation filter, no impact on RD performance
 #define ENABLE_SIMD_OPT_BUFFER                          ( 1 && ENABLE_SIMD_OPT )                            ///< SIMD optimization for the buffer operations, no impact on RD performance
 #define ENABLE_SIMD_OPT_DIST                            ( 1 && ENABLE_SIMD_OPT )                            ///< SIMD optimization for the distortion calculations(SAD,SSE,HADAMARD), no impact on RD performance
@@ -243,9 +247,11 @@ typedef std::pair<int, int>  TrCost;
 #if RExt__HIGH_BIT_DEPTH_SUPPORT
 #define FULL_NBIT                                         1 ///< When enabled, use distortion measure derived from all bits of source data, otherwise discard (bitDepth - 8) least-significant bits of distortion
 #define RExt__HIGH_PRECISION_FORWARD_TRANSFORM            1 ///< 0 use original 6-bit transform matrices for both forward and inverse transform, 1 (default) = use original matrices for inverse transform and high precision matrices for forward transform
+#define JVET_V0106_DEP_QUANT_ENC_OPT                      1 ///< 0 use original g_goRiceBits[4][32] LUT for codeword length estimation at encoder, 1 (default) use extended g_goRiceBits[16][64] LUT for codeword length estimation at encoder
 #else
 #define FULL_NBIT                                         1 ///< When enabled, use distortion measure derived from all bits of source data, otherwise discard (bitDepth - 8) least-significant bits of distortion
 #define RExt__HIGH_PRECISION_FORWARD_TRANSFORM            0 ///< 0 (default) use original 6-bit transform matrices for both forward and inverse transform, 1 = use original matrices for inverse transform and high precision matrices for forward transform
+#define JVET_V0106_DEP_QUANT_ENC_OPT                      0 ///< 0 (default) use original g_goRiceBits[4][32] LUT for codeword length estimation at encoder, 1 - use extended g_goRiceBits[16][64] LUT for codeword length estimation at encoder
 #endif
 
 #if FULL_NBIT
@@ -692,6 +698,9 @@ namespace Profile
   enum Name
   {
     NONE                                 = 0,
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+    INTRA                                = 8,
+#endif
     STILL_PICTURE                        = 64,
     MAIN_10                              = 1,
     MAIN_10_STILL_PICTURE                = MAIN_10 | STILL_PICTURE,
@@ -701,6 +710,17 @@ namespace Profile
     MAIN_10_444_STILL_PICTURE            = MAIN_10_444 | STILL_PICTURE,
     MULTILAYER_MAIN_10_444               = 49,
     MULTILAYER_MAIN_10_444_STILL_PICTURE = MULTILAYER_MAIN_10_444 | STILL_PICTURE,
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+    MAIN_12                              = 2,
+    MAIN_12_444                          = 34,
+    MAIN_16_444                          = 36,
+    MAIN_12_INTRA                        = MAIN_12 | INTRA,
+    MAIN_12_444_INTRA                    = MAIN_12_444 | INTRA,
+    MAIN_16_444_INTRA                    = MAIN_16_444 | INTRA,
+    MAIN_12_STILL_PICTURE                = MAIN_12 | STILL_PICTURE,
+    MAIN_12_444_STILL_PICTURE            = MAIN_12_444 | STILL_PICTURE,
+    MAIN_16_444_STILL_PICTURE            = MAIN_16_444 | STILL_PICTURE,
+#endif
   };
 }
 
@@ -730,9 +750,7 @@ namespace Level
     LEVEL6   = 96,
     LEVEL6_1 = 99,
     LEVEL6_2 = 102,
-#if JVET_T0065_LEVEL_6_3
     LEVEL6_3 = 105,
-#endif
     LEVEL15_5 = 255,
   };
 }
@@ -791,11 +809,7 @@ enum NalUnitType
   NAL_UNIT_CODED_SLICE_GDR,         // 10
 
   NAL_UNIT_RESERVED_IRAP_VCL_11,
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
   NAL_UNIT_OPI,                     // 12
-#else
-  NAL_UNIT_RESERVED_IRAP_VCL_12,
-#endif
   NAL_UNIT_DCI,                     // 13
   NAL_UNIT_VPS,                     // 14
   NAL_UNIT_SPS,                     // 15
@@ -920,8 +934,12 @@ struct LFCUParam
   bool leftEdge;                         ///< indicates left edge
   bool topEdge;                          ///< indicates top edge
 };
-
-
+struct LutModel
+{
+  bool             presentFlag = false;
+  int              numLutValues = 0;
+  std::vector<Pel> lutValues;
+};
 
 struct PictureHash
 {
@@ -1229,20 +1247,8 @@ template<typename T>
 class dynamic_cache
 {
   std::vector<T*> m_cache;
-#if ENABLE_SPLIT_PARALLELISM
-  int64_t         m_cacheId;
-#endif
 
 public:
-
-#if ENABLE_SPLIT_PARALLELISM
-  dynamic_cache()
-  {
-    static int cacheId = 0;
-    m_cacheId = cacheId++;
-  }
-
-#endif
   ~dynamic_cache()
   {
     deleteEntries();
@@ -1267,48 +1273,22 @@ public:
     {
       ret = m_cache.back();
       m_cache.pop_back();
-#if ENABLE_SPLIT_PARALLELISM
-      CHECK( ret->cacheId != m_cacheId, "Putting item into wrong cache!" );
-      CHECK( !ret->cacheUsed,           "Fetched an element that should've been in cache!!" );
-#endif
     }
     else
     {
       ret = new T;
     }
 
-#if ENABLE_SPLIT_PARALLELISM
-    ret->cacheId   = m_cacheId;
-    ret->cacheUsed = false;
-
-#endif
     return ret;
   }
 
   void cache( T* el )
   {
-#if ENABLE_SPLIT_PARALLELISM
-    CHECK( el->cacheId != m_cacheId, "Putting item into wrong cache!" );
-    CHECK( el->cacheUsed,            "Putting cached item back into cache!" );
-
-    el->cacheUsed = true;
-
-#endif
     m_cache.push_back( el );
   }
 
   void cache( std::vector<T*>& vel )
   {
-#if ENABLE_SPLIT_PARALLELISM
-    for( auto el : vel )
-    {
-      CHECK( el->cacheId != m_cacheId, "Putting item into wrong cache!" );
-      CHECK( el->cacheUsed,            "Putting cached item back into cache!" );
-
-      el->cacheUsed = true;
-    }
-
-#endif
     m_cache.insert( m_cache.end(), vel.begin(), vel.end() );
     vel.clear();
   }
diff --git a/source/Lib/CommonLib/Unit.cpp b/source/Lib/CommonLib/Unit.cpp
index ab3fc757ee617944be57f41123f304fbafd39e76..8f9787c6cf69d206445dc46e8a91b63100163fa9 100644
--- a/source/Lib/CommonLib/Unit.cpp
+++ b/source/Lib/CommonLib/Unit.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -557,6 +557,10 @@ void PredictionUnit::initData()
     for ( uint32_t j = 0; j < 3; j++ )
     {
       mvAffi[i][j].setZero();
+#if GDR_ENABLED
+      mvAffiSolid[i][j] = true;
+      mvAffiValid[i][j] = true;
+#endif
     }
   }
   ciipFlag = false;
diff --git a/source/Lib/CommonLib/Unit.h b/source/Lib/CommonLib/Unit.h
index 8a6858fdf56d8e8d3138122f45e00e3a88fb386f..b6b0c44e4c537477c49b7cd00f68f368626f91ee 100644
--- a/source/Lib/CommonLib/Unit.h
+++ b/source/Lib/CommonLib/Unit.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -350,11 +350,7 @@ struct CodingUnit : public UnitArea
 
   TransformUnit *firstTU;
   TransformUnit *lastTU;
-#if ENABLE_SPLIT_PARALLELISM
 
-  int64_t cacheId;
-  bool    cacheUsed;
-#endif
   const uint8_t     getSbtIdx() const { assert( ( ( sbtInfo >> 0 ) & 0xf ) < NUMBER_SBT_IDX ); return ( sbtInfo >> 0 ) & 0xf; }
   const uint8_t     getSbtPos() const { return ( sbtInfo >> 4 ) & 0x3; }
   void              setSbtIdx( uint8_t idx ) { CHECK( idx >= NUMBER_SBT_IDX, "sbt_idx wrong" ); sbtInfo = ( idx << 0 ) + ( sbtInfo & 0xf0 ); }
@@ -394,12 +390,25 @@ struct InterPredictionData
   uint8_t     mvpNum  [NUM_REF_PIC_LIST_01];
   Mv        mvd     [NUM_REF_PIC_LIST_01];
   Mv        mv      [NUM_REF_PIC_LIST_01];
+#if GDR_ENABLED 
+  bool      mvSolid[NUM_REF_PIC_LIST_01];
+  bool      mvValid[NUM_REF_PIC_LIST_01];
+  bool      mvpSolid[NUM_REF_PIC_LIST_01];
+  MvpType   mvpType[NUM_REF_PIC_LIST_01];
+  Position  mvpPos[NUM_REF_PIC_LIST_01];
+#endif
   int16_t     refIdx  [NUM_REF_PIC_LIST_01];
   MergeType mergeType;
   bool      mvRefine;
   Mv        mvdL0SubPu[MAX_NUM_SUBCU_DMVR];
   Mv        mvdAffi [NUM_REF_PIC_LIST_01][3];
   Mv        mvAffi[NUM_REF_PIC_LIST_01][3];
+#if GDR_ENABLED
+  bool      mvAffiSolid[NUM_REF_PIC_LIST_01][3];
+  bool      mvAffiValid[NUM_REF_PIC_LIST_01][3];
+  MvpType   mvAffiType[NUM_REF_PIC_LIST_01][3];
+  Position  mvAffiPos[NUM_REF_PIC_LIST_01][3];
+#endif
   bool      ciipFlag;
 
   Mv        bv;                             // block vector for IBC
@@ -434,12 +443,6 @@ struct PredictionUnit : public UnitArea, public IntraPredictionData, public Inte
   const MotionInfo& getMotionInfo( const Position& pos ) const;
   MotionBuf         getMotionBuf();
   CMotionBuf        getMotionBuf() const;
-
-#if ENABLE_SPLIT_PARALLELISM
-
-  int64_t cacheId;
-  bool    cacheUsed;
-#endif
 };
 
 // ---------------------------------------------------------------------------
@@ -490,11 +493,6 @@ struct TransformUnit : public UnitArea
         Pel*      getPLTIndex(const ComponentID id);
         bool*     getRunTypes(const ComponentID id);
 
-#if ENABLE_SPLIT_PARALLELISM
-  int64_t cacheId;
-  bool    cacheUsed;
-
-#endif
 private:
   TCoeff *m_coeffs[ MAX_NUM_TBLOCKS ];
   Pel    *m_pcmbuf[ MAX_NUM_TBLOCKS ];
diff --git a/source/Lib/CommonLib/UnitPartitioner.cpp b/source/Lib/CommonLib/UnitPartitioner.cpp
index 7c2a0c02b2ede863cc88b498c407ebb3914435bc..afdb0e978879e8e30589fdaf87146bc0b03796e5 100644
--- a/source/Lib/CommonLib/UnitPartitioner.cpp
+++ b/source/Lib/CommonLib/UnitPartitioner.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/UnitPartitioner.h b/source/Lib/CommonLib/UnitPartitioner.h
index 4ed645937b0dc1478522cb45a4d32455b7b3d84f..6b5651036d5a6f196525efe8036f9b0cafc71302 100644
--- a/source/Lib/CommonLib/UnitPartitioner.h
+++ b/source/Lib/CommonLib/UnitPartitioner.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp
index f76f64ea236e1ec1e806d7d09a0b91f1f08633db..f759784f30b2366ed78c3616714a39050b28f077 100644
--- a/source/Lib/CommonLib/UnitTools.cpp
+++ b/source/Lib/CommonLib/UnitTools.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -101,48 +101,86 @@ void CS::setRefinedMotionField(CodingStructure &cs)
 
 bool CU::getRprScaling( const SPS* sps, const PPS* curPPS, Picture* refPic, int& xScale, int& yScale )
 {
-  const Window& curScalingWindow = curPPS->getScalingWindow();
-  int curPicWidth = curPPS->getPicWidthInLumaSamples()   - SPS::getWinUnitX( sps->getChromaFormatIdc() ) * (curScalingWindow.getWindowLeftOffset() + curScalingWindow.getWindowRightOffset());
-  int curPicHeight = curPPS->getPicHeightInLumaSamples() - SPS::getWinUnitY( sps->getChromaFormatIdc() ) * (curScalingWindow.getWindowTopOffset()  + curScalingWindow.getWindowBottomOffset());
-
-  const Window& refScalingWindow = refPic->getScalingWindow();
-  int refPicWidth = refPic->getPicWidthInLumaSamples()   - SPS::getWinUnitX( sps->getChromaFormatIdc() ) * (refScalingWindow.getWindowLeftOffset() + refScalingWindow.getWindowRightOffset());
-  int refPicHeight = refPic->getPicHeightInLumaSamples() - SPS::getWinUnitY( sps->getChromaFormatIdc() ) * (refScalingWindow.getWindowTopOffset()  + refScalingWindow.getWindowBottomOffset());
-
-  xScale = ( ( refPicWidth << SCALE_RATIO_BITS ) + ( curPicWidth >> 1 ) ) / curPicWidth;
-  yScale = ( ( refPicHeight << SCALE_RATIO_BITS ) + ( curPicHeight >> 1 ) ) / curPicHeight;
-
-  int curSeqMaxPicWidthY = sps->getMaxPicWidthInLumaSamples();                  // sps_pic_width_max_in_luma_samples
-  int curSeqMaxPicHeightY = sps->getMaxPicHeightInLumaSamples();                // sps_pic_height_max_in_luma_samples
-  int curPicWidthY = curPPS->getPicWidthInLumaSamples();                        // pps_pic_width_in_luma_samples
-  int curPicHeightY = curPPS->getPicHeightInLumaSamples();                      // pps_pic_height_in_luma_samples
-  int max8MinCbSizeY = std::max((int)8, (1<<sps->getLog2MinCodingBlockSize())); // Max(8, MinCbSizeY)
+  const int subWidthC  = SPS::getWinUnitX(sps->getChromaFormatIdc());
+  const int subHeightC = SPS::getWinUnitY(sps->getChromaFormatIdc());
 
-  CHECK((curPicWidth * curSeqMaxPicWidthY) < refPicWidth * (curPicWidthY - max8MinCbSizeY), "(curPicWidth * curSeqMaxPicWidthY) should be greater than or equal to refPicWidth * (curPicWidthY - max8MinCbSizeY))");
-  CHECK((curPicHeight * curSeqMaxPicHeightY) < refPicHeight * (curPicHeightY - max8MinCbSizeY), "(curPicHeight * curSeqMaxPicHeightY) should be greater than or equal to refPicHeight * (curPicHeightY - max8MinCbSizeY))");
-
-  CHECK(curPicWidth * 2 < refPicWidth, "curPicWidth * 2 shall be greater than or equal to refPicWidth");
-  CHECK(curPicHeight * 2 < refPicHeight, "curPicHeight * 2 shall be greater than or equal to refPicHeight");
-  CHECK(curPicWidth > refPicWidth * 8, "curPicWidth shall be less than or equal to refPicWidth * 8");
-  CHECK(curPicHeight > refPicHeight * 8, "curPicHeight shall be less than or equal to refPicHeight * 8");
+  const Window& curScalingWindow = curPPS->getScalingWindow();
 
-  int subWidthC = SPS::getWinUnitX(sps->getChromaFormatIdc());
-  int subHeightC = SPS::getWinUnitY(sps->getChromaFormatIdc());
+  const int curLeftOffset   = subWidthC * curScalingWindow.getWindowLeftOffset();
+  const int curRightOffset  = subWidthC * curScalingWindow.getWindowRightOffset();
+  const int curTopOffset    = subHeightC * curScalingWindow.getWindowTopOffset();
+  const int curBottomOffset = subHeightC * curScalingWindow.getWindowBottomOffset();
 
-  CHECK(subWidthC * curScalingWindow.getWindowLeftOffset() < (-curPicWidthY) * 15, "The value of SubWidthC * pps_scaling_win_left_offset shall be greater than or equal to -pps_pic_width_in_luma_samples * 15");
-  CHECK(subWidthC * curScalingWindow.getWindowLeftOffset() >= curPicWidthY, "The value of SubWidthC * pps_scaling_win_left_offset shall be less than pps_pic_width_in_luma_samples");
-  CHECK(subWidthC * curScalingWindow.getWindowRightOffset() < (-curPicWidthY) * 15, "The value of SubWidthC * pps_scaling_win_right_offset shall be greater than or equal to -pps_pic_width_in_luma_samples * 15");
-  CHECK(subWidthC * curScalingWindow.getWindowRightOffset() >= curPicWidthY, "The value of SubWidthC * pps_scaling_win_right_offset shall be less than pps_pic_width_in_luma_samples");
+  // Note: 64-bit integers are used for sizes such as to avoid possible overflows in corner cases
+  const int64_t curPicScalWinWidth  = curPPS->getPicWidthInLumaSamples() - (curLeftOffset + curRightOffset);
+  const int64_t curPicScalWinHeight = curPPS->getPicHeightInLumaSamples() - (curTopOffset + curBottomOffset);
 
-  CHECK(subHeightC * curScalingWindow.getWindowTopOffset() < (-curPicHeightY) * 15, "The value of SubHeightC * pps_scaling_win_top_offset shall be greater than or equal to -pps_pic_height_in_luma_samples * 15");
-  CHECK(subHeightC * curScalingWindow.getWindowTopOffset() >= curPicHeightY, "The value of SubHeightC * pps_scaling_win_top_offset shall be less than pps_pic_height_in_luma_samples");
-  CHECK(subHeightC * curScalingWindow.getWindowBottomOffset() < (-curPicHeightY) * 15, "The value of SubHeightC *pps_scaling_win_bottom_offset shall be greater than or equal to -pps_pic_height_in_luma_samples * 15");
-  CHECK(subHeightC * curScalingWindow.getWindowBottomOffset() >= curPicHeightY, "The value of SubHeightC *pps_scaling_win_bottom_offset shall be less than pps_pic_height_in_luma_samples");
+  const Window& refScalingWindow = refPic->getScalingWindow();
 
-  CHECK(subWidthC * (curScalingWindow.getWindowLeftOffset() + curScalingWindow.getWindowRightOffset()) < (-curPicWidthY) * 15, "The value of SubWidthC * ( pps_scaling_win_left_offset + pps_scaling_win_right_offset ) shall be greater than or equal to -pps_pic_width_in_luma_samples * 15");
-  CHECK(subWidthC * (curScalingWindow.getWindowLeftOffset() + curScalingWindow.getWindowRightOffset()) >= curPicWidthY, "The value of SubWidthC * ( pps_scaling_win_left_offset + pps_scaling_win_right_offset ) shall be less than pps_pic_width_in_luma_samples");
-  CHECK(subHeightC * (curScalingWindow.getWindowTopOffset() + curScalingWindow.getWindowBottomOffset()) < (-curPicHeightY) * 15, "The value of SubHeightC * ( pps_scaling_win_top_offset + pps_scaling_win_bottom_offset ) shall be greater than or equal to -pps_pic_height_in_luma_samples * 15");
-  CHECK(subHeightC * (curScalingWindow.getWindowTopOffset() + curScalingWindow.getWindowBottomOffset()) >= curPicHeightY, "The value of SubHeightC * ( pps_scaling_win_top_offset + pps_scaling_win_bottom_offset ) shall be less than pps_pic_height_in_luma_samples");
+  const int refLeftOffset   = subWidthC * refScalingWindow.getWindowLeftOffset();
+  const int refRightOffset  = subWidthC * refScalingWindow.getWindowRightOffset();
+  const int refTopOffset    = subHeightC * refScalingWindow.getWindowTopOffset();
+  const int refBottomOffset = subHeightC * refScalingWindow.getWindowBottomOffset();
+
+  const int64_t refPicScalWinWidth  = refPic->getPicWidthInLumaSamples() - (refLeftOffset + refRightOffset);
+  const int64_t refPicScalWinHeight = refPic->getPicHeightInLumaSamples() - (refTopOffset + refBottomOffset);
+
+  CHECK(curPicScalWinWidth * 2 < refPicScalWinWidth,
+        "curPicScalWinWidth * 2 shall be greater than or equal to refPicScalWinWidth");
+  CHECK(curPicScalWinHeight * 2 < refPicScalWinHeight,
+        "curPicScalWinHeight * 2 shall be greater than or equal to refPicScalWinHeight");
+  CHECK(curPicScalWinWidth > refPicScalWinWidth * 8,
+        "curPicScalWinWidth shall be less than or equal to refPicScalWinWidth * 8");
+  CHECK(curPicScalWinHeight > refPicScalWinHeight * 8,
+        "curPicScalWinHeight shall be less than or equal to refPicScalWinHeight * 8");
+
+  xScale = (int) (((refPicScalWinWidth << SCALE_RATIO_BITS) + (curPicScalWinWidth >> 1)) / curPicScalWinWidth);
+  yScale = (int) (((refPicScalWinHeight << SCALE_RATIO_BITS) + (curPicScalWinHeight >> 1)) / curPicScalWinHeight);
+
+  const int maxPicWidth  = sps->getMaxPicWidthInLumaSamples();    // sps_pic_width_max_in_luma_samples
+  const int maxPicHeight = sps->getMaxPicHeightInLumaSamples();   // sps_pic_height_max_in_luma_samples
+  const int curPicWidth  = curPPS->getPicWidthInLumaSamples();    // pps_pic_width_in_luma_samples
+  const int curPicHeight = curPPS->getPicHeightInLumaSamples();   // pps_pic_height_in_luma_samples
+
+  const int picSizeIncrement = std::max((int) 8, (1 << sps->getLog2MinCodingBlockSize()));   // Max(8, MinCbSizeY)
+
+  CHECK((curPicScalWinWidth * maxPicWidth) < refPicScalWinWidth * (curPicWidth - picSizeIncrement),
+        "(curPicScalWinWidth * maxPicWidth) should be greater than or equal to refPicScalWinWidth * (curPicWidth - "
+        "picSizeIncrement))");
+  CHECK((curPicScalWinHeight * maxPicHeight) < refPicScalWinHeight * (curPicHeight - picSizeIncrement),
+        "(curPicScalWinHeight * maxPicHeight) should be greater than or equal to refPicScalWinHeight * (curPicHeight - "
+        "picSizeIncrement))");
+
+  CHECK(curLeftOffset < -curPicWidth * 15, "The value of SubWidthC * pps_scaling_win_left_offset shall be greater "
+                                           "than or equal to -pps_pic_width_in_luma_samples * 15");
+  CHECK(curLeftOffset >= curPicWidth,
+        "The value of SubWidthC * pps_scaling_win_left_offset shall be less than pps_pic_width_in_luma_samples");
+  CHECK(curRightOffset < -curPicWidth * 15, "The value of SubWidthC * pps_scaling_win_right_offset shall be greater "
+                                            "than or equal to -pps_pic_width_in_luma_samples * 15");
+  CHECK(curRightOffset >= curPicWidth,
+        "The value of SubWidthC * pps_scaling_win_right_offset shall be less than pps_pic_width_in_luma_samples");
+
+  CHECK(curTopOffset < -curPicHeight * 15, "The value of SubHeightC * pps_scaling_win_top_offset shall be greater "
+                                           "than or equal to -pps_pic_height_in_luma_samples * 15");
+  CHECK(curTopOffset >= curPicHeight,
+        "The value of SubHeightC * pps_scaling_win_top_offset shall be less than pps_pic_height_in_luma_samples");
+  CHECK(curBottomOffset < (-curPicHeight) * 15, "The value of SubHeightC * pps_scaling_win_bottom_offset shall be "
+                                                "greater than or equal to -pps_pic_height_in_luma_samples * 15");
+  CHECK(curBottomOffset >= curPicHeight,
+        "The value of SubHeightC * pps_scaling_win_bottom_offset shall be less than pps_pic_height_in_luma_samples");
+
+  CHECK(curLeftOffset + curRightOffset < -curPicWidth * 15,
+        "The value of SubWidthC * ( pps_scaling_win_left_offset + pps_scaling_win_right_offset ) shall be greater than "
+        "or equal to -pps_pic_width_in_luma_samples * 15");
+  CHECK(curLeftOffset + curRightOffset >= curPicWidth,
+        "The value of SubWidthC * ( pps_scaling_win_left_offset + pps_scaling_win_right_offset ) shall be less than "
+        "pps_pic_width_in_luma_samples");
+  CHECK(curTopOffset + curBottomOffset < -curPicHeight * 15,
+        "The value of SubHeightC * ( pps_scaling_win_top_offset + pps_scaling_win_bottom_offset ) shall be greater "
+        "than or equal to -pps_pic_height_in_luma_samples * 15");
+  CHECK(curTopOffset + curBottomOffset >= curPicHeight,
+        "The value of SubHeightC * ( pps_scaling_win_top_offset + pps_scaling_win_bottom_offset ) shall be less than "
+        "pps_pic_height_in_luma_samples");
 
   return refPic->isRefScaled( curPPS );
 }
@@ -170,7 +208,7 @@ void CU::checkConformanceILRP(Slice *slice)
     return;
   }
 
-  //constraint 1: The picture referred to by each active entry in RefPicList[ 0 ] or RefPicList[ 1 ] has the same subpicture layout as the current picture 
+  //constraint 1: The picture referred to by each active entry in RefPicList[ 0 ] or RefPicList[ 1 ] has the same subpicture layout as the current picture
   bool isAllRefSameSubpicLayout = true;
   for (int refList = 0; refList < numRefList; refList++) // loop over l0 and l1
   {
@@ -349,6 +387,10 @@ void CU::saveMotionInHMVP( const CodingUnit& cu, const bool isToBeDone )
   {
     MotionInfo mi = pu.getMotionInfo();
 
+#if GDR_ENABLED
+    mi.sourcePos   = pu.lumaPos();
+    mi.sourceClean = pu.cs->isClean(mi.sourcePos, CHANNEL_TYPE_LUMA);
+#endif
     mi.BcwIdx = (mi.interDir == 3) ? cu.BcwIdx : BCW_DEFAULT;
 
     const unsigned log2ParallelMergeLevel = (pu.cs->sps->getLog2ParallelMergeLevelMinus2() + 2);
@@ -788,7 +830,7 @@ uint32_t PU::getCoLocatedIntraLumaMode(const PredictionUnit &pu)
 
 int PU::getWideAngle( const TransformUnit &tu, const uint32_t dirMode, const ComponentID compID )
 {
-  //This function returns a wide angle index taking into account that the values 0 and 1 are reserved 
+  //This function returns a wide angle index taking into account that the values 0 and 1 are reserved
   //for Planar and DC respectively, as defined in the Spec. Text.
   if( dirMode < 2 )
   {
@@ -817,7 +859,12 @@ int PU::getWideAngle( const TransformUnit &tu, const uint32_t dirMode, const Com
 bool PU::addMergeHMVPCand(const CodingStructure &cs, MergeCtx &mrgCtx, const int &mrgCandIdx,
                           const uint32_t maxNumMergeCandMin1, int &cnt, const bool isAvailableA1,
                           const MotionInfo miLeft, const bool isAvailableB1, const MotionInfo miAbove,
-                          const bool ibcFlag, const bool isGt4x4)
+                          const bool ibcFlag, const bool isGt4x4
+#if GDR_ENABLED
+                         ,const PredictionUnit &pu
+                         ,bool &allCandSolidInAbove
+#endif
+)
 {
   const Slice& slice = *cs.slice;
   MotionInfo miNeighbor;
@@ -825,9 +872,26 @@ bool PU::addMergeHMVPCand(const CodingStructure &cs, MergeCtx &mrgCtx, const int
   auto &lut = ibcFlag ? cs.motionLut.lutIbc : cs.motionLut.lut;
   int num_avai_candInLUT = (int)lut.size();
 
+#if GDR_ENABLED
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+
+  bool  vbOnCtuBoundary = true;
+  if (isEncodeGdrClean)
+  {
+    vbOnCtuBoundary = (pu.cs->picHeader->getNumVerVirtualBoundaries() == 0) || (pu.cs->picHeader->getVirtualBoundariesPosX(0) % pu.cs->sps->getMaxCUWidth() == 0);
+    allCandSolidInAbove = allCandSolidInAbove && vbOnCtuBoundary;
+  }
+#endif
   for (int mrgIdx = 1; mrgIdx <= num_avai_candInLUT; mrgIdx++)
   {
     miNeighbor = lut[num_avai_candInLUT - mrgIdx];
+#if GDR_ENABLED
+    Position sourcePos = Position(0, 0);
+    if (isEncodeGdrClean)
+    {
+      sourcePos = miNeighbor.sourcePos;
+    }
+#endif
 
     if ( mrgIdx > 2 || ((mrgIdx > 1 || !isGt4x4) && ibcFlag)
       || ((!isAvailableA1 || (miLeft != miNeighbor)) && (!isAvailableB1 || (miAbove != miNeighbor))) )
@@ -837,9 +901,28 @@ bool PU::addMergeHMVPCand(const CodingStructure &cs, MergeCtx &mrgCtx, const int
       mrgCtx.BcwIdx            [cnt] = (miNeighbor.interDir == 3) ? miNeighbor.BcwIdx : BCW_DEFAULT;
 
       mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miNeighbor.mv[0], miNeighbor.refIdx[0]);
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        // note : cannot gaurantee the order/value in the lut if any of the lut is in dirty area
+        mrgCtx.mvPos[(cnt << 1) + 0]   = sourcePos;
+        mrgCtx.mvSolid[(cnt << 1) + 0] = allCandSolidInAbove && vbOnCtuBoundary;
+        mrgCtx.mvValid[(cnt << 1) + 0] = cs.isClean(pu.Y().bottomRight(), miNeighbor.mv[0], REF_PIC_LIST_0, miNeighbor.refIdx[0]);
+        allCandSolidInAbove = allCandSolidInAbove && vbOnCtuBoundary;
+      }
+#endif
       if (slice.isInterB())
       {
         mrgCtx.mvFieldNeighbours[(cnt << 1) + 1].setMvField(miNeighbor.mv[1], miNeighbor.refIdx[1]);
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          mrgCtx.mvPos[(cnt << 1) + 1]   = sourcePos;
+          mrgCtx.mvSolid[(cnt << 1) + 1] = allCandSolidInAbove && vbOnCtuBoundary;
+          mrgCtx.mvValid[(cnt << 1) + 1] = cs.isClean(pu.Y().bottomRight(), miNeighbor.mv[1], REF_PIC_LIST_1, miNeighbor.refIdx[1]);
+          allCandSolidInAbove = allCandSolidInAbove && vbOnCtuBoundary;
+        }
+#endif
       }
 
       if (mrgCandIdx == cnt)
@@ -867,6 +950,11 @@ void PU::getIBCMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx, const
 {
   const CodingStructure &cs = *pu.cs;
   const uint32_t maxNumMergeCand = pu.cs->sps->getMaxNumIBCMergeCand();
+#if GDR_ENABLED
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+  bool  allCandSolidInAbove = true;
+#endif
+
   for (uint32_t ui = 0; ui < maxNumMergeCand; ++ui)
   {
     mrgCtx.BcwIdx[ui] = BCW_DEFAULT;
@@ -874,6 +962,15 @@ void PU::getIBCMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx, const
     mrgCtx.mrgTypeNeighbours[ui] = MRG_TYPE_IBC;
     mrgCtx.mvFieldNeighbours[ui * 2].refIdx = NOT_VALID;
     mrgCtx.mvFieldNeighbours[ui * 2 + 1].refIdx = NOT_VALID;
+#if GDR_ENABLED
+    if (isEncodeGdrClean)
+    {
+      mrgCtx.mvSolid[(ui << 1) + 0] = true;
+      mrgCtx.mvSolid[(ui << 1) + 1] = true;
+      mrgCtx.mvValid[(ui << 1) + 0] = true;
+      mrgCtx.mvValid[(ui << 1) + 1] = true;
+    }
+#endif
     mrgCtx.useAltHpelIf[ui] = false;
   }
 
@@ -899,6 +996,12 @@ void PU::getIBCMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx, const
     mrgCtx.interDirNeighbours[cnt] = miLeft.interDir;
     // get Mv from Left
     mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miLeft.mv[0], miLeft.refIdx[0]);
+#if GDR_ENABLED
+    if (isEncodeGdrClean)
+    {
+      mrgCtx.mvSolid[(cnt << 1) + 0] = cs.isClean(posLB.offset(-1, 0), pu.chType);
+    }
+#endif
     if (mrgCandIdx == cnt)
     {
       return;
@@ -925,6 +1028,12 @@ void PU::getIBCMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx, const
       mrgCtx.interDirNeighbours[cnt] = miAbove.interDir;
       // get Mv from Above
       mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miAbove.mv[0], miAbove.refIdx[0]);
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        mrgCtx.mvSolid[(cnt << 1) + 0] = cs.isClean(posRT.offset(0, -1), pu.chType);
+      }
+#endif
       if (mrgCandIdx == cnt)
       {
         return;
@@ -942,8 +1051,19 @@ void PU::getIBCMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx, const
 
   if (cnt != maxNumMergeCand)
   {
+#if GDR_ENABLED
+    bool allCandSolidInAbove = true;
+    bool bFound = addMergeHMVPCand(cs, mrgCtx, mrgCandIdx, maxNumMergeCand, cnt
+      , isAvailableA1, miLeft, isAvailableB1, miAbove
+      , true
+      , isGt4x4
+      , pu
+      , allCandSolidInAbove
+    );
+#else
     bool bFound = addMergeHMVPCand(cs, mrgCtx, mrgCandIdx, maxNumMergeCand, cnt, isAvailableA1, miLeft, isAvailableB1,
                                    miAbove, true, isGt4x4);
+#endif
 
     if (bFound)
     {
@@ -955,6 +1075,14 @@ void PU::getIBCMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx, const
     {
       mrgCtx.mvFieldNeighbours[cnt * 2].setMvField(Mv(0, 0), MAX_NUM_REF);
       mrgCtx.interDirNeighbours[cnt] = 1;
+#if GDR_ENABLED
+      // GDR: zero mv(0,0)
+      if (isEncodeGdrClean)
+      {
+        mrgCtx.mvSolid[cnt << 1] = true && allCandSolidInAbove;
+        allCandSolidInAbove       = true && allCandSolidInAbove;
+      }
+#endif
       if (mrgCandIdx == cnt)
       {
         return;
@@ -974,6 +1102,10 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
   const Slice &slice         = *pu.cs->slice;
   const uint32_t maxNumMergeCand = pu.cs->sps->getMaxNumMergeCand();
   CHECK (maxNumMergeCand > MRG_MAX_NUM_CANDS, "selected maximum number of merge candidate exceeds global limit");
+#if GDR_ENABLED
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+  bool  allCandSolidInAbove = true;
+#endif
   for (uint32_t ui = 0; ui < maxNumMergeCand; ++ui)
   {
     mrgCtx.BcwIdx[ui] = BCW_DEFAULT;
@@ -981,6 +1113,17 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
     mrgCtx.mrgTypeNeighbours [ui] = MRG_TYPE_DEFAULT_N;
     mrgCtx.mvFieldNeighbours[(ui << 1)    ].refIdx = NOT_VALID;
     mrgCtx.mvFieldNeighbours[(ui << 1) + 1].refIdx = NOT_VALID;
+#if GDR_ENABLED
+    if (isEncodeGdrClean)
+    {
+      mrgCtx.mvSolid[(ui << 1) + 0] = true;
+      mrgCtx.mvSolid[(ui << 1) + 1] = true;
+      mrgCtx.mvValid[(ui << 1) + 0] = true;
+      mrgCtx.mvValid[(ui << 1) + 1] = true;
+      mrgCtx.mvPos[(ui << 1) + 0] = Position(0, 0);
+      mrgCtx.mvPos[(ui << 1) + 1] = Position(0, 0);
+    }
+#endif
     mrgCtx.useAltHpelIf[ui] = false;
   }
 
@@ -1010,9 +1153,27 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
     mrgCtx.BcwIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puAbove->cu->BcwIdx : BCW_DEFAULT;
     mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miAbove.mv[0], miAbove.refIdx[0]);
 
+#if GDR_ENABLED
+    if (isEncodeGdrClean)
+    {
+      Position pos = puAbove->lumaPos();
+      mrgCtx.mvPos[(cnt << 1) + 0] = pos;
+      mrgCtx.mvSolid[(cnt << 1) + 0] = cs.isClean(pos, pu.chType);
+      mrgCtx.mvValid[(cnt << 1) + 0] = cs.isClean(pu.Y().bottomRight(), miAbove.mv[0], REF_PIC_LIST_0, miAbove.refIdx[0]);
+    }
+#endif
     if (slice.isInterB())
     {
       mrgCtx.mvFieldNeighbours[(cnt << 1) + 1].setMvField(miAbove.mv[1], miAbove.refIdx[1]);
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        Position pos = puAbove->lumaPos();
+        mrgCtx.mvPos[(cnt << 1) + 1] = pos;
+        mrgCtx.mvSolid[(cnt << 1) + 1] = cs.isClean(pos, pu.chType);
+        mrgCtx.mvValid[(cnt << 1) + 1] = cs.isClean(pu.Y().bottomRight(), miAbove.mv[1], REF_PIC_LIST_1, miAbove.refIdx[1]);
+      }
+#endif
     }
     if (mrgCandIdx == cnt)
     {
@@ -1045,10 +1206,28 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
       mrgCtx.BcwIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puLeft->cu->BcwIdx : BCW_DEFAULT;
       // get Mv from Left
       mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miLeft.mv[0], miLeft.refIdx[0]);
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        Position pos = puLeft->lumaPos();
+        mrgCtx.mvPos[(cnt << 1) + 0] = pos;
+        mrgCtx.mvSolid[(cnt << 1) + 0] = cs.isClean(pos, pu.chType);
+        mrgCtx.mvValid[(cnt << 1) + 0] = cs.isClean(pu.Y().bottomRight(), miLeft.mv[0], REF_PIC_LIST_0, miLeft.refIdx[0]);
+      }
+#endif
 
       if (slice.isInterB())
       {
         mrgCtx.mvFieldNeighbours[(cnt << 1) + 1].setMvField(miLeft.mv[1], miLeft.refIdx[1]);
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          Position pos = puLeft->lumaPos();
+          mrgCtx.mvPos[(cnt << 1) + 1] = pos;
+          mrgCtx.mvSolid[(cnt << 1) + 1] = cs.isClean(pos, pu.chType);
+          mrgCtx.mvValid[(cnt << 1) + 1] = cs.isClean(pu.Y().bottomRight(), miLeft.mv[1], REF_PIC_LIST_1, miLeft.refIdx[1]);
+        }
+#endif
       }
       if (mrgCandIdx == cnt)
       {
@@ -1083,10 +1262,28 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
       // get Mv from Above-right
       mrgCtx.BcwIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puAboveRight->cu->BcwIdx : BCW_DEFAULT;
       mrgCtx.mvFieldNeighbours[cnt << 1].setMvField( miAboveRight.mv[0], miAboveRight.refIdx[0] );
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        Position pos = puAboveRight->lumaPos();
+        mrgCtx.mvPos[(cnt << 1) + 0] = pos;
+        mrgCtx.mvSolid[(cnt << 1) + 0] = cs.isClean(pos, pu.chType);
+        mrgCtx.mvValid[(cnt << 1) + 0] = cs.isClean(pu.Y().bottomRight(), miAboveRight.mv[0], REF_PIC_LIST_0, miAboveRight.refIdx[0]);
+      }
+#endif
 
       if( slice.isInterB() )
       {
         mrgCtx.mvFieldNeighbours[( cnt << 1 ) + 1].setMvField( miAboveRight.mv[1], miAboveRight.refIdx[1] );
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          Position pos = puAboveRight->lumaPos();
+          mrgCtx.mvPos[(cnt << 1) + 1] = pos;
+          mrgCtx.mvSolid[(cnt << 1) + 1] = cs.isClean(pos, pu.chType);
+          mrgCtx.mvValid[(cnt << 1) + 1] = cs.isClean(pu.Y().bottomRight(), miAboveRight.mv[1], REF_PIC_LIST_1, miAboveRight.refIdx[1]);
+        }
+#endif
       }
 
       if (mrgCandIdx == cnt)
@@ -1120,10 +1317,28 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
       mrgCtx.BcwIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puLeftBottom->cu->BcwIdx : BCW_DEFAULT;
       // get Mv from Bottom-Left
       mrgCtx.mvFieldNeighbours[cnt << 1].setMvField( miBelowLeft.mv[0], miBelowLeft.refIdx[0] );
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        Position pos = puLeftBottom->lumaPos();
+        mrgCtx.mvPos[(cnt << 1) + 0] = pos;
+        mrgCtx.mvSolid[(cnt << 1) + 0] = cs.isClean(pos, pu.chType);
+        mrgCtx.mvValid[(cnt << 1) + 0] = cs.isClean(pu.Y().bottomRight(), miBelowLeft.mv[0], REF_PIC_LIST_0, miBelowLeft.refIdx[0]);
+      }
+#endif
 
       if( slice.isInterB() )
       {
         mrgCtx.mvFieldNeighbours[( cnt << 1 ) + 1].setMvField( miBelowLeft.mv[1], miBelowLeft.refIdx[1] );
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          Position pos = puLeftBottom->lumaPos();
+          mrgCtx.mvPos[(cnt << 1) + 1] = pos;
+          mrgCtx.mvSolid[(cnt << 1) + 1] = cs.isClean(pos, pu.chType);
+          mrgCtx.mvValid[(cnt << 1) + 1] = cs.isClean(pu.Y().bottomRight(), miBelowLeft.mv[1], REF_PIC_LIST_1, miBelowLeft.refIdx[1]);
+        }
+#endif
       }
 
       if (mrgCandIdx == cnt)
@@ -1160,10 +1375,28 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
         mrgCtx.BcwIdx[cnt] = (mrgCtx.interDirNeighbours[cnt] == 3) ? puAboveLeft->cu->BcwIdx : BCW_DEFAULT;
         // get Mv from Above-Left
         mrgCtx.mvFieldNeighbours[cnt << 1].setMvField( miAboveLeft.mv[0], miAboveLeft.refIdx[0] );
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          Position pos = puAboveLeft->lumaPos();
+          mrgCtx.mvPos[(cnt << 1) + 0] = pos;
+          mrgCtx.mvSolid[(cnt << 1) + 0] = cs.isClean(pos, pu.chType);
+          mrgCtx.mvValid[(cnt << 1) + 0] = cs.isClean(pu.Y().bottomRight(), miAboveLeft.mv[0], REF_PIC_LIST_0, miAboveLeft.refIdx[0]);
+        }
+#endif
 
         if( slice.isInterB() )
         {
           mrgCtx.mvFieldNeighbours[( cnt << 1 ) + 1].setMvField( miAboveLeft.mv[1], miAboveLeft.refIdx[1] );
+#if GDR_ENABLED
+          if (isEncodeGdrClean)
+          {
+            Position pos = puAboveLeft->lumaPos();
+            mrgCtx.mvPos[(cnt << 1) + 1] = pos;
+            mrgCtx.mvSolid[(cnt << 1) + 1] = cs.isClean(pos, pu.chType);
+            mrgCtx.mvValid[(cnt << 1) + 1] = cs.isClean(pu.Y().bottomRight(), miAboveLeft.mv[1], REF_PIC_LIST_1, miAboveLeft.refIdx[1]);
+          }
+#endif
         }
 
         if (mrgCandIdx == cnt)
@@ -1187,6 +1420,12 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
     // offset the pos to be sure to "point" to the same position the uiAbsPartIdx would've pointed to
     Position posRB = pu.Y().bottomRight().offset( -3, -3 );
     const PreCalcValues& pcv = *cs.pcv;
+#if GDR_ENABLED
+    bool posC0inCurPicSolid = true;
+    bool posC1inCurPicSolid = true;
+    bool posC0inRefPicSolid = true;
+    bool posC1inRefPicSolid = true;
+#endif
 
     Position posC0;
     Position posC1 = pu.Y().center();
@@ -1218,6 +1457,24 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
     {
       dir     |= 1;
       mrgCtx.mvFieldNeighbours[2 * uiArrayAddr].setMvField(cColMv, iRefIdx);
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        Mv ccMv;
+
+        posC0inCurPicSolid = cs.isClean(posC0, CHANNEL_TYPE_LUMA);
+        posC1inCurPicSolid = cs.isClean(posC1, CHANNEL_TYPE_LUMA);
+        posC0inRefPicSolid = cs.isClean(posC0, REF_PIC_LIST_0, iRefIdx);
+        posC1inRefPicSolid = cs.isClean(posC1, REF_PIC_LIST_0, iRefIdx);
+
+        bool isMVP0exist = C0Avail && getColocatedMVP(pu, REF_PIC_LIST_0, posC0, ccMv, iRefIdx, false);
+
+        Position pos = isMVP0exist ? posC0 : posC1;
+        mrgCtx.mvPos[2 * uiArrayAddr] = pos;
+        mrgCtx.mvSolid[2 * uiArrayAddr] = isMVP0exist ? (posC0inCurPicSolid && posC0inRefPicSolid) : (posC1inCurPicSolid && posC1inRefPicSolid);
+        mrgCtx.mvValid[2 * uiArrayAddr] = cs.isClean(pu.Y().bottomRight(), ccMv, REF_PIC_LIST_0, iRefIdx);
+      }
+#endif
     }
 
     if (slice.isInterB())
@@ -1228,6 +1485,24 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
       {
         dir     |= 2;
         mrgCtx.mvFieldNeighbours[2 * uiArrayAddr + 1].setMvField(cColMv, iRefIdx);
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          Mv ccMv;
+
+          posC0inCurPicSolid = cs.isClean(posC0, CHANNEL_TYPE_LUMA);
+          posC1inCurPicSolid = cs.isClean(posC1, CHANNEL_TYPE_LUMA);
+          posC0inRefPicSolid = cs.isClean(posC0, REF_PIC_LIST_1, iRefIdx);
+          posC1inRefPicSolid = cs.isClean(posC1, REF_PIC_LIST_1, iRefIdx);
+
+          bool isMVP0exist = C0Avail && getColocatedMVP(pu, REF_PIC_LIST_1, posC0, ccMv, iRefIdx, false);
+
+          Position pos = isMVP0exist ? posC0 : posC1;
+          mrgCtx.mvPos[2 * uiArrayAddr + 1] = pos;
+          mrgCtx.mvSolid[2 * uiArrayAddr + 1] = isMVP0exist ? (posC0inCurPicSolid && posC0inRefPicSolid) : (posC1inCurPicSolid && posC1inRefPicSolid);
+          mrgCtx.mvValid[2 * uiArrayAddr + 1] = cs.isClean(pu.Y().bottomRight(), ccMv, REF_PIC_LIST_1, iRefIdx);
+        }
+#endif
       }
     }
 
@@ -1259,8 +1534,16 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
   if (cnt != maxNumMergeCandMin1)
   {
     bool isGt4x4 = true;
+#if GDR_ENABLED
+    allCandSolidInAbove = true;
+#endif
+#if GDR_ENABLED
+    bool bFound  = addMergeHMVPCand(cs, mrgCtx, mrgCandIdx, maxNumMergeCandMin1, cnt, isAvailableA1, miLeft,
+                                   isAvailableB1, miAbove, CU::isIBC(*pu.cu), isGt4x4, pu, allCandSolidInAbove);
+#else
     bool bFound  = addMergeHMVPCand(cs, mrgCtx, mrgCandIdx, maxNumMergeCandMin1, cnt, isAvailableA1, miLeft,
                                    isAvailableB1, miAbove, CU::isIBC(*pu.cu), isGt4x4);
+#endif
 
     if (bFound)
     {
@@ -1283,6 +1566,12 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
         const short refIdxI = mrgCtx.mvFieldNeighbours[0 * 2 + refListId].refIdx;
         const short refIdxJ = mrgCtx.mvFieldNeighbours[1 * 2 + refListId].refIdx;
 
+#if GDR_ENABLED
+        // GDR: Pairwise average candidate
+        bool mvISolid = mrgCtx.mvSolid[0 * 2 + refListId];
+        bool mvJSolid = mrgCtx.mvSolid[1 * 2 + refListId];
+        bool mvSolid = true;
+#endif
         // both MVs are invalid, skip
         if( (refIdxI == NOT_VALID) && (refIdxJ == NOT_VALID) )
         {
@@ -1302,17 +1591,53 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
           roundAffineMv(avgMv.hor, avgMv.ver, 1);
 
           mrgCtx.mvFieldNeighbours[cnt * 2 + refListId].setMvField( avgMv, refIdxI );
+#if GDR_ENABLED
+          // GDR: Pairwise single I,J candidate
+          if (isEncodeGdrClean)
+          {
+            mvSolid = mvISolid && mvJSolid && allCandSolidInAbove;
+
+            mrgCtx.mvPos[cnt * 2 + refListId] = Position(0, 0);
+            mrgCtx.mvSolid[cnt * 2 + refListId] = mvSolid && allCandSolidInAbove;
+            mrgCtx.mvValid[cnt * 2 + refListId] = cs.isClean(pu.Y().bottomRight(), avgMv, (RefPicList)refListId, refIdxI);
+            allCandSolidInAbove = mvSolid && allCandSolidInAbove;
+          }
+#endif
         }
         // only one MV is valid, take the only one MV
         else if( refIdxI != NOT_VALID )
         {
           Mv singleMv = mrgCtx.mvFieldNeighbours[0 * 2 + refListId].mv;
           mrgCtx.mvFieldNeighbours[cnt * 2 + refListId].setMvField( singleMv, refIdxI );
+#if GDR_ENABLED
+          // GDR: Pairwise single I,J candidate
+          if (isEncodeGdrClean)
+          {
+            mvSolid = mvISolid && allCandSolidInAbove;
+
+            mrgCtx.mvPos[cnt * 2 + refListId] = Position(0, 0);
+            mrgCtx.mvSolid[cnt * 2 + refListId] = mvSolid && allCandSolidInAbove;
+            mrgCtx.mvValid[cnt * 2 + refListId] = cs.isClean(pu.Y().bottomRight(), singleMv, (RefPicList)refListId, refIdxI);
+            allCandSolidInAbove = mvSolid && allCandSolidInAbove;
+          }
+#endif
         }
         else if( refIdxJ != NOT_VALID )
         {
           Mv singleMv = mrgCtx.mvFieldNeighbours[1 * 2 + refListId].mv;
           mrgCtx.mvFieldNeighbours[cnt * 2 + refListId].setMvField( singleMv, refIdxJ );
+#if GDR_ENABLED
+          // GDR: Pairwise single I,J candidate
+          if (isEncodeGdrClean)
+          {
+            mvSolid = mvJSolid && allCandSolidInAbove;
+
+            mrgCtx.mvPos[cnt * 2 + refListId] = Position(0, 0);
+            mrgCtx.mvSolid[cnt * 2 + refListId] = mvSolid && allCandSolidInAbove;
+            mrgCtx.mvValid[cnt * 2 + refListId] = cs.isClean(pu.Y().bottomRight(), singleMv, (RefPicList)refListId, refIdxJ);
+            allCandSolidInAbove = mvSolid && allCandSolidInAbove;
+          }
+#endif
         }
       }
 
@@ -1343,10 +1668,30 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
     mrgCtx.mvFieldNeighbours  [uiArrayAddr << 1].setMvField(Mv(0, 0), r);
     mrgCtx.useAltHpelIf[uiArrayAddr] = false;
 
+#if GDR_ENABLED
+    // GDR: zero mv(0,0)
+    if (isEncodeGdrClean)
+    {
+      mrgCtx.mvPos[uiArrayAddr << 1] = Position(0, 0);
+      mrgCtx.mvSolid[uiArrayAddr << 1] = true && allCandSolidInAbove;
+      mrgCtx.mvValid[uiArrayAddr << 1] = cs.isClean(pu.Y().bottomRight(), Mv(0, 0), REF_PIC_LIST_0, r);
+      allCandSolidInAbove = true && allCandSolidInAbove;
+    }
+#endif
     if (slice.isInterB())
     {
       mrgCtx.interDirNeighbours [ uiArrayAddr          ] = 3;
       mrgCtx.mvFieldNeighbours  [(uiArrayAddr << 1) + 1].setMvField(Mv(0, 0), r);
+#if GDR_ENABLED
+      // GDR: zero mv(0,0)
+      if (isEncodeGdrClean)
+      {
+        mrgCtx.mvPos[(uiArrayAddr << 1) + 1] = Position(0, 0);
+        mrgCtx.mvSolid[(uiArrayAddr << 1) + 1] = true && allCandSolidInAbove;
+        mrgCtx.mvValid[(uiArrayAddr << 1) + 1] = cs.isClean(pu.Y().bottomRight(), Mv(0, 0), (RefPicList)REF_PIC_LIST_1, r);
+        allCandSolidInAbove = true && allCandSolidInAbove;
+      }
+#endif
     }
 
     if ( mrgCtx.interDirNeighbours[uiArrayAddr] == 1 && pu.cs->slice->getRefPic(REF_PIC_LIST_0, mrgCtx.mvFieldNeighbours[uiArrayAddr << 1].refIdx)->getPOC() == pu.cs->slice->getPOC())
@@ -1464,6 +1809,20 @@ void PU::getInterMMVDMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx,
   int currBaseNum = 0;
   const uint16_t maxNumMergeCand = mrgCtx.numValidMergeCand;
 
+#if GDR_ENABLED
+  const CodingStructure &cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+#endif
+
+#if GDR_ENABLED
+  for (int k = 0; k < MMVD_BASE_MV_NUM; k++)
+  {
+    mrgCtx.mmvdSolid[k][0] = true;
+    mrgCtx.mmvdSolid[k][1] = true;
+    mrgCtx.mmvdValid[k][0] = true;
+    mrgCtx.mmvdValid[k][1] = true;
+  }
+#endif
   for (k = 0; k < maxNumMergeCand; k++)
   {
     if (mrgCtx.mrgTypeNeighbours[k] == MRG_TYPE_DEFAULT_N)
@@ -1475,16 +1834,37 @@ void PU::getInterMMVDMergeCandidates(const PredictionUnit &pu, MergeCtx& mrgCtx,
       {
         mrgCtx.mmvdBaseMv[currBaseNum][0] = mrgCtx.mvFieldNeighbours[(k << 1)];
         mrgCtx.mmvdBaseMv[currBaseNum][1] = mrgCtx.mvFieldNeighbours[(k << 1) + 1];
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          mrgCtx.mmvdSolid[currBaseNum][0] = mrgCtx.mvSolid[(k << 1) + 0];
+          mrgCtx.mmvdSolid[currBaseNum][1] = mrgCtx.mvSolid[(k << 1) + 1];
+        }
+#endif
       }
       else if (refIdxList0 >= 0)
       {
         mrgCtx.mmvdBaseMv[currBaseNum][0] = mrgCtx.mvFieldNeighbours[(k << 1)];
         mrgCtx.mmvdBaseMv[currBaseNum][1] = MvField(Mv(0, 0), -1);
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          mrgCtx.mmvdSolid[currBaseNum][0] = mrgCtx.mvSolid[(k << 1) + 0];
+          mrgCtx.mmvdSolid[currBaseNum][1] = true;
+        }
+#endif
       }
       else if (refIdxList1 >= 0)
       {
         mrgCtx.mmvdBaseMv[currBaseNum][0] = MvField(Mv(0, 0), -1);
         mrgCtx.mmvdBaseMv[currBaseNum][1] = mrgCtx.mvFieldNeighbours[(k << 1) + 1];
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          mrgCtx.mmvdSolid[currBaseNum][0] = true;
+          mrgCtx.mmvdSolid[currBaseNum][1] = mrgCtx.mvSolid[(k << 1) + 1];
+        }
+#endif
       }
       mrgCtx.mmvdUseAltHpelIf[currBaseNum] = mrgCtx.useAltHpelIf[k];
 
@@ -1826,6 +2206,10 @@ void PU::fillMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, const in
   Position posRT = pu.Y().topRight();
   Position posLB = pu.Y().bottomLeft();
 
+#if GDR_ENABLED
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+  bool &allCandSolidInAbove = amvpInfo.allCandSolidInAbove;
+#endif
   {
     bool bAdded = addMVPCandUnscaled( pu, eRefPicList, refIdx, posLB, MD_BELOW_LEFT, *pInfo );
 
@@ -1896,6 +2280,24 @@ void PU::fillMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, const in
     }
     if ( ( C0Avail && getColocatedMVP( pu, eRefPicList, posC0, cColMv, refIdx_Col, false ) ) || getColocatedMVP( pu, eRefPicList, posC1, cColMv, refIdx_Col, false ) )
     {
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        Mv   ccMv;
+        bool posC0inCurPicSolid = cs.isClean(posC0, CHANNEL_TYPE_LUMA);
+        bool posC1inCurPicSolid = cs.isClean(posC1, CHANNEL_TYPE_LUMA);
+        bool posC0inRefPicSolid = cs.isClean(posC0, REF_PIC_LIST_0, refIdx_Col);
+        bool posC1inRefPicSolid = cs.isClean(posC1, REF_PIC_LIST_0, refIdx_Col);
+
+        bool isMVP0exist = C0Avail && getColocatedMVP(pu, eRefPicList, posC0, ccMv, refIdx_Col, false);
+
+        Position pos = isMVP0exist ? posC0 : posC1;
+        pInfo->mvPos[pInfo->numCand]   = pos;
+        pInfo->mvType[pInfo->numCand]  = isMVP0exist ? MVP_TMVP_C0 : MVP_TMVP_C1;
+        pInfo->mvSolid[pInfo->numCand] = allCandSolidInAbove && (isMVP0exist ? (posC0inCurPicSolid && posC0inRefPicSolid) : (posC1inCurPicSolid && posC1inRefPicSolid));
+        allCandSolidInAbove = allCandSolidInAbove && pInfo->mvSolid[pInfo->numCand];
+      }
+#endif
       cColMv.roundTransPrecInternal2Amvr(pu.cu->imv);
       pInfo->mvCand[pInfo->numCand++] = cColMv;
     }
@@ -1914,6 +2316,13 @@ void PU::fillMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, const in
 
   while (pInfo->numCand < AMVP_MAX_NUM_CANDS)
   {
+#if GDR_ENABLED
+    if (isEncodeGdrClean)
+    {
+      pInfo->mvType[pInfo->numCand] = MVP_ZERO;
+      allCandSolidInAbove = pInfo->mvSolid[pInfo->numCand] = true && allCandSolidInAbove;
+    }
+#endif
     pInfo->mvCand[pInfo->numCand] = Mv( 0, 0 );
     pInfo->numCand++;
   }
@@ -1930,6 +2339,9 @@ bool PU::addAffineMVPCandUnscaled( const PredictionUnit &pu, const RefPicList &r
   const PredictionUnit *neibPU = NULL;
   Position neibPos;
 
+#if GDR_ENABLED
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+#endif
   switch ( dir )
   {
   case MD_LEFT:
@@ -1959,6 +2371,11 @@ bool PU::addAffineMVPCandUnscaled( const PredictionUnit &pu, const RefPicList &r
   }
 
   Mv outputAffineMv[3];
+#if GDR_ENABLED
+  bool     outputAffineMvSolid[3];
+  MvpType  outputAffineMvType[3];
+  Position outputAffineMvPos[3];
+#endif
   const MotionInfo& neibMi = neibPU->getMotionInfo( neibPos );
 
   const int        currRefPOC = cs.slice->getRefPic( refPicList, refIdx )->getPOC();
@@ -1974,15 +2391,54 @@ bool PU::addAffineMVPCandUnscaled( const PredictionUnit &pu, const RefPicList &r
       continue;
     }
 
+#if GDR_ENABLED
+    // note : get MV from neihgbor of neibPu (LB, RB) and save to outputAffineMv
+    if (isEncodeGdrClean)
+    {
+      xInheritedAffineMv(pu, neibPU, eRefPicListIndex, outputAffineMv, outputAffineMvSolid, outputAffineMvType, outputAffineMvPos);
+    }
+    else
+    {
+      xInheritedAffineMv(pu, neibPU, eRefPicListIndex, outputAffineMv);
+    }
+#else
     xInheritedAffineMv( pu, neibPU, eRefPicListIndex, outputAffineMv );
+#endif
     outputAffineMv[0].roundAffinePrecInternal2Amvr(pu.cu->imv);
     outputAffineMv[1].roundAffinePrecInternal2Amvr(pu.cu->imv);
     affiAMVPInfo.mvCandLT[affiAMVPInfo.numCand] = outputAffineMv[0];
     affiAMVPInfo.mvCandRT[affiAMVPInfo.numCand] = outputAffineMv[1];
+#if GDR_ENABLED
+    bool neighClean = true;
+
+    if (isEncodeGdrClean)
+    {
+      neighClean = cs.isClean(neibPU->Y().pos(), CHANNEL_TYPE_LUMA);
+
+      affiAMVPInfo.mvSolidLT[affiAMVPInfo.numCand] = neighClean && outputAffineMvSolid[0];
+      affiAMVPInfo.mvSolidRT[affiAMVPInfo.numCand] = neighClean && outputAffineMvSolid[1];
+
+      affiAMVPInfo.mvTypeLT[affiAMVPInfo.numCand]  = outputAffineMvType[0];
+      affiAMVPInfo.mvTypeRT[affiAMVPInfo.numCand]  = outputAffineMvType[1];
+
+      affiAMVPInfo.mvPosLT[affiAMVPInfo.numCand]   = outputAffineMvPos[0];
+      affiAMVPInfo.mvPosRT[affiAMVPInfo.numCand]   = outputAffineMvPos[1];
+    }
+#endif
     if ( pu.cu->affineType == AFFINEMODEL_6PARAM )
     {
       outputAffineMv[2].roundAffinePrecInternal2Amvr(pu.cu->imv);
       affiAMVPInfo.mvCandLB[affiAMVPInfo.numCand] = outputAffineMv[2];
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        neighClean = cs.isClean(neibPU->Y().pos(), CHANNEL_TYPE_LUMA);
+
+        affiAMVPInfo.mvSolidLB[affiAMVPInfo.numCand] = neighClean && outputAffineMvSolid[2];
+        affiAMVPInfo.mvTypeLB[affiAMVPInfo.numCand]  = outputAffineMvType[2];
+        affiAMVPInfo.mvPosLB[affiAMVPInfo.numCand]   = outputAffineMvPos[2];
+      }
+#endif
     }
     affiAMVPInfo.numCand++;
     return true;
@@ -1990,6 +2446,132 @@ bool PU::addAffineMVPCandUnscaled( const PredictionUnit &pu, const RefPicList &r
 
   return false;
 }
+#if GDR_ENABLED
+void PU::xInheritedAffineMv(const PredictionUnit &pu, const PredictionUnit* puNeighbour, RefPicList eRefPicList, Mv rcMv[3], bool rcMvSolid[3], MvpType rcMvType[3], Position rcMvPos[3])
+{
+  const CodingStructure &cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+
+  int posNeiX = puNeighbour->Y().pos().x;
+  int posNeiY = puNeighbour->Y().pos().y;
+  int posCurX = pu.Y().pos().x;
+  int posCurY = pu.Y().pos().y;
+
+  int neiW = puNeighbour->Y().width;
+  int curW = pu.Y().width;
+  int neiH = puNeighbour->Y().height;
+  int curH = pu.Y().height;
+
+  Mv mvLT, mvRT, mvLB;
+
+  mvLT = puNeighbour->mvAffi[eRefPicList][0];
+  mvRT = puNeighbour->mvAffi[eRefPicList][1];
+  mvLB = puNeighbour->mvAffi[eRefPicList][2];
+
+
+#if GDR_ENABLED
+  bool neighClean = true;
+
+  if (isEncodeGdrClean)
+  {
+    neighClean = cs.isClean(puNeighbour->Y().pos(), CHANNEL_TYPE_LUMA);
+
+    rcMvSolid[0] = neighClean;
+    rcMvSolid[1] = neighClean;
+    rcMvSolid[2] = neighClean;
+
+    rcMvType[0] = AFFINE_INHERIT;
+    rcMvType[1] = AFFINE_INHERIT;
+    rcMvType[2] = AFFINE_INHERIT;
+
+    rcMvPos[0] = puNeighbour->Y().pos();
+    rcMvPos[1] = puNeighbour->Y().pos();
+    rcMvPos[2] = puNeighbour->Y().pos();
+  }
+#endif
+
+
+  bool isTopCtuBoundary = false;
+  if ((posNeiY + neiH) % pu.cs->sps->getCTUSize() == 0 && (posNeiY + neiH) == posCurY)
+  {
+    // use bottom-left and bottom-right sub-block MVs for inheritance
+    const Position posRB = puNeighbour->Y().bottomRight();
+    const Position posLB = puNeighbour->Y().bottomLeft();
+
+    mvLT = puNeighbour->getMotionInfo(posLB).mv[eRefPicList];
+    mvRT = puNeighbour->getMotionInfo(posRB).mv[eRefPicList];
+
+#if GDR_ENABLED
+    if (isEncodeGdrClean)
+    {
+      neighClean = cs.isClean(puNeighbour->Y().pos(), CHANNEL_TYPE_LUMA);
+
+      rcMvSolid[0] = cs.isClean(posLB, CHANNEL_TYPE_LUMA);
+      rcMvSolid[1] = cs.isClean(posRB, CHANNEL_TYPE_LUMA);
+      rcMvSolid[2] = neighClean;
+
+      rcMvType[0]  = AFFINE_INHERIT_LB_RB;
+      rcMvType[1]  = AFFINE_INHERIT_LB_RB;
+      rcMvType[2]  = AFFINE_INHERIT_LB_RB;
+
+      rcMvPos[0]   = posLB;
+      rcMvPos[1]   = posRB;
+      rcMvPos[2]   = puNeighbour->Y().pos();
+    }
+#endif
+
+    posNeiY += neiH;
+    isTopCtuBoundary = true;
+  }
+
+  int shift = MAX_CU_DEPTH;
+  int iDMvHorX, iDMvHorY, iDMvVerX, iDMvVerY;
+
+  iDMvHorX = (mvRT - mvLT).getHor() << (shift - floorLog2(neiW));
+  iDMvHorY = (mvRT - mvLT).getVer() << (shift - floorLog2(neiW));
+  if (puNeighbour->cu->affineType == AFFINEMODEL_6PARAM && !isTopCtuBoundary)
+  {
+    iDMvVerX = (mvLB - mvLT).getHor() << (shift - floorLog2(neiH));
+    iDMvVerY = (mvLB - mvLT).getVer() << (shift - floorLog2(neiH));
+  }
+  else
+  {
+    iDMvVerX = -iDMvHorY;
+    iDMvVerY = iDMvHorX;
+  }
+
+  int iMvScaleHor = mvLT.getHor() << shift;
+  int iMvScaleVer = mvLT.getVer() << shift;
+  int horTmp, verTmp;
+
+  // v0
+  horTmp = iMvScaleHor + iDMvHorX * (posCurX - posNeiX) + iDMvVerX * (posCurY - posNeiY);
+  verTmp = iMvScaleVer + iDMvHorY * (posCurX - posNeiX) + iDMvVerY * (posCurY - posNeiY);
+  roundAffineMv(horTmp, verTmp, shift);
+  rcMv[0].hor = horTmp;
+  rcMv[0].ver = verTmp;
+  rcMv[0].clipToStorageBitDepth();
+
+  // v1
+  horTmp = iMvScaleHor + iDMvHorX * (posCurX + curW - posNeiX) + iDMvVerX * (posCurY - posNeiY);
+  verTmp = iMvScaleVer + iDMvHorY * (posCurX + curW - posNeiX) + iDMvVerY * (posCurY - posNeiY);
+  roundAffineMv(horTmp, verTmp, shift);
+  rcMv[1].hor = horTmp;
+  rcMv[1].ver = verTmp;
+  rcMv[1].clipToStorageBitDepth();
+
+  // v2
+  if (pu.cu->affineType == AFFINEMODEL_6PARAM)
+  {
+    horTmp = iMvScaleHor + iDMvHorX * (posCurX - posNeiX) + iDMvVerX * (posCurY + curH - posNeiY);
+    verTmp = iMvScaleVer + iDMvHorY * (posCurX - posNeiX) + iDMvVerY * (posCurY + curH - posNeiY);
+    roundAffineMv(horTmp, verTmp, shift);
+    rcMv[2].hor = horTmp;
+    rcMv[2].ver = verTmp;
+    rcMv[2].clipToStorageBitDepth();
+  }
+}
+#endif
 
 void PU::xInheritedAffineMv( const PredictionUnit &pu, const PredictionUnit* puNeighbour, RefPicList eRefPicList, Mv rcMv[3] )
 {
@@ -2078,8 +2660,31 @@ void PU::fillAffineMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, co
     return;
   }
 
+#if GDR_ENABLED
+  const CodingStructure &cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+  bool &allCandSolidInAbove = affiAMVPInfo.allCandSolidInAbove;
+
+  if (isEncodeGdrClean)
+  {
+    allCandSolidInAbove = true;
+
+    affiAMVPInfo.allCandSolidInAbove = true;
+    for (int i = 0; i < AMVP_MAX_NUM_CANDS_MEM; i++)
+    {
+      affiAMVPInfo.mvSolidLT[i] = true;
+      affiAMVPInfo.mvSolidRT[i] = true;
+      affiAMVPInfo.mvSolidLB[i] = true;
+    }
+  }
+#endif
   // insert inherited affine candidates
   Mv outputAffineMv[3];
+#if GDR_ENABLED
+  bool     outputAffineMvSolid[3];
+  MvpType  outputAffineMvType[3];
+  Position outputAffineMvPos[3];
+#endif
   Position posLT = pu.Y().topLeft();
   Position posRT = pu.Y().topRight();
   Position posLB = pu.Y().bottomLeft();
@@ -2117,6 +2722,18 @@ void PU::fillAffineMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, co
   AMVPInfo amvpInfo0;
   amvpInfo0.numCand = 0;
 
+#if GDR_ENABLED
+  if (isEncodeGdrClean)
+  {
+    amvpInfo0.allCandSolidInAbove = true;
+    for (int i = 0; i < AMVP_MAX_NUM_CANDS_MEM; i++)
+    {
+      amvpInfo0.mvSolid[i] = true;
+      amvpInfo0.mvValid[i] = true;
+    }
+  }
+#endif
+
   // A->C: Above Left, Above, Left
   addMVPCandUnscaled( pu, eRefPicList, refIdx, posLT, MD_ABOVE_LEFT, amvpInfo0 );
   if ( amvpInfo0.numCand < 1 )
@@ -2133,6 +2750,18 @@ void PU::fillAffineMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, co
   AMVPInfo amvpInfo1;
   amvpInfo1.numCand = 0;
 
+#if GDR_ENABLED
+  if (isEncodeGdrClean)
+  {
+    amvpInfo1.allCandSolidInAbove = true;
+    for (int i = 0; i < AMVP_MAX_NUM_CANDS_MEM; i++)
+    {
+      amvpInfo1.mvSolid[i] = true;
+      amvpInfo1.mvValid[i] = true;
+    }
+  }
+#endif
+
   // D->E: Above, Above Right
   addMVPCandUnscaled( pu, eRefPicList, refIdx, posRT, MD_ABOVE, amvpInfo1 );
   if ( amvpInfo1.numCand < 1 )
@@ -2145,6 +2774,18 @@ void PU::fillAffineMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, co
   AMVPInfo amvpInfo2;
   amvpInfo2.numCand = 0;
 
+#if GDR_ENABLED
+  if (isEncodeGdrClean)
+  {
+    amvpInfo2.allCandSolidInAbove = true;
+    for (int i = 0; i < AMVP_MAX_NUM_CANDS_MEM; i++)
+    {
+      amvpInfo2.mvSolid[i] = true;
+      amvpInfo2.mvValid[i] = true;
+    }
+  }
+#endif
+
   // F->G: Left, Below Left
   addMVPCandUnscaled( pu, eRefPicList, refIdx, posLB, MD_LEFT, amvpInfo2 );
   if ( amvpInfo2.numCand < 1 )
@@ -2157,6 +2798,24 @@ void PU::fillAffineMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, co
   outputAffineMv[1] = amvpInfo1.mvCand[0];
   outputAffineMv[2] = amvpInfo2.mvCand[0];
 
+#if GDR_ENABLED
+  if (isEncodeGdrClean)
+  {
+    outputAffineMvSolid[0] = amvpInfo0.mvSolid[0] && allCandSolidInAbove;
+    outputAffineMvSolid[1] = amvpInfo1.mvSolid[0] && allCandSolidInAbove;
+    outputAffineMvSolid[2] = amvpInfo2.mvSolid[0] && allCandSolidInAbove;
+
+    outputAffineMvPos[0] = amvpInfo0.mvPos[0];
+    outputAffineMvPos[1] = amvpInfo1.mvPos[0];
+    outputAffineMvPos[2] = amvpInfo2.mvPos[0];
+
+    outputAffineMvType[0] = amvpInfo0.mvType[0];
+    outputAffineMvType[1] = amvpInfo1.mvType[0];
+    outputAffineMvType[2] = amvpInfo2.mvType[0];
+
+    allCandSolidInAbove = allCandSolidInAbove && outputAffineMvSolid[0] && outputAffineMvSolid[1] && outputAffineMvSolid[2];
+  }
+#endif
   outputAffineMv[0].roundAffinePrecInternal2Amvr(pu.cu->imv);
   outputAffineMv[1].roundAffinePrecInternal2Amvr(pu.cu->imv);
   outputAffineMv[2].roundAffinePrecInternal2Amvr(pu.cu->imv);
@@ -2166,6 +2825,24 @@ void PU::fillAffineMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, co
     affiAMVPInfo.mvCandLT[affiAMVPInfo.numCand] = outputAffineMv[0];
     affiAMVPInfo.mvCandRT[affiAMVPInfo.numCand] = outputAffineMv[1];
     affiAMVPInfo.mvCandLB[affiAMVPInfo.numCand] = outputAffineMv[2];
+#if GDR_ENABLED
+    if (isEncodeGdrClean)
+    {
+      affiAMVPInfo.mvSolidLT[affiAMVPInfo.numCand] = outputAffineMvSolid[0] && allCandSolidInAbove;
+      affiAMVPInfo.mvSolidRT[affiAMVPInfo.numCand] = outputAffineMvSolid[1] && allCandSolidInAbove;
+      affiAMVPInfo.mvSolidLB[affiAMVPInfo.numCand] = outputAffineMvSolid[2] && allCandSolidInAbove;
+
+      affiAMVPInfo.mvPosLT[affiAMVPInfo.numCand] = outputAffineMvPos[0];
+      affiAMVPInfo.mvPosRT[affiAMVPInfo.numCand] = outputAffineMvPos[1];
+      affiAMVPInfo.mvPosLB[affiAMVPInfo.numCand] = outputAffineMvPos[2];
+
+      affiAMVPInfo.mvTypeLT[affiAMVPInfo.numCand] = outputAffineMvType[0];
+      affiAMVPInfo.mvTypeRT[affiAMVPInfo.numCand] = outputAffineMvType[1];
+      affiAMVPInfo.mvTypeLB[affiAMVPInfo.numCand] = outputAffineMvType[2];
+
+      allCandSolidInAbove = allCandSolidInAbove && outputAffineMvSolid[0] && outputAffineMvSolid[1] && outputAffineMvSolid[2];
+    }
+#endif
     affiAMVPInfo.numCand++;
   }
 
@@ -2179,6 +2856,24 @@ void PU::fillAffineMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, co
         affiAMVPInfo.mvCandLT[affiAMVPInfo.numCand] = outputAffineMv[i];
         affiAMVPInfo.mvCandRT[affiAMVPInfo.numCand] = outputAffineMv[i];
         affiAMVPInfo.mvCandLB[affiAMVPInfo.numCand] = outputAffineMv[i];
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          affiAMVPInfo.mvSolidLT[affiAMVPInfo.numCand] = outputAffineMvSolid[i] && allCandSolidInAbove;
+          affiAMVPInfo.mvSolidRT[affiAMVPInfo.numCand] = outputAffineMvSolid[i] && allCandSolidInAbove;
+          affiAMVPInfo.mvSolidLB[affiAMVPInfo.numCand] = outputAffineMvSolid[i] && allCandSolidInAbove;
+
+          affiAMVPInfo.mvPosLT[affiAMVPInfo.numCand] = outputAffineMvPos[i];
+          affiAMVPInfo.mvPosRT[affiAMVPInfo.numCand] = outputAffineMvPos[i];
+          affiAMVPInfo.mvPosLB[affiAMVPInfo.numCand] = outputAffineMvPos[i];
+
+          affiAMVPInfo.mvTypeLT[affiAMVPInfo.numCand] = outputAffineMvType[i];
+          affiAMVPInfo.mvTypeRT[affiAMVPInfo.numCand] = outputAffineMvType[i];
+          affiAMVPInfo.mvTypeLB[affiAMVPInfo.numCand] = outputAffineMvType[i];
+
+          allCandSolidInAbove = allCandSolidInAbove && outputAffineMvSolid[i];
+        }
+#endif
         affiAMVPInfo.numCand++;
       }
     }
@@ -2218,6 +2913,52 @@ void PU::fillAffineMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, co
         affiAMVPInfo.mvCandLT[affiAMVPInfo.numCand] = cColMv;
         affiAMVPInfo.mvCandRT[affiAMVPInfo.numCand] = cColMv;
         affiAMVPInfo.mvCandLB[affiAMVPInfo.numCand] = cColMv;
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          Mv ccMv;
+
+          bool posC0inCurPicSolid = cs.isClean(posC0, CHANNEL_TYPE_LUMA);
+          bool posC1inCurPicSolid = cs.isClean(posC1, CHANNEL_TYPE_LUMA);
+          bool posC0inRefPicSolid = cs.isClean(posC0, eRefPicList, refIdxCol);
+          bool posC1inRefPicSolid = cs.isClean(posC1, eRefPicList, refIdxCol);
+
+          bool isMVP0exist = C0Avail && getColocatedMVP(pu, eRefPicList, posC0, ccMv, refIdxCol, false);
+
+          if (isMVP0exist)
+          {
+            affiAMVPInfo.mvSolidLT[affiAMVPInfo.numCand] = posC0inCurPicSolid && posC0inRefPicSolid && allCandSolidInAbove;
+            affiAMVPInfo.mvSolidRT[affiAMVPInfo.numCand] = posC0inCurPicSolid && posC0inRefPicSolid && allCandSolidInAbove;
+            affiAMVPInfo.mvSolidLB[affiAMVPInfo.numCand] = posC0inCurPicSolid && posC0inRefPicSolid && allCandSolidInAbove;
+
+            affiAMVPInfo.mvPosLT[affiAMVPInfo.numCand] = posC0;
+            affiAMVPInfo.mvPosRT[affiAMVPInfo.numCand] = posC0;
+            affiAMVPInfo.mvPosLB[affiAMVPInfo.numCand] = posC0;
+
+            affiAMVPInfo.mvTypeLT[affiAMVPInfo.numCand] = MVP_TMVP_C0;
+            affiAMVPInfo.mvTypeRT[affiAMVPInfo.numCand] = MVP_TMVP_C0;
+            affiAMVPInfo.mvTypeLB[affiAMVPInfo.numCand] = MVP_TMVP_C0;
+
+            allCandSolidInAbove = allCandSolidInAbove && affiAMVPInfo.mvSolidLT[affiAMVPInfo.numCand] && affiAMVPInfo.mvSolidRT[affiAMVPInfo.numCand] && affiAMVPInfo.mvSolidLB[affiAMVPInfo.numCand];
+          }
+          else
+          {
+            affiAMVPInfo.mvSolidLT[affiAMVPInfo.numCand] = posC1inCurPicSolid && posC1inRefPicSolid && allCandSolidInAbove;
+            affiAMVPInfo.mvSolidRT[affiAMVPInfo.numCand] = posC1inCurPicSolid && posC1inRefPicSolid && allCandSolidInAbove;
+            affiAMVPInfo.mvSolidLB[affiAMVPInfo.numCand] = posC1inCurPicSolid && posC1inRefPicSolid && allCandSolidInAbove;
+
+            affiAMVPInfo.mvPosLT[affiAMVPInfo.numCand] = posC1;
+            affiAMVPInfo.mvPosRT[affiAMVPInfo.numCand] = posC1;
+            affiAMVPInfo.mvPosLB[affiAMVPInfo.numCand] = posC1;
+
+            affiAMVPInfo.mvTypeLT[affiAMVPInfo.numCand] = MVP_TMVP_C1;
+            affiAMVPInfo.mvTypeRT[affiAMVPInfo.numCand] = MVP_TMVP_C1;
+            affiAMVPInfo.mvTypeLB[affiAMVPInfo.numCand] = MVP_TMVP_C1;
+
+            allCandSolidInAbove = allCandSolidInAbove && affiAMVPInfo.mvSolidLT[affiAMVPInfo.numCand] && affiAMVPInfo.mvSolidRT[affiAMVPInfo.numCand] && affiAMVPInfo.mvSolidLB[affiAMVPInfo.numCand];
+          }
+        }
+#endif
         affiAMVPInfo.numCand++;
       }
     }
@@ -2230,6 +2971,24 @@ void PU::fillAffineMvpCand(PredictionUnit &pu, const RefPicList &eRefPicList, co
         affiAMVPInfo.mvCandLT[affiAMVPInfo.numCand].setZero();
         affiAMVPInfo.mvCandRT[affiAMVPInfo.numCand].setZero();
         affiAMVPInfo.mvCandLB[affiAMVPInfo.numCand].setZero();
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          affiAMVPInfo.mvSolidLT[affiAMVPInfo.numCand] = true && allCandSolidInAbove;
+          affiAMVPInfo.mvSolidRT[affiAMVPInfo.numCand] = true && allCandSolidInAbove;
+          affiAMVPInfo.mvSolidLB[affiAMVPInfo.numCand] = true && allCandSolidInAbove;
+
+          affiAMVPInfo.mvPosLT[affiAMVPInfo.numCand] = Position(0, 0);
+          affiAMVPInfo.mvPosRT[affiAMVPInfo.numCand] = Position(0, 0);
+          affiAMVPInfo.mvPosLB[affiAMVPInfo.numCand] = Position(0, 0);
+
+          affiAMVPInfo.mvTypeLT[affiAMVPInfo.numCand] = MVP_ZERO;
+          affiAMVPInfo.mvTypeRT[affiAMVPInfo.numCand] = MVP_ZERO;
+          affiAMVPInfo.mvTypeLB[affiAMVPInfo.numCand] = MVP_ZERO;
+
+          allCandSolidInAbove = allCandSolidInAbove && affiAMVPInfo.mvSolidLT[affiAMVPInfo.numCand] && affiAMVPInfo.mvSolidRT[affiAMVPInfo.numCand] && affiAMVPInfo.mvSolidLB[affiAMVPInfo.numCand];
+        }
+#endif
         affiAMVPInfo.numCand++;
       }
     }
@@ -2249,6 +3008,11 @@ bool PU::addMVPCandUnscaled( const PredictionUnit &pu, const RefPicList &eRefPic
   const PredictionUnit *neibPU = NULL;
         Position neibPos;
 
+#if GDR_ENABLED
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+  bool &allCandSolidInAbove = info.allCandSolidInAbove;
+#endif
+
   switch (eDir)
   {
   case MD_LEFT:
@@ -2289,6 +3053,18 @@ bool PU::addMVPCandUnscaled( const PredictionUnit &pu, const RefPicList &eRefPic
 
     if( neibRefIdx >= 0 && currRefPOC == cs.slice->getRefPOC( eRefPicListIndex, neibRefIdx ) )
     {
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        bool isSolid = cs.isClean(neibPos, CHANNEL_TYPE_LUMA);
+
+        info.mvSolid[info.numCand] = isSolid && allCandSolidInAbove;
+        info.mvType[info.numCand] = (MvpType)eDir;
+        info.mvPos[info.numCand] = neibPos;
+
+        allCandSolidInAbove = isSolid && allCandSolidInAbove;
+      }
+#endif
       info.mvCand[info.numCand++] = neibMi.mv[eRefPicListIndex];
       return true;
     }
@@ -2308,6 +3084,22 @@ void PU::addAMVPHMVPCand(const PredictionUnit &pu, const RefPicList eRefPicList,
   int num_allowedCand = std::min(MAX_NUM_HMVP_AVMPCANDS, num_avai_candInLUT);
   const RefPicList eRefPicList2nd = (eRefPicList == REF_PIC_LIST_0) ? REF_PIC_LIST_1 : REF_PIC_LIST_0;
 
+#if GDR_ENABLED
+  CodingStructure &cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+  bool &allCandSolidInAbove = info.allCandSolidInAbove;
+#endif
+
+
+#if GDR_ENABLED
+  bool vbOnCtuBoundary = true;
+  if (isEncodeGdrClean)
+  {
+    vbOnCtuBoundary = (pu.cs->picHeader->getNumVerVirtualBoundaries() == 0) || (pu.cs->picHeader->getVirtualBoundariesPosX(0) % pu.cs->sps->getMaxCUWidth() == 0);
+    allCandSolidInAbove = allCandSolidInAbove && vbOnCtuBoundary;
+  }
+#endif
+
   for (int mrgIdx = 1; mrgIdx <= num_allowedCand; mrgIdx++)
   {
     if (info.numCand >= AMVP_MAX_NUM_CANDS)
@@ -2326,6 +3118,15 @@ void PU::addAMVPHMVPCand(const PredictionUnit &pu, const RefPicList eRefPicList,
         Mv pmv = neibMi.mv[eRefPicListIndex];
         pmv.roundTransPrecInternal2Amvr(pu.cu->imv);
 
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          info.mvPos[info.numCand]   = neibMi.sourcePos;
+          info.mvType[info.numCand]  = MVP_HMVP;
+          info.mvSolid[info.numCand] = allCandSolidInAbove && vbOnCtuBoundary; //  cs.isClean(neibMi.soPos, CHANNEL_TYPE_LUMA);
+          allCandSolidInAbove = allCandSolidInAbove && vbOnCtuBoundary;
+        }
+#endif
         info.mvCand[info.numCand++] = pmv;
         if (info.numCand >= AMVP_MAX_NUM_CANDS)
         {
@@ -2458,9 +3259,7 @@ void PU::getAffineControlPointCand(const PredictionUnit &pu, MotionInfo mi[4], b
         cMv[l][1].clipToStorageBitDepth();
         break;
 
-      default:
-        CHECK( 1, "Invalid model index!\n" );
-        break;
+      default: THROW("Invalid model index!"); break;
       }
     }
     else
@@ -2554,6 +3353,9 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx
   const Slice &slice = *pu.cs->slice;
   const uint32_t maxNumAffineMergeCand = slice.getPicHeader()->getMaxNumAffineMergeCand();
   const unsigned plevel = pu.cs->sps->getLog2ParallelMergeLevelMinus2() + 2;
+#if GDR_ENABLED
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+#endif
 
   for ( int i = 0; i < maxNumAffineMergeCand; i++ )
   {
@@ -2561,12 +3363,33 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx
     {
       affMrgCtx.mvFieldNeighbours[(i << 1) + 0][mvNum].setMvField( Mv(), -1 );
       affMrgCtx.mvFieldNeighbours[(i << 1) + 1][mvNum].setMvField( Mv(), -1 );
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        affMrgCtx.mvSolid[(i << 1) + 0][mvNum] = true;
+        affMrgCtx.mvSolid[(i << 1) + 1][mvNum] = true;
+        affMrgCtx.mvValid[(i << 1) + 0][mvNum] = true;
+        affMrgCtx.mvValid[(i << 1) + 1][mvNum] = true;
+      }
+#endif
     }
     affMrgCtx.interDirNeighbours[i] = 0;
     affMrgCtx.affineType[i] = AFFINEMODEL_4PARAM;
     affMrgCtx.mergeType[i] = MRG_TYPE_DEFAULT_N;
     affMrgCtx.BcwIdx[i] = BCW_DEFAULT;
   }
+#if GDR_ENABLED
+  if (isEncodeGdrClean)
+  {
+    MergeCtx &mrgCtx = *affMrgCtx.mrgCtx;
+    int numMergeCand = MRG_MAX_NUM_CANDS << 1;
+    for (int i = 0; i < numMergeCand; i++)
+    {
+      mrgCtx.mvSolid[i] = true;
+      mrgCtx.mvValid[i] = true;
+    }
+  }
+#endif
 
   affMrgCtx.numValidMergeCand = 0;
   affMrgCtx.maxNumMergeCand = maxNumAffineMergeCand;
@@ -2603,6 +3426,14 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx
       {
         mrgCtx.mvFieldNeighbours[(pos << 1) + 1].setMvField( miLeft.mv[1], miLeft.refIdx[1] );
       }
+#if GDR_ENABLED
+      // check if the (puLeft) is in clean area
+      if (isEncodeGdrClean)
+      {
+        mrgCtx.mvSolid[(pos << 1) + 0] = cs.isClean(puLeft->Y().bottomRight(), CHANNEL_TYPE_LUMA);
+        mrgCtx.mvSolid[(pos << 1) + 1] = cs.isClean(puLeft->Y().bottomRight(), CHANNEL_TYPE_LUMA);
+      }
+#endif
       pos++;
     }
 
@@ -2615,6 +3446,13 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx
       {
         affMrgCtx.mvFieldNeighbours[(affMrgCtx.numValidMergeCand << 1) + 0][mvNum].setMvField( mrgCtx.mvFieldNeighbours[(pos << 1) + 0].mv, mrgCtx.mvFieldNeighbours[(pos << 1) + 0].refIdx );
         affMrgCtx.mvFieldNeighbours[(affMrgCtx.numValidMergeCand << 1) + 1][mvNum].setMvField( mrgCtx.mvFieldNeighbours[(pos << 1) + 1].mv, mrgCtx.mvFieldNeighbours[(pos << 1) + 1].refIdx );
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          affMrgCtx.mvSolid[(affMrgCtx.numValidMergeCand << 1) + 0][mvNum] = mrgCtx.mvSolid[(pos << 1) + 0];
+          affMrgCtx.mvSolid[(affMrgCtx.numValidMergeCand << 1) + 1][mvNum] = mrgCtx.mvSolid[(pos << 1) + 1];
+        }
+#endif
       }
       affMrgCtx.interDirNeighbours[affMrgCtx.numValidMergeCand] = mrgCtx.interDirNeighbours[pos];
 
@@ -2645,17 +3483,44 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx
     {
       // derive Mv from Neigh affine PU
       Mv cMv[2][3];
+#if GDR_ENABLED
+      bool    cMvSolid[2][3] = { {true, true, true}, {true, true, true} };
+      MvpType cMvType[2][3];
+      Position cMvPos[2][3];
+#endif
       const PredictionUnit* puNeigh = npu[idx];
       pu.cu->affineType = puNeigh->cu->affineType;
       if ( puNeigh->interDir != 2 )
       {
-        xInheritedAffineMv( pu, puNeigh, REF_PIC_LIST_0, cMv[0] );
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          xInheritedAffineMv(pu, puNeigh, REF_PIC_LIST_0, cMv[0], cMvSolid[0], cMvType[0], cMvPos[0]);
+        }
+        else
+        {
+          xInheritedAffineMv(pu, puNeigh, REF_PIC_LIST_0, cMv[0]);
+        }
+#else
+        xInheritedAffineMv(pu, puNeigh, REF_PIC_LIST_0, cMv[0]);
+#endif
       }
       if ( slice.isInterB() )
       {
         if ( puNeigh->interDir != 1 )
         {
-          xInheritedAffineMv( pu, puNeigh, REF_PIC_LIST_1, cMv[1] );
+#if GDR_ENABLED
+          if (isEncodeGdrClean)
+          {
+            xInheritedAffineMv(pu, puNeigh, REF_PIC_LIST_1, cMv[1], cMvSolid[1], cMvType[1], cMvPos[1]);
+          }
+          else
+          {
+            xInheritedAffineMv(pu, puNeigh, REF_PIC_LIST_1, cMv[1]);
+          }
+#else
+          xInheritedAffineMv(pu, puNeigh, REF_PIC_LIST_1, cMv[1]);
+#endif
         }
       }
 
@@ -2663,6 +3528,13 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx
       {
         affMrgCtx.mvFieldNeighbours[(affMrgCtx.numValidMergeCand << 1) + 0][mvNum].setMvField( cMv[0][mvNum], puNeigh->refIdx[0] );
         affMrgCtx.mvFieldNeighbours[(affMrgCtx.numValidMergeCand << 1) + 1][mvNum].setMvField( cMv[1][mvNum], puNeigh->refIdx[1] );
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          affMrgCtx.mvSolid[(affMrgCtx.numValidMergeCand << 1) + 0][mvNum] = cMvSolid[0][mvNum];
+          affMrgCtx.mvSolid[(affMrgCtx.numValidMergeCand << 1) + 1][mvNum] = cMvSolid[0][mvNum];
+        }
+#endif
       }
       affMrgCtx.interDirNeighbours[affMrgCtx.numValidMergeCand] = puNeigh->interDir;
       affMrgCtx.affineType[affMrgCtx.numValidMergeCand] = (EAffineModel)(puNeigh->cu->affineType);
@@ -2690,6 +3562,9 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx
       int8_t neighBcw[2] = { BCW_DEFAULT, BCW_DEFAULT };
       // control point: LT B2->B3->A2
       const Position posLT[3] = { pu.Y().topLeft().offset( -1, -1 ), pu.Y().topLeft().offset( 0, -1 ), pu.Y().topLeft().offset( -1, 0 ) };
+#if GDR_ENABLED
+      bool miSolid[4] = { false, false, false, false };
+#endif
       for ( int i = 0; i < 3; i++ )
       {
         const Position pos = posLT[i];
@@ -2700,6 +3575,12 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx
           isAvailable[0] = true;
           mi[0] = puNeigh->getMotionInfo( pos );
           neighBcw[0] = puNeigh->cu->BcwIdx;
+#if GDR_ENABLED
+          if (isEncodeGdrClean)
+          {
+            miSolid[0] = cs.isClean(puNeigh->Y().topRight(), CHANNEL_TYPE_LUMA);
+          }
+#endif
           break;
         }
       }
@@ -2716,6 +3597,12 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx
           isAvailable[1] = true;
           mi[1] = puNeigh->getMotionInfo( pos );
           neighBcw[1] = puNeigh->cu->BcwIdx;
+#if GDR_ENABLED
+          if (isEncodeGdrClean)
+          {
+            miSolid[1] = cs.isClean(puNeigh->Y().topRight(), CHANNEL_TYPE_LUMA);
+          }
+#endif
           break;
         }
       }
@@ -2731,6 +3618,12 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx
         {
           isAvailable[2] = true;
           mi[2] = puNeigh->getMotionInfo( pos );
+#if GDR_ENABLED
+          if (isEncodeGdrClean)
+          {
+            miSolid[2] = cs.isClean(puNeigh->Y().topRight(), CHANNEL_TYPE_LUMA);
+          }
+#endif
           break;
         }
       }
@@ -2772,6 +3665,15 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx
           mi[3].refIdx[0] = refIdx;
           mi[3].interDir = 1;
           isAvailable[3] = true;
+#if GDR_ENABLED
+          if (isEncodeGdrClean)
+          {
+            bool posL0inCurPicSolid = cs.isClean(posC0, CHANNEL_TYPE_LUMA);
+            bool posL0inRefPicSolid = cs.isClean(posC0, REF_PIC_LIST_0, refIdx);
+
+            miSolid[3] = posL0inCurPicSolid && posL0inRefPicSolid;
+          }
+#endif
         }
 
         if ( slice.isInterB() )
@@ -2783,6 +3685,15 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx
             mi[3].refIdx[1] = refIdx;
             mi[3].interDir |= 2;
             isAvailable[3] = true;
+#if GDR_ENABLED
+            if (isEncodeGdrClean)
+            {
+              bool posL1inCurPicSolid = cs.isClean(posC0, CHANNEL_TYPE_LUMA);
+              bool posL1inRefPicSolid = cs.isClean(posC0, REF_PIC_LIST_1, refIdx);
+
+              miSolid[3] = (mi[3].interDir & 1) ? (miSolid[3] && posL1inCurPicSolid && posL1inRefPicSolid) : (posL1inCurPicSolid && posL1inRefPicSolid);
+            }
+#endif
           }
         }
       }
@@ -2799,12 +3710,36 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx
         { 0, 2 },             // 5:  LT, LB
       };
 
+#if GDR_ENABLED
+      bool modelSolid[6] =
+      {
+        miSolid[0] && miSolid[1] && miSolid[2],
+        miSolid[0] && miSolid[1] && miSolid[3],
+        miSolid[0] && miSolid[2] && miSolid[3],
+        miSolid[1] && miSolid[2] && miSolid[3],
+        miSolid[0] && miSolid[1],
+        miSolid[0] && miSolid[2]
+      };
+#endif
       int verNum[6] = { 3, 3, 3, 3, 2, 2 };
       int startIdx = pu.cs->sps->getUseAffineType() ? 0 : 4;
       for ( int idx = startIdx; idx < modelNum; idx++ )
       {
         int modelIdx = order[idx];
+#if GDR_ENABLED
+        int affinNumValidCand = affMrgCtx.numValidMergeCand;
+#endif
         getAffineControlPointCand(pu, mi, isAvailable, model[modelIdx], ((modelIdx == 3) ? neighBcw[1] : neighBcw[0]), modelIdx, verNum[modelIdx], affMrgCtx);
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          for (int i = 0; i < 3; i++)
+          {
+            affMrgCtx.mvSolid[(affinNumValidCand << 1) + 0][i] = modelSolid[modelIdx];
+            affMrgCtx.mvSolid[(affinNumValidCand << 1) + 1][i] = modelSolid[modelIdx];
+          }
+        }
+#endif
         if ( affMrgCtx.numValidMergeCand != 0 && affMrgCtx.numValidMergeCand - 1 == mrgCandIdx )
         {
           return;
@@ -2827,6 +3762,12 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx
     for ( int mvNum = 0; mvNum < 3; mvNum++ )
     {
       affMrgCtx.mvFieldNeighbours[(cnt << 1) + 0][mvNum].setMvField( Mv( 0, 0 ), 0 );
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        affMrgCtx.mvSolid[(cnt << 1) + 0][mvNum] = true;
+      }
+#endif
     }
     affMrgCtx.interDirNeighbours[cnt] = 1;
 
@@ -2835,6 +3776,12 @@ void PU::getAffineMergeCand( const PredictionUnit &pu, AffineMergeCtx& affMrgCtx
       for ( int mvNum = 0; mvNum < 3; mvNum++ )
       {
         affMrgCtx.mvFieldNeighbours[(cnt << 1) + 1][mvNum].setMvField( Mv( 0, 0 ), 0 );
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          affMrgCtx.mvSolid[(cnt << 1) + 1][mvNum] = true;
+        }
+#endif
       }
       affMrgCtx.interDirNeighbours[cnt] = 3;
     }
@@ -2971,15 +3918,32 @@ bool PU::getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx& mrgCtx, b
   const Picture *pColPic = slice.getRefPic(RefPicList(slice.isInterB() ? 1 - slice.getColFromL0Flag() : 0), slice.getColRefIdx());
   Mv cTMv;
 
+#if GDR_ENABLED
+  const CodingStructure& cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+  bool isSubPuSolid[2] = { true, true };
+#endif
   if ( count )
   {
     if ( (mrgCtx.interDirNeighbours[0] & (1 << REF_PIC_LIST_0)) && slice.getRefPic( REF_PIC_LIST_0, mrgCtx.mvFieldNeighbours[REF_PIC_LIST_0].refIdx ) == pColPic )
     {
       cTMv = mrgCtx.mvFieldNeighbours[REF_PIC_LIST_0].mv;
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        isSubPuSolid[REF_PIC_LIST_0] = mrgCtx.mvSolid[REF_PIC_LIST_0];
+      }
+#endif
     }
     else if ( slice.isInterB() && (mrgCtx.interDirNeighbours[0] & (1 << REF_PIC_LIST_1)) && slice.getRefPic( REF_PIC_LIST_1, mrgCtx.mvFieldNeighbours[REF_PIC_LIST_1].refIdx ) == pColPic )
     {
       cTMv = mrgCtx.mvFieldNeighbours[REF_PIC_LIST_1].mv;
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        isSubPuSolid[REF_PIC_LIST_1] = mrgCtx.mvSolid[REF_PIC_LIST_1];
+      }
+#endif
     }
   }
 
@@ -3035,6 +3999,12 @@ bool PU::getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx& mrgCtx, b
         // set as default, for further motion vector field spanning
         mrgCtx.mvFieldNeighbours[(count << 1) + currRefListId].setMvField(cColMv, 0);
         mrgCtx.interDirNeighbours[count] |= (1 << currRefListId);
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          mrgCtx.mvSolid[(count << 1) + currRefListId] = cs.isClean(centerPos, currRefPicList, refIdx);
+        }
+#endif
         LICFlag = tempLICFlag;
         mrgCtx.BcwIdx[count] = BCW_DEFAULT;
         found = true;
@@ -3088,6 +4058,12 @@ bool PU::getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx& mrgCtx, b
               mi.refIdx[currRefListId] = 0;
               mi.mv[currRefListId]     = cColMv;
               found                    = true;
+#if GDR_ENABLED
+              if (isEncodeGdrClean)
+              {
+                isSubPuSolid[currRefPicList] = isSubPuSolid[currRefPicList] && cs.isClean(colPos, currRefPicList, refIdx);
+              }
+#endif
             }
           }
         }
@@ -3113,6 +4089,16 @@ bool PU::getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx& mrgCtx, b
       }
     }
   }
+
+#if GDR_ENABLED
+  if (isEncodeGdrClean)
+  {
+    // the final if it is solid
+    mrgCtx.mvSolid[(count << 1) + 0] = mrgCtx.mvSolid[(count << 1) + 0] && isSubPuSolid[0];
+    mrgCtx.mvSolid[(count << 1) + 1] = mrgCtx.mvSolid[(count << 1) + 1] && isSubPuSolid[1];
+  }
+#endif
+
   return true;
 }
 
@@ -3290,6 +4276,10 @@ void PU::getGeoMergeCandidates( const PredictionUnit &pu, MergeCtx& geoMrgCtx )
   const uint32_t maxNumMergeCand = pu.cs->sps->getMaxNumMergeCand();
   geoMrgCtx.numValidMergeCand = 0;
 
+#if GDR_ENABLED
+  CodingStructure &cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+#endif
   for (int32_t i = 0; i < GEO_MAX_NUM_UNI_CANDS; i++)
   {
     geoMrgCtx.BcwIdx[i] = BCW_DEFAULT;
@@ -3299,6 +4289,13 @@ void PU::getGeoMergeCandidates( const PredictionUnit &pu, MergeCtx& geoMrgCtx )
     geoMrgCtx.mvFieldNeighbours[(i << 1) + 1].refIdx = NOT_VALID;
     geoMrgCtx.mvFieldNeighbours[(i << 1)].mv = Mv();
     geoMrgCtx.mvFieldNeighbours[(i << 1) + 1].mv = Mv();
+#if GDR_ENABLED
+    if (isEncodeGdrClean)
+    {
+      geoMrgCtx.mvSolid[(i << 1) + 0] = true;
+      geoMrgCtx.mvSolid[(i << 1) + 1] = true;
+    }
+#endif
     geoMrgCtx.useAltHpelIf[i] = false;
   }
 
@@ -3315,6 +4312,18 @@ void PU::getGeoMergeCandidates( const PredictionUnit &pu, MergeCtx& geoMrgCtx )
       geoMrgCtx.mvFieldNeighbours[(geoMrgCtx.numValidMergeCand << 1) + parity].mv = tmpMergeCtx.mvFieldNeighbours[(i << 1) + parity].mv;
       geoMrgCtx.mvFieldNeighbours[(geoMrgCtx.numValidMergeCand << 1) + !parity].refIdx = -1;
       geoMrgCtx.mvFieldNeighbours[(geoMrgCtx.numValidMergeCand << 1) + parity].refIdx = tmpMergeCtx.mvFieldNeighbours[(i << 1) + parity].refIdx;
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        Mv  mv = tmpMergeCtx.mvFieldNeighbours[(i << 1) + parity].mv;
+        int refIdx = tmpMergeCtx.mvFieldNeighbours[(i << 1) + parity].refIdx;
+        RefPicList refPicList = parity ? REF_PIC_LIST_1 : REF_PIC_LIST_0;
+        geoMrgCtx.mvSolid[(geoMrgCtx.numValidMergeCand << 1) + !parity] = true;
+        geoMrgCtx.mvSolid[(geoMrgCtx.numValidMergeCand << 1) + parity] = tmpMergeCtx.mvSolid[(i << 1) + parity];
+        geoMrgCtx.mvValid[(geoMrgCtx.numValidMergeCand << 1) + !parity] = true;
+        geoMrgCtx.mvValid[(geoMrgCtx.numValidMergeCand << 1) + parity] = cs.isClean(pu.Y().bottomRight(), mv, refPicList, refIdx);
+      }
+#endif
       geoMrgCtx.numValidMergeCand++;
       if (geoMrgCtx.numValidMergeCand == GEO_MAX_NUM_UNI_CANDS)
       {
@@ -3331,6 +4340,18 @@ void PU::getGeoMergeCandidates( const PredictionUnit &pu, MergeCtx& geoMrgCtx )
       geoMrgCtx.mvFieldNeighbours[(geoMrgCtx.numValidMergeCand << 1) + parity].mv = Mv(0, 0);
       geoMrgCtx.mvFieldNeighbours[(geoMrgCtx.numValidMergeCand << 1) + !parity].refIdx = tmpMergeCtx.mvFieldNeighbours[(i << 1) + !parity].refIdx;
       geoMrgCtx.mvFieldNeighbours[(geoMrgCtx.numValidMergeCand << 1) + parity].refIdx = -1;
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        Mv  mv = tmpMergeCtx.mvFieldNeighbours[(i << 1) + !parity].mv;
+        int refIdx = tmpMergeCtx.mvFieldNeighbours[(i << 1) + !parity].refIdx;
+        RefPicList refPicList = (!parity) ? REF_PIC_LIST_1 : REF_PIC_LIST_0;
+        geoMrgCtx.mvSolid[(geoMrgCtx.numValidMergeCand << 1) + !parity] = tmpMergeCtx.mvSolid[(i << 1) + !parity];
+        geoMrgCtx.mvSolid[(geoMrgCtx.numValidMergeCand << 1) + parity] = true;
+        geoMrgCtx.mvValid[(geoMrgCtx.numValidMergeCand << 1) + !parity] = cs.isClean(pu.Y().bottomRight(), mv, refPicList, refIdx);
+        geoMrgCtx.mvValid[(geoMrgCtx.numValidMergeCand << 1) + parity] = true;
+      }
+#endif
       geoMrgCtx.numValidMergeCand++;
       if (geoMrgCtx.numValidMergeCand == GEO_MAX_NUM_UNI_CANDS)
       {
@@ -3601,7 +4622,7 @@ uint8_t CU::targetSbtAllowed( uint8_t sbtIdx, uint8_t sbtAllowed )
   case SBT_HOR_HALF: val = ( ( sbtAllowed >> SBT_HOR_HALF ) & 0x1 ); break;
   case SBT_VER_QUAD: val = ( ( sbtAllowed >> SBT_VER_QUAD ) & 0x1 ); break;
   case SBT_HOR_QUAD: val = ( ( sbtAllowed >> SBT_HOR_QUAD ) & 0x1 ); break;
-  default:           CHECK( 1, "unknown SBT type" );
+  default: THROW("unknown SBT type");
   }
   return val;
 }
diff --git a/source/Lib/CommonLib/UnitTools.h b/source/Lib/CommonLib/UnitTools.h
index c87fbdc93904b3319f2095ca38b1743015a5d79f..5b5e0ddc2f3fe9935eb7062bb4eefccc554a6307 100644
--- a/source/Lib/CommonLib/UnitTools.h
+++ b/source/Lib/CommonLib/UnitTools.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -148,11 +148,18 @@ namespace PU
   void fillIBCMvpCand                 (PredictionUnit &pu, AMVPInfo &amvpInfo);
   void fillAffineMvpCand              (      PredictionUnit &pu, const RefPicList &eRefPicList, const int &refIdx, AffineAMVPInfo &affiAMVPInfo);
   bool addMVPCandUnscaled             (const PredictionUnit &pu, const RefPicList &eRefPicList, const int &iRefIdx, const Position &pos, const MvpDir &eDir, AMVPInfo &amvpInfo);
+#if GDR_ENABLED
+  void xInheritedAffineMv(const PredictionUnit &pu, const PredictionUnit* puNeighbour, RefPicList eRefPicList, Mv rcMv[3], bool rcMvSolid[3], MvpType rcMvType[3], Position rcMvPos[3]);  
+#endif
   void xInheritedAffineMv             ( const PredictionUnit &pu, const PredictionUnit* puNeighbour, RefPicList eRefPicList, Mv rcMv[3] );
   bool addMergeHMVPCand               (const CodingStructure &cs, MergeCtx& mrgCtx, const int& mrgCandIdx, const uint32_t maxNumMergeCandMin1, int &cnt
     , const bool isAvailableA1, const MotionInfo miLeft, const bool isAvailableB1, const MotionInfo miAbove
     , const bool ibcFlag
     , const bool isGt4x4
+#if GDR_ENABLED
+    , const PredictionUnit &pu
+    , bool &allCandSolidInAbove  
+#endif
   );
   void addAMVPHMVPCand                (const PredictionUnit &pu, const RefPicList eRefPicList, const int currRefPOC, AMVPInfo &info);
   bool addAffineMVPCandUnscaled       ( const PredictionUnit &pu, const RefPicList &refPicList, const int &refIdx, const Position &pos, const MvpDir &dir, AffineAMVPInfo &affiAmvpInfo );
@@ -203,8 +210,8 @@ int getMipSizeId      (const Size& block);
 bool allowLfnstWithMip(const Size& block);
 
 template<typename T, size_t N>
-uint32_t updateCandList(T uiMode, double uiCost, static_vector<T, N>& candModeList, static_vector<double, N>& candCostList
-  , size_t uiFastCandNum = N, int* iserttPos = nullptr)
+uint32_t updateCandList(T mode, double uiCost, static_vector<T, N> &candModeList,
+                        static_vector<double, N> &candCostList, size_t uiFastCandNum = N, int *iserttPos = nullptr)
 {
   CHECK( std::min( uiFastCandNum, candModeList.size() ) != std::min( uiFastCandNum, candCostList.size() ), "Sizes do not match!" );
   CHECK( uiFastCandNum > candModeList.capacity(), "The vector is to small to hold all the candidates!" );
@@ -225,7 +232,7 @@ uint32_t updateCandList(T uiMode, double uiCost, static_vector<T, N>& candModeLi
       candModeList[currSize - i] = candModeList[currSize - 1 - i];
       candCostList[currSize - i] = candCostList[currSize - 1 - i];
     }
-    candModeList[currSize - shift] = uiMode;
+    candModeList[currSize - shift] = mode;
     candCostList[currSize - shift] = uiCost;
     if (iserttPos != nullptr)
     {
@@ -235,7 +242,7 @@ uint32_t updateCandList(T uiMode, double uiCost, static_vector<T, N>& candModeLi
   }
   else if( currSize < uiFastCandNum )
   {
-    candModeList.insert( candModeList.end() - shift, uiMode );
+    candModeList.insert(candModeList.end() - shift, mode);
     candCostList.insert( candCostList.end() - shift, uiCost );
     if (iserttPos != nullptr)
     {
diff --git a/source/Lib/CommonLib/WeightPrediction.cpp b/source/Lib/CommonLib/WeightPrediction.cpp
index 1493d087b9c029715d2585198aace31ed6b87dfd..b9f0a550c5dd5b44d438b26d8acd47027c831da1 100644
--- a/source/Lib/CommonLib/WeightPrediction.cpp
+++ b/source/Lib/CommonLib/WeightPrediction.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -414,7 +414,10 @@ void  WeightPrediction::xWeightedPredictionBi(const PredictionUnit       &pu,
 
   CHECK( !pu.cs->pps->getWPBiPred(), "Weighted Bi-prediction disabled" );
 
-  if (iRefIdx0 < 0 && iRefIdx1 < 0) return;
+  if (iRefIdx0 < 0 && iRefIdx1 < 0)
+  {
+    return;
+  }
 
   getWpScaling(pu.cu->slice, iRefIdx0, iRefIdx1, pwp0, pwp1, maxNumComp);
 
diff --git a/source/Lib/CommonLib/WeightPrediction.h b/source/Lib/CommonLib/WeightPrediction.h
index 80915b6185ca5f66718f9cf76ea112846c1fc8c0..9f4a56785ac24122d500d57f1b78063caf4ae19e 100644
--- a/source/Lib/CommonLib/WeightPrediction.h
+++ b/source/Lib/CommonLib/WeightPrediction.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/dtrace.cpp b/source/Lib/CommonLib/dtrace.cpp
index 3500c6be69f4c885964cbec1cd10bc855144fa1b..c9b983cd1b46dcdec0dc39a8414f21b3d54eaa7e 100644
--- a/source/Lib/CommonLib/dtrace.cpp
+++ b/source/Lib/CommonLib/dtrace.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -47,83 +47,88 @@
 #include "dtrace_next.h"
 
 
-
 void Channel::update( std::map< CType, int > state )
 {
-
-    for( std::list<Rule>::iterator rules_iter = rule_list.begin();
-            rules_iter != rule_list.end();
-            ++rules_iter ) {
-        /* iterate over conditions, get the state of the condition type
-         * and check if contion is met:
-         *     if not -> go to next rule
-         *        yes -> go to next condition
-         * if all conditions are met: set channel active and return */
-        bool probe = true;
-
-        for( Rule::iterator cond_iter = rules_iter->begin();
-                cond_iter != rules_iter->end();
-                ++cond_iter ) {
-            int sVal = state[cond_iter->type];
-            if( !cond_iter->eval( cond_iter->rval, sVal ) ) {
-                probe = false;
-                break;
-            }
-        }
-        if( probe ) {
-            _active = true;
-            return;
-        }
+  for (std::list<Rule>::iterator rules_iter = rule_list.begin(); rules_iter != rule_list.end(); ++rules_iter)
+  {
+    /* iterate over conditions, get the state of the condition type
+     * and check if contion is met:
+     *     if not -> go to next rule
+     *        yes -> go to next condition
+     * if all conditions are met: set channel active and return */
+    bool probe = true;
+
+    for (Rule::iterator cond_iter = rules_iter->begin(); cond_iter != rules_iter->end(); ++cond_iter)
+    {
+      int sVal = state[cond_iter->type];
+      if (!cond_iter->eval(cond_iter->rval, sVal))
+      {
+        probe = false;
+        break;
+      }
     }
+    if (probe)
+    {
+      _active = true;
+      return;
+    }
+  }
 
-    _active = false;
+  _active = false;
 }
 
 void Channel::add( std::vector<Condition> rule )
 {
-    rule_list.push_back( rule );
+  rule_list.push_back(rule);
 }
 
 static inline
 std::vector<std::string> &split( const std::string &s, char delim, std::vector<std::string> &elems )
 {
-    std::stringstream ss( s );
-    std::string item;
-    while ( std::getline( ss, item, delim ) ) {
-        elems.push_back( item );
-    }
-    return elems;
+  std::stringstream ss(s);
+  std::string       item;
+  while (std::getline(ss, item, delim))
+  {
+    elems.push_back(item);
+  }
+  return elems;
 }
 
 static inline
 std::vector<std::string> split( const std::string &s, char delim )
 {
-    std::vector<std::string> elems;
-    split( s, delim, elems );
-    return elems;
+  std::vector<std::string> elems;
+  split(s, delim, elems);
+  return elems;
 }
 
 CDTrace::CDTrace( const char *filename, vstring channel_names )
     : copy(false), m_trace_file(NULL), m_error_code( 0 )
 {
-    if( filename )
-        m_trace_file = fopen( filename, "w" );
+  if (filename)
+  {
+    m_trace_file = fopen(filename, "w");
+  }
 
-    int i = 0;
-    for( vstring::iterator ci = channel_names.begin(); ci != channel_names.end(); ++ci ) {
-        deserializationTable[*ci] = i++;
-        chanRules.push_back( Channel() );
-    }
+  int i = 0;
+  for (vstring::iterator ci = channel_names.begin(); ci != channel_names.end(); ++ci)
+  {
+    deserializationTable[*ci] = i++;
+    chanRules.push_back(Channel());
+  }
 }
 
 CDTrace::CDTrace( const char *filename, const dtrace_channels_t& channels )
   : copy( false ), m_trace_file( NULL ), m_error_code( 0 )
 {
   if( filename )
+  {
     m_trace_file = fopen( filename, "w" );
+  }
 
   //int i = 0;
-  for( dtrace_channels_t::const_iterator ci = channels.begin(); ci != channels.end(); ++ci ) {
+  for (dtrace_channels_t::const_iterator ci = channels.begin(); ci != channels.end(); ++ci)
+  {
     deserializationTable[ci->channel_name] = ci->channel_number/*i++*/;
     chanRules.push_back( Channel() );
   }
@@ -131,13 +136,13 @@ CDTrace::CDTrace( const char *filename, const dtrace_channels_t& channels )
 
 CDTrace::CDTrace( const CDTrace& other )
 {
-    copy = true;
-    m_trace_file         = other.m_trace_file;
-    chanRules            = other.chanRules;
-    condition_types      = other.condition_types;
-    state                = other.state;
-    deserializationTable = other.deserializationTable;
-    m_error_code         = other.m_error_code;
+  copy                 = true;
+  m_trace_file         = other.m_trace_file;
+  chanRules            = other.chanRules;
+  condition_types      = other.condition_types;
+  state                = other.state;
+  deserializationTable = other.deserializationTable;
+  m_error_code         = other.m_error_code;
 }
 
 CDTrace::CDTrace( const std::string& sTracingFile, const std::string& sTracingRule, const dtrace_channels_t& channels )
@@ -152,28 +157,30 @@ CDTrace::CDTrace( const std::string& sTracingFile, const std::string& sTracingRu
 
 void CDTrace::swap( CDTrace& other )
 {
-    using std::swap;
-    CDTrace& first = *this;
-    CDTrace& second = other;
-    swap(first.copy,second.copy);
-    swap(first.m_trace_file,second.m_trace_file);
-    swap(first.chanRules,second.chanRules);
-    swap(first.condition_types,second.condition_types);
-    swap(first.state,second.state);
-    swap(first.deserializationTable,second.deserializationTable);
+  using std::swap;
+  CDTrace &first  = *this;
+  CDTrace &second = other;
+  swap(first.copy, second.copy);
+  swap(first.m_trace_file, second.m_trace_file);
+  swap(first.chanRules, second.chanRules);
+  swap(first.condition_types, second.condition_types);
+  swap(first.state, second.state);
+  swap(first.deserializationTable, second.deserializationTable);
 }
 
 CDTrace& CDTrace::operator=( const CDTrace& other )
 {
-    CDTrace tmp(other);
-    swap( tmp );
-    return *this;
+  CDTrace tmp(other);
+  swap(tmp);
+  return *this;
 }
 
 CDTrace::~CDTrace()
 {
-    if( !copy && m_trace_file )
-        fclose( m_trace_file );
+  if (!copy && m_trace_file)
+  {
+    fclose(m_trace_file);
+  }
 }
 
 bool _cf_eq ( int bound, int val ) { return ( val==bound ); }
@@ -183,65 +190,93 @@ bool _cf_ge ( int bound, int val ) { return ( val>=bound ); }
 
 int CDTrace::addRule( std::string rulestring )
 {
-    vstring chans_conds = split( rulestring, ':' );
-    vstring channels = split( chans_conds[0], ',' );
-    vstring conditions = split( chans_conds[1], ',' );
-
-    /* parse the rules first */
-    std::vector<Condition> rule;
-    for( vstring::iterator ci = conditions.begin(); ci != conditions.end(); ++ci ) {
-        /* find one of "==", "!=", "<=", ">=" */
-        const char *ops_[] = { "==", "!=", "<=", ">=" };
-        vstring operators( ops_,&ops_[sizeof( ops_ )/sizeof( ops_[0] )] );
-        vstring::iterator oi = operators.begin();
-        std::size_t pos = std::string::npos;
-        do {
-            if( ( pos = ci->find( *oi ) ) != std::string::npos ) break;
-        } while( ++oi != operators.end() );
-
-        /* No operator found, malformed rules string -> abort */
-        if( pos == std::string::npos ) return -2;
-
-        CType ctype( *ci,0,pos );
-        int value = std::atoi( ci->substr( pos+2, ci->length()-( pos+2 ) ).c_str() );
-        //if( condition_types.find( ctype ) == condition_types.end() ) return 0;
-
-        /* partially apply the condition value to the associated
-         * condtion function and append it to the rule */
-        bool ( *cfunc )( int,int );
-        if( "==" == *oi ) cfunc = _cf_eq;
-        else if( "!=" == *oi ) cfunc = _cf_neq;
-        else if( "<=" == *oi ) cfunc = _cf_le;
-        else if( ">=" == *oi ) cfunc = _cf_ge;
-        else return 0; // this is already taken care of
-
-        rule.push_back( Condition( ctype, cfunc, value ) );
+  vstring chans_conds = split(rulestring, ':');
+  vstring channels    = split(chans_conds[0], ',');
+  vstring conditions  = split(chans_conds[1], ',');
+
+  /* parse the rules first */
+  std::vector<Condition> rule;
+  for (vstring::iterator ci = conditions.begin(); ci != conditions.end(); ++ci)
+  {
+    /* find one of "==", "!=", "<=", ">=" */
+    const char *      ops_[] = { "==", "!=", "<=", ">=" };
+    vstring           operators(ops_, &ops_[sizeof(ops_) / sizeof(ops_[0])]);
+    vstring::iterator oi  = operators.begin();
+    std::size_t       pos = std::string::npos;
+    do
+    {
+      if ((pos = ci->find(*oi)) != std::string::npos)
+      {
+        break;
+      }
+    } while (++oi != operators.end());
+
+    /* No operator found, malformed rules string -> abort */
+    if (pos == std::string::npos)
+    {
+      return -2;
+    }
+
+    CType ctype(*ci, 0, pos);
+    int   value = std::atoi(ci->substr(pos + 2, ci->length() - (pos + 2)).c_str());
+    // if( condition_types.find( ctype ) == condition_types.end() ) return 0;
+
+    /* partially apply the condition value to the associated
+     * condtion function and append it to the rule */
+    bool (*cfunc)(int, int);
+    if ("==" == *oi)
+    {
+      cfunc = _cf_eq;
+    }
+    else if ("!=" == *oi)
+    {
+      cfunc = _cf_neq;
+    }
+    else if ("<=" == *oi)
+    {
+      cfunc = _cf_le;
+    }
+    else if (">=" == *oi)
+    {
+      cfunc = _cf_ge;
+    }
+    else
+    {
+      return 0;   // this is already taken care of
     }
 
-    /* add the rule to each channel */
-    for( vstring::iterator chan_iter = channels.begin(); chan_iter != channels.end(); ++chan_iter ) {
-        std::map< Key, int>::iterator ichan = deserializationTable.find(*chan_iter);
-        if( ichan != deserializationTable.end() )
-            chanRules[ichan->second].add( rule );
-        else
-            return -3;
+    rule.push_back(Condition(ctype, cfunc, value));
+  }
+
+  /* add the rule to each channel */
+  for (vstring::iterator chan_iter = channels.begin(); chan_iter != channels.end(); ++chan_iter)
+  {
+    std::map<Key, int>::iterator ichan = deserializationTable.find(*chan_iter);
+    if (ichan != deserializationTable.end())
+    {
+      chanRules[ichan->second].add(rule);
     }
+    else
+    {
+      return -3;
+    }
+  }
 
-    //return (int)channels.size();
-    return 0;
+  // return (int)channels.size();
+  return 0;
 }
 
 bool CDTrace::update( state_type stateval )
 {
-    state[stateval.first] = stateval.second;
+  state[stateval.first] = stateval.second;
 
-    /* pass over all the channel rules */
-    for( std::vector< Channel >::iterator citer = chanRules.begin(); citer != chanRules.end(); ++citer )
-    {
-        citer->update( state );
-    }
+  /* pass over all the channel rules */
+  for (std::vector<Channel>::iterator citer = chanRules.begin(); citer != chanRules.end(); ++citer)
+  {
+    citer->update(state);
+  }
 
-    return true;
+  return true;
 }
 
 void CDTrace::getChannelsList( std::string& sChannels )
@@ -251,7 +286,9 @@ void CDTrace::getChannelsList( std::string& sChannels )
   if( deserializationTable.size() > 0 )
   {
     for( channel_map_t::iterator it = deserializationTable.begin(); it != deserializationTable.end(); ++it )
+    {
       sChannels += it->first + "\n";
+    }
   }
 }
 
@@ -261,8 +298,12 @@ const char* CDTrace::getChannelName( int channel_number )
   if( deserializationTable.size() > 0 )
   {
     for( channel_map_t::iterator it = deserializationTable.begin(); it != deserializationTable.end(); ++it )
+    {
       if( it->second == channel_number )
+      {
         return it->first.c_str();
+      }
+    }
   }
   return not_found;
 }
@@ -273,9 +314,13 @@ std::string CDTrace::getErrMessage()
   if( m_error_code )
   {
     if( m_error_code == -2 )
+    {
       str = ( " - DTrace ERROR: Add tracing rule failed: DECERR_DTRACE_BAD_RULE" );
+    }
     else if( m_error_code == -3 )
+    {
       str = ( " - DTrace ERROR: Add tracing rule failed: DECERR_DTRACE_UNKNOWN_CHANNEL" );
+    }
     else
     {
       str = " - DTrace ERROR: Undefined error";
@@ -296,7 +341,9 @@ void CDTrace::dtrace( int k, const char *format, /*va_list args*/... )
     fflush( m_trace_file );
     va_end ( args );
     if( bCount )
+    {
       chanRules[k].incrementCounter();
+    }
   }
   return;
 }
diff --git a/source/Lib/CommonLib/dtrace.h b/source/Lib/CommonLib/dtrace.h
index 9e10e201b70042294d934eb4bf4bd44ae78d8dae..f26c557a15f6b656ff9efe9dadee8937982b9ae4 100644
--- a/source/Lib/CommonLib/dtrace.h
+++ b/source/Lib/CommonLib/dtrace.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/dtrace_blockstatistics.cpp b/source/Lib/CommonLib/dtrace_blockstatistics.cpp
index 86ff4413df53ee30af6537c2ba16a31a02cc33ca..d92ae1e17a27bdf137349fbaa21b0d8258de8f8e 100644
--- a/source/Lib/CommonLib/dtrace_blockstatistics.cpp
+++ b/source/Lib/CommonLib/dtrace_blockstatistics.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -684,8 +684,8 @@ void writeAllData(const CodingStructure& cs, const UnitArea& ctuArea)
                 {
                   const Mv mv = pixMi.mv[REF_PIC_LIST_0];
 #if BLOCK_STATS_AS_CSV
-                  g_trace_ctx->dtrace<false>( 
-                    D_BLOCK_STATISTICS_ALL, 
+                  g_trace_ctx->dtrace<false>(
+                    D_BLOCK_STATISTICS_ALL,
                     "BlockStat;%d;%4d;%4d;%2d;%2d;%s;%4d;%4d\n",
                      cs.picture->poc,
                      pu.lx() + 4*x,
@@ -713,8 +713,8 @@ void writeAllData(const CodingStructure& cs, const UnitArea& ctuArea)
                 {
                   const Mv mv = pixMi.mv[REF_PIC_LIST_1];
 #if BLOCK_STATS_AS_CSV
-                  g_trace_ctx->dtrace<false>( 
-                    D_BLOCK_STATISTICS_ALL, 
+                  g_trace_ctx->dtrace<false>(
+                    D_BLOCK_STATISTICS_ALL,
                     "BlockStat;%d;%4d;%4d;%2d;%2d;%s;%4d;%4d\n",
                      cs.picture->poc,
                      pu.lx() + 4*x,
@@ -743,8 +743,8 @@ void writeAllData(const CodingStructure& cs, const UnitArea& ctuArea)
                   {
                     const Mv mv = pixMi.mv[REF_PIC_LIST_0];
 #if BLOCK_STATS_AS_CSV
-                  g_trace_ctx->dtrace<false>( 
-                    D_BLOCK_STATISTICS_ALL, 
+                  g_trace_ctx->dtrace<false>(
+                    D_BLOCK_STATISTICS_ALL,
                     "BlockStat;%d;%4d;%4d;%2d;%2d;%s;%4d;%4d\n",
                      cs.picture->poc,
                      pu.lx() + 4*x,
@@ -767,12 +767,12 @@ void writeAllData(const CodingStructure& cs, const UnitArea& ctuArea)
                      mv.hor,
                      mv.ver);
 #endif
-                  }                
+                  }
                   {
                     const Mv mv = pixMi.mv[REF_PIC_LIST_1];
 #if BLOCK_STATS_AS_CSV
-                  g_trace_ctx->dtrace<false>( 
-                    D_BLOCK_STATISTICS_ALL, 
+                  g_trace_ctx->dtrace<false>(
+                    D_BLOCK_STATISTICS_ALL,
                     "BlockStat;%d;%4d;%4d;%2d;%2d;%s;%4d;%4d\n",
                      cs.picture->poc,
                      pu.lx() + 4*x,
@@ -795,7 +795,7 @@ void writeAllData(const CodingStructure& cs, const UnitArea& ctuArea)
                      mv.hor,
                      mv.ver);
 #endif
-                  }                                    
+                  }
                 }
               }
             }
@@ -1140,7 +1140,7 @@ void writeAllCodedData(const CodingStructure & cs, const UnitArea & ctuArea)
           }
           default:
           {
-            CHECK(1, "Invalid prediction mode");
+            THROW("Invalid prediction mode");
             break;
           }
         }
diff --git a/source/Lib/CommonLib/dtrace_blockstatistics.h b/source/Lib/CommonLib/dtrace_blockstatistics.h
index 1a294fdbf631a43935d50fedfa2ac605fab4d760..f9eff9ce1ba202cf7fbdf833c0b4c85b4a67473a 100644
--- a/source/Lib/CommonLib/dtrace_blockstatistics.h
+++ b/source/Lib/CommonLib/dtrace_blockstatistics.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/dtrace_buffer.h b/source/Lib/CommonLib/dtrace_buffer.h
index afba4a3ced5a5e4e8db2d57faba03a055c53db1d..e30551076833bbfd2e0d979e767f2a126337519b 100644
--- a/source/Lib/CommonLib/dtrace_buffer.h
+++ b/source/Lib/CommonLib/dtrace_buffer.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/dtrace_codingstruct.h b/source/Lib/CommonLib/dtrace_codingstruct.h
index 656942903eb14621fe0db10cc3a721a465affb0c..672f7f441b48d8b19710c072087d6cf0a2ef3181 100644
--- a/source/Lib/CommonLib/dtrace_codingstruct.h
+++ b/source/Lib/CommonLib/dtrace_codingstruct.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/dtrace_next.h b/source/Lib/CommonLib/dtrace_next.h
index 7ef78cca960ba7d31b793788d4d0510bee803e52..8281ae7668945d082e225f1079f2bc113a77cbcc 100644
--- a/source/Lib/CommonLib/dtrace_next.h
+++ b/source/Lib/CommonLib/dtrace_next.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/version.h b/source/Lib/CommonLib/version.h
index 66cfa83e92a64a3066bcb45bb26a984601460460..f1c5cd405ebff9a75b69d8d2158a20b7a116188f 100644
--- a/source/Lib/CommonLib/version.h
+++ b/source/Lib/CommonLib/version.h
@@ -1,3 +1,3 @@
 #if ! defined( VTM_VERSION )
-#define VTM_VERSION "10.2"
+#define VTM_VERSION "14.0"
 #endif
diff --git a/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h b/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h
index 962620313e07cff1cfa16c0725b688c07fde36f5..a8962b1ebcc84e34abc715380c8b41b595e6a55d 100644
--- a/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h
+++ b/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h
@@ -40,7 +40,523 @@
 #else
 #include <x86intrin.h>
 #endif
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+static void simdFilter5x5Blk_HBD(AlfClassifier **classifier, const PelUnitBuf &recDst, const CPelUnitBuf &recSrc,
+  const Area &blkDst, const Area &blk, const ComponentID compId, const short *filterSet,
+  const Pel *fClipSet, const ClpRng &clpRng, CodingStructure &cs, const int vbCTUHeight,
+  int vbPos)
+
+{
+  CHECK((vbCTUHeight & (vbCTUHeight - 1)) != 0, "vbCTUHeight must be a power of 2");
+  CHECK(!isChroma(compId), "ALF 5x5 filter is for chroma only");
+
+  const CPelBuf srcBuffer = recSrc.get(compId);
+  PelBuf        dstBuffer = recDst.get(compId);
+
+  const size_t srcStride = srcBuffer.stride;
+  const size_t dstStride = dstBuffer.stride;
+
+  constexpr int shift = AdaptiveLoopFilter::m_NUM_BITS - 1;
+  constexpr int round = 1 << (shift - 1);
+  const __m128i offset1 = _mm_set1_epi32((1 << ((shift + 3) - 1)) - round);
+
+  const size_t width = blk.width;
+  const size_t height = blk.height;
+
+  constexpr size_t step_x = 4;
+  constexpr size_t step_y = 4;
+
+  CHECK(blk.y % step_y, "Wrong startHeight in filtering");
+  CHECK(blk.x % step_x, "Wrong startWidth in filtering");
+  CHECK(height % step_y, "Wrong endHeight in filtering");
+  CHECK(width % step_x, "Wrong endWidth in filtering");
+
+  const Pel *src = srcBuffer.buf + blk.y * srcStride + blk.x;
+  Pel *      dst = dstBuffer.buf + blkDst.y * dstStride + blkDst.x;
+
+  const __m128i offset = _mm_set1_epi32(round);
+  const __m128i min = _mm_set1_epi32(clpRng.min);
+  const __m128i max = _mm_set1_epi32(clpRng.max);
+  const __m128i zeros = _mm_setzero_si128();
+
+  __m128i params[2][3];
+  __m128i fs = _mm_lddqu_si128((__m128i *) filterSet);
+  params[0][0] = _mm_cvtepi16_epi32(_mm_shuffle_epi32(fs, 0x00));
+  params[0][1] = _mm_cvtepi16_epi32(_mm_shuffle_epi32(fs, 0x55));
+  params[0][2] = _mm_cvtepi16_epi32(_mm_shuffle_epi32(fs, 0xaa));
+  __m128i fcLo = _mm_lddqu_si128((__m128i *) fClipSet);
+  __m128i fcHi = _mm_loadl_epi64((__m128i *) (fClipSet + 4));
+  params[1][0] = _mm_shuffle_epi32(fcLo, 0x44);
+  params[1][1] = _mm_shuffle_epi32(fcLo, 0xee);
+  params[1][2] = _mm_shuffle_epi32(fcHi, 0x44);
+
+  for (size_t i = 0; i < height; i += step_y)
+  {
+    for (size_t j = 0; j < width; j += step_x)
+    {
+      for (size_t ii = 0; ii < step_y; ii++)
+      {
+        const Pel *img0, *img1, *img2, *img3, *img4;
+
+        img0 = src + j + ii * srcStride;
+        img1 = img0 + srcStride;
+        img2 = img0 - srcStride;
+        img3 = img1 + srcStride;
+        img4 = img2 - srcStride;
+
+        const int yVb = (blkDst.y + i + ii) & (vbCTUHeight - 1);
+        if (yVb < vbPos && (yVb >= vbPos - 2))   // above
+        {
+          img1 = (yVb == vbPos - 1) ? img0 : img1;
+          img3 = (yVb >= vbPos - 2) ? img1 : img3;
+
+          img2 = (yVb == vbPos - 1) ? img0 : img2;
+          img4 = (yVb >= vbPos - 2) ? img2 : img4;
+        }
+        else if (yVb >= vbPos && (yVb <= vbPos + 1))   // bottom
+        {
+          img2 = (yVb == vbPos) ? img0 : img2;
+          img4 = (yVb <= vbPos + 1) ? img2 : img4;
+
+          img1 = (yVb == vbPos) ? img0 : img1;
+          img3 = (yVb <= vbPos + 1) ? img1 : img3;
+        }
+        __m128i cur = _mm_lddqu_si128((const __m128i *) img0);
+        __m128i accum = offset;
+
+        auto process2coeffs = [&](const int i, const Pel *ptr0, const Pel *ptr1, const Pel *ptr2, const Pel *ptr3) {
+          const __m128i val00 = _mm_sub_epi32(_mm_lddqu_si128((const __m128i *) ptr0), cur);
+          const __m128i val10 = _mm_sub_epi32(_mm_lddqu_si128((const __m128i *) ptr2), cur);
+          const __m128i val01 = _mm_sub_epi32(_mm_lddqu_si128((const __m128i *) ptr1), cur);
+          const __m128i val11 = _mm_sub_epi32(_mm_lddqu_si128((const __m128i *) ptr3), cur);
+          __m128i val01A = _mm_unpacklo_epi32(val00, val10);
+          __m128i val01B = _mm_unpackhi_epi32(val00, val10);
+          __m128i val01C = _mm_unpacklo_epi32(val01, val11);
+          __m128i val01D = _mm_unpackhi_epi32(val01, val11);
+
+          __m128i limit01A = params[1][i];
+
+          val01A = _mm_min_epi32(val01A, limit01A);
+          val01B = _mm_min_epi32(val01B, limit01A);
+          val01C = _mm_min_epi32(val01C, limit01A);
+          val01D = _mm_min_epi32(val01D, limit01A);
+
+          limit01A = _mm_sub_epi32(zeros, limit01A);
+
+          val01A = _mm_max_epi32(val01A, limit01A);
+          val01B = _mm_max_epi32(val01B, limit01A);
+          val01C = _mm_max_epi32(val01C, limit01A);
+          val01D = _mm_max_epi32(val01D, limit01A);
+
+          val01A = _mm_add_epi32(val01A, val01C);
+          val01B = _mm_add_epi32(val01B, val01D);
+
+          __m128i coeff01 = params[0][i];
+
+          val01A = _mm_mullo_epi32(val01A, coeff01);
+          val01B = _mm_mullo_epi32(val01B, coeff01);
+
+          accum = _mm_add_epi32(accum, _mm_hadd_epi32(val01A, val01B));
+        };
+
+        process2coeffs(0, img3 + 0, img4 + 0, img1 + 1, img2 - 1);
+        process2coeffs(1, img1 + 0, img2 + 0, img1 - 1, img2 + 1);
+        process2coeffs(2, img0 + 2, img0 - 2, img0 + 1, img0 - 1);
+
+        bool isNearVBabove = yVb < vbPos && (yVb >= vbPos - 1);
+        bool isNearVBbelow = yVb >= vbPos && (yVb <= vbPos);
+        if (!(isNearVBabove || isNearVBbelow))
+        {
+          accum = _mm_srai_epi32(accum, shift);
+        }
+        else
+        {
+          accum = _mm_srai_epi32(_mm_add_epi32(accum, offset1), shift + 3);
+        }
+        accum = _mm_add_epi32(accum, cur);
+        accum = _mm_min_epi32(max, _mm_max_epi32(accum, min));
+
+        _mm_storeu_si128((__m128i *) (dst + ii * dstStride + j), accum);
+      }
+    }
+
+    src += srcStride * step_y;
+    dst += dstStride * step_y;
+  }
+}
+
+static void simdDeriveClassificationBlk_HBD(AlfClassifier **classifier, int **laplacian[NUM_DIRECTIONS],
+  const CPelBuf &srcLuma, const Area &blkDst, const Area &blk, const int shift,
+  const int vbCTUHeight, int vbPos)
+{
+  CHECK((blk.height & 7) != 0, "Block height must be a multiple of 8");
+  CHECK((blk.width & 7) != 0, "Block width must be a multiple of 8");
+  CHECK((vbCTUHeight & (vbCTUHeight - 1)) != 0, "vbCTUHeight must be a power of 2");
+
+  const size_t imgStride = srcLuma.stride;
+  const Pel *  srcExt = srcLuma.buf;
+
+  const int imgHExtended = blk.height + 4;
+  const int imgWExtended = blk.width + 4;
+
+  const int posX = blk.pos().x;
+  const int posY = blk.pos().y;
+
+  // 18x40 array
+  uint32_t colSums[(AdaptiveLoopFilter::m_CLASSIFICATION_BLK_SIZE + 4) >> 1]
+    [AdaptiveLoopFilter::m_CLASSIFICATION_BLK_SIZE + 8];
+
+  for (int i = 0; i < imgHExtended; i += 2)
+  {
+    const size_t offset = (i + posY - 3) * imgStride + posX - 3;
+
+    const Pel *imgY0 = &srcExt[offset];
+    const Pel *imgY1 = &srcExt[offset + imgStride];
+    const Pel *imgY2 = &srcExt[offset + imgStride * 2];
+    const Pel *imgY3 = &srcExt[offset + imgStride * 3];
+
+    // pixel padding for gradient calculation
+    int pos = blkDst.pos().y - 2 + i;
+    int posInCTU = pos & (vbCTUHeight - 1);
+    if (pos > 0 && posInCTU == vbPos - 2)
+    {
+      imgY3 = imgY2;
+    }
+    else if (pos > 0 && posInCTU == vbPos)
+    {
+      imgY0 = imgY1;
+    }
+
+    __m128i prev_hv = _mm_setzero_si128();  __m128i prev_di = _mm_setzero_si128();
+
+    for (int j = 0; j < imgWExtended; j += 8)
+    {
+      const __m128i x0_lo = _mm_lddqu_si128((const __m128i *) (imgY0 + j));
+      const __m128i x0_hi = _mm_lddqu_si128((const __m128i *) (imgY0 + j + 4));
+      const __m128i x1_lo = _mm_lddqu_si128((const __m128i *) (imgY1 + j));
+      const __m128i x1_hi = _mm_lddqu_si128((const __m128i *) (imgY1 + j + 4));
+      const __m128i x2_lo = _mm_lddqu_si128((const __m128i *) (imgY2 + j));
+      const __m128i x2_hi = _mm_lddqu_si128((const __m128i *) (imgY2 + j + 4));
+      const __m128i x3_lo = _mm_lddqu_si128((const __m128i *) (imgY3 + j));
+      const __m128i x3_hi = _mm_lddqu_si128((const __m128i *) (imgY3 + j + 4));
+
+      const __m128i x4_lo = _mm_lddqu_si128((const __m128i *) (imgY0 + j + 2));
+      const __m128i x4_hi = _mm_lddqu_si128((const __m128i *) (imgY0 + j + 6));
+      const __m128i x5_lo = _mm_lddqu_si128((const __m128i *) (imgY1 + j + 2));
+      const __m128i x5_hi = _mm_lddqu_si128((const __m128i *) (imgY1 + j + 6));
+      const __m128i x6_lo = _mm_lddqu_si128((const __m128i *) (imgY2 + j + 2));
+      const __m128i x6_hi = _mm_lddqu_si128((const __m128i *) (imgY2 + j + 6));
+      const __m128i x7_lo = _mm_lddqu_si128((const __m128i *) (imgY3 + j + 2));
+      const __m128i x7_hi = _mm_lddqu_si128((const __m128i *) (imgY3 + j + 6));
+
+      const __m128i nw_lo = _mm_blend_epi16(x0_lo, x1_lo, 0xcc);
+      const __m128i nw_hi = _mm_blend_epi16(x0_hi, x1_hi, 0xcc);
+      const __m128i n_lo = _mm_blend_epi16(x0_lo, x5_lo, 0x33);
+      const __m128i n_hi = _mm_blend_epi16(x0_hi, x5_hi, 0x33);
+      const __m128i ne_lo = _mm_blend_epi16(x4_lo, x5_lo, 0xcc);
+      const __m128i ne_hi = _mm_blend_epi16(x4_hi, x5_hi, 0xcc);
+      const __m128i w_lo = _mm_blend_epi16(x1_lo, x2_lo, 0xcc);
+      const __m128i w_hi = _mm_blend_epi16(x1_hi, x2_hi, 0xcc);
+      const __m128i e_lo = _mm_blend_epi16(x5_lo, x6_lo, 0xcc);
+      const __m128i e_hi = _mm_blend_epi16(x5_hi, x6_hi, 0xcc);
+      const __m128i sw_lo = _mm_blend_epi16(x2_lo, x3_lo, 0xcc);
+      const __m128i sw_hi = _mm_blend_epi16(x2_hi, x3_hi, 0xcc);
+      const __m128i s_lo = _mm_blend_epi16(x2_lo, x7_lo, 0x33);
+      const __m128i s_hi = _mm_blend_epi16(x2_hi, x7_hi, 0x33);
+      const __m128i se_lo = _mm_blend_epi16(x6_lo, x7_lo, 0xcc);
+      const __m128i se_hi = _mm_blend_epi16(x6_hi, x7_hi, 0xcc);
+
+      __m128i c_lo = _mm_slli_epi32(_mm_blend_epi16(x1_lo, x6_lo, 0x33), 1);
+      __m128i c_hi = _mm_slli_epi32(_mm_blend_epi16(x1_hi, x6_hi, 0x33), 1);
+      __m128i d_lo = _mm_shuffle_epi32(c_lo, 0xb1);
+      __m128i d_hi = _mm_shuffle_epi32(c_hi, 0xb1);
+
+      const __m128i ver_lo = _mm_abs_epi32(_mm_sub_epi32(c_lo, _mm_add_epi32(n_lo, s_lo)));
+      const __m128i ver_hi = _mm_abs_epi32(_mm_sub_epi32(c_hi, _mm_add_epi32(n_hi, s_hi)));
+      const __m128i hor_lo = _mm_abs_epi32(_mm_sub_epi32(d_lo, _mm_add_epi32(w_lo, e_lo)));
+      const __m128i hor_hi = _mm_abs_epi32(_mm_sub_epi32(d_hi, _mm_add_epi32(w_hi, e_hi)));
+      const __m128i di0_lo = _mm_abs_epi32(_mm_sub_epi32(d_lo, _mm_add_epi32(nw_lo, se_lo)));
+      const __m128i di0_hi = _mm_abs_epi32(_mm_sub_epi32(d_hi, _mm_add_epi32(nw_hi, se_hi)));
+      const __m128i di1_lo = _mm_abs_epi32(_mm_sub_epi32(d_lo, _mm_add_epi32(ne_lo, sw_lo)));
+      const __m128i di1_hi = _mm_abs_epi32(_mm_sub_epi32(d_hi, _mm_add_epi32(ne_hi, sw_hi)));
+
+      const __m128i v = _mm_hadd_epi32(ver_lo, ver_hi);
+      const __m128i h = _mm_hadd_epi32(hor_lo, hor_hi);
+      const __m128i di0 = _mm_hadd_epi32(di0_lo, di0_hi);
+      const __m128i di1 = _mm_hadd_epi32(di1_lo, di1_hi);
+      const __m128i all_hv = _mm_hadd_epi32(v, h);
+      const __m128i all_di = _mm_hadd_epi32(di0, di1);
+
+      const __m128i t_hv = _mm_blend_epi16(all_hv, prev_hv, 0xcc);
+      const __m128i t_di = _mm_blend_epi16(all_di, prev_di, 0xcc);
+
+      const __m128i cmb0 = _mm_hadd_epi32(t_hv, t_di);
+      const __m128i cmb1 = _mm_hadd_epi32(all_hv, all_di);
+      _mm_storeu_si128((__m128i *) &colSums[i >> 1][j], cmb0);
+      _mm_storeu_si128((__m128i *) &colSums[i >> 1][j + 4], cmb1);
+
+      prev_hv = all_hv;
+      prev_di = all_di;
+    }
+  }
+
+  const __m128i zeros = _mm_setzero_si128();
+  for (int i = 0; i < (blk.height >> 1); i += 4)
+  {
+    for (int j = 0; j < blk.width; j += 8)
+    {
+      __m128i x0l, x1l, x2l, x3l, x4l, x5l, x6l, x7l;
+      __m128i x0h, x1h, x2h, x3h, x4h, x5h, x6h, x7h;
+
+      const uint32_t z = (2 * i + blkDst.pos().y) & (vbCTUHeight - 1);
+      const uint32_t z2 = (2 * i + 4 + blkDst.pos().y) & (vbCTUHeight - 1);
+
+      x0l = (z == vbPos) ? zeros : _mm_lddqu_si128((__m128i *) &colSums[i + 0][j + 4]);
+      x0h = (z == vbPos) ? zeros : _mm_lddqu_si128((__m128i *) &colSums[i + 0][j + 8]);
+      x1l = _mm_lddqu_si128((__m128i *) &colSums[i + 1][j + 4]);
+      x1h = _mm_lddqu_si128((__m128i *) &colSums[i + 1][j + 8]);
+      x2l = _mm_lddqu_si128((__m128i *) &colSums[i + 2][j + 4]);
+      x2h = _mm_lddqu_si128((__m128i *) &colSums[i + 2][j + 8]);
+      x3l = (z == vbPos - 4) ? zeros : _mm_lddqu_si128((__m128i *) &colSums[i + 3][j + 4]);
+      x3h = (z == vbPos - 4) ? zeros : _mm_lddqu_si128((__m128i *) &colSums[i + 3][j + 8]);
+
+      x4l = (z2 == vbPos) ? zeros : _mm_lddqu_si128((__m128i *) &colSums[i + 2][j + 4]);
+      x4h = (z2 == vbPos) ? zeros : _mm_lddqu_si128((__m128i *) &colSums[i + 2][j + 8]);
+      x5l = _mm_lddqu_si128((__m128i *) &colSums[i + 3][j + 4]);
+      x5h = _mm_lddqu_si128((__m128i *) &colSums[i + 3][j + 8]);
+      x6l = _mm_lddqu_si128((__m128i *) &colSums[i + 4][j + 4]);
+      x6h = _mm_lddqu_si128((__m128i *) &colSums[i + 4][j + 8]);
+      x7l = (z2 == vbPos - 4) ? zeros : _mm_lddqu_si128((__m128i *) &colSums[i + 5][j + 4]);
+      x7h = (z2 == vbPos - 4) ? zeros : _mm_lddqu_si128((__m128i *) &colSums[i + 5][j + 8]);
+
+      x0l = _mm_add_epi32(x0l, x1l);
+      x2l = _mm_add_epi32(x2l, x3l);
+      x4l = _mm_add_epi32(x4l, x5l);
+      x6l = _mm_add_epi32(x6l, x7l);
+      x0h = _mm_add_epi32(x0h, x1h);
+      x2h = _mm_add_epi32(x2h, x3h);
+      x4h = _mm_add_epi32(x4h, x5h);
+      x6h = _mm_add_epi32(x6h, x7h);
+
+      x0l = _mm_add_epi32(x0l, x2l);
+      x4l = _mm_add_epi32(x4l, x6l);
+      x0h = _mm_add_epi32(x0h, x2h);
+      x4h = _mm_add_epi32(x4h, x6h);
+
+      x2l = _mm_unpacklo_epi32(x0l, x4l);
+      x2h = _mm_unpackhi_epi32(x0l, x4l);
+      x6l = _mm_unpacklo_epi32(x0h, x4h);
+      x6h = _mm_unpackhi_epi32(x0h, x4h);
+
+      __m128i sumV = _mm_unpacklo_epi32(x2l, x6l);
+      __m128i sumH = _mm_unpackhi_epi32(x2l, x6l);
+      __m128i sumD0 = _mm_unpacklo_epi32(x2h, x6h);
+      __m128i sumD1 = _mm_unpackhi_epi32(x2h, x6h);
+
+      __m128i tempAct = _mm_add_epi32(sumV, sumH);
+
+      const uint32_t scale = (z == vbPos - 4 || z == vbPos) ? 96 : 64;
+      const uint32_t scale2 = (z2 == vbPos - 4 || z2 == vbPos) ? 96 : 64;
+      __m128i activity = _mm_mullo_epi32(tempAct, _mm_unpacklo_epi64(_mm_set1_epi32(scale), _mm_set1_epi32(scale2)));
+      activity = _mm_srl_epi32(activity, _mm_cvtsi32_si128(shift));
+      activity = _mm_min_epi32(activity, _mm_set1_epi32(15));
+      __m128i classIdx = _mm_shuffle_epi8(_mm_setr_epi8(0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4), activity);
+
+      __m128i dirTempHVMinus1 = _mm_cmpgt_epi32(sumV, sumH);
+      __m128i hv1 = _mm_max_epi32(sumV, sumH);
+      __m128i hv0 = _mm_min_epi32(sumV, sumH);
+
+      __m128i dirTempDMinus1 = _mm_cmpgt_epi32(sumD0, sumD1);
+      __m128i d1 = _mm_max_epi32(sumD0, sumD1);
+      __m128i d0 = _mm_min_epi32(sumD0, sumD1);
+
+      __m128i a = _mm_xor_si128(_mm_mullo_epi32(d1, hv0), _mm_set1_epi32(0x80000000));
+      __m128i b = _mm_xor_si128(_mm_mullo_epi32(hv1, d0), _mm_set1_epi32(0x80000000));
+      __m128i dirIdx = _mm_cmpgt_epi32(a, b);
+      __m128i hvd1 = _mm_blendv_epi8(hv1, d1, dirIdx);
+      __m128i hvd0 = _mm_blendv_epi8(hv0, d0, dirIdx);
+
+      __m128i strength1 = _mm_cmpgt_epi32(hvd1, _mm_add_epi32(hvd0, hvd0));
+      __m128i strength2 = _mm_cmpgt_epi32(_mm_add_epi32(hvd1, hvd1), _mm_add_epi32(hvd0, _mm_slli_epi32(hvd0, 3)));
+      __m128i offset = _mm_and_si128(strength1, _mm_set1_epi32(5));
+      classIdx = _mm_add_epi32(classIdx, offset);
+      classIdx = _mm_add_epi32(classIdx, _mm_and_si128(strength2, _mm_set1_epi32(5)));
+      offset = _mm_andnot_si128(dirIdx, offset);
+      offset = _mm_add_epi32(offset, offset);
+      classIdx = _mm_add_epi32(classIdx, offset);
+
+      __m128i transposeIdx = _mm_set1_epi32(3);
+      transposeIdx = _mm_add_epi32(transposeIdx, dirTempHVMinus1);
+      transposeIdx = _mm_add_epi32(transposeIdx, dirTempDMinus1);
+      transposeIdx = _mm_add_epi32(transposeIdx, dirTempDMinus1);
+
+      int yOffset = 2 * i + blkDst.pos().y;
+      int xOffset = j + blkDst.pos().x;
 
+      static_assert(sizeof(AlfClassifier) == 2, "ALFClassifier type must be 16 bits wide");
+      __m128i v;
+      v = _mm_unpacklo_epi8(classIdx, transposeIdx);
+      v = _mm_shuffle_epi8(v, _mm_setr_epi8(0, 1, 0, 1, 0, 1, 0, 1, 8, 9, 8, 9, 8, 9, 8, 9));
+      _mm_storeu_si128((__m128i *) (classifier[yOffset] + xOffset), v);
+      _mm_storeu_si128((__m128i *) (classifier[yOffset + 1] + xOffset), v);
+      _mm_storeu_si128((__m128i *) (classifier[yOffset + 2] + xOffset), v);
+      _mm_storeu_si128((__m128i *) (classifier[yOffset + 3] + xOffset), v);
+      v = _mm_unpackhi_epi8(classIdx, transposeIdx);
+      v = _mm_shuffle_epi8(v, _mm_setr_epi8(0, 1, 0, 1, 0, 1, 0, 1, 8, 9, 8, 9, 8, 9, 8, 9));
+      _mm_storeu_si128((__m128i *) (classifier[yOffset + 4] + xOffset), v);
+      _mm_storeu_si128((__m128i *) (classifier[yOffset + 5] + xOffset), v);
+      _mm_storeu_si128((__m128i *) (classifier[yOffset + 6] + xOffset), v);
+      _mm_storeu_si128((__m128i *) (classifier[yOffset + 7] + xOffset), v);
+    }
+  }
+}
+
+#ifdef USE_AVX2
+static void simdFilter5x5Blk_HBD_AVX2(AlfClassifier **classifier, const PelUnitBuf &recDst, const CPelUnitBuf &recSrc,
+  const Area &blkDst, const Area &blk, const ComponentID compId, const short *filterSet,
+  const Pel *fClipSet, const ClpRng &clpRng, CodingStructure &cs, const int vbCTUHeight,
+  int vbPos)
+
+{
+  CHECK((vbCTUHeight & (vbCTUHeight - 1)) != 0, "vbCTUHeight must be a power of 2");
+  CHECK(!isChroma(compId), "ALF 5x5 filter is for chroma only");
+
+  const CPelBuf srcBuffer = recSrc.get(compId);
+  PelBuf        dstBuffer = recDst.get(compId);
+
+  const size_t srcStride = srcBuffer.stride;
+  const size_t dstStride = dstBuffer.stride;
+
+  constexpr int shift = AdaptiveLoopFilter::m_NUM_BITS - 1;
+  constexpr int round = 1 << (shift - 1);
+  const __m256i offset1 = _mm256_set1_epi32((1 << ((shift + 3) - 1)) - round);
+
+  const size_t width = blk.width;
+  const size_t height = blk.height;
+
+  constexpr size_t step_x = 8;
+  constexpr size_t step_y = 4;
+
+  CHECK(blk.y % step_y, "Wrong startHeight in filtering");
+  CHECK(blk.x % step_x, "Wrong startWidth in filtering");
+  CHECK(height % step_y, "Wrong endHeight in filtering");
+  CHECK(width % step_x, "Wrong endWidth in filtering");
+
+  const Pel *src = srcBuffer.buf + blk.y * srcStride + blk.x;
+  Pel *      dst = dstBuffer.buf + blkDst.y * dstStride + blkDst.x;
+
+  const __m256i offset = _mm256_set1_epi32(round);
+  const __m256i min = _mm256_set1_epi32(clpRng.min);
+  const __m256i max = _mm256_set1_epi32(clpRng.max);
+  const __m256i zeros = _mm256_setzero_si256();
+
+  __m128i params[2][3];
+  __m128i fs = _mm_lddqu_si128((__m128i *) filterSet);
+  params[0][0] = _mm_cvtepi16_epi32(_mm_shuffle_epi32(fs, 0x00));
+  params[0][1] = _mm_cvtepi16_epi32(_mm_shuffle_epi32(fs, 0x55));
+  params[0][2] = _mm_cvtepi16_epi32(_mm_shuffle_epi32(fs, 0xaa));
+  __m128i fcLo = _mm_lddqu_si128((__m128i *) fClipSet);
+  __m128i fcHi = _mm_loadl_epi64((__m128i *) (fClipSet + 4));
+  params[1][0] = _mm_shuffle_epi32(fcLo, 0x44);
+  params[1][1] = _mm_shuffle_epi32(fcLo, 0xee);
+  params[1][2] = _mm_shuffle_epi32(fcHi, 0x44);
+
+  for (size_t i = 0; i < height; i += step_y)
+  {
+    for (size_t j = 0; j < width; j += step_x)
+    {
+      for (size_t ii = 0; ii < step_y; ii++)
+      {
+        const Pel *img0, *img1, *img2, *img3, *img4;
+
+        img0 = src + j + ii * srcStride;
+        img1 = img0 + srcStride;
+        img2 = img0 - srcStride;
+        img3 = img1 + srcStride;
+        img4 = img2 - srcStride;
+
+        const int yVb = (blkDst.y + i + ii) & (vbCTUHeight - 1);
+        if (yVb < vbPos && (yVb >= vbPos - 2))   // above
+        {
+          img1 = (yVb == vbPos - 1) ? img0 : img1;
+          img3 = (yVb >= vbPos - 2) ? img1 : img3;
+
+          img2 = (yVb == vbPos - 1) ? img0 : img2;
+          img4 = (yVb >= vbPos - 2) ? img2 : img4;
+        }
+        else if (yVb >= vbPos && (yVb <= vbPos + 1))   // bottom
+        {
+          img2 = (yVb == vbPos) ? img0 : img2;
+          img4 = (yVb <= vbPos + 1) ? img2 : img4;
+
+          img1 = (yVb == vbPos) ? img0 : img1;
+          img3 = (yVb <= vbPos + 1) ? img1 : img3;
+        }
+        __m256i cur = _mm256_lddqu_si256((const __m256i *) img0);
+        __m256i accum = offset;
+
+        auto process2coeffs = [&](const int i, const Pel *ptr0, const Pel *ptr1, const Pel *ptr2, const Pel *ptr3) {
+          const __m256i val00 = _mm256_sub_epi32(_mm256_lddqu_si256((const __m256i *) ptr0), cur);
+          const __m256i val10 = _mm256_sub_epi32(_mm256_lddqu_si256((const __m256i *) ptr2), cur);
+          const __m256i val01 = _mm256_sub_epi32(_mm256_lddqu_si256((const __m256i *) ptr1), cur);
+          const __m256i val11 = _mm256_sub_epi32(_mm256_lddqu_si256((const __m256i *) ptr3), cur);
+          __m256i val01A = _mm256_unpacklo_epi32(val00, val10);
+          __m256i val01B = _mm256_unpackhi_epi32(val00, val10);
+          __m256i val01C = _mm256_unpacklo_epi32(val01, val11);
+          __m256i val01D = _mm256_unpackhi_epi32(val01, val11);
+
+          __m256i limit01A = _mm256_inserti128_si256(_mm256_castsi128_si256(params[1][i]), params[1][i], 1);
+
+          val01A = _mm256_min_epi32(val01A, limit01A);
+          val01B = _mm256_min_epi32(val01B, limit01A);
+          val01C = _mm256_min_epi32(val01C, limit01A);
+          val01D = _mm256_min_epi32(val01D, limit01A);
+
+          limit01A = _mm256_sub_epi32(zeros, limit01A);
+
+          val01A = _mm256_max_epi32(val01A, limit01A);
+          val01B = _mm256_max_epi32(val01B, limit01A);
+          val01C = _mm256_max_epi32(val01C, limit01A);
+          val01D = _mm256_max_epi32(val01D, limit01A);
+
+          val01A = _mm256_add_epi32(val01A, val01C);
+          val01B = _mm256_add_epi32(val01B, val01D);
+
+          __m256i coeff01 = _mm256_inserti128_si256(_mm256_castsi128_si256(params[0][i]), params[0][i], 1);
+
+          val01A = _mm256_mullo_epi32(val01A, coeff01);
+          val01B = _mm256_mullo_epi32(val01B, coeff01);
+
+          accum = _mm256_add_epi32(accum, _mm256_hadd_epi32(val01A, val01B));
+        };
+
+        process2coeffs(0, img3 + 0, img4 + 0, img1 + 1, img2 - 1);
+        process2coeffs(1, img1 + 0, img2 + 0, img1 - 1, img2 + 1);
+        process2coeffs(2, img0 + 2, img0 - 2, img0 + 1, img0 - 1);
+
+        bool isNearVBabove = yVb < vbPos && (yVb >= vbPos - 1);
+        bool isNearVBbelow = yVb >= vbPos && (yVb <= vbPos);
+        if (!(isNearVBabove || isNearVBbelow))
+        {
+          accum = _mm256_srai_epi32(accum, shift);
+        }
+        else
+        {
+          accum = _mm256_srai_epi32(_mm256_add_epi32(accum, offset1), shift + 3);
+        }
+        accum = _mm256_add_epi32(accum, cur);
+        accum = _mm256_min_epi32(max, _mm256_max_epi32(accum, min));
+
+        _mm256_store_si256((__m256i *) (dst + ii * dstStride + j), accum);
+      }
+    }
+
+    src += srcStride * step_y;
+    dst += dstStride * step_y;
+  }
+}
+#endif
+#else
 template<X86_VEXT vext>
 static void simdDeriveClassificationBlk(AlfClassifier **classifier, int **laplacian[NUM_DIRECTIONS],
                                         const CPelBuf &srcLuma, const Area &blkDst, const Area &blk, const int shift,
@@ -449,7 +965,7 @@ static void simdFilter5x5Blk(AlfClassifier **classifier, const PelUnitBuf &recDs
     dst += dstStride * STEP_Y;
   }
 }
-
+#endif
 constexpr uint16_t sh(int x)
 {
   return 0x0202 * (x & 7) + 0x0100 + 0x1010 * (x & 8);
@@ -473,7 +989,488 @@ static const uint16_t shuffleTab[4][2][8] = {
     { sh(1), sh(0), sh(2), sh(6), sh(12), sh(13), sh(14), sh(15) },
   },
 };
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+constexpr uint32_t shuffle32(int x)
+{
+  return 0x04040404 * (x & 3) + 0x03020100 + ((x & 4) ? 0x10101010 : 0x00000000) + ((x & 8) ? 0x20202020 : 0x00000000);
+}
+
+static const uint32_t shuffleTab32[4][3][4] = {
+  {
+    { shuffle32(0), shuffle32(1), shuffle32(2),  shuffle32(3)  },
+    { shuffle32(4), shuffle32(5), shuffle32(6),  shuffle32(7)  },
+    { shuffle32(8), shuffle32(9), shuffle32(10), shuffle32(11) },
+  },
+  {
+    { shuffle32(9), shuffle32(4), shuffle32(10), shuffle32(8) },
+    { shuffle32(1), shuffle32(5), shuffle32(11), shuffle32(7) },
+    { shuffle32(3), shuffle32(0), shuffle32(2),  shuffle32(6) },
+  },
+  {
+    { shuffle32(0), shuffle32(3), shuffle32(2),  shuffle32(1)  },
+    { shuffle32(8), shuffle32(7), shuffle32(6),  shuffle32(5)  },
+    { shuffle32(4), shuffle32(9), shuffle32(10), shuffle32(11) },
+  },
+  {
+    { shuffle32(9), shuffle32(8), shuffle32(10), shuffle32(4) },
+    { shuffle32(3), shuffle32(7), shuffle32(11), shuffle32(5) },
+    { shuffle32(1), shuffle32(0), shuffle32(2),  shuffle32(6) },
+  },
+};
 
+static void simdFilter7x7Blk_HBD(AlfClassifier **classifier, const PelUnitBuf &recDst, const CPelUnitBuf &recSrc,
+  const Area &blkDst, const Area &blk, const ComponentID compId, const short *filterSet,
+  const Pel *fClipSet, const ClpRng &clpRng, CodingStructure &cs, const int vbCTUHeight,
+  int vbPos)
+{
+  CHECK((vbCTUHeight & (vbCTUHeight - 1)) != 0, "vbCTUHeight must be a power of 2");
+  CHECK(isChroma(compId), "7x7 ALF filter is meant for luma only");
+
+  const CPelBuf srcBuffer = recSrc.get(compId);
+  PelBuf        dstBuffer = recDst.get(compId);
+
+  const size_t srcStride = srcBuffer.stride;
+  const size_t dstStride = dstBuffer.stride;
+
+  constexpr int shift = AdaptiveLoopFilter::m_NUM_BITS - 1;
+  constexpr int round = 1 << (shift - 1);
+
+  const size_t width = blk.width;
+  const size_t height = blk.height;
+
+  constexpr size_t step_x = 4;
+  constexpr size_t step_y = 4;
+
+  CHECK(blk.y % step_y, "Wrong startHeight in filtering");
+  CHECK(blk.x % step_x, "Wrong startWidth in filtering");
+  CHECK(height % step_y, "Wrong endHeight in filtering");
+  CHECK(width % step_x, "Wrong endWidth in filtering");
+
+  const Pel *src = srcBuffer.buf + blk.y * srcStride + blk.x;
+  Pel *      dst = dstBuffer.buf + blkDst.y * dstStride + blkDst.x;
+
+  const __m128i offset = _mm_set1_epi32(round);
+  const __m128i offset1 = _mm_set1_epi32((1 << ((shift + 3) - 1)) - round);
+  const __m128i min = _mm_set1_epi32(clpRng.min);
+  const __m128i max = _mm_set1_epi32(clpRng.max);
+  const __m128i zeros = _mm_setzero_si128();
+
+  const __m128i cmp1 = _mm_set1_epi8((char)0x0f);
+  const __m128i cmp2 = _mm_set1_epi8((char)0xf0);
+  const __m128i mask1 = _mm_set1_epi8((char)0x10);
+  const __m128i mask2 = _mm_set1_epi8((char)0x20);
+
+  for (size_t i = 0; i < height; i += step_y)  // + 4
+  {
+    const AlfClassifier *pClass = classifier[blkDst.y + i] + blkDst.x;
+
+    for (size_t j = 0; j < width; j += step_x)  // + 4
+    {
+      __m128i params[2][6];
+
+      const AlfClassifier &cl = pClass[j];
+
+      const int transposeIdx = cl.transposeIdx;
+      const int classIdx = cl.classIdx;
+
+      __m128i rawCoeff0, rawCoeff1;
+      rawCoeff0 = _mm_lddqu_si128((const __m128i *) (filterSet + classIdx * MAX_NUM_ALF_LUMA_COEFF));
+      rawCoeff1 = _mm_loadl_epi64((const __m128i *) (filterSet + classIdx * MAX_NUM_ALF_LUMA_COEFF + 8));
+
+      const __m128i s0 = _mm_lddqu_si128((const __m128i *) shuffleTab[transposeIdx][0]);
+      const __m128i s1 = _mm_xor_si128(s0, _mm_set1_epi8((char)0x80));
+      const __m128i s2 = _mm_lddqu_si128((const __m128i *) shuffleTab[transposeIdx][1]);
+      const __m128i s3 = _mm_xor_si128(s2, _mm_set1_epi8((char)0x80));
+
+      const __m128i rawCoeffLo = _mm_or_si128(_mm_shuffle_epi8(rawCoeff0, s0), _mm_shuffle_epi8(rawCoeff1, s1));
+      const __m128i rawCoeffHi = _mm_or_si128(_mm_shuffle_epi8(rawCoeff0, s2), _mm_shuffle_epi8(rawCoeff1, s3));
+
+      params[0][0] = _mm_cvtepi16_epi32(_mm_shuffle_epi32(rawCoeffLo, 0x00));
+      params[0][1] = _mm_cvtepi16_epi32(_mm_shuffle_epi32(rawCoeffLo, 0x55));
+      params[0][2] = _mm_cvtepi16_epi32(_mm_shuffle_epi32(rawCoeffLo, 0xaa));
+      params[0][3] = _mm_cvtepi16_epi32(_mm_shuffle_epi32(rawCoeffLo, 0xff));
+      params[0][4] = _mm_cvtepi16_epi32(_mm_shuffle_epi32(rawCoeffHi, 0x00));
+      params[0][5] = _mm_cvtepi16_epi32(_mm_shuffle_epi32(rawCoeffHi, 0x55));
+
+      __m128i rawClip0, rawClip1, rawClip2;
+      rawClip0 = _mm_lddqu_si128((const __m128i *) (fClipSet + classIdx * MAX_NUM_ALF_LUMA_COEFF));
+      rawClip1 = _mm_lddqu_si128((const __m128i *) (fClipSet + classIdx * MAX_NUM_ALF_LUMA_COEFF + 4));
+      rawClip2 = _mm_lddqu_si128((const __m128i *) (fClipSet + classIdx * MAX_NUM_ALF_LUMA_COEFF + 8));
+
+      __m128i mask;
+      __m128i s00, s01, s02, s10, s11, s12, s20, s21, s22;
+      __m128i src0 = _mm_lddqu_si128((const __m128i *) shuffleTab32[transposeIdx][0]);
+      mask = _mm_and_si128(_mm_cmpgt_epi8(src0, cmp1), cmp2);
+      s00 = _mm_or_si128(src0, mask);
+      s01 = _mm_xor_si128(src0, mask1);
+      mask = _mm_and_si128(_mm_cmpgt_epi8(s01, cmp1), cmp2);
+      s01 = _mm_or_si128(s01, mask);
+      s02 = _mm_xor_si128(src0, mask2);
+      mask = _mm_and_si128(_mm_cmpgt_epi8(s02, cmp1), cmp2);
+      s02 = _mm_or_si128(s02, mask);
+
+      __m128i src1 = _mm_lddqu_si128((const __m128i *) shuffleTab32[transposeIdx][1]);
+      mask = _mm_and_si128(_mm_cmpgt_epi8(src1, cmp1), cmp2);
+      s10 = _mm_or_si128(src1, mask);
+      s11 = _mm_xor_si128(src1, mask1);
+      mask = _mm_and_si128(_mm_cmpgt_epi8(s11, cmp1), cmp2);
+      s11 = _mm_or_si128(s11, mask);
+      s12 = _mm_xor_si128(src1, mask2);
+      mask = _mm_and_si128(_mm_cmpgt_epi8(s12, cmp1), cmp2);
+      s12 = _mm_or_si128(s12, mask);
+
+      __m128i src2 = _mm_lddqu_si128((const __m128i *) shuffleTab32[transposeIdx][2]);
+      mask = _mm_and_si128(_mm_cmpgt_epi8(src2, cmp1), cmp2);
+      s20 = _mm_or_si128(src2, mask);
+      s21 = _mm_xor_si128(src2, mask1);
+      mask = _mm_and_si128(_mm_cmpgt_epi8(s21, cmp1), cmp2);
+      s21 = _mm_or_si128(s21, mask);
+      s22 = _mm_xor_si128(src2, mask2);
+      mask = _mm_and_si128(_mm_cmpgt_epi8(s22, cmp1), cmp2);
+      s22 = _mm_or_si128(s22, mask);
+
+      const __m128i rawClipLo = _mm_or_si128(_mm_or_si128(_mm_shuffle_epi8(rawClip0, s00), _mm_shuffle_epi8(rawClip1, s01)), _mm_shuffle_epi8(rawClip2, s02));
+      const __m128i rawClipMl = _mm_or_si128(_mm_or_si128(_mm_shuffle_epi8(rawClip0, s10), _mm_shuffle_epi8(rawClip1, s11)), _mm_shuffle_epi8(rawClip2, s12));
+      const __m128i rawClipHi = _mm_or_si128(_mm_or_si128(_mm_shuffle_epi8(rawClip0, s20), _mm_shuffle_epi8(rawClip1, s21)), _mm_shuffle_epi8(rawClip2, s22));
+
+      params[1][0] = _mm_shuffle_epi32(rawClipLo, 0x44);
+      params[1][1] = _mm_shuffle_epi32(rawClipLo, 0xee);
+      params[1][2] = _mm_shuffle_epi32(rawClipMl, 0x44);
+      params[1][3] = _mm_shuffle_epi32(rawClipMl, 0xee);
+      params[1][4] = _mm_shuffle_epi32(rawClipHi, 0x44);
+      params[1][5] = _mm_shuffle_epi32(rawClipHi, 0xee);
+
+      for (size_t ii = 0; ii < step_y; ii++)
+      {
+        const Pel *img0, *img1, *img2, *img3, *img4, *img5, *img6;
+
+        img0 = src + j + ii * srcStride;
+        img1 = img0 + srcStride;
+        img2 = img0 - srcStride;
+        img3 = img1 + srcStride;
+        img4 = img2 - srcStride;
+        img5 = img3 + srcStride;
+        img6 = img4 - srcStride;
+
+        const int yVb = (blkDst.y + i + ii) & (vbCTUHeight - 1);
+        if (yVb < vbPos && (yVb >= vbPos - 4))   // above
+        {
+          img1 = (yVb == vbPos - 1) ? img0 : img1;
+          img3 = (yVb >= vbPos - 2) ? img1 : img3;
+          img5 = (yVb >= vbPos - 3) ? img3 : img5;
+
+          img2 = (yVb == vbPos - 1) ? img0 : img2;
+          img4 = (yVb >= vbPos - 2) ? img2 : img4;
+          img6 = (yVb >= vbPos - 3) ? img4 : img6;
+        }
+        else if (yVb >= vbPos && (yVb <= vbPos + 3))   // bottom
+        {
+          img2 = (yVb == vbPos) ? img0 : img2;
+          img4 = (yVb <= vbPos + 1) ? img2 : img4;
+          img6 = (yVb <= vbPos + 2) ? img4 : img6;
+
+          img1 = (yVb == vbPos) ? img0 : img1;
+          img3 = (yVb <= vbPos + 1) ? img1 : img3;
+          img5 = (yVb <= vbPos + 2) ? img3 : img5;
+        }
+        __m128i cur = _mm_lddqu_si128((const __m128i *) img0);
+        __m128i accum = offset;
+
+        auto process2coeffs = [&](const int i, const Pel *ptr0, const Pel *ptr1, const Pel *ptr2, const Pel *ptr3) {
+          const __m128i val00 = _mm_sub_epi32(_mm_lddqu_si128((const __m128i *) ptr0), cur);
+          const __m128i val10 = _mm_sub_epi32(_mm_lddqu_si128((const __m128i *) ptr2), cur);
+          const __m128i val01 = _mm_sub_epi32(_mm_lddqu_si128((const __m128i *) ptr1), cur);
+          const __m128i val11 = _mm_sub_epi32(_mm_lddqu_si128((const __m128i *) ptr3), cur);
+
+          __m128i val01A = _mm_unpacklo_epi32(val00, val10);
+          __m128i val01B = _mm_unpackhi_epi32(val00, val10);
+          __m128i val01C = _mm_unpacklo_epi32(val01, val11);
+          __m128i val01D = _mm_unpackhi_epi32(val01, val11);
+
+          __m128i limit01 = params[1][i];
+
+          val01A = _mm_min_epi32(val01A, limit01);
+          val01B = _mm_min_epi32(val01B, limit01);
+          val01C = _mm_min_epi32(val01C, limit01);
+          val01D = _mm_min_epi32(val01D, limit01);
+
+          limit01 = _mm_sub_epi32(zeros, limit01);
+
+          val01A = _mm_max_epi32(val01A, limit01);
+          val01B = _mm_max_epi32(val01B, limit01);
+          val01C = _mm_max_epi32(val01C, limit01);
+          val01D = _mm_max_epi32(val01D, limit01);
+
+          val01A = _mm_add_epi32(val01A, val01C);
+          val01B = _mm_add_epi32(val01B, val01D);
+
+          const __m128i coeff01 = params[0][i];
+
+          val01A = _mm_mullo_epi32(val01A, coeff01);
+          val01B = _mm_mullo_epi32(val01B, coeff01);
+
+          accum = _mm_add_epi32(accum, _mm_hadd_epi32(val01A, val01B));
+        };
+
+
+        process2coeffs(0, img5 + 0, img6 + 0, img3 + 1, img4 - 1);
+        process2coeffs(1, img3 + 0, img4 + 0, img3 - 1, img4 + 1);
+        process2coeffs(2, img1 + 2, img2 - 2, img1 + 1, img2 - 1);
+        process2coeffs(3, img1 + 0, img2 + 0, img1 - 1, img2 + 1);
+        process2coeffs(4, img1 - 2, img2 + 2, img0 + 3, img0 - 3);
+        process2coeffs(5, img0 + 2, img0 - 2, img0 + 1, img0 - 1);
+
+
+        bool isNearVBabove = yVb < vbPos && (yVb >= vbPos - 1);
+        bool isNearVBbelow = yVb >= vbPos && (yVb <= vbPos);
+        if (!(isNearVBabove || isNearVBbelow))
+        {
+          accum = _mm_srai_epi32(accum, shift);
+        }
+        else
+        {
+          accum = _mm_srai_epi32(_mm_add_epi32(accum, offset1), shift + 3);
+        }
+        accum = _mm_add_epi32(accum, cur);
+        accum = _mm_min_epi32(max, _mm_max_epi32(accum, min));
+
+        _mm_storeu_si128((__m128i *) (dst + ii * dstStride + j), accum);
+      }
+    }
+
+    src += srcStride * step_y;
+    dst += dstStride * step_y;
+  }
+}
+
+#ifdef USE_AVX2
+static void simdFilter7x7Blk_HBD_AVX2(AlfClassifier **classifier, const PelUnitBuf &recDst, const CPelUnitBuf &recSrc,
+  const Area &blkDst, const Area &blk, const ComponentID compId, const short *filterSet,
+  const Pel *fClipSet, const ClpRng &clpRng, CodingStructure &cs, const int vbCTUHeight,
+  int vbPos)
+{
+  CHECK((vbCTUHeight & (vbCTUHeight - 1)) != 0, "vbCTUHeight must be a power of 2");
+  CHECK(isChroma(compId), "7x7 ALF filter is meant for luma only");
+
+  const CPelBuf srcBuffer = recSrc.get(compId);
+  PelBuf        dstBuffer = recDst.get(compId);
+
+  const size_t srcStride = srcBuffer.stride;
+  const size_t dstStride = dstBuffer.stride;
+
+  constexpr int shift = AdaptiveLoopFilter::m_NUM_BITS - 1;
+  constexpr int round = 1 << (shift - 1);
+
+  const size_t width = blk.width;
+  const size_t height = blk.height;
+
+  constexpr size_t step_x = 8;
+  constexpr size_t step_y = 4;
+
+  CHECK(blk.y % step_y, "Wrong startHeight in filtering");
+  CHECK(blk.x % step_x, "Wrong startWidth in filtering");
+  CHECK(height % step_y, "Wrong endHeight in filtering");
+  CHECK(width % step_x, "Wrong endWidth in filtering");
+
+  const Pel *src = srcBuffer.buf + blk.y * srcStride + blk.x;
+  Pel *      dst = dstBuffer.buf + blkDst.y * dstStride + blkDst.x;
+
+  const __m256i offset = _mm256_set1_epi32(round);
+  const __m256i offset1 = _mm256_set1_epi32((1 << ((shift + 3) - 1)) - round);
+  const __m256i min = _mm256_set1_epi32(clpRng.min);
+  const __m256i max = _mm256_set1_epi32(clpRng.max);
+  const __m256i zeros = _mm256_setzero_si256();
+
+  const __m128i cmp1 = _mm_set1_epi8((char)0x0f);
+  const __m128i cmp2 = _mm_set1_epi8((char)0xf0);
+  const __m128i mask1 = _mm_set1_epi8((char)0x10);
+  const __m128i mask2 = _mm_set1_epi8((char)0x20);
+
+  for (size_t i = 0; i < height; i += step_y)  // + 4
+  {
+    const AlfClassifier *pClass = classifier[blkDst.y + i] + blkDst.x;
+
+    for (size_t j = 0; j < width; j += step_x)  // + 8
+    {
+      __m128i params[2][2][6];
+
+      for (int k = 0; k < 2; k++)
+      {
+        const AlfClassifier &cl = pClass[j + (k << 2)];
+        const int transposeIdx = cl.transposeIdx;
+        const int classIdx = cl.classIdx;
+
+        __m128i rawCoeff0, rawCoeff1;
+        rawCoeff0 = _mm_lddqu_si128((const __m128i *) (filterSet + classIdx * MAX_NUM_ALF_LUMA_COEFF));
+        rawCoeff1 = _mm_loadl_epi64((const __m128i *) (filterSet + classIdx * MAX_NUM_ALF_LUMA_COEFF + 8));
+
+        const __m128i s0 = _mm_lddqu_si128((const __m128i *) shuffleTab[transposeIdx][0]);
+        const __m128i s1 = _mm_xor_si128(s0, _mm_set1_epi8((char)0x80));
+        const __m128i s2 = _mm_lddqu_si128((const __m128i *) shuffleTab[transposeIdx][1]);
+        const __m128i s3 = _mm_xor_si128(s2, _mm_set1_epi8((char)0x80));
+
+        const __m128i rawCoeffLo = _mm_or_si128(_mm_shuffle_epi8(rawCoeff0, s0), _mm_shuffle_epi8(rawCoeff1, s1));
+        const __m128i rawCoeffHi = _mm_or_si128(_mm_shuffle_epi8(rawCoeff0, s2), _mm_shuffle_epi8(rawCoeff1, s3));
+
+        params[k][0][0] = _mm_cvtepi16_epi32(_mm_shuffle_epi32(rawCoeffLo, 0x00));
+        params[k][0][1] = _mm_cvtepi16_epi32(_mm_shuffle_epi32(rawCoeffLo, 0x55));
+        params[k][0][2] = _mm_cvtepi16_epi32(_mm_shuffle_epi32(rawCoeffLo, 0xaa));
+        params[k][0][3] = _mm_cvtepi16_epi32(_mm_shuffle_epi32(rawCoeffLo, 0xff));
+        params[k][0][4] = _mm_cvtepi16_epi32(_mm_shuffle_epi32(rawCoeffHi, 0x00));
+        params[k][0][5] = _mm_cvtepi16_epi32(_mm_shuffle_epi32(rawCoeffHi, 0x55));
+
+        __m128i rawClip0, rawClip1, rawClip2;
+        rawClip0 = _mm_lddqu_si128((const __m128i *) (fClipSet + classIdx * MAX_NUM_ALF_LUMA_COEFF));
+        rawClip1 = _mm_lddqu_si128((const __m128i *) (fClipSet + classIdx * MAX_NUM_ALF_LUMA_COEFF + 4));
+        rawClip2 = _mm_lddqu_si128((const __m128i *) (fClipSet + classIdx * MAX_NUM_ALF_LUMA_COEFF + 8));
+
+        __m128i mask;
+        __m128i s00, s01, s02, s10, s11, s12, s20, s21, s22;
+        __m128i src0 = _mm_lddqu_si128((const __m128i *) shuffleTab32[transposeIdx][0]);
+        mask = _mm_and_si128(_mm_cmpgt_epi8(src0, cmp1), cmp2);
+        s00 = _mm_or_si128(src0, mask);
+        s01 = _mm_xor_si128(src0, mask1);
+        mask = _mm_and_si128(_mm_cmpgt_epi8(s01, cmp1), cmp2);
+        s01 = _mm_or_si128(s01, mask);
+        s02 = _mm_xor_si128(src0, mask2);
+        mask = _mm_and_si128(_mm_cmpgt_epi8(s02, cmp1), cmp2);
+        s02 = _mm_or_si128(s02, mask);
+
+        __m128i src1 = _mm_lddqu_si128((const __m128i *) shuffleTab32[transposeIdx][1]);
+        mask = _mm_and_si128(_mm_cmpgt_epi8(src1, cmp1), cmp2);
+        s10 = _mm_or_si128(src1, mask);
+        s11 = _mm_xor_si128(src1, mask1);
+        mask = _mm_and_si128(_mm_cmpgt_epi8(s11, cmp1), cmp2);
+        s11 = _mm_or_si128(s11, mask);
+        s12 = _mm_xor_si128(src1, mask2);
+        mask = _mm_and_si128(_mm_cmpgt_epi8(s12, cmp1), cmp2);
+        s12 = _mm_or_si128(s12, mask);
+
+        __m128i src2 = _mm_lddqu_si128((const __m128i *) shuffleTab32[transposeIdx][2]);
+        mask = _mm_and_si128(_mm_cmpgt_epi8(src2, cmp1), cmp2);
+        s20 = _mm_or_si128(src2, mask);
+        s21 = _mm_xor_si128(src2, mask1);
+        mask = _mm_and_si128(_mm_cmpgt_epi8(s21, cmp1), cmp2);
+        s21 = _mm_or_si128(s21, mask);
+        s22 = _mm_xor_si128(src2, mask2);
+        mask = _mm_and_si128(_mm_cmpgt_epi8(s22, cmp1), cmp2);
+        s22 = _mm_or_si128(s22, mask);
+
+        const __m128i rawClipLo = _mm_or_si128(_mm_or_si128(_mm_shuffle_epi8(rawClip0, s00), _mm_shuffle_epi8(rawClip1, s01)), _mm_shuffle_epi8(rawClip2, s02));
+        const __m128i rawClipMl = _mm_or_si128(_mm_or_si128(_mm_shuffle_epi8(rawClip0, s10), _mm_shuffle_epi8(rawClip1, s11)), _mm_shuffle_epi8(rawClip2, s12));
+        const __m128i rawClipHi = _mm_or_si128(_mm_or_si128(_mm_shuffle_epi8(rawClip0, s20), _mm_shuffle_epi8(rawClip1, s21)), _mm_shuffle_epi8(rawClip2, s22));
+
+        params[k][1][0] = _mm_shuffle_epi32(rawClipLo, 0x44);
+        params[k][1][1] = _mm_shuffle_epi32(rawClipLo, 0xee);
+        params[k][1][2] = _mm_shuffle_epi32(rawClipMl, 0x44);
+        params[k][1][3] = _mm_shuffle_epi32(rawClipMl, 0xee);
+        params[k][1][4] = _mm_shuffle_epi32(rawClipHi, 0x44);
+        params[k][1][5] = _mm_shuffle_epi32(rawClipHi, 0xee);
+      }
+
+      for (size_t ii = 0; ii < step_y; ii++)
+      {
+        const Pel *img0, *img1, *img2, *img3, *img4, *img5, *img6;
+
+        img0 = src + j + ii * srcStride;
+        img1 = img0 + srcStride;
+        img2 = img0 - srcStride;
+        img3 = img1 + srcStride;
+        img4 = img2 - srcStride;
+        img5 = img3 + srcStride;
+        img6 = img4 - srcStride;
+
+        const int yVb = (blkDst.y + i + ii) & (vbCTUHeight - 1);
+        if (yVb < vbPos && (yVb >= vbPos - 4))   // above
+        {
+          img1 = (yVb == vbPos - 1) ? img0 : img1;
+          img3 = (yVb >= vbPos - 2) ? img1 : img3;
+          img5 = (yVb >= vbPos - 3) ? img3 : img5;
+
+          img2 = (yVb == vbPos - 1) ? img0 : img2;
+          img4 = (yVb >= vbPos - 2) ? img2 : img4;
+          img6 = (yVb >= vbPos - 3) ? img4 : img6;
+        }
+        else if (yVb >= vbPos && (yVb <= vbPos + 3))   // bottom
+        {
+          img2 = (yVb == vbPos) ? img0 : img2;
+          img4 = (yVb <= vbPos + 1) ? img2 : img4;
+          img6 = (yVb <= vbPos + 2) ? img4 : img6;
+
+          img1 = (yVb == vbPos) ? img0 : img1;
+          img3 = (yVb <= vbPos + 1) ? img1 : img3;
+          img5 = (yVb <= vbPos + 2) ? img3 : img5;
+        }
+        __m256i cur = _mm256_lddqu_si256((const __m256i *) img0);
+        __m256i accum = offset;
+
+        auto process2coeffs = [&](const int i, const Pel *ptr0, const Pel *ptr1, const Pel *ptr2, const Pel *ptr3) {
+          const __m256i val00 = _mm256_sub_epi32(_mm256_lddqu_si256((const __m256i *) ptr0), cur);
+          const __m256i val10 = _mm256_sub_epi32(_mm256_lddqu_si256((const __m256i *) ptr2), cur);
+          const __m256i val01 = _mm256_sub_epi32(_mm256_lddqu_si256((const __m256i *) ptr1), cur);
+          const __m256i val11 = _mm256_sub_epi32(_mm256_lddqu_si256((const __m256i *) ptr3), cur);
+
+          __m256i val01A = _mm256_unpacklo_epi32(val00, val10);
+          __m256i val01B = _mm256_unpackhi_epi32(val00, val10);
+          __m256i val01C = _mm256_unpacklo_epi32(val01, val11);
+          __m256i val01D = _mm256_unpackhi_epi32(val01, val11);
+
+          __m256i limit01 = _mm256_inserti128_si256(_mm256_castsi128_si256(params[0][1][i]), params[1][1][i], 1);
+
+          val01A = _mm256_min_epi32(val01A, limit01);
+          val01B = _mm256_min_epi32(val01B, limit01);
+          val01C = _mm256_min_epi32(val01C, limit01);
+          val01D = _mm256_min_epi32(val01D, limit01);
+
+          limit01 = _mm256_sub_epi32(zeros, limit01);
+
+          val01A = _mm256_max_epi32(val01A, limit01);
+          val01B = _mm256_max_epi32(val01B, limit01);
+          val01C = _mm256_max_epi32(val01C, limit01);
+          val01D = _mm256_max_epi32(val01D, limit01);
+
+          val01A = _mm256_add_epi32(val01A, val01C);
+          val01B = _mm256_add_epi32(val01B, val01D);
+
+          const __m256i coeff01 = _mm256_inserti128_si256(_mm256_castsi128_si256(params[0][0][i]), params[1][0][i], 1);
+
+          val01A = _mm256_mullo_epi32(val01A, coeff01);
+          val01B = _mm256_mullo_epi32(val01B, coeff01);
+
+          accum = _mm256_add_epi32(accum, _mm256_hadd_epi32(val01A, val01B));
+        };
+
+        process2coeffs(0, img5 + 0, img6 + 0, img3 + 1, img4 - 1);
+        process2coeffs(1, img3 + 0, img4 + 0, img3 - 1, img4 + 1);
+        process2coeffs(2, img1 + 2, img2 - 2, img1 + 1, img2 - 1);
+        process2coeffs(3, img1 + 0, img2 + 0, img1 - 1, img2 + 1);
+        process2coeffs(4, img1 - 2, img2 + 2, img0 + 3, img0 - 3);
+        process2coeffs(5, img0 + 2, img0 - 2, img0 + 1, img0 - 1);
+
+
+        bool isNearVBabove = yVb < vbPos && (yVb >= vbPos - 1);
+        bool isNearVBbelow = yVb >= vbPos && (yVb <= vbPos);
+        if (!(isNearVBabove || isNearVBbelow))
+        {
+          accum = _mm256_srai_epi32(accum, shift);
+        }
+        else
+        {
+          accum = _mm256_srai_epi32(_mm256_add_epi32(accum, offset1), shift + 3);
+        }
+        accum = _mm256_add_epi32(accum, cur);
+        accum = _mm256_min_epi32(max, _mm256_max_epi32(accum, min));
+
+        _mm256_store_si256((__m256i *) (dst + ii * dstStride + j), accum);
+      }
+    }
+
+    src += srcStride * step_y;
+    dst += dstStride * step_y;
+  }
+}
+#endif
+#else
 template<X86_VEXT vext>
 static void simdFilter7x7Blk(AlfClassifier **classifier, const PelUnitBuf &recDst, const CPelUnitBuf &recSrc,
   const Area &blkDst, const Area &blk, const ComponentID compId, const short *filterSet,
@@ -672,13 +1669,29 @@ static void simdFilter7x7Blk(AlfClassifier **classifier, const PelUnitBuf &recDs
     dst += dstStride * STEP_Y;
   }
 }
-
+#endif
 template <X86_VEXT vext>
 void AdaptiveLoopFilter::_initAdaptiveLoopFilterX86()
 {
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+  m_deriveClassificationBlk = simdDeriveClassificationBlk_HBD;
+#ifdef USE_AVX2
+  if (vext >= AVX2)
+  {
+    m_filter5x5Blk = simdFilter5x5Blk_HBD_AVX2;
+    m_filter7x7Blk = simdFilter7x7Blk_HBD_AVX2;
+  }
+  else
+#endif
+  {
+    m_filter5x5Blk = simdFilter5x5Blk_HBD;
+    m_filter7x7Blk = simdFilter7x7Blk_HBD;
+  }
+#else
   m_deriveClassificationBlk = simdDeriveClassificationBlk<vext>;
   m_filter5x5Blk = simdFilter5x5Blk<vext>;
   m_filter7x7Blk = simdFilter7x7Blk<vext>;
+#endif
 }
 
 template void AdaptiveLoopFilter::_initAdaptiveLoopFilterX86<SIMDX86>();
diff --git a/source/Lib/CommonLib/x86/AffineGradientSearchX86.h b/source/Lib/CommonLib/x86/AffineGradientSearchX86.h
index bc8676e258c6090ce4577931d3aa1ac4e451c729..f6a3d95d7caea8e1c4a11f14f247f139b5315507 100644
--- a/source/Lib/CommonLib/x86/AffineGradientSearchX86.h
+++ b/source/Lib/CommonLib/x86/AffineGradientSearchX86.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -306,14 +306,468 @@ static void simdEqualCoeffComputer( Pel *pResidue, int residueStride, int **ppDe
     idx2 -= (width);
   }
 }
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+template<X86_VEXT vext>
+static void simdHorizontalSobelFilter_HBD_SIMD(Pel *const pPred, const int predStride, int *const pDerivate, const int derivateBufStride, const int width, const int height)
+{
+  __m128i pred[4];
+  __m128i pred2x[2];
+  __m128i intermediates[4];
+  __m128i derivate[2];
+
+  assert(!(height % 2));
+  assert(!(width % 4));
+
+  /* Derivates of the rows and columns at the boundary are done at the end of this function */
+  /* The value of col and row indicate the columns and rows for which the derivates have already been computed */
+
+  int col = 1;
+#if USE_AVX2
+  if (vext >= AVX2)
+  {
+    __m256i pred256[4];
+    __m256i pred2x256[2];
+    __m256i intermediates256[4];
+    __m256i derivate256[2];
+
+    for (; (col + 6) < width; col += 6)
+    {
+      pred256[0] = _mm256_lddqu_si256((__m256i *)&pPred[col - 1]);
+      pred256[1] = _mm256_lddqu_si256((__m256i *)&pPred[predStride + col - 1]);
+
+      for (int row = 1; row < (height - 1); row += 2)
+      {
+        pred256[2] = _mm256_lddqu_si256((__m256i *)&pPred[(row + 1) * predStride + col - 1]);
+        pred256[3] = _mm256_lddqu_si256((__m256i *)&pPred[(row + 2) * predStride + col - 1]);
+
+        pred2x256[0] = _mm256_slli_epi32(pred256[1], 1);
+        pred2x256[1] = _mm256_slli_epi32(pred256[2], 1);
+
+        intermediates256[0] = _mm256_add_epi32(pred2x256[0], pred256[0]);
+        intermediates256[2] = _mm256_add_epi32(pred2x256[1], pred256[1]);
+
+        intermediates256[0] = _mm256_add_epi32(intermediates256[0], pred256[2]);
+        intermediates256[2] = _mm256_add_epi32(intermediates256[2], pred256[3]);
+
+        pred256[0] = pred256[2];
+        pred256[1] = pred256[3];
+
+        intermediates256[1] = _mm256_permute4x64_epi64(intermediates256[0], 0xf9);
+        intermediates256[3] = _mm256_permute4x64_epi64(intermediates256[2], 0xf9);
+
+        derivate256[0] = _mm256_sub_epi32(intermediates256[1], intermediates256[0]);
+        derivate256[1] = _mm256_sub_epi32(intermediates256[3], intermediates256[2]);
+
+        _mm_storeu_si128((__m128i *)&pDerivate[col + row * derivateBufStride], _mm256_castsi256_si128(derivate256[0]));
+        _mm_storel_epi64((__m128i *)&pDerivate[col + 4 + row * derivateBufStride], _mm256_castsi256_si128(_mm256_permute4x64_epi64(derivate256[0], 0xaa)));
+
+        _mm_storeu_si128((__m128i *)&pDerivate[col + (row + 1) * derivateBufStride], _mm256_castsi256_si128(derivate256[1]));
+        _mm_storel_epi64((__m128i *)&pDerivate[col + 4 + (row + 1) * derivateBufStride], _mm256_castsi256_si128(_mm256_permute4x64_epi64(derivate256[1], 0xaa)));
+      }
+    }
+  }
+#endif
+
+  for (; (col + 2) < width; col += 2)
+  {
+    pred[0] = _mm_lddqu_si128((__m128i *)&pPred[col - 1]);
+    pred[1] = _mm_lddqu_si128((__m128i *)&pPred[predStride + col - 1]);
+
+    for (int row = 1; row < (height - 1); row += 2)
+    {
+      pred[2] = _mm_lddqu_si128((__m128i *)&pPred[(row + 1) * predStride + col - 1]);
+      pred[3] = _mm_lddqu_si128((__m128i *)&pPred[(row + 2) * predStride + col - 1]);
+
+      pred2x[0] = _mm_slli_epi32(pred[1], 1);
+      pred2x[1] = _mm_slli_epi32(pred[2], 1);
+
+      intermediates[0] = _mm_add_epi32(pred2x[0], pred[0]);
+      intermediates[2] = _mm_add_epi32(pred2x[1], pred[1]);
+
+      intermediates[0] = _mm_add_epi32(intermediates[0], pred[2]);
+      intermediates[2] = _mm_add_epi32(intermediates[2], pred[3]);
+
+      pred[0] = pred[2];
+      pred[1] = pred[3];
+
+      intermediates[1] = _mm_srli_si128(intermediates[0], 8);
+      intermediates[3] = _mm_srli_si128(intermediates[2], 8);
+
+      derivate[0] = _mm_sub_epi32(intermediates[1], intermediates[0]);
+      derivate[1] = _mm_sub_epi32(intermediates[3], intermediates[2]);
+
+      _mm_storel_epi64((__m128i *)&pDerivate[col + row * derivateBufStride], derivate[0]);
+      _mm_storel_epi64((__m128i *)&pDerivate[col + (row + 1) * derivateBufStride], derivate[1]);
+    }
+  }
+
+  for (int j = 1; j < (height - 1); j++)
+  {
+    pDerivate[j * derivateBufStride] = pDerivate[j * derivateBufStride + 1];
+    pDerivate[j * derivateBufStride + (width - 1)] = pDerivate[j * derivateBufStride + (width - 2)];
+  }
+
+  memcpy(pDerivate, pDerivate + derivateBufStride, width * sizeof(pDerivate[0]));
+  memcpy(pDerivate + (height - 1) * derivateBufStride, pDerivate + (height - 2) * derivateBufStride, width * sizeof(pDerivate[0])
+  );
+}
+
+template<X86_VEXT vext>
+static void simdVerticalSobelFilter_HBD_SIMD(Pel *const pPred, const int predStride, int *const pDerivate, const int derivateBufStride, const int width, const int height)
+{
+  __m128i pred[4];
+  __m128i intermediates[6];
+  __m128i derivate[2];
+
+  assert(!(height % 2));
+  assert(!(width % 4));
+
+  int col = 1;
+#if USE_AVX2
+  if (vext >= AVX2)
+  {
+    __m256i pred256[4];
+    __m256i intermediates256[6];
+    __m256i derivate256[2];
+    __m256i shuffle256 = _mm256_set_epi32(0x00000007, 0x00000007, 0x00000006, 0x00000005, 0x00000004, 0x00000003, 0x00000002, 0x00000001);
+
+    for (; (col + 6) < width; col += 6)
+    {
+      pred256[0] = _mm256_lddqu_si256((__m256i *)&pPred[col - 1]);
+      pred256[1] = _mm256_lddqu_si256((__m256i *)&pPred[predStride + col - 1]);
+
+      for (int row = 1; row < (height - 1); row += 2)
+      {
+        pred256[2] = _mm256_lddqu_si256((__m256i *)&pPred[(row + 1) * predStride + col - 1]);
+        pred256[3] = _mm256_lddqu_si256((__m256i *)&pPred[(row + 2) * predStride + col - 1]);
+
+        intermediates256[0] = _mm256_sub_epi32(pred256[2], pred256[0]);
+        intermediates256[3] = _mm256_sub_epi32(pred256[3], pred256[1]);
+
+        pred256[0] = pred256[2];
+        pred256[1] = pred256[3];
+
+        intermediates256[1] = _mm256_permutevar8x32_epi32(intermediates256[0], shuffle256);
+        intermediates256[4] = _mm256_permutevar8x32_epi32(intermediates256[3], shuffle256);
+        intermediates256[2] = _mm256_permute4x64_epi64(intermediates256[0], 0xf9);
+        intermediates256[5] = _mm256_permute4x64_epi64(intermediates256[3], 0xf9);
+
+        intermediates256[1] = _mm256_slli_epi32(intermediates256[1], 1);
+        intermediates256[4] = _mm256_slli_epi32(intermediates256[4], 1);
+
+        intermediates256[0] = _mm256_add_epi32(intermediates256[0], intermediates256[2]);
+        intermediates256[3] = _mm256_add_epi32(intermediates256[3], intermediates256[5]);
+
+        derivate256[0] = _mm256_add_epi32(intermediates256[0], intermediates256[1]);
+        derivate256[1] = _mm256_add_epi32(intermediates256[3], intermediates256[4]);
+
+        _mm_storeu_si128((__m128i *)&pDerivate[col + row * derivateBufStride], _mm256_castsi256_si128(derivate256[0]));
+        _mm_storel_epi64((__m128i *)&pDerivate[col + 4 + row * derivateBufStride], _mm256_castsi256_si128(_mm256_permute4x64_epi64(derivate256[0], 0xaa)));
+
+        _mm_storeu_si128((__m128i *)&pDerivate[col + (row + 1) * derivateBufStride], _mm256_castsi256_si128(derivate256[1]));
+        _mm_storel_epi64((__m128i *)&pDerivate[col + 4 + (row + 1) * derivateBufStride], _mm256_castsi256_si128(_mm256_permute4x64_epi64(derivate256[1], 0xaa)));
+      }
+    }
+  }
+#endif
+
+  /* Derivates of the rows and columns at the boundary are done at the end of this function */
+  /* The value of col and row indicate the columns and rows for which the derivates have already been computed */
+  for (; (col + 2) < width; col += 2)
+  {
+    pred[0] = _mm_lddqu_si128((__m128i *)&pPred[col - 1]);
+    pred[1] = _mm_lddqu_si128((__m128i *)&pPred[predStride + col - 1]);
+
+    for (int row = 1; row < (height - 1); row += 2)
+    {
+      pred[2] = _mm_lddqu_si128((__m128i *)&pPred[(row + 1) * predStride + col - 1]);
+      pred[3] = _mm_lddqu_si128((__m128i *)&pPred[(row + 2) * predStride + col - 1]);
+
+      intermediates[0] = _mm_sub_epi32(pred[2], pred[0]);
+      intermediates[3] = _mm_sub_epi32(pred[3], pred[1]);
+
+      pred[0] = pred[2];
+      pred[1] = pred[3];
+
+      intermediates[1] = _mm_srli_si128(intermediates[0], 4);
+      intermediates[4] = _mm_srli_si128(intermediates[3], 4);
+      intermediates[2] = _mm_srli_si128(intermediates[0], 8);
+      intermediates[5] = _mm_srli_si128(intermediates[3], 8);
+
+      intermediates[1] = _mm_slli_epi32(intermediates[1], 1);
+      intermediates[4] = _mm_slli_epi32(intermediates[4], 1);
+
+      intermediates[0] = _mm_add_epi32(intermediates[0], intermediates[2]);
+      intermediates[3] = _mm_add_epi32(intermediates[3], intermediates[5]);
+
+      derivate[0] = _mm_add_epi32(intermediates[0], intermediates[1]);
+      derivate[1] = _mm_add_epi32(intermediates[3], intermediates[4]);
+
+      _mm_storel_epi64((__m128i *)&pDerivate[col + row * derivateBufStride], derivate[0]);
+      _mm_storel_epi64((__m128i *)&pDerivate[col + (row + 1) * derivateBufStride], derivate[1]);
+    }
+  }
+
+  for (int j = 1; j < (height - 1); j++)
+  {
+    pDerivate[j * derivateBufStride] = pDerivate[j * derivateBufStride + 1];
+    pDerivate[j * derivateBufStride + (width - 1)] = pDerivate[j * derivateBufStride + (width - 2)];
+  }
+
+  memcpy(pDerivate, pDerivate + derivateBufStride, width * sizeof(pDerivate[0]));
+  memcpy(pDerivate + (height - 1) * derivateBufStride, pDerivate + (height - 2) * derivateBufStride, width * sizeof(pDerivate[0]));
+}
+
+#define CALC_EQUAL_COEFF_16PXLS(x1,x2,y1,y2,tmp0,tmp1,tmp2,tmp3,inter0,inter1,inter2,inter3,loadLocation)         \
+{                                                                                                                 \
+inter0 = _mm256_mul_epi32(x1, y1);                                                                                \
+inter1 = _mm256_mul_epi32(tmp0, tmp2);                                                                            \
+inter2 = _mm256_mul_epi32(x2, y2);                                                                                \
+inter3 = _mm256_mul_epi32(tmp1, tmp3);                                                                            \
+inter2 = _mm256_add_epi64(inter0, inter2);                                                                        \
+inter3 = _mm256_add_epi64(inter1, inter3);                                                                        \
+inter0 = _mm256_castsi128_si256(_mm_loadl_epi64(loadLocation));                                                   \
+inter3 = _mm256_add_epi64(inter2, inter3);                                                                        \
+inter1 = _mm256_permute4x64_epi64(inter3, 0x4e);                                                                  \
+inter3 = _mm256_add_epi64(inter1, inter3);                                                                        \
+inter1 = _mm256_permute4x64_epi64(inter3, 0xb1);                                                                  \
+inter3 = _mm256_add_epi64(inter1, inter3);                                                                        \
+inter3 = _mm256_add_epi64(inter0, inter3);                                                                        \
+}
+
+template<X86_VEXT vext>
+static void simdEqualCoeffComputer_HBD_SIMD(Pel *pResidue, int residueStride, int **ppDerivate, int derivateBufStride, int64_t(*pEqualCoeff)[7], int width, int height, bool b6Param)
+{
+  int n = b6Param ? 6 : 4;
+  CHECK((width & 8), "width of affine block should be multiple of 8");
+
+#if USE_AVX2
+  if (vext >= AVX2)
+  {
+    int idx1 = -2 * derivateBufStride - 8;
+    int idx2 = -derivateBufStride - 8;
+
+    __m256i tmp[4];
+    __m256i intermediate[4];
+    __m256i residue[2];
+    __m256i coef[12];
+
+    // Add directly to indexes to get new index
+    __m256i four = _mm256_set1_epi32(4);
+    __m256i eight = _mm256_set1_epi32(8);
+    __m256i shuffle = _mm256_set_epi32(0x00000007, 0x00000007, 0x00000006, 0x00000005, 0x00000004, 0x00000003, 0x00000002, 0x00000001);
+    __m256i indxJ = _mm256_set1_epi32(-2);
+
+    for (int j = 0; j < height; j += 2)
+    {
+      if (!(j & 3))
+        indxJ = _mm256_add_epi32(indxJ, four);
+      __m256i indxK = _mm256_set_epi32(-2, -2, -2, -2, -6, -6, -6, -6);
+      idx1 += (derivateBufStride << 1);
+      idx2 += (derivateBufStride << 1);
+
+      for (int k = 0; k < width; k += 8)
+      {
+        idx1 += 8;
+        idx2 += 8;
+        indxK = _mm256_add_epi32(indxK, eight);
+
+        if (b6Param)
+        {
+          // coef[0-5] for iC[0-5] of 1st row of pixels
+          coef[0] = _mm256_lddqu_si256((__m256i *)&ppDerivate[0][idx1]);
+          coef[2] = _mm256_lddqu_si256((__m256i *)&ppDerivate[1][idx1]);
+          coef[1] = _mm256_mullo_epi32(indxK, coef[0]);
+          coef[3] = _mm256_mullo_epi32(indxK, coef[2]);
+          coef[4] = _mm256_mullo_epi32(indxJ, coef[0]);
+          coef[5] = _mm256_mullo_epi32(indxJ, coef[2]);
+
+          // coef[6-11] for iC[0-5] of 2nd row of pixels
+          coef[6] = _mm256_lddqu_si256((__m256i *)&ppDerivate[0][idx2]);
+          coef[8] = _mm256_lddqu_si256((__m256i *)&ppDerivate[1][idx2]);
+          coef[7] = _mm256_mullo_epi32(indxK, coef[6]);
+          coef[9] = _mm256_mullo_epi32(indxK, coef[8]);
+          coef[10] = _mm256_mullo_epi32(indxJ, coef[6]);
+          coef[11] = _mm256_mullo_epi32(indxJ, coef[8]);
+        }
+        else
+        {
+          // coef[0-3] for iC[0-3] of 1st row of pixels
+          coef[0] = _mm256_lddqu_si256((__m256i *)&ppDerivate[0][idx1]);
+          coef[2] = _mm256_lddqu_si256((__m256i *)&ppDerivate[1][idx1]);
+          coef[1] = _mm256_mullo_epi32(indxK, coef[0]);
+          coef[3] = _mm256_mullo_epi32(indxJ, coef[0]);
+          tmp[0] = _mm256_mullo_epi32(indxJ, coef[2]);
+          tmp[1] = _mm256_mullo_epi32(indxK, coef[2]);
+          coef[1] = _mm256_add_epi32(coef[1], tmp[0]);
+          coef[3] = _mm256_sub_epi32(coef[3], tmp[1]);
+
+          // coef[4-7] for iC[0-3] of 1st row of pixels
+          coef[4] = _mm256_lddqu_si256((__m256i *)&ppDerivate[0][idx2]);
+          coef[6] = _mm256_lddqu_si256((__m256i *)&ppDerivate[1][idx2]);
+          coef[5] = _mm256_mullo_epi32(indxK, coef[4]);
+          coef[7] = _mm256_mullo_epi32(indxJ, coef[4]);
+          tmp[2] = _mm256_mullo_epi32(indxJ, coef[6]);
+          tmp[3] = _mm256_mullo_epi32(indxK, coef[6]);
+          coef[5] = _mm256_add_epi32(coef[5], tmp[2]);
+          coef[7] = _mm256_sub_epi32(coef[7], tmp[3]);
+        }
+
+        // Residue
+        residue[0] = _mm256_lddqu_si256((__m256i *)&pResidue[idx1]);
+        residue[1] = _mm256_lddqu_si256((__m256i *)&pResidue[idx2]);
+        residue[0] = _mm256_slli_epi32(residue[0], 3);
+        residue[1] = _mm256_slli_epi32(residue[1], 3);
+
+        // Calculation of coefficient matrix
+        for (int col = 0; col < n; col++)
+        {
+          tmp[0] = _mm256_permutevar8x32_epi32(coef[0 + col], shuffle);
+          tmp[1] = _mm256_permutevar8x32_epi32(coef[n + col], shuffle);
+          CALC_EQUAL_COEFF_16PXLS(coef[0 + col], coef[n + col], coef[0 + col], coef[n + col], tmp[0], tmp[1], tmp[0], tmp[1], intermediate[0], intermediate[1], intermediate[2], intermediate[3], (const __m128i*)&pEqualCoeff[col + 1][col]);
+          _mm_storel_epi64((__m128i*)&pEqualCoeff[col + 1][col], _mm256_castsi256_si128(intermediate[3]));
+
+          for (int row = col + 1; row < n; row++)
+          {
+            tmp[2] = _mm256_permutevar8x32_epi32(coef[0 + row], shuffle);
+            tmp[3] = _mm256_permutevar8x32_epi32(coef[n + row], shuffle);
+            CALC_EQUAL_COEFF_16PXLS(coef[0 + col], coef[n + col], coef[0 + row], coef[n + row], tmp[0], tmp[1], tmp[2], tmp[3], intermediate[0], intermediate[1], intermediate[2], intermediate[3], (const __m128i*)&pEqualCoeff[col + 1][row]);
+            _mm_storel_epi64((__m128i*)&pEqualCoeff[col + 1][row], _mm256_castsi256_si128(intermediate[3]));
+            _mm_storel_epi64((__m128i*)&pEqualCoeff[row + 1][col], _mm256_castsi256_si128(intermediate[3]));
+          }
+
+          tmp[2] = _mm256_permutevar8x32_epi32(residue[0], shuffle);
+          tmp[3] = _mm256_permutevar8x32_epi32(residue[1], shuffle);
+          CALC_EQUAL_COEFF_16PXLS(coef[0 + col], coef[n + col], residue[0], residue[1], tmp[0], tmp[1], tmp[2], tmp[3], intermediate[0], intermediate[1], intermediate[2], intermediate[3], (const __m128i*)&pEqualCoeff[col + 1][n]);
+          _mm_storel_epi64((__m128i*)&pEqualCoeff[col + 1][n], _mm256_castsi256_si128(intermediate[3]));
+        }
+      }
+
+      idx1 -= (width);
+      idx2 -= (width);
+    }
+  }
+  else
+#endif
+  {
+    int idx1 = -2 * derivateBufStride - 4;
+    int idx2 = -derivateBufStride - 4;
+
+    __m128i four;
+    __m128i tmp[4];
+    __m128i intermediate[4];
+    __m128i indxK, indxJ;
+    __m128i residue[2];
+    __m128i coef[12];
 
+    // Add directly to indexes to get new index
+    four = _mm_set1_epi32(4);
+    indxJ = _mm_set1_epi32(-2);
+
+    for (int j = 0; j < height; j += 2)
+    {
+      if (!(j & 3))
+        indxJ = _mm_add_epi32(indxJ, four);
+      indxK = _mm_set1_epi32(-2);
+      idx1 += (derivateBufStride << 1);
+      idx2 += (derivateBufStride << 1);
+
+      for (int k = 0; k < width; k += 4)
+      {
+        idx1 += 4;
+        idx2 += 4;
+        indxK = _mm_add_epi32(indxK, four);
+
+        if (b6Param)
+        {
+          // coef[0-5] for iC[0-5] of 1st row of pixels
+          coef[0] = _mm_lddqu_si128((const __m128i*)&ppDerivate[0][idx1]);
+          coef[2] = _mm_lddqu_si128((const __m128i*)&ppDerivate[1][idx1]);
+          coef[1] = _mm_mullo_epi32(indxK, coef[0]);
+          coef[3] = _mm_mullo_epi32(indxK, coef[2]);
+          coef[4] = _mm_mullo_epi32(indxJ, coef[0]);
+          coef[5] = _mm_mullo_epi32(indxJ, coef[2]);
+
+          // coef[6-11] for iC[0-5] of 2nd row of pixels
+          coef[6] = _mm_lddqu_si128((const __m128i*)&ppDerivate[0][idx2]);
+          coef[8] = _mm_lddqu_si128((const __m128i*)&ppDerivate[1][idx2]);
+          coef[7] = _mm_mullo_epi32(indxK, coef[6]);
+          coef[9] = _mm_mullo_epi32(indxK, coef[8]);
+          coef[10] = _mm_mullo_epi32(indxJ, coef[6]);
+          coef[11] = _mm_mullo_epi32(indxJ, coef[8]);
+        }
+        else
+        {
+          // coef[0-3] for iC[0-3] of 1st row of pixels
+          coef[0] = _mm_lddqu_si128((const __m128i*)&ppDerivate[0][idx1]);
+          coef[2] = _mm_lddqu_si128((const __m128i*)&ppDerivate[1][idx1]);
+          coef[1] = _mm_mullo_epi32(indxK, coef[0]);
+          coef[3] = _mm_mullo_epi32(indxJ, coef[0]);
+          tmp[0] = _mm_mullo_epi32(indxJ, coef[2]);
+          tmp[1] = _mm_mullo_epi32(indxK, coef[2]);
+          coef[1] = _mm_add_epi32(coef[1], tmp[0]);
+          coef[3] = _mm_sub_epi32(coef[3], tmp[1]);
+
+          // coef[4-7] for iC[0-3] of 1st row of pixels
+          coef[4] = _mm_lddqu_si128((const __m128i*)&ppDerivate[0][idx2]);
+          coef[6] = _mm_lddqu_si128((const __m128i*)&ppDerivate[1][idx2]);
+          coef[5] = _mm_mullo_epi32(indxK, coef[4]);
+          coef[7] = _mm_mullo_epi32(indxJ, coef[4]);
+          tmp[2] = _mm_mullo_epi32(indxJ, coef[6]);
+          tmp[3] = _mm_mullo_epi32(indxK, coef[6]);
+          coef[5] = _mm_add_epi32(coef[5], tmp[2]);
+          coef[7] = _mm_sub_epi32(coef[7], tmp[3]);
+        }
+
+        // Residue
+        residue[0] = _mm_lddqu_si128((__m128i *)&pResidue[idx1]);
+        residue[1] = _mm_lddqu_si128((__m128i *)&pResidue[idx2]);
+        residue[0] = _mm_slli_epi32(residue[0], 3);
+        residue[1] = _mm_slli_epi32(residue[1], 3);
+
+        // Calculation of coefficient matrix
+        for (int col = 0; col < n; col++)
+        {
+          tmp[0] = _mm_srli_si128(coef[0 + col], 4);
+          tmp[1] = _mm_srli_si128(coef[n + col], 4);
+          CALC_EQUAL_COEFF_8PXLS(coef[0 + col], coef[n + col], coef[0 + col], coef[n + col], tmp[0], tmp[1], tmp[0], tmp[1], intermediate[0], intermediate[1], intermediate[2], intermediate[3], (const __m128i*)&pEqualCoeff[col + 1][col]);
+          _mm_storel_epi64((__m128i*)&pEqualCoeff[col + 1][col], intermediate[3]);
+
+          for (int row = col + 1; row < n; row++)
+          {
+            tmp[2] = _mm_srli_si128(coef[0 + row], 4);
+            tmp[3] = _mm_srli_si128(coef[n + row], 4);
+            CALC_EQUAL_COEFF_8PXLS(coef[0 + col], coef[n + col], coef[0 + row], coef[n + row], tmp[0], tmp[1], tmp[2], tmp[3], intermediate[0], intermediate[1], intermediate[2], intermediate[3], (const __m128i*)&pEqualCoeff[col + 1][row]);
+            _mm_storel_epi64((__m128i*)&pEqualCoeff[col + 1][row], intermediate[3]);
+            _mm_storel_epi64((__m128i*)&pEqualCoeff[row + 1][col], intermediate[3]);
+          }
+
+          tmp[2] = _mm_srli_si128(residue[0], 4);
+          tmp[3] = _mm_srli_si128(residue[1], 4);
+          CALC_EQUAL_COEFF_8PXLS(coef[0 + col], coef[n + col], residue[0], residue[1], tmp[0], tmp[1], tmp[2], tmp[3], intermediate[0], intermediate[1], intermediate[2], intermediate[3], (const __m128i*)&pEqualCoeff[col + 1][n]);
+          _mm_storel_epi64((__m128i*)&pEqualCoeff[col + 1][n], intermediate[3]);
+        }
+      }
+
+      idx1 -= (width);
+      idx2 -= (width);
+    }
+  }
+}
+#endif
 
 template <X86_VEXT vext>
 void AffineGradientSearch::_initAffineGradientSearchX86()
 {
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+  m_HorizontalSobelFilter = simdHorizontalSobelFilter_HBD_SIMD<vext>;
+  m_VerticalSobelFilter = simdVerticalSobelFilter_HBD_SIMD<vext>;
+  m_EqualCoeffComputer = simdEqualCoeffComputer_HBD_SIMD<vext>;
+#else
   m_HorizontalSobelFilter = simdHorizontalSobelFilter<vext>;
   m_VerticalSobelFilter = simdVerticalSobelFilter<vext>;
   m_EqualCoeffComputer = simdEqualCoeffComputer<vext>;
+#endif
 }
 
 template void AffineGradientSearch::_initAffineGradientSearchX86<SIMDX86>();
diff --git a/source/Lib/CommonLib/x86/BufferX86.h b/source/Lib/CommonLib/x86/BufferX86.h
index c763a2977ca6fb745ea905d289bed80fba69787b..9c49b1c56aa34e2572946db5736fbf503679ca24 100644
--- a/source/Lib/CommonLib/x86/BufferX86.h
+++ b/source/Lib/CommonLib/x86/BufferX86.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -224,7 +224,7 @@ void paddingSimd(Pel *dst, int stride, int width, int height, int padSize)
   }
   else
   {
-    CHECK(false, "padding size must be 1 or 2");
+    THROW("padding size must be 1 or 2");
   }
 }
 
@@ -477,7 +477,438 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride,
 #endif
   }
 }
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+template< X86_VEXT vext, bool PAD = true >
+void gradFilterHBD_SIMD(Pel* src, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, const int bitDepth)
+{
+  Pel* srcTmp = src + srcStride + 1;
+  Pel* gradXTmp = gradX + gradStride + 1;
+  Pel* gradYTmp = gradY + gradStride + 1;
+
+  int widthInside = width - 2 * BIO_EXTEND_SIZE;
+  int heightInside = height - 2 * BIO_EXTEND_SIZE;
+  int shift1 = 6;
+  assert((widthInside & 3) == 0);
+
+#ifdef USE_AVX2
+  if (vext >= AVX2)
+  {
+    for (int y = 0; y < heightInside; y++)
+    {
+      for (int x = 0; x < widthInside; x += 8)
+      {
+        __m256i mmPixTop = _mm256_srai_epi32(_mm256_lddqu_si256((__m256i*) (srcTmp + x - srcStride)), shift1);
+        __m256i mmPixBottom = _mm256_srai_epi32(_mm256_lddqu_si256((__m256i*) (srcTmp + x + srcStride)), shift1);
+        __m256i mmPixLeft = _mm256_srai_epi32(_mm256_lddqu_si256((__m256i*) (srcTmp + x - 1)), shift1);
+        __m256i mmPixRight = _mm256_srai_epi32(_mm256_lddqu_si256((__m256i*) (srcTmp + x + 1)), shift1);
+
+        __m256i mmGradVer = _mm256_sub_epi32(mmPixBottom, mmPixTop);
+        __m256i mmGradHor = _mm256_sub_epi32(mmPixRight, mmPixLeft);
+
+        _mm256_storeu_si256((__m256i *) (gradYTmp + x), mmGradVer);
+        _mm256_storeu_si256((__m256i *) (gradXTmp + x), mmGradHor);
+      }
+      gradXTmp += gradStride;
+      gradYTmp += gradStride;
+      srcTmp += srcStride;
+    }
+  }
+  else
+#endif
+  {
+    __m128i mmShift1 = _mm_cvtsi32_si128(shift1);
+    for (int y = 0; y < heightInside; y++)
+    {
+      for (int x = 0; x < widthInside; x += 4)
+      {
+        __m128i mmPixTop = _mm_sra_epi32(_mm_lddqu_si128((__m128i*) (srcTmp + x - srcStride)), mmShift1);
+        __m128i mmPixBottom = _mm_sra_epi32(_mm_lddqu_si128((__m128i*) (srcTmp + x + srcStride)), mmShift1);
+        __m128i mmPixLeft = _mm_sra_epi32(_mm_lddqu_si128((__m128i*) (srcTmp + x - 1)), mmShift1);
+        __m128i mmPixRight = _mm_sra_epi32(_mm_lddqu_si128((__m128i*) (srcTmp + x + 1)), mmShift1);
+
+        __m128i mmGradVer = _mm_sub_epi32(mmPixBottom, mmPixTop);
+        __m128i mmGradHor = _mm_sub_epi32(mmPixRight, mmPixLeft);
+
+        _mm_storeu_si128((__m128i *) (gradYTmp + x), mmGradVer);
+        _mm_storeu_si128((__m128i *) (gradXTmp + x), mmGradHor);
+      }
+      gradXTmp += gradStride;
+      gradYTmp += gradStride;
+      srcTmp += srcStride;
+    }
+  }
+
+  if (PAD)
+  {
+    gradXTmp = gradX + gradStride + 1;
+    gradYTmp = gradY + gradStride + 1;
+    for (int y = 0; y < heightInside; y++)
+    {
+      gradXTmp[-1] = gradXTmp[0];
+      gradXTmp[widthInside] = gradXTmp[widthInside - 1];
+      gradXTmp += gradStride;
+
+      gradYTmp[-1] = gradYTmp[0];
+      gradYTmp[widthInside] = gradYTmp[widthInside - 1];
+      gradYTmp += gradStride;
+    }
+
+    gradXTmp = gradX + gradStride;
+    gradYTmp = gradY + gradStride;
+    ::memcpy(gradXTmp - gradStride, gradXTmp, sizeof(Pel)*(width));
+    ::memcpy(gradXTmp + heightInside * gradStride, gradXTmp + (heightInside - 1)*gradStride, sizeof(Pel)*(width));
+    ::memcpy(gradYTmp - gradStride, gradYTmp, sizeof(Pel)*(width));
+    ::memcpy(gradYTmp + heightInside * gradStride, gradYTmp + (heightInside - 1)*gradStride, sizeof(Pel)*(width));
+  }
+}
+
+template< X86_VEXT vext >
+void calcBIOSumsHBD_SIMD(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel* gradX1, Pel* gradY0, Pel* gradY1, int xu, int yu, const int src0Stride, const int src1Stride, const int widthG, const int bitDepth, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGY_GX)
+{
+  int shift4 = 4;
+  int shift5 = 1;
+
+#ifdef USE_AVX2
+  if (vext >= AVX2)
+  {
+    __m256i sumAbsGXTmp = _mm256_setzero_si256();
+    __m256i sumDIXTmp = _mm256_setzero_si256();
+    __m256i sumAbsGYTmp = _mm256_setzero_si256();
+    __m256i sumDIYTmp = _mm256_setzero_si256();
+    __m256i sumSignGyGxTmp = _mm256_setzero_si256();
+
+    for (int y = 0; y < 6; y++)
+    {
+      auto load6values = [](const Pel *ptr)
+      {
+        __m256i a = _mm256_castsi128_si256(_mm_lddqu_si128((__m128i *) ptr));
+        __m128i b = _mm_loadl_epi64((__m128i *)(ptr + 4));
+        return _mm256_inserti128_si256(a, b, 1);
+      };
+
+      __m256i shiftSrcY0Tmp = _mm256_srai_epi32(load6values(srcY0Tmp), shift4);  // srcY0Tmp[x] >> shift4
+      __m256i shiftSrcY1Tmp = _mm256_srai_epi32(load6values(srcY1Tmp), shift4);  // srcY1Tmp[x] >> shift4
+      __m256i loadGradX0 = load6values(gradX0);  // gradX0[x]
+      __m256i loadGradX1 = load6values(gradX1);  // gradX1[x]
+      __m256i loadGradY0 = load6values(gradY0);  // gradY0[x]
+      __m256i loadGradY1 = load6values(gradY1);  // gradY1[x]
+
+      __m256i subTemp1 = _mm256_sub_epi32(shiftSrcY1Tmp, shiftSrcY0Tmp);  // (srcY1Tmp[x] >> shift4) - (srcY0Tmp[x] >> shift4)
+      __m256i packTempX = _mm256_srai_epi32(_mm256_add_epi32(loadGradX0, loadGradX1), shift5);  // (gradX0[x] + gradX1[x]) >> shift5
+      __m256i packTempY = _mm256_srai_epi32(_mm256_add_epi32(loadGradY0, loadGradY1), shift5);  // (gradY0[x] + gradY1[x]) >> shift5
+      __m256i gX = _mm256_abs_epi32(packTempX);  // abs(tmpGX)
+      __m256i gY = _mm256_abs_epi32(packTempY);  // abs(tmpGY)
+      __m256i dIX = _mm256_sign_epi32(subTemp1, packTempX);  // (tmpGX < 0 ? -tmpDI : (tmpGX == 0 ? 0 : tmpDI))
+      __m256i dIY = _mm256_sign_epi32(subTemp1, packTempY);  // (tmpGY < 0 ? -tmpDI : (tmpGY == 0 ? 0 : tmpDI))
+      __m256i signGY_GX = _mm256_sign_epi32(packTempX, packTempY);  // (tmpGY < 0 ? -tmpGX : (tmpGY == 0 ? 0 : tmpGX))
+
+      sumAbsGXTmp = _mm256_add_epi32(sumAbsGXTmp, gX);
+      sumDIXTmp = _mm256_add_epi32(sumDIXTmp, dIX);
+      sumAbsGYTmp = _mm256_add_epi32(sumAbsGYTmp, gY);
+      sumDIYTmp = _mm256_add_epi32(sumDIYTmp, dIY);
+      sumSignGyGxTmp = _mm256_add_epi32(sumSignGyGxTmp, signGY_GX);
+
+      srcY0Tmp += src0Stride;
+      srcY1Tmp += src1Stride;
+      gradX0 += widthG;
+      gradX1 += widthG;
+      gradY0 += widthG;
+      gradY1 += widthG;
+    }
+
+    __m256i mm_gx_gy_l = _mm256_unpacklo_epi32(sumAbsGXTmp, sumAbsGYTmp);
+    __m256i mm_gx_gy_h = _mm256_unpackhi_epi32(sumAbsGXTmp, sumAbsGYTmp);
+    __m256i mm_dIx_dIy_l = _mm256_unpacklo_epi32(sumDIXTmp, sumDIYTmp);
+    __m256i mm_dIx_dIy_h = _mm256_unpackhi_epi32(sumDIXTmp, sumDIYTmp);
+    __m256i c1 = _mm256_unpacklo_epi64(mm_gx_gy_l, mm_dIx_dIy_l);
+    __m256i c2 = _mm256_unpackhi_epi64(mm_gx_gy_l, mm_dIx_dIy_l);
+    __m256i c3 = _mm256_unpacklo_epi64(mm_gx_gy_h, mm_dIx_dIy_h);
+    __m256i c4 = _mm256_unpackhi_epi64(mm_gx_gy_h, mm_dIx_dIy_h);
+
+    c1 = _mm256_add_epi32(c1, c2);
+    c1 = _mm256_add_epi32(c1, c3);
+    c1 = _mm256_add_epi32(c1, c4);
+    c1 = _mm256_add_epi32(c1, _mm256_permute4x64_epi64(c1, 0xee));
+    *sumAbsGX = _mm_cvtsi128_si32(_mm256_castsi256_si128(c1));
+    *sumAbsGY = _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_shuffle_epi32(c1, 0x55)));
+    *sumDIX = _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_shuffle_epi32(c1, 0xaa)));
+    *sumDIY = _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_shuffle_epi32(c1, 0xff)));
+
+    sumSignGyGxTmp = _mm256_add_epi32(sumSignGyGxTmp, _mm256_permute4x64_epi64(sumSignGyGxTmp, 0x4e));   // 01001110
+    sumSignGyGxTmp = _mm256_add_epi32(sumSignGyGxTmp, _mm256_permute4x64_epi64(sumSignGyGxTmp, 0xb1));   // 10110001
+    sumSignGyGxTmp = _mm256_add_epi32(sumSignGyGxTmp, _mm256_shuffle_epi32(sumSignGyGxTmp, 0x55));
+    *sumSignGY_GX = _mm_cvtsi128_si32(_mm256_castsi256_si128(sumSignGyGxTmp));
+  }
+  else
+#endif
+  {
+    __m128i sumAbsGXTmp = _mm_setzero_si128();
+    __m128i sumDIXTmp = _mm_setzero_si128();
+    __m128i sumAbsGYTmp = _mm_setzero_si128();
+    __m128i sumDIYTmp = _mm_setzero_si128();
+    __m128i sumSignGyGxTmp = _mm_setzero_si128();
 
+    for (int y = 0; y < 6; y++)
+    {
+      // the first 4 samples
+      __m128i shiftSrcY0Tmp = _mm_srai_epi32(_mm_lddqu_si128((__m128i*)srcY0Tmp), shift4);  // srcY0Tmp[x] >> shift4
+      __m128i shiftSrcY1Tmp = _mm_srai_epi32(_mm_lddqu_si128((__m128i*)srcY1Tmp), shift4);  // srcY1Tmp[x] >> shift4
+      __m128i loadGradX0 = _mm_lddqu_si128((__m128i*)gradX0);  // gradX0[x]
+      __m128i loadGradX1 = _mm_lddqu_si128((__m128i*)gradX1);  // gradX1[x]
+      __m128i loadGradY0 = _mm_lddqu_si128((__m128i*)gradY0);  // gradY0[x]
+      __m128i loadGradY1 = _mm_lddqu_si128((__m128i*)gradY1);  // gradY1[x]
+
+      __m128i subTemp1 = _mm_sub_epi32(shiftSrcY1Tmp, shiftSrcY0Tmp);  // (srcY1Tmp[x] >> shift4) - (srcY0Tmp[x] >> shift4)
+      __m128i packTempX = _mm_srai_epi32(_mm_add_epi32(loadGradX0, loadGradX1), shift5);  // (gradX0[x] + gradX1[x]) >> shift5
+      __m128i packTempY = _mm_srai_epi32(_mm_add_epi32(loadGradY0, loadGradY1), shift5);  // (gradY0[x] + gradY1[x]) >> shift5
+      __m128i gX = _mm_abs_epi32(packTempX);  // abs(tmpGX)
+      __m128i gY = _mm_abs_epi32(packTempY);  // abs(tmpGY)
+      __m128i dIX = _mm_sign_epi32(subTemp1, packTempX);  // (tmpGX < 0 ? -tmpDI : (tmpGX == 0 ? 0 : tmpDI))
+      __m128i dIY = _mm_sign_epi32(subTemp1, packTempY);  // (tmpGY < 0 ? -tmpDI : (tmpGY == 0 ? 0 : tmpDI))
+      __m128i signGY_GX = _mm_sign_epi32(packTempX, packTempY);  // (tmpGY < 0 ? -tmpGX : (tmpGY == 0 ? 0 : tmpGX))
+
+      sumAbsGXTmp = _mm_add_epi32(sumAbsGXTmp, gX);
+      sumDIXTmp = _mm_add_epi32(sumDIXTmp, dIX);
+      sumAbsGYTmp = _mm_add_epi32(sumAbsGYTmp, gY);
+      sumDIYTmp = _mm_add_epi32(sumDIYTmp, dIY);
+      sumSignGyGxTmp = _mm_add_epi32(sumSignGyGxTmp, signGY_GX);
+
+      // the following two samples
+      shiftSrcY0Tmp = _mm_srai_epi32(_mm_cvtsi64_si128(*(long long*)(srcY0Tmp + 4)), shift4);  // srcY0Tmp[x] >> shift4
+      shiftSrcY1Tmp = _mm_srai_epi32(_mm_cvtsi64_si128(*(long long*)(srcY1Tmp + 4)), shift4);  // srcY1Tmp[x] >> shift4
+      loadGradX0 = _mm_cvtsi64_si128(*(long long*)(gradX0 + 4));  // gradX0[x]
+      loadGradX1 = _mm_cvtsi64_si128(*(long long*)(gradX1 + 4));  // gradX1[x]
+      loadGradY0 = _mm_cvtsi64_si128(*(long long*)(gradY0 + 4));  // gradY0[x]
+      loadGradY1 = _mm_cvtsi64_si128(*(long long*)(gradY1 + 4));  // gradY1[x]
+
+      subTemp1 = _mm_sub_epi32(shiftSrcY1Tmp, shiftSrcY0Tmp);  // (srcY1Tmp[x] >> shift4) - (srcY0Tmp[x] >> shift4)
+      packTempX = _mm_srai_epi32(_mm_add_epi32(loadGradX0, loadGradX1), shift5);  // (gradX0[x] + gradX1[x]) >> shift5
+      packTempY = _mm_srai_epi32(_mm_add_epi32(loadGradY0, loadGradY1), shift5);  // (gradY0[x] + gradY1[x]) >> shift5
+      gX = _mm_abs_epi32(packTempX);  // abs(tmpGX)
+      gY = _mm_abs_epi32(packTempY);  // abs(tmpGY)
+      dIX = _mm_sign_epi32(subTemp1, packTempX);  // (tmpGX < 0 ? -tmpDI : (tmpGX == 0 ? 0 : tmpDI))
+      dIY = _mm_sign_epi32(subTemp1, packTempY);  // (tmpGY < 0 ? -tmpDI : (tmpGY == 0 ? 0 : tmpDI))
+      signGY_GX = _mm_sign_epi32(packTempX, packTempY);  // (tmpGY < 0 ? -tmpGX : (tmpGY == 0 ? 0 : tmpGX))
+
+      sumAbsGXTmp = _mm_add_epi32(sumAbsGXTmp, gX);
+      sumDIXTmp = _mm_add_epi32(sumDIXTmp, dIX);
+      sumAbsGYTmp = _mm_add_epi32(sumAbsGYTmp, gY);
+      sumDIYTmp = _mm_add_epi32(sumDIYTmp, dIY);
+      sumSignGyGxTmp = _mm_add_epi32(sumSignGyGxTmp, signGY_GX);
+
+      srcY0Tmp += src0Stride;
+      srcY1Tmp += src1Stride;
+      gradX0 += widthG;
+      gradX1 += widthG;
+      gradY0 += widthG;
+      gradY1 += widthG;
+    }
+
+    __m128i a12 = _mm_unpacklo_epi32(sumAbsGXTmp, sumAbsGYTmp);
+    __m128i a3 = _mm_unpackhi_epi32(sumAbsGXTmp, sumAbsGYTmp);
+    __m128i b12 = _mm_unpacklo_epi32(sumDIXTmp, sumDIYTmp);
+    __m128i b3 = _mm_unpackhi_epi32(sumDIXTmp, sumDIYTmp);
+    __m128i c1 = _mm_unpacklo_epi64(a12, b12);
+    __m128i c2 = _mm_unpackhi_epi64(a12, b12);
+    __m128i c3 = _mm_unpacklo_epi64(a3, b3);
+    __m128i c4 = _mm_unpackhi_epi64(a3, b3);
+
+    c1 = _mm_add_epi32(c1, c2);
+    c1 = _mm_add_epi32(c1, c3);
+    c1 = _mm_add_epi32(c1, c4);
+
+    *sumAbsGX = _mm_cvtsi128_si32(c1);
+    *sumAbsGY = _mm_cvtsi128_si32(_mm_shuffle_epi32(c1, 0x55));
+    *sumDIX = _mm_cvtsi128_si32(_mm_shuffle_epi32(c1, 0xaa));
+    *sumDIY = _mm_cvtsi128_si32(_mm_shuffle_epi32(c1, 0xff));
+
+    sumSignGyGxTmp = _mm_add_epi32(sumSignGyGxTmp, _mm_shuffle_epi32(sumSignGyGxTmp, 0x4e));   // 01001110
+    sumSignGyGxTmp = _mm_add_epi32(sumSignGyGxTmp, _mm_shuffle_epi32(sumSignGyGxTmp, 0xb1));   // 10110001
+    *sumSignGY_GX = _mm_cvtsi128_si32(sumSignGyGxTmp);
+  }
+}
+
+template< X86_VEXT vext >
+void addBIOAvg4HBD_SIMD(const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng)
+{
+#ifdef USE_AVX2
+  if (vext >= AVX2)
+  {
+    __m256i mm_tmpx = _mm256_set1_epi32(tmpx);
+    __m256i mm_tmpy = _mm256_set1_epi32(tmpy);
+    __m256i mm_offset = _mm256_set1_epi32(offset);
+    __m256i vibdimin = _mm256_set1_epi32(clpRng.min);
+    __m256i vibdimax = _mm256_set1_epi32(clpRng.max);
+
+    int src0Stride2 = (src0Stride << 1);
+    int src1Stride2 = (src1Stride << 1);
+    int dstStride2 = (dstStride << 1);
+    int gradStride2 = (gradStride << 1);
+
+    for (int y = 0; y < height; y += 2)
+    {
+      for (int x = 0; x < width; x += 4)
+      {
+        __m256i mm_gradX0 = _mm256_castsi128_si256(_mm_lddqu_si128((__m128i *)(gradX0 + x)));
+        mm_gradX0 = _mm256_inserti128_si256(mm_gradX0, _mm_lddqu_si128((__m128i *)(gradX0 + x + gradStride)), 1);
+        __m256i mm_gradX1 = _mm256_castsi128_si256(_mm_lddqu_si128((__m128i *)(gradX1 + x)));
+        mm_gradX1 = _mm256_inserti128_si256(mm_gradX1, _mm_lddqu_si128((__m128i *)(gradX1 + x + gradStride)), 1);
+        __m256i mm_gradY0 = _mm256_castsi128_si256(_mm_lddqu_si128((__m128i *)(gradY0 + x)));
+        mm_gradY0 = _mm256_inserti128_si256(mm_gradY0, _mm_lddqu_si128((__m128i *)(gradY0 + x + gradStride)), 1);
+        __m256i mm_gradY1 = _mm256_castsi128_si256(_mm_lddqu_si128((__m128i *)(gradY1 + x)));
+        mm_gradY1 = _mm256_inserti128_si256(mm_gradY1, _mm_lddqu_si128((__m128i *)(gradY1 + x + gradStride)), 1);
+
+        __m256i mm_gradX = _mm256_sub_epi32(mm_gradX0, mm_gradX1);
+        __m256i mm_gradY = _mm256_sub_epi32(mm_gradY0, mm_gradY1);
+        __m256i mm_sum = _mm256_add_epi32(_mm256_mullo_epi32(mm_gradX, mm_tmpx), _mm256_mullo_epi32(mm_gradY, mm_tmpy));
+
+        __m256i mm_src0 = _mm256_castsi128_si256(_mm_lddqu_si128((__m128i *)(src0 + x)));
+        mm_src0 = _mm256_inserti128_si256(mm_src0, _mm_lddqu_si128((__m128i *)(src0 + x + src0Stride)), 1);
+        __m256i mm_src1 = _mm256_castsi128_si256(_mm_lddqu_si128((__m128i *)(src1 + x)));
+        mm_src1 = _mm256_inserti128_si256(mm_src1, _mm_lddqu_si128((__m128i *)(src1 + x + src1Stride)), 1);
+        __m256i mm_src = _mm256_add_epi32(mm_src0, mm_src1);
+
+        mm_sum = _mm256_add_epi32(mm_sum, mm_src);
+        mm_sum = _mm256_srai_epi32(_mm256_add_epi32(mm_sum, mm_offset), shift);
+        mm_sum = _mm256_min_epi32(_mm256_max_epi32(mm_sum, vibdimin), vibdimax);
+
+        _mm_storeu_si128((__m128i *) (dst + x), _mm256_castsi256_si128(mm_sum));
+        _mm_storeu_si128((__m128i *) (dst + x + dstStride), _mm256_castsi256_si128(_mm256_permute4x64_epi64(mm_sum, 0xee)));
+      }
+      dst += dstStride2;     src0 += src0Stride2;   src1 += src1Stride2;
+      gradX0 += gradStride2; gradX1 += gradStride2; gradY0 += gradStride2; gradY1 += gradStride2;
+    }
+  }
+  else
+#endif
+  {
+    __m128i mm_tmpx = _mm_set1_epi32(tmpx);
+    __m128i mm_tmpy = _mm_set1_epi32(tmpy);
+    __m128i mm_offset = _mm_set1_epi32(offset);
+    __m128i vibdimin = _mm_set1_epi32(clpRng.min);
+    __m128i vibdimax = _mm_set1_epi32(clpRng.max);
+
+    for (int y = 0; y < height; y++)
+    {
+      for (int x = 0; x < width; x += 4)
+      {
+        __m128i mm_gradX = _mm_sub_epi32(_mm_lddqu_si128((__m128i *)(gradX0 + x)), _mm_lddqu_si128((__m128i *) (gradX1 + x)));
+        __m128i mm_gradY = _mm_sub_epi32(_mm_lddqu_si128((__m128i *)(gradY0 + x)), _mm_lddqu_si128((__m128i *) (gradY1 + x)));
+        __m128i mm_sum = _mm_add_epi32(_mm_mullo_epi32(mm_gradX, mm_tmpx), _mm_mullo_epi32(mm_gradY, mm_tmpy));
+        __m128i mm_src = _mm_add_epi32(_mm_lddqu_si128((__m128i *)(src0 + x)), _mm_lddqu_si128((__m128i *)(src1 + x)));
+        mm_sum = _mm_add_epi32(mm_sum, mm_src);
+        mm_sum = _mm_srai_epi32(_mm_add_epi32(mm_sum, mm_offset), shift);
+        mm_sum = _mm_min_epi32(_mm_max_epi32(mm_sum, vibdimin), vibdimax);
+        _mm_storeu_si128((__m128i *) (dst + x), mm_sum);
+      }
+      dst += dstStride;     src0 += src0Stride;   src1 += src1Stride;
+      gradX0 += gradStride; gradX1 += gradStride; gradY0 += gradStride; gradY1 += gradStride;
+    }
+  }
+}
+
+template< X86_VEXT vext >
+void applyPROFHBD_SIMD(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride, int width, int height, const Pel* gradX, const Pel* gradY, int gradStride, const int* dMvX, const int* dMvY, int dMvStride, const bool& bi, int shiftNum, Pel offset, const ClpRng& clpRng)
+{
+  CHECKD((width & 3), "block width error!");
+  const int dILimit = 1 << std::max<int>(clpRng.bd + 1, 13);
+
+#ifdef USE_AVX2
+  if (vext >= AVX2)
+  {
+    __m256i mm_dmvx, mm_dmvy, mm_gradx, mm_grady, mm_dI, mm_src;
+    __m256i mm_offset = _mm256_set1_epi32(offset);
+    __m256i vibdimin = _mm256_set1_epi32(clpRng.min);
+    __m256i vibdimax = _mm256_set1_epi32(clpRng.max);
+    __m256i mm_dimin = _mm256_set1_epi32(-dILimit);
+    __m256i mm_dimax = _mm256_set1_epi32(dILimit - 1);
+
+    for (int h = 0; h < height; h += 2)
+    {
+      const int* vX = dMvX;
+      const int* vY = dMvY;
+      const Pel* gX = gradX;
+      const Pel* gY = gradY;
+      const Pel* src = srcPel;
+      Pel*       dst = dstPel;
+
+      for (int w = 0; w < width; w += 4)
+      {
+        mm_dmvx = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_lddqu_si128((__m128i *)vX)), _mm_lddqu_si128((__m128i *)(vX + dMvStride)), 1);
+        mm_dmvy = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_lddqu_si128((__m128i *)vY)), _mm_lddqu_si128((__m128i *)(vY + dMvStride)), 1);
+        mm_gradx = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_lddqu_si128((__m128i *)gX)), _mm_lddqu_si128((__m128i *)(gX + gradStride)), 1);
+        mm_grady = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_lddqu_si128((__m128i *)gY)), _mm_lddqu_si128((__m128i *)(gY + gradStride)), 1);
+        mm_src = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_lddqu_si128((__m128i *)src)), _mm_lddqu_si128((__m128i *)(src + srcStride)), 1);
+
+        mm_dI = _mm256_add_epi32(_mm256_mullo_epi32(mm_dmvx, mm_gradx), _mm256_mullo_epi32(mm_dmvy, mm_grady));
+        mm_dI = _mm256_min_epi32(mm_dimax, _mm256_max_epi32(mm_dimin, mm_dI));
+        mm_dI = _mm256_add_epi32(mm_src, mm_dI);
+
+        if (!bi)
+        {
+          mm_dI = _mm256_srai_epi32(_mm256_add_epi32(mm_dI, mm_offset), shiftNum);
+          mm_dI = _mm256_min_epi32(vibdimax, _mm256_max_epi32(vibdimin, mm_dI));
+        }
+
+        _mm_storeu_si128((__m128i *)dst, _mm256_castsi256_si128(mm_dI));
+        _mm_storeu_si128((__m128i *)(dst + dstStride), _mm256_castsi256_si128(_mm256_permute4x64_epi64(mm_dI, 0xee)));
+        vX += 4; vY += 4; gX += 4; gY += 4; src += 4; dst += 4;
+      }
+      dMvX += (dMvStride << 1);
+      dMvY += (dMvStride << 1);
+      gradX += (gradStride << 1);
+      gradY += (gradStride << 1);
+      srcPel += (srcStride << 1);
+      dstPel += (dstStride << 1);
+    }
+  }
+  else
+#endif
+  {
+    __m128i mm_dmvx, mm_dmvy, mm_gradx, mm_grady, mm_dI;
+    __m128i mm_offset = _mm_set1_epi32(offset);
+    __m128i vibdimin = _mm_set1_epi32(clpRng.min);
+    __m128i vibdimax = _mm_set1_epi32(clpRng.max);
+    __m128i mm_dimin = _mm_set1_epi32(-dILimit);
+    __m128i mm_dimax = _mm_set1_epi32(dILimit - 1);
+
+    for (int h = 0; h < height; h++)
+    {
+      const int* vX = dMvX;
+      const int* vY = dMvY;
+      const Pel* gX = gradX;
+      const Pel* gY = gradY;
+      const Pel* src = srcPel;
+      Pel*       dst = dstPel;
+
+      for (int w = 0; w < width; w += 4)
+      {
+        mm_dmvx = _mm_lddqu_si128((__m128i *)vX);
+        mm_dmvy = _mm_lddqu_si128((__m128i *)vY);
+        mm_gradx = _mm_lddqu_si128((__m128i*)gX);
+        mm_grady = _mm_lddqu_si128((__m128i*)gY);
+        mm_dI = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx, mm_gradx), _mm_mullo_epi32(mm_dmvy, mm_grady));
+        mm_dI = _mm_min_epi32(mm_dimax, _mm_max_epi32(mm_dimin, mm_dI));
+        mm_dI = _mm_add_epi32(_mm_lddqu_si128((__m128i *)src), mm_dI);
+        if (!bi)
+        {
+          mm_dI = _mm_srai_epi32(_mm_add_epi32(mm_dI, mm_offset), shiftNum);
+          mm_dI = _mm_min_epi32(vibdimax, _mm_max_epi32(vibdimin, mm_dI));
+        }
+
+        _mm_storeu_si128((__m128i *)dst, mm_dI);
+        vX += 4; vY += 4; gX += 4; gY += 4; src += 4; dst += 4;
+      }
+      dMvX += dMvStride;
+      dMvY += dMvStride;
+      gradX += gradStride;
+      gradY += gradStride;
+      srcPel += srcStride;
+      dstPel += dstStride;
+    }
+  }
+}
+#endif
 
 template< X86_VEXT vext >
 void roundIntVector_SIMD(int* v, int size, unsigned int nShift, const int dmvLimit)
@@ -1002,7 +1433,286 @@ void linTf_SSE( const Pel* src, int srcStride, Pel *dst, int dstStride, int widt
     }
   }
 }
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+template< X86_VEXT vext, int W >
+void addAvg_HBD_SIMD(const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, int width, int height, int shift, int offset, const ClpRng& clpRng)
+{
+  CHECK((width & 3), "the function only supports width multiple of 4");
 
+  __m128i voffset = _mm_set1_epi32(offset);
+  __m128i vibdimin = _mm_set1_epi32(clpRng.min);
+  __m128i vibdimax = _mm_set1_epi32(clpRng.max);
+
+#ifdef USE_AVX2
+  __m256i mm256_voffset = _mm256_set1_epi32(offset);
+  __m256i mm256_vibdimin = _mm256_set1_epi32(clpRng.min);
+  __m256i mm256_vibdimax = _mm256_set1_epi32(clpRng.max);
+#endif
+
+  for (int row = 0; row < height; row++)
+  {
+    int col = 0;
+#ifdef USE_AVX2
+    if (vext >= AVX2)
+    {
+      for (; col < ((width >> 3) << 3); col += 8)
+      {
+        __m256i vsum = _mm256_lddqu_si256((const __m256i *)&src0[col]);
+        __m256i vdst = _mm256_lddqu_si256((const __m256i *)&src1[col]);
+        vsum = _mm256_add_epi32(vsum, vdst);
+        vsum = _mm256_add_epi32(vsum, mm256_voffset);
+        vsum = _mm256_srai_epi32(vsum, shift);
+
+        vsum = _mm256_min_epi32(mm256_vibdimax, _mm256_max_epi32(mm256_vibdimin, vsum));
+        _mm256_storeu_si256((__m256i *)&dst[col], vsum);
+      }
+    }
+#endif
+
+    for (; col < width; col += 4)
+    {
+      __m128i vsum = _mm_lddqu_si128((const __m128i *)&src0[col]);
+      __m128i vdst = _mm_lddqu_si128((const __m128i *)&src1[col]);
+      vsum = _mm_add_epi32(vsum, vdst);
+      vsum = _mm_add_epi32(vsum, voffset);
+      vsum = _mm_srai_epi32(vsum, shift);
+
+      vsum = _mm_min_epi32(vibdimax, _mm_max_epi32(vibdimin, vsum));
+      _mm_storeu_si128((__m128i *)&dst[col], vsum);
+    }
+
+    src0 += src0Stride;
+    src1 += src1Stride;
+    dst += dstStride;
+  }
+}
+
+template< X86_VEXT vext, int W >
+void reco_HBD_SIMD(const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, int width, int height, const ClpRng& clpRng)
+{
+  CHECK((width & 3), "the function only supports width multiple of 4");
+
+
+  __m128i vbdmin = _mm_set1_epi32(clpRng.min);
+  __m128i vbdmax = _mm_set1_epi32(clpRng.max);
+
+#ifdef USE_AVX2
+  __m256i mm256_vbdmin = _mm256_set1_epi32(clpRng.min);
+  __m256i mm256_vbdmax = _mm256_set1_epi32(clpRng.max);
+#endif
+
+  for (int row = 0; row < height; row++)
+  {
+    int col = 0;
+#ifdef USE_AVX2
+    if (vext >= AVX2)
+    {
+      for (; col < ((width >> 3) << 3); col += 8)
+      {
+        __m256i vsrc = _mm256_lddqu_si256((const __m256i *)&src0[col]);
+        __m256i vdst = _mm256_lddqu_si256((const __m256i *)&src1[col]);
+
+        vdst = _mm256_add_epi32(vdst, vsrc);
+        vdst = _mm256_min_epi32(mm256_vbdmax, _mm256_max_epi32(mm256_vbdmin, vdst));
+        _mm256_storeu_si256((__m256i *)&dst[col], vdst);
+      }
+    }
+#endif
+    for (; col < width; col += 4)
+    {
+      __m128i vsrc = _mm_lddqu_si128((const __m128i *)&src0[col]);
+      __m128i vdst = _mm_lddqu_si128((const __m128i *)&src1[col]);
+
+      vdst = _mm_add_epi32(vdst, vsrc);
+      vdst = _mm_min_epi32(vbdmax, _mm_max_epi32(vbdmin, vdst));
+
+      _mm_storeu_si128((__m128i *)&dst[col], vdst);
+    }
+
+    src0 += src0Stride;
+    src1 += src1Stride;
+    dst += dstStride;
+  }
+}
+
+#if ENABLE_SIMD_OPT_BCW
+template< X86_VEXT vext, int W >
+void removeHighFreq_HBD_SIMD(Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height)
+{
+  CHECK((width & 3), "the function only supports width multiple of 4");
+  for (int row = 0; row < height; row++)
+  {
+    int col = 0;
+#ifdef USE_AVX2
+    if (vext >= AVX2)
+    {
+      __m256i mm256_vsrc0, mm256_vsrc1;
+      for (; col < ((width >> 3) << 3); col += 8)
+      {
+        mm256_vsrc0 = _mm256_lddqu_si256((const __m256i *)&src0[col]);
+        mm256_vsrc1 = _mm256_lddqu_si256((const __m256i *)&src1[col]);
+
+        mm256_vsrc0 = _mm256_sub_epi32(_mm256_slli_epi32(mm256_vsrc0, 1), mm256_vsrc1);
+        _mm256_storeu_si256((__m256i *)&src0[col], mm256_vsrc0);
+      }
+    }
+#endif
+    __m128i vsrc0, vsrc1;
+    for (; col < width; col += 4)
+    {
+      vsrc0 = _mm_lddqu_si128((const __m128i *)&src0[col]);
+      vsrc1 = _mm_lddqu_si128((const __m128i *)&src1[col]);
+
+      vsrc0 = _mm_sub_epi32(_mm_slli_epi32(vsrc0, 1), vsrc1);
+      _mm_store_si128((__m128i *)&src0[col], vsrc0);
+    }
+    src0 += src0Stride;
+    src1 += src1Stride;
+  }
+}
+
+template< X86_VEXT vext, int W >
+void removeWeightHighFreq_HBD_SIMD(Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int bcwWeight)
+{
+  CHECK((width & 3), "the function only supports width multiple of 4");
+
+  int normalizer = ((1 << 16) + (bcwWeight > 0 ? (bcwWeight >> 1) : -(bcwWeight >> 1))) / bcwWeight;
+  int weight0 = normalizer << g_BcwLog2WeightBase;
+  int weight1 = (g_BcwWeightBase - bcwWeight)*normalizer;
+  Intermediate_Int offset = Intermediate_Int(1) << (shift - 1);
+
+#ifdef USE_AVX2
+  if (vext >= AVX2)
+  {
+    __m256i voffset = _mm256_set1_epi64x(offset);
+    __m256i vw0 = _mm256_set1_epi32(weight0);
+    __m256i vw1 = _mm256_set1_epi32(weight1);
+
+    __m256i vdst, vsrc;
+    for (int row = 0; row < height; row++)
+    {
+      for (int col = 0; col < width; col += 4)
+      {
+        __m256i vsrc0 = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_lddqu_si128((__m128i *)&src0[col])), _mm_lddqu_si128((__m128i *)&src0[col + 2]), 1);
+        __m256i vsrc1 = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_lddqu_si128((__m128i *)&src1[col])), _mm_lddqu_si128((__m128i *)&src1[col + 2]), 1);
+        vsrc0 = _mm256_shuffle_epi32(vsrc0, 0x50);
+        vsrc1 = _mm256_shuffle_epi32(vsrc1, 0x50);
+
+        vdst = _mm256_mul_epi32(vsrc0, vw0);
+        vsrc = _mm256_mul_epi32(vsrc1, vw1);
+        vdst = _mm256_add_epi64(_mm256_sub_epi64(vdst, vsrc), voffset);
+
+        *(src0 + col) = (Pel)(_mm256_extract_epi64(vdst, 0) >> shift);
+        *(src0 + col + 1) = (Pel)(_mm256_extract_epi64(vdst, 1) >> shift);
+        *(src0 + col + 2) = (Pel)(_mm256_extract_epi64(vdst, 2) >> shift);
+        *(src0 + col + 3) = (Pel)(_mm256_extract_epi64(vdst, 3) >> shift);
+      }
+      src0 += src0Stride;
+      src1 += src1Stride;
+    }
+  }
+  else
+#endif
+  {
+    __m128i voffset = _mm_set_epi64x(offset, offset);
+    __m128i vw0 = _mm_set1_epi32(weight0);
+    __m128i vw1 = _mm_set1_epi32(weight1);
+
+    __m128i vdst, vsrc;
+    for (int row = 0; row < height; row++)
+    {
+      for (int col = 0; col < width; col += 4)
+      {
+        __m128i vsrc0 = _mm_lddqu_si128((__m128i *)&src0[col]);
+        __m128i vsrc1 = _mm_lddqu_si128((__m128i *)&src1[col]);
+
+        vdst = _mm_mul_epi32(vsrc0, vw0);
+        vsrc = _mm_mul_epi32(vsrc1, vw1);
+        vdst = _mm_add_epi64(_mm_sub_epi64(vdst, vsrc), voffset);
+
+        *(src0 + col) = (Pel)(_mm_extract_epi64(vdst, 0) >> shift);
+        *(src0 + col + 2) = (Pel)(_mm_extract_epi64(vdst, 1) >> shift);
+
+        vsrc0 = _mm_srli_si128(vsrc0, 4);
+        vsrc1 = _mm_srli_si128(vsrc1, 4);
+
+        vdst = _mm_mul_epi32(vsrc0, vw0);
+        vsrc = _mm_mul_epi32(vsrc1, vw1);
+        vdst = _mm_add_epi64(_mm_sub_epi64(vdst, vsrc), voffset);
+
+        *(src0 + col + 1) = (Pel)(_mm_extract_epi64(vdst, 0) >> shift);
+        *(src0 + col + 3) = (Pel)(_mm_extract_epi64(vdst, 1) >> shift);
+      }
+      src0 += src0Stride;
+      src1 += src1Stride;
+    }
+  }
+}
+#endif
+
+template<bool clip, typename T> static inline void do_clip_hbd(T& vreg, T& vbdmin, T& vbdmax);
+template<> inline void do_clip_hbd<false, __m128i>(__m128i&, __m128i&, __m128i&) { }
+#ifdef USE_AVX2
+template<> inline void do_clip_hbd<false, __m256i>(__m256i&, __m256i&, __m256i&) { }
+#endif
+template<> inline void do_clip_hbd<true, __m128i>(__m128i& vreg, __m128i& vbdmin, __m128i& vbdmax) { vreg = _mm_min_epi32(vbdmax, _mm_max_epi32(vbdmin, vreg)); }
+#ifdef USE_AVX2
+template<> inline void do_clip_hbd<true, __m256i>(__m256i& vreg, __m256i& vbdmin, __m256i& vbdmax) { vreg = _mm256_min_epi32(vbdmax, _mm256_max_epi32(vbdmin, vreg)); }
+#endif
+
+template<X86_VEXT vext, int W, bool doAdd, bool mult, bool doShift, bool shiftR, bool clip>
+void linTf_HBD_SIMD(const Pel* src, int srcStride, Pel *dst, int dstStride, int width, int height, int scale, int shift, int offset, const ClpRng& clpRng)
+{
+  CHECK((width & 3), "the function only supports width multiple of 4");
+
+  __m128i vbdmin = _mm_set1_epi32(clpRng.min);
+  __m128i vbdmax = _mm_set1_epi32(clpRng.max);
+  __m128i voffset = _mm_set1_epi32(offset);
+  __m128i vscale = _mm_set1_epi32(scale);
+  __m128i val;
+
+#ifdef USE_AVX2
+  __m256i mm256_vbdmin = _mm256_set1_epi32(clpRng.min);
+  __m256i mm256_vbdmax = _mm256_set1_epi32(clpRng.max);
+  __m256i mm256_voffset = _mm256_set1_epi32(offset);
+  __m256i mm256_vscale = _mm256_set1_epi32(scale);
+  __m256i mm256_val;
+#endif
+
+  for (int row = 0; row < height; row++)
+  {
+    int col = 0;
+#ifdef USE_AVX2
+    if (vext >= AVX2)
+    {
+      for (; col < ((width >> 3) << 3); col += 8)
+      {
+        mm256_val = _mm256_lddqu_si256((const __m256i *)&src[col]);
+        do_mult<mult, __m256i>(mm256_val, mm256_vscale);
+        do_shift<doShift, shiftR, __m256i>(mm256_val, shift);
+        do_add<doAdd, __m256i>(mm256_val, mm256_voffset);
+        do_clip_hbd<clip, __m256i>(mm256_val, mm256_vbdmin, mm256_vbdmax);
+
+        _mm256_storeu_si256((__m256i *)&dst[col], mm256_val);
+      }
+    }
+#endif
+    for (; col < width; col += 4)
+    {
+      val = _mm_lddqu_si128((const __m128i *)&src[col]);
+      do_mult<mult, __m128i>(val, vscale);
+      do_shift<doShift, shiftR, __m128i>(val, shift);
+      do_add<doAdd, __m128i>(val, voffset);
+      do_clip_hbd<clip, __m128i>(val, vbdmin, vbdmax);
+
+      _mm_storeu_si128((__m128i *)&dst[col], val);
+    }
+
+    src += srcStride;
+    dst += dstStride;
+  }
+}
+#endif
 template<X86_VEXT vext, int W>
 void linTf_SSE_entry( const Pel* src, int srcStride, Pel *dst, int dstStride, int width, int height, int scale, int shift, int offset, const ClpRng& clpRng, bool clip )
 {
@@ -1010,6 +1720,40 @@ void linTf_SSE_entry( const Pel* src, int srcStride, Pel *dst, int dstStride, in
 
   switch( fn )
   {
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+  case  0: linTf_HBD_SIMD<vext, W, true, true, true, true, true >(src, srcStride, dst, dstStride, width, height, scale, shift, offset, clpRng); break;
+  case  1: linTf_HBD_SIMD<vext, W, true, true, true, true, false>(src, srcStride, dst, dstStride, width, height, scale, shift, offset, clpRng); break;
+  case  2: linTf_HBD_SIMD<vext, W, true, true, true, false, true >(src, srcStride, dst, dstStride, width, height, scale, -shift, offset, clpRng); break;
+  case  3: linTf_HBD_SIMD<vext, W, true, true, true, false, false>(src, srcStride, dst, dstStride, width, height, scale, -shift, offset, clpRng); break;
+  case  4: linTf_HBD_SIMD<vext, W, true, true, false, true, true >(src, srcStride, dst, dstStride, width, height, scale, shift, offset, clpRng); break;
+  case  5: linTf_HBD_SIMD<vext, W, true, true, false, true, false>(src, srcStride, dst, dstStride, width, height, scale, shift, offset, clpRng); break;
+  case  6: linTf_HBD_SIMD<vext, W, true, true, false, false, true >(src, srcStride, dst, dstStride, width, height, scale, -shift, offset, clpRng); break;
+  case  7: linTf_HBD_SIMD<vext, W, true, true, false, false, false>(src, srcStride, dst, dstStride, width, height, scale, -shift, offset, clpRng); break;
+  case  8: linTf_HBD_SIMD<vext, W, true, false, true, true, true >(src, srcStride, dst, dstStride, width, height, scale, shift, offset, clpRng); break;
+  case  9: linTf_HBD_SIMD<vext, W, true, false, true, true, false>(src, srcStride, dst, dstStride, width, height, scale, shift, offset, clpRng); break;
+  case 10: linTf_HBD_SIMD<vext, W, true, false, true, false, true >(src, srcStride, dst, dstStride, width, height, scale, -shift, offset, clpRng); break;
+  case 11: linTf_HBD_SIMD<vext, W, true, false, true, false, false>(src, srcStride, dst, dstStride, width, height, scale, -shift, offset, clpRng); break;
+  case 12: linTf_HBD_SIMD<vext, W, true, false, false, true, true >(src, srcStride, dst, dstStride, width, height, scale, shift, offset, clpRng); break;
+  case 13: linTf_HBD_SIMD<vext, W, true, false, false, true, false>(src, srcStride, dst, dstStride, width, height, scale, shift, offset, clpRng); break;
+  case 14: linTf_HBD_SIMD<vext, W, true, false, false, false, true >(src, srcStride, dst, dstStride, width, height, scale, -shift, offset, clpRng); break;
+  case 15: linTf_HBD_SIMD<vext, W, true, false, false, false, false>(src, srcStride, dst, dstStride, width, height, scale, -shift, offset, clpRng); break;
+  case 16: linTf_HBD_SIMD<vext, W, false, true, true, true, true >(src, srcStride, dst, dstStride, width, height, scale, shift, offset, clpRng); break;
+  case 17: linTf_HBD_SIMD<vext, W, false, true, true, true, false>(src, srcStride, dst, dstStride, width, height, scale, shift, offset, clpRng); break;
+  case 18: linTf_HBD_SIMD<vext, W, false, true, true, false, true >(src, srcStride, dst, dstStride, width, height, scale, -shift, offset, clpRng); break;
+  case 19: linTf_HBD_SIMD<vext, W, false, true, true, false, false>(src, srcStride, dst, dstStride, width, height, scale, -shift, offset, clpRng); break;
+  case 20: linTf_HBD_SIMD<vext, W, false, true, false, true, true >(src, srcStride, dst, dstStride, width, height, scale, shift, offset, clpRng); break;
+  case 21: linTf_HBD_SIMD<vext, W, false, true, false, true, false>(src, srcStride, dst, dstStride, width, height, scale, shift, offset, clpRng); break;
+  case 22: linTf_HBD_SIMD<vext, W, false, true, false, false, true >(src, srcStride, dst, dstStride, width, height, scale, -shift, offset, clpRng); break;
+  case 23: linTf_HBD_SIMD<vext, W, false, true, false, false, false>(src, srcStride, dst, dstStride, width, height, scale, -shift, offset, clpRng); break;
+  case 24: linTf_HBD_SIMD<vext, W, false, false, true, true, true >(src, srcStride, dst, dstStride, width, height, scale, shift, offset, clpRng); break;
+  case 25: linTf_HBD_SIMD<vext, W, false, false, true, true, false>(src, srcStride, dst, dstStride, width, height, scale, shift, offset, clpRng); break;
+  case 26: linTf_HBD_SIMD<vext, W, false, false, true, false, true >(src, srcStride, dst, dstStride, width, height, scale, -shift, offset, clpRng); break;
+  case 27: linTf_HBD_SIMD<vext, W, false, false, true, false, false>(src, srcStride, dst, dstStride, width, height, scale, -shift, offset, clpRng); break;
+  case 28: linTf_HBD_SIMD<vext, W, false, false, false, true, true >(src, srcStride, dst, dstStride, width, height, scale, shift, offset, clpRng); break;
+  case 29: linTf_HBD_SIMD<vext, W, false, false, false, true, false>(src, srcStride, dst, dstStride, width, height, scale, shift, offset, clpRng); break;
+  case 30: linTf_HBD_SIMD<vext, W, false, false, false, false, true >(src, srcStride, dst, dstStride, width, height, scale, -shift, offset, clpRng); break;
+  case 31: linTf_HBD_SIMD<vext, W, false, false, false, false, false>(src, srcStride, dst, dstStride, width, height, scale, -shift, offset, clpRng); break;
+#else
   case  0: linTf_SSE<vext, W, true,  true,  true,  true,  true >( src, srcStride, dst, dstStride, width, height, scale,  shift, offset, clpRng ); break;
   case  1: linTf_SSE<vext, W, true,  true,  true,  true,  false>( src, srcStride, dst, dstStride, width, height, scale,  shift, offset, clpRng ); break;
   case  2: linTf_SSE<vext, W, true,  true,  true,  false, true >( src, srcStride, dst, dstStride, width, height, scale, -shift, offset, clpRng ); break;
@@ -1042,6 +1786,7 @@ void linTf_SSE_entry( const Pel* src, int srcStride, Pel *dst, int dstStride, in
   case 29: linTf_SSE<vext, W, false, false, false, true,  false>( src, srcStride, dst, dstStride, width, height, scale,  shift, offset, clpRng ); break;
   case 30: linTf_SSE<vext, W, false, false, false, false, true >( src, srcStride, dst, dstStride, width, height, scale, -shift, offset, clpRng ); break;
   case 31: linTf_SSE<vext, W, false, false, false, false, false>( src, srcStride, dst, dstStride, width, height, scale, -shift, offset, clpRng ); break;
+#endif
   default:
     THROW( "Unknown parametrization of the linear transformation" );
     break;
@@ -1051,6 +1796,29 @@ void linTf_SSE_entry( const Pel* src, int srcStride, Pel *dst, int dstStride, in
 template<X86_VEXT vext>
 void PelBufferOps::_initPelBufOpsX86()
 {
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+  addAvg8 = addAvg_HBD_SIMD<vext, 8>;
+  addAvg4 = addAvg_HBD_SIMD<vext, 4>;
+
+  addBIOAvg4 = addBIOAvg4HBD_SIMD<vext>;
+  bioGradFilter = gradFilterHBD_SIMD<vext>;
+  calcBIOSums = calcBIOSumsHBD_SIMD<vext>;
+
+  reco8 = reco_HBD_SIMD<vext, 8>;
+  reco4 = reco_HBD_SIMD<vext, 4>;
+
+  linTf8 = linTf_SSE_entry<vext, 8>;
+  linTf4 = linTf_SSE_entry<vext, 4>;
+#if ENABLE_SIMD_OPT_BCW
+  removeWeightHighFreq8 = removeWeightHighFreq_HBD_SIMD<vext, 8>;
+  removeWeightHighFreq4 = removeWeightHighFreq_HBD_SIMD<vext, 4>;
+  removeHighFreq8 = removeHighFreq_HBD_SIMD<vext, 8>;
+  removeHighFreq4 = removeHighFreq_HBD_SIMD<vext, 4>;
+#endif
+
+  profGradFilter = gradFilterHBD_SIMD<vext, false>;
+  applyPROF = applyPROFHBD_SIMD<vext>;
+#else
   addAvg8 = addAvg_SSE<vext, 8>;
   addAvg4 = addAvg_SSE<vext, 4>;
 
@@ -1073,6 +1841,7 @@ void PelBufferOps::_initPelBufOpsX86()
 #endif
   profGradFilter = gradFilter_SSE<vext, false>;
   applyPROF      = applyPROF_SSE<vext>;
+#endif
   roundIntVector = roundIntVector_SIMD<vext>;
 }
 
diff --git a/source/Lib/CommonLib/x86/CommonDefX86.cpp b/source/Lib/CommonLib/x86/CommonDefX86.cpp
index 448b627bb7ed2e01a03ef24c94fa66dc0347f904..3c9a1948155d249412bfbeaa11c5899a0386f561 100644
--- a/source/Lib/CommonLib/x86/CommonDefX86.cpp
+++ b/source/Lib/CommonLib/x86/CommonDefX86.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/x86/CommonDefX86.h b/source/Lib/CommonLib/x86/CommonDefX86.h
index 29f90397f8521217176ec94724687d50382e18dc..b00469dbd0f41f3d45f47616c3199aaad57f91c0 100644
--- a/source/Lib/CommonLib/x86/CommonDefX86.h
+++ b/source/Lib/CommonLib/x86/CommonDefX86.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/x86/IbcHashMapX86.h b/source/Lib/CommonLib/x86/IbcHashMapX86.h
index 2d0ce5f033d844e8f8865fad0e431ec399c2621d..0d8eb6669511e5c7fca45dc0d49c2b8728bd25ed 100644
--- a/source/Lib/CommonLib/x86/IbcHashMapX86.h
+++ b/source/Lib/CommonLib/x86/IbcHashMapX86.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/x86/InitX86.cpp b/source/Lib/CommonLib/x86/InitX86.cpp
index 458839510c00487d6f61fc80ba13d15197a09de5..5908746d2f76b42902fd90b2dc720e0072dadcf7 100644
--- a/source/Lib/CommonLib/x86/InitX86.cpp
+++ b/source/Lib/CommonLib/x86/InitX86.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/CommonLib/x86/InterpolationFilterX86.h b/source/Lib/CommonLib/x86/InterpolationFilterX86.h
index 3e52ef6fb8660426304fe7ba79a9b894a6a25aa5..336734f998b69da87c59d26dc87eb4bec2e6456c 100644
--- a/source/Lib/CommonLib/x86/InterpolationFilterX86.h
+++ b/source/Lib/CommonLib/x86/InterpolationFilterX86.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -621,7 +621,7 @@ template<X86_VEXT vext, int N, bool shiftBack>
 static void simdInterpolateVerM8( const int16_t *src, int srcStride, int16_t *dst, int dstStride, int width, int height, int shift, int offset, const ClpRng& clpRng, int16_t const *coeff )
 {
 //   src -= ( N / 2 - 1 ) * srcStride;
-  const Pel *srcOrig = src;
+  const int16_t *srcOrig = src;
   int16_t *dstOrig = dst;
 
   __m128i vcoeff[N / 2], vsrc[N];
@@ -1155,184 +1155,1229 @@ static void simdInterpolateN2_10BIT_M4(const int16_t* src, int srcStride, int16_
     dst += dstStride;
   }
 }
-
-template<X86_VEXT vext, int N, bool isVertical, bool isFirst, bool isLast>
-static void simdFilter( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff, bool biMCForDMVR)
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+template<X86_VEXT vext, int N, bool shiftBack>
+static void simdInterpolateHorM8_HBD(const Pel* src, int srcStride, Pel *dst, int dstStride, int width, int height, int shift, int offset, const ClpRng& clpRng, Pel const *coeff)
 {
-  int row, col;
-
-  Pel c[8];
-  c[0] = coeff[0];
-  c[1] = coeff[1];
-  if( N >= 4 )
-  {
-    c[2] = coeff[2];
-    c[3] = coeff[3];
-  }
-  if( N >= 6 )
+  const int filterSpan = (N - 1);
+  _mm_prefetch((const char*)src + srcStride, _MM_HINT_T0);
+  _mm_prefetch((const char*)src + (width >> 1) + srcStride, _MM_HINT_T0);
+  _mm_prefetch((const char*)src + width + filterSpan + srcStride, _MM_HINT_T0);
+
+  __m128i voffset = _mm_set1_epi32(offset);
+  __m128i vibdimin = _mm_set1_epi32(clpRng.min);
+  __m128i vibdimax = _mm_set1_epi32(clpRng.max);
+  __m128i vcoeffh0, vcoeffh1;
+  __m128i vsrc0, vsrc1;
+  __m128i vsuma, vsumb;
+
+  vcoeffh0 = _mm_lddqu_si128((__m128i const *)coeff);
+  if (N == 8)
   {
-    c[4] = coeff[4];
-    c[5] = coeff[5];
+    vcoeffh1 = _mm_lddqu_si128((__m128i const *)(coeff + 4));
   }
-  if( N == 8 )
+
+  for (int row = 0; row < height; row++)
   {
-    c[6] = coeff[6];
-    c[7] = coeff[7];
+    _mm_prefetch((const char*)src + 2 * srcStride, _MM_HINT_T0);
+    _mm_prefetch((const char*)src + (width >> 1) + 2 * srcStride, _MM_HINT_T0);
+    _mm_prefetch((const char*)src + width + filterSpan + 2 * srcStride, _MM_HINT_T0);
+
+    for (int col = 0; col < width; col += 8)
+    {
+      __m128i vtmp[4];
+      for (int i = 0; i < 8; i += 2)
+      {
+        if (N == 8)
+        {
+          __m128i vsrc00 = _mm_lddqu_si128((__m128i const *)&src[col + i]);
+          __m128i vsrc01 = _mm_lddqu_si128((__m128i const *)&src[col + i + 4]);
+          vsrc0 = _mm_add_epi32(_mm_mullo_epi32(vsrc00, vcoeffh0), _mm_mullo_epi32(vsrc01, vcoeffh1));
+
+          __m128i vsrc10 = _mm_lddqu_si128((__m128i const *)&src[col + i + 1]);
+          __m128i vsrc11 = _mm_lddqu_si128((__m128i const *)&src[col + i + 5]);
+          vsrc1 = _mm_add_epi32(_mm_mullo_epi32(vsrc10, vcoeffh0), _mm_mullo_epi32(vsrc11, vcoeffh1));
+        }
+        else
+        {
+          vsrc0 = _mm_mullo_epi32(_mm_lddqu_si128((__m128i const *)&src[col + i]), vcoeffh0);
+          vsrc1 = _mm_mullo_epi32(_mm_lddqu_si128((__m128i const *)&src[col + i + 1]), vcoeffh0);
+        }
+
+        vtmp[i / 2] = _mm_hadd_epi32(vsrc0, vsrc1);
+      }
+
+      vsuma = _mm_hadd_epi32(vtmp[0], vtmp[1]);
+      vsumb = _mm_hadd_epi32(vtmp[2], vtmp[3]);
+
+      vsuma = _mm_add_epi32(vsuma, voffset);
+      vsumb = _mm_add_epi32(vsumb, voffset);
+
+      vsuma = _mm_srai_epi32(vsuma, shift);
+      vsumb = _mm_srai_epi32(vsumb, shift);
+
+      if (shiftBack)
+      {
+        vsuma = _mm_min_epi32(vibdimax, _mm_max_epi32(vibdimin, vsuma));
+        vsumb = _mm_min_epi32(vibdimax, _mm_max_epi32(vibdimin, vsumb));
+      }
+      _mm_storeu_si128((__m128i *)&dst[col], vsuma);
+      _mm_storeu_si128((__m128i *)&dst[col + 4], vsumb);
+    }
+    src += srcStride;
+    dst += dstStride;
   }
+}
 
-  int cStride = ( isVertical ) ? srcStride : 1;
-  src -= ( N/2 - 1 ) * cStride;
+template<X86_VEXT vext, int N, bool shiftBack>
+static void simdInterpolateHorM8_HBD_AVX2(const Pel* src, int srcStride, Pel *dst, int dstStride, int width, int height, int shift, int offset, const ClpRng& clpRng, Pel const *coeff)
+{
+#ifdef USE_AVX2
+  const int filterSpan = (N - 1);
+  _mm_prefetch((const char*)(src + srcStride), _MM_HINT_T0);
+  _mm_prefetch((const char*)(src + (width >> 1) + srcStride), _MM_HINT_T0);
+  _mm_prefetch((const char*)(src + width + filterSpan + srcStride), _MM_HINT_T0);
 
-  int offset;
-  int headRoom = IF_INTERNAL_FRAC_BITS(clpRng.bd);
-  int shift    = IF_FILTER_PREC;
-  // with the current settings (IF_INTERNAL_PREC = 14 and IF_FILTER_PREC = 6), though headroom can be
-  // negative for bit depths greater than 14, shift will remain non-negative for bit depths of 8->20
-  CHECK( shift < 0, "Negative shift" );
+  __m256i voffset = _mm256_set1_epi32(offset);
+  __m256i vibdimin = _mm256_set1_epi32(clpRng.min);
+  __m256i vibdimax = _mm256_set1_epi32(clpRng.max);
 
+  __m256i vcoeff[N];
+  for (int i = 0; i < N; i++)
+  {
+    vcoeff[i] = _mm256_set1_epi32(coeff[i]);
+  }
 
-  if( isLast )
+  for (int row = 0; row < height; row++)
   {
-    shift += ( isFirst ) ? 0 : headRoom;
-    offset = 1 << ( shift - 1 );
-    offset += ( isFirst ) ? 0 : IF_INTERNAL_OFFS << IF_FILTER_PREC;
+    _mm_prefetch((const char*)(src + 2 * srcStride), _MM_HINT_T0);
+    _mm_prefetch((const char*)(src + (width >> 1) + 2 * srcStride), _MM_HINT_T0);
+    _mm_prefetch((const char*)(src + width + filterSpan + 2 * srcStride), _MM_HINT_T0);
+
+    for (int col = 0; col < width; col += 8)
+    {
+      __m256i vsum = _mm256_setzero_si256();
+      for (int i = 0; i < N; i++)
+      {
+        __m256i vsrc = _mm256_lddqu_si256((__m256i *)&src[col + i]);
+        vsum = _mm256_add_epi32(vsum, _mm256_mullo_epi32(vsrc, vcoeff[i]));
+      }
+      vsum = _mm256_add_epi32(vsum, voffset);
+      vsum = _mm256_srai_epi32(vsum, shift);
+      if (shiftBack)
+      {
+        vsum = _mm256_min_epi32(vibdimax, _mm256_max_epi32(vibdimin, vsum));
+      }
+      _mm256_storeu_si256((__m256i *)&dst[col], vsum);
+    }
+    src += srcStride;
+    dst += dstStride;
   }
-  else
+#endif
+}
+
+template<X86_VEXT vext, int N, bool shiftBack>
+static void simdInterpolateVerM8_HBD(const Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height, int shift, int offset, const ClpRng& clpRng, Pel const *coeff)
+{
+  const Pel *srcOrig = src;
+  Pel *dstOrig = dst;
+
+  __m128i vcoeff[N], vsrc0[N], vsrc1[N];
+  __m128i vzero = _mm_setzero_si128();
+  __m128i voffset = _mm_set1_epi32(offset);
+  __m128i vibdimin = _mm_set1_epi32(clpRng.min);
+  __m128i vibdimax = _mm_set1_epi32(clpRng.max);
+
+  __m128i vsuma, vsumb;
+  for (int i = 0; i < N; i++)
   {
-    shift -= ( isFirst ) ? headRoom : 0;
-    offset = ( isFirst ) ? -IF_INTERNAL_OFFS << shift : 0;
+    vcoeff[i] = _mm_set1_epi32(coeff[i]);
   }
 
-  if (biMCForDMVR)
+  for (int col = 0; col < width; col += 8)
   {
-    if( isFirst )
+    for (int i = 0; i < N - 1; i++)
     {
-      shift = IF_FILTER_PREC_BILINEAR - (IF_INTERNAL_PREC_BILINEAR - clpRng.bd);
-      offset = 1 << (shift - 1);
+      vsrc0[i] = _mm_lddqu_si128((__m128i const *)&src[col + i * srcStride]);
+      vsrc1[i] = _mm_lddqu_si128((__m128i const *)&src[col + 4 + i * srcStride]);
     }
-    else
+
+    for (int row = 0; row < height; row++)
     {
-      shift = 4;
-      offset = 1 << (shift - 1);
+      vsrc0[N - 1] = _mm_lddqu_si128((__m128i const *)&src[col + (N - 1) * srcStride]);
+      vsrc1[N - 1] = _mm_lddqu_si128((__m128i const *)&src[col + 4 + (N - 1) * srcStride]);
+
+      vsuma = vsumb = vzero;
+      for (int i = 0; i < N; i++)
+      {
+        vsuma = _mm_add_epi32(vsuma, _mm_mullo_epi32(vsrc0[i], vcoeff[i]));
+        vsumb = _mm_add_epi32(vsumb, _mm_mullo_epi32(vsrc1[i], vcoeff[i]));
+      }
+
+      for (int i = 0; i < N - 1; i++)
+      {
+        vsrc0[i] = vsrc0[i + 1];
+        vsrc1[i] = vsrc1[i + 1];
+      }
+
+      vsuma = _mm_add_epi32(vsuma, voffset);
+      vsumb = _mm_add_epi32(vsumb, voffset);
+
+      vsuma = _mm_srai_epi32(vsuma, shift);
+      vsumb = _mm_srai_epi32(vsumb, shift);
+
+      if (shiftBack) //clip
+      {
+        vsuma = _mm_min_epi32(vibdimax, _mm_max_epi32(vibdimin, vsuma));
+        vsumb = _mm_min_epi32(vibdimax, _mm_max_epi32(vibdimin, vsumb));
+      }
+
+      _mm_storeu_si128((__m128i *)&dst[col], vsuma);
+      _mm_storeu_si128((__m128i *)&dst[col + 4], vsumb);
+
+      src += srcStride;
+      dst += dstStride;
     }
+    src = srcOrig;
+    dst = dstOrig;
+  }
+}
+
+template<X86_VEXT vext, int N, bool shiftBack>
+static void simdInterpolateVerM8_HBD_AVX2(const Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height, int shift, int offset, const ClpRng& clpRng, Pel const *coeff)
+{
+#ifdef USE_AVX2
+  __m256i voffset = _mm256_set1_epi32(offset);
+  __m256i vibdimin = _mm256_set1_epi32(clpRng.min);
+  __m256i vibdimax = _mm256_set1_epi32(clpRng.max);
+
+  __m256i vsrc[N], vcoeff[N];
+  for (int i = 0; i < N; i++)
+  {
+    vcoeff[i] = _mm256_set1_epi32(coeff[i]);
   }
+
+  const Pel *srcOrig = src;
+  Pel *dstOrig = dst;
+
+  for (int col = 0; col < width; col += 8)
   {
-    if( N == 8 && !( width & 0x07 ) )
+    for (int i = 0; i < N - 1; i++)
     {
-      if( !isVertical )
+      vsrc[i] = _mm256_loadu_si256((const __m256i *)&src[col + i * srcStride]);
+    }
+
+    for (int row = 0; row < height; row++)
+    {
+      vsrc[N - 1] = _mm256_loadu_si256((const __m256i *)&src[col + (N - 1) * srcStride]);
+
+      __m256i vsum = _mm256_setzero_si256();
+      for (int i = 0; i < N; i++)
       {
-        if( vext>= AVX2 )
-          simdInterpolateHorM8_AVX2<vext, 8, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
-        else
-          simdInterpolateHorM8<vext, 8, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
+        vsum = _mm256_add_epi32(vsum, _mm256_mullo_epi32(vsrc[i], vcoeff[i]));
       }
-      else
+
+      for (int i = 0; i < N - 1; i++)
       {
-        if( vext>= AVX2 )
-          simdInterpolateVerM8_AVX2<vext, 8, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
-        else
-          simdInterpolateVerM8<vext, 8, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
+        vsrc[i] = vsrc[i + 1];
       }
-      return;
-    }
-    else if( N == 8 && !( width & 0x03 ) )
-    {
-      if( !isVertical )
+
+      vsum = _mm256_add_epi32(vsum, voffset);
+      vsum = _mm256_srai_epi32(vsum, shift);
+
+      if (shiftBack)
       {
-        simdInterpolateHorM4<vext, 8, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
+        vsum = _mm256_min_epi32(vibdimax, _mm256_max_epi32(vibdimin, vsum));
       }
-      else
-        simdInterpolateVerM4<vext, 8, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
-      return;
+      _mm256_storeu_si256((__m256i *)&dst[col], vsum);
+
+      src += srcStride;
+      dst += dstStride;
     }
-    else if( N == 4 && !( width & 0x03 ) )
+    src = srcOrig;
+    dst = dstOrig;
+  }
+#endif
+}
+
+template<X86_VEXT vext, int N, bool shiftBack>
+static void simdInterpolateHorM4_HBD(const Pel* src, int srcStride, Pel *dst, int dstStride, int width, int height, int shift, int offset, const ClpRng& clpRng, Pel const *coeff)
+{
+  __m128i voffset = _mm_set1_epi32(offset);
+  __m128i vibdimin = _mm_set1_epi32(clpRng.min);
+  __m128i vibdimax = _mm_set1_epi32(clpRng.max);
+  __m128i vcoeffh0, vcoeffh1, vsum;
+  vcoeffh0 = _mm_lddqu_si128((__m128i const *)coeff);
+  if (N == 8)
+  {
+    vcoeffh1 = _mm_lddqu_si128((__m128i const *)(coeff + 4));
+  }
+
+  for (int row = 0; row < height; row++)
+  {
+    for (int col = 0; col < width; col += 4)
     {
-      if( !isVertical )
+      if (N == 8)
       {
-        if( ( width % 8 ) == 0 )
+        __m128i vtmp[2];
+        for (int i = 0; i < 4; i += 2)
         {
-          if( vext>= AVX2 )
-            simdInterpolateHorM8_AVX2<vext, 4, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
-          else
-            simdInterpolateHorM8<vext, 4, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
+          __m128i vsrc00 = _mm_lddqu_si128((__m128i const *)&src[col + i]);
+          __m128i vsrc01 = _mm_lddqu_si128((__m128i const *)&src[col + i + 4]);
+          __m128i vsrc10 = _mm_lddqu_si128((__m128i const *)&src[col + i + 1]);
+          __m128i vsrc11 = _mm_lddqu_si128((__m128i const *)&src[col + i + 5]);
+
+          __m128i vsrc0 = _mm_add_epi32(_mm_mullo_epi32(vsrc00, vcoeffh0), _mm_mullo_epi32(vsrc01, vcoeffh1));
+          __m128i vsrc1 = _mm_add_epi32(_mm_mullo_epi32(vsrc10, vcoeffh0), _mm_mullo_epi32(vsrc11, vcoeffh1));
+          vtmp[i / 2] = _mm_hadd_epi32(vsrc0, vsrc1);
         }
-        else
-          simdInterpolateHorM4<vext, 4, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
+        vsum = _mm_hadd_epi32(vtmp[0], vtmp[1]);
       }
       else
-        simdInterpolateVerM4<vext, 4, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
-      return;
-    }
-    else if (biMCForDMVR)
-    {
-      if (N == 2 && !(width & 0x03))
       {
-        if (clpRng.bd <= 10)
-        {
-        simdInterpolateN2_10BIT_M4<vext, isLast>(src, srcStride, dst, dstStride, cStride, width, height, shift, offset, clpRng, c);
-        }
-        else
-        {
-          simdInterpolateN2_HIGHBIT_M4<vext, isLast>(src, srcStride, dst, dstStride, cStride, width, height, shift, offset, clpRng, c);
-        }
-        return;
+        __m128i vsrc0 = _mm_lddqu_si128((__m128i const *)&src[col]);
+        __m128i vsrc1 = _mm_lddqu_si128((__m128i const *)&src[col + 1]);
+        __m128i vsrc2 = _mm_lddqu_si128((__m128i const *)&src[col + 2]);
+        __m128i vsrc3 = _mm_lddqu_si128((__m128i const *)&src[col + 3]);
+
+        vsrc0 = _mm_mullo_epi32(vsrc0, vcoeffh0);
+        vsrc1 = _mm_mullo_epi32(vsrc1, vcoeffh0);
+        vsrc2 = _mm_mullo_epi32(vsrc2, vcoeffh0);
+        vsrc3 = _mm_mullo_epi32(vsrc3, vcoeffh0);
+
+        __m128i vsrca = _mm_hadd_epi32(vsrc0, vsrc1);
+        __m128i vsrcb = _mm_hadd_epi32(vsrc2, vsrc3);
+        vsum = _mm_hadd_epi32(vsrca, vsrcb);
       }
+
+      vsum = _mm_add_epi32(vsum, voffset);
+      vsum = _mm_srai_epi32(vsum, shift);
+      if (shiftBack)
+      {
+        vsum = _mm_min_epi32(vibdimax, _mm_max_epi32(vibdimin, vsum));
+      }
+      _mm_storeu_si128((__m128i *)&dst[col], vsum);
     }
-    else if( N == 2 && !( width & 0x07 ) )
-    {
-      simdInterpolateN2_M8<vext, isLast>( src, srcStride, dst, dstStride, cStride, width, height, shift, offset, clpRng, c );
-      return;
-    }
-    else if( N == 2 && !( width & 0x03 ) )
-    {
-      simdInterpolateN2_M4<vext, isLast>( src, srcStride, dst, dstStride, cStride, width, height, shift, offset, clpRng, c );
-      return;
-    }
+    src += srcStride;
+    dst += dstStride;
   }
+}
 
-  for( row = 0; row < height; row++ )
+template<X86_VEXT vext, int N, bool shiftBack>
+static void simdInterpolateVerM4_HBD(const Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height, int shift, int offset, const ClpRng& clpRng, Pel const *coeff)
+{
+  const Pel *srcOrig = src;
+  Pel *dstOrig = dst;
+
+  __m128i vsum, vcoeff[N], vsrc[N];
+  __m128i vzero = _mm_setzero_si128();
+  __m128i voffset = _mm_set1_epi32(offset);
+  __m128i vibdimin = _mm_set1_epi32(clpRng.min);
+  __m128i vibdimax = _mm_set1_epi32(clpRng.max);
+  for (int i = 0; i < N; i++)
   {
-    for( col = 0; col < width; col++ )
+    vcoeff[i] = _mm_set1_epi32(coeff[i]);
+  }
+
+  for (int col = 0; col < width; col += 4)
+  {
+    for (int i = 0; i < N - 1; i++)
     {
-      int sum;
+      vsrc[i] = _mm_lddqu_si128((__m128i const *)&src[col + i * srcStride]);
+    }
 
-      sum  = src[col + 0 * cStride] * c[0];
-      sum += src[col + 1 * cStride] * c[1];
-      if( N >= 4 )
-      {
-        sum += src[col + 2 * cStride] * c[2];
-        sum += src[col + 3 * cStride] * c[3];
-      }
-      if( N >= 6 )
+    for (int row = 0; row < height; row++)
+    {
+      vsrc[N - 1] = _mm_lddqu_si128((__m128i const *)&src[col + (N - 1) * srcStride]);
+
+      vsum = vzero;
+      for (int i = 0; i < N; i++)
       {
-        sum += src[col + 4 * cStride] * c[4];
-        sum += src[col + 5 * cStride] * c[5];
+        vsum = _mm_add_epi32(vsum, _mm_mullo_epi32(vsrc[i], vcoeff[i]));
       }
-      if( N == 8 )
+
+      vsum = _mm_add_epi32(vsum, voffset);
+      vsum = _mm_srai_epi32(vsum, shift);
+
+      if (shiftBack)
       {
-        sum += src[col + 6 * cStride] * c[6];
-        sum += src[col + 7 * cStride] * c[7];
+        vsum = _mm_min_epi32(vibdimax, _mm_max_epi32(vibdimin, vsum));
       }
 
-      Pel val = ( sum + offset ) >> shift;
-      if( isLast )
+      _mm_storeu_si128((__m128i *)&dst[col], vsum);
+
+      for (int i = 0; i < N - 1; i++)
       {
-        val = ClipPel( val, clpRng );
+        vsrc[i] = vsrc[i + 1];
       }
-      dst[col] = val;
-    }
 
-    src += srcStride;
-    dst += dstStride;
+      src += srcStride;
+      dst += dstStride;
+    }
+    src = srcOrig;
+    dst = dstOrig;
   }
 }
 
-template< X86_VEXT vext >
-void xWeightedGeoBlk_SSE(const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const uint8_t splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1)
+template<X86_VEXT vext, bool isLast>
+static void simdInterpolateN2_M8_HBD(const Pel* src, int srcStride, Pel *dst, int dstStride, int cStride, int width, int height, int shift, int offset, const ClpRng& clpRng, Pel const *c)
 {
-  Pel* dst = predDst.get(compIdx).buf;
-  Pel* src0 = predSrc0.get(compIdx).buf;
-  Pel* src1 = predSrc1.get(compIdx).buf;
+  int row, col;
+  __m128i mmOffset = _mm_set1_epi32(offset);
+  __m128i mmMin = _mm_set1_epi32(clpRng.min);
+  __m128i mmMax = _mm_set1_epi32(clpRng.max);
+  __m128i mmCoeff[2];
+  for (int n = 0; n < 2; n++)
+  {
+    mmCoeff[n] = _mm_set1_epi32(c[n]);
+  }
+
+  for (row = 0; row < height; row++)
+  {
+    for (col = 0; col < width; col += 8)
+    {
+      const Pel* src_tmp = src;
+      __m128i vsuma = _mm_setzero_si128();
+      __m128i vsumb = _mm_setzero_si128();
+
+      for (int i = 0; i < 2; i++)
+      {
+        __m128i vsrc0 = _mm_lddqu_si128((__m128i *)&src_tmp[col]);
+        __m128i vsrc1 = _mm_lddqu_si128((__m128i *)&src_tmp[col + 4]);
+        vsuma = _mm_add_epi32(vsuma, _mm_mullo_epi32(vsrc0, mmCoeff[i]));
+        vsumb = _mm_add_epi32(vsumb, _mm_mullo_epi32(vsrc1, mmCoeff[i]));
+        src_tmp += cStride;
+      }
+
+      vsuma = _mm_srai_epi32(_mm_add_epi32(vsuma, mmOffset), shift);
+      vsumb = _mm_srai_epi32(_mm_add_epi32(vsumb, mmOffset), shift);
+      if (isLast)
+      {
+        vsuma = _mm_min_epi32(mmMax, _mm_max_epi32(mmMin, vsuma));
+        vsumb = _mm_min_epi32(mmMax, _mm_max_epi32(mmMin, vsumb));
+      }
+
+      _mm_storeu_si128((__m128i *)&dst[col], vsuma);
+      _mm_storeu_si128((__m128i *)&dst[col + 4], vsumb);
+    }
+    src += srcStride;
+    dst += dstStride;
+  }
+}
+
+template<X86_VEXT vext, bool isLast>
+static void simdInterpolateN2_M4_HBD(const Pel* src, int srcStride, Pel *dst, int dstStride, int cStride, int width, int height, int shift, int offset, const ClpRng& clpRng, Pel const *c)
+{
+  int row, col;
+  __m128i mmOffset = _mm_set1_epi32(offset);
+  __m128i mmMin = _mm_set1_epi32(clpRng.min);
+  __m128i mmMax = _mm_set1_epi32(clpRng.max);
+  __m128i mmCoeff[2];
+  for (int n = 0; n < 2; n++)
+  {
+    mmCoeff[n] = _mm_set1_epi32(c[n]);
+  }
+
+  for (row = 0; row < height; row++)
+  {
+    for (col = 0; col < width; col += 4)
+    {
+      const Pel* src_tmp = src;
+      __m128i vsum = _mm_setzero_si128();
+
+      for (int i = 0; i < 2; i++)
+      {
+        __m128i vsrc = _mm_lddqu_si128((__m128i *)&src_tmp[col]);
+        vsum = _mm_add_epi32(vsum, _mm_mullo_epi32(vsrc, mmCoeff[i]));
+        src_tmp += cStride;
+      }
+
+      vsum = _mm_srai_epi32(_mm_add_epi32(vsum, mmOffset), shift);
+      if (isLast)
+      {
+        vsum = _mm_min_epi32(mmMax, _mm_max_epi32(mmMin, vsum));
+      }
+
+      _mm_storeu_si128((__m128i *)&dst[col], vsum);
+    }
+    src += srcStride;
+    dst += dstStride;
+  }
+}
+
+template<X86_VEXT vext, bool isLast>
+static void simdInterpolateN2_HBD_M4(const Pel* src, int srcStride, Pel *dst, int dstStride, int cStride, int width, int height, int shift, int offset, const ClpRng& clpRng, Pel const *c)
+{
+  CHECK(isLast, "Not Supported");
+  CHECK(width % 4 != 0, "Not Supported");
+
+  __m128i mmOffset = _mm_set1_epi32(offset);
+  __m128i mmCoeff[2];
+  for (int n = 0; n < 2; n++)
+  {
+    mmCoeff[n] = _mm_set1_epi32(c[n]);
+  }
+
+  for (int row = 0; row < height; row++)
+  {
+    for (int col = 0; col < width; col += 4)
+    {
+      const Pel* src_tmp = src;
+      __m128i vsum = _mm_setzero_si128();
+      for (int i = 0; i < 2; i++)
+      {
+        __m128i vsrc = _mm_lddqu_si128((__m128i *)&src_tmp[col]);
+        vsum = _mm_add_epi32(vsum, _mm_mullo_epi32(vsrc, mmCoeff[i]));
+        src_tmp += cStride;
+      }
+      vsum = _mm_srai_epi32(_mm_add_epi32(vsum, mmOffset), shift);
+      _mm_storeu_si128((__m128i *)&dst[col], vsum);
+    }
+    src += srcStride;
+    dst += dstStride;
+  }
+}
+
+template<X86_VEXT vext, bool isLast>
+static void simdInterpolateN2_HBD_M4_AVX2(const Pel* src, int srcStride, Pel *dst, int dstStride, int cStride, int width, int height, int shift, int offset, const ClpRng& clpRng, Pel const *c)
+{
+#ifdef USE_AVX2
+  CHECK(isLast, "Not Supported");
+  CHECK(width % 4 != 0, "Not Supported");
+
+  __m256i mmOffset = _mm256_set1_epi32(offset);
+  __m256i mmCoeff[2];
+  for (int n = 0; n < 2; n++)
+  {
+    mmCoeff[n] = _mm256_set1_epi32(c[n]);
+  }
+
+  int srcStride2 = (srcStride << 1);
+  int dstStride2 = (dstStride << 1);
+
+  for (int row = 0; row < height; row += 2)
+  {
+    for (int col = 0; col < width; col += 4)
+    {
+      const Pel* src_tmp = src;
+      __m256i vsum = _mm256_setzero_si256();
+      for (int i = 0; i < 2; i++)
+      {
+        __m256i vsrc = _mm256_castsi128_si256(_mm_lddqu_si128((__m128i *)&src_tmp[col]));
+        vsrc = _mm256_inserti128_si256(vsrc, _mm_lddqu_si128((__m128i *)&src_tmp[col + srcStride]), 1);
+        vsum = _mm256_add_epi32(vsum, _mm256_mullo_epi32(vsrc, mmCoeff[i]));
+        src_tmp += cStride;
+      }
+      vsum = _mm256_srai_epi32(_mm256_add_epi32(vsum, mmOffset), shift);
+
+      _mm_storeu_si128((__m128i *)&dst[col], _mm256_castsi256_si128(vsum));
+      _mm_storeu_si128((__m128i *)&dst[col + dstStride], _mm256_castsi256_si128(_mm256_permute4x64_epi64(vsum, 0xee)));
+    }
+    src += srcStride2;
+    dst += dstStride2;
+  }
+#endif
+}
+
+template<X86_VEXT vext, bool isFirst, bool isLast>
+static void simdFilterCopy_HBD(const ClpRng& clpRng, const Pel* src, int srcStride, Pel* dst, int dstStride, int width, int height, bool biMCForDMVR)
+{
+  int row;
+
+  if (isFirst == isLast)
+  {
+    for (row = 0; row < height; row++)
+    {
+      memcpy(&dst[0], &src[0], width * sizeof(Pel));
+      src += srcStride;
+      dst += dstStride;
+    }
+  }
+  else if (isFirst)
+  {
+    if (width & 1)
+    {
+      InterpolationFilter::filterCopy<isFirst, isLast>(clpRng, src, srcStride, dst, dstStride, width, height,
+                                                       biMCForDMVR);
+      return;
+    }
+
+    if (biMCForDMVR)
+    {
+      int shift10BitOut = (clpRng.bd - IF_INTERNAL_PREC_BILINEAR);
+      if (shift10BitOut <= 0)
+      {
+        const __m128i shift = _mm_cvtsi32_si128(-shift10BitOut);
+        for (row = 0; row < height; row++)
+        {
+          int col = 0;
+#ifdef USE_AVX2
+          if (vext >= AVX2)
+          {
+            for (; col < ((width >> 3) << 3); col += 8)
+            {
+              __m256i val = _mm256_lddqu_si256((__m256i *) &src[col]);
+              val         = _mm256_sll_epi32(val, shift);
+              _mm256_storeu_si256((__m256i *) &dst[col], val);
+            }
+          }
+#endif
+
+          for (; col < ((width >> 2) << 2); col += 4)
+          {
+            __m128i val = _mm_lddqu_si128((__m128i *) &src[col]);
+            val         = _mm_sll_epi32(val, shift);
+            _mm_storeu_si128((__m128i *) &dst[col], val);
+          }
+
+          for (; col < width; col += 2)
+          {
+            __m128i val = _mm_loadl_epi64((__m128i *) &src[col]);
+            val         = _mm_sll_epi32(val, shift);
+            _mm_storel_epi64((__m128i *) &dst[col], val);
+          }
+          src += srcStride;
+          dst += dstStride;
+        }
+      }
+      else
+      {
+        int offset = (1 << (shift10BitOut - 1));
+        for (row = 0; row < height; row++)
+        {
+          int col = 0;
+#ifdef USE_AVX2
+          if (vext >= AVX2)
+          {
+            __m256i mm256_offset = _mm256_set1_epi32(offset);
+            for (; col < ((width >> 3) << 3); col += 8)
+            {
+              __m256i vsrc = _mm256_lddqu_si256((__m256i *) &src[col]);
+              vsrc         = _mm256_srai_epi32(_mm256_add_epi32(vsrc, mm256_offset), shift10BitOut);
+              _mm256_storeu_si256((__m256i *) &dst[col], vsrc);
+            }
+          }
+#endif
+
+          __m128i mm128_offset = _mm_set1_epi32(offset);
+          for (; col < ((width >> 2) << 2); col += 4)
+          {
+            __m128i vsrc = _mm_lddqu_si128((__m128i *) &src[col]);
+            vsrc         = _mm_srai_epi32(_mm_add_epi32(vsrc, mm128_offset), shift10BitOut);
+            _mm_storeu_si128((__m128i *) &dst[col], vsrc);
+          }
+
+          for (; col < width; col += 2)
+          {
+            __m128i vsrc = _mm_loadl_epi64((__m128i *) &src[col]);
+            vsrc         = _mm_srai_epi32(_mm_add_epi32(vsrc, mm128_offset), shift10BitOut);
+            _mm_storel_epi64((__m128i *) &dst[col], vsrc);
+          }
+          src += srcStride;
+          dst += dstStride;
+        }
+      }
+    }
+    else
+    {
+      const int shift = IF_INTERNAL_FRAC_BITS(clpRng.bd);
+      for (row = 0; row < height; row++)
+      {
+        int col = 0;
+#ifdef USE_AVX2
+        if (vext >= AVX2)
+        {
+          __m256i mm256_offset = _mm256_set1_epi32(IF_INTERNAL_OFFS);
+          for (; col < ((width >> 3) << 3); col += 8)
+          {
+            __m256i vsrc = _mm256_lddqu_si256((__m256i *)&src[col]);
+            vsrc = _mm256_sub_epi32(_mm256_slli_epi32(vsrc, shift), mm256_offset);
+            _mm256_storeu_si256((__m256i *)&dst[col], vsrc);
+          }
+        }
+#endif
+
+        __m128i mm128_offset = _mm_set1_epi32(IF_INTERNAL_OFFS);
+        for (; col < ((width >> 2) << 2); col += 4)
+        {
+          __m128i vsrc = _mm_lddqu_si128((__m128i *)&src[col]);
+          vsrc = _mm_sub_epi32(_mm_slli_epi32(vsrc, shift), mm128_offset);
+          _mm_storeu_si128((__m128i *)&dst[col], vsrc);
+        }
+
+        for (; col < width; col += 2)
+        {
+          __m128i vsrc = _mm_loadl_epi64((__m128i *)&src[col]);
+          vsrc = _mm_sub_epi32(_mm_slli_epi32(vsrc, shift), mm128_offset);
+          _mm_storel_epi64((__m128i *)&dst[col], vsrc);
+        }
+        src += srcStride;
+        dst += dstStride;
+      }
+    }
+  }
+  else
+  {
+    if (width & 1)
+    {
+      InterpolationFilter::filterCopy<isFirst, isLast>(clpRng, src, srcStride, dst, dstStride, width, height,
+                                                       biMCForDMVR);
+      return;
+    }
+
+    CHECK(biMCForDMVR, "isLast must be false when biMCForDMVR is true");
+    const int shift = IF_INTERNAL_FRAC_BITS(clpRng.bd);
+    for (row = 0; row < height; row++)
+    {
+      int col = 0;
+#ifdef USE_AVX2
+      if (vext >= AVX2)
+      {
+        __m256i mm256_offset = _mm256_set1_epi32(IF_INTERNAL_OFFS);
+        __m256i mm256_min    = _mm256_set1_epi32(clpRng.min);
+        __m256i mm256_max    = _mm256_set1_epi32(clpRng.max);
+        for (; col < ((width >> 3) << 3); col += 8)
+        {
+          __m256i vsrc = _mm256_lddqu_si256((__m256i *) &src[col]);
+          vsrc         = _mm256_add_epi32(vsrc, mm256_offset);
+          if (shift <= 0)
+          {
+            vsrc = _mm256_slli_epi32(vsrc, (-shift));
+          }
+          else
+          {
+            vsrc = _mm256_srai_epi32(_mm256_add_epi32(vsrc, _mm256_set1_epi32(1 << (shift - 1))), shift);
+          }
+          vsrc = _mm256_min_epi32(mm256_max, _mm256_max_epi32(mm256_min, vsrc));
+
+          _mm256_storeu_si256((__m256i *) &dst[col], vsrc);
+        }
+      }
+#endif
+
+      __m128i mm128_offset = _mm_set1_epi32(IF_INTERNAL_OFFS);
+      __m128i mm128_min    = _mm_set1_epi32(clpRng.min);
+      __m128i mm128_max    = _mm_set1_epi32(clpRng.max);
+      for (; col < ((width >> 2) << 2); col += 4)
+      {
+        __m128i vsrc = _mm_lddqu_si128((__m128i *) &src[col]);
+        vsrc         = _mm_add_epi32(vsrc, mm128_offset);
+        if (shift <= 0)
+        {
+          vsrc = _mm_slli_epi32(vsrc, (-shift));
+        }
+        else
+        {
+          vsrc = _mm_srai_epi32(_mm_add_epi32(vsrc, _mm_set1_epi32(1 << (shift - 1))), shift);
+        }
+        vsrc = _mm_min_epi32(mm128_max, _mm_max_epi32(mm128_min, vsrc));
+
+        _mm_storeu_si128((__m128i *) &dst[col], vsrc);
+      }
+
+      for (; col < width; col += 2)
+      {
+        __m128i vsrc = _mm_loadl_epi64((__m128i *) &src[col]);
+        vsrc         = _mm_add_epi32(vsrc, mm128_offset);
+        if (shift <= 0)
+        {
+          vsrc = _mm_slli_epi32(vsrc, (-shift));
+        }
+        else
+        {
+          vsrc = _mm_srai_epi32(_mm_add_epi32(vsrc, _mm_set1_epi32(1 << (shift - 1))), shift);
+        }
+        vsrc = _mm_min_epi32(mm128_max, _mm_max_epi32(mm128_min, vsrc));
+
+        _mm_storel_epi64((__m128i *) &dst[col], vsrc);
+      }
+
+      src += srcStride;
+      dst += dstStride;
+    }
+  }
+}
+
+template< X86_VEXT vext >
+void xWeightedGeoBlk_HBD_SIMD(const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const uint8_t splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1)
+{
+  Pel* dst = predDst.get(compIdx).buf;
+  Pel* src0 = predSrc0.get(compIdx).buf;
+  Pel* src1 = predSrc1.get(compIdx).buf;
+  int32_t strideDst = predDst.get(compIdx).stride;
+  int32_t strideSrc0 = predSrc0.get(compIdx).stride;
+  int32_t strideSrc1 = predSrc1.get(compIdx).stride;
+
+  const char    log2WeightBase = 3;
+  const ClpRng  clpRng = pu.cu->slice->clpRngs().comp[compIdx];
+  const int32_t shiftWeighted = IF_INTERNAL_FRAC_BITS(clpRng.bd) + log2WeightBase;
+  const int32_t offsetWeighted = (1 << (shiftWeighted - 1)) + (IF_INTERNAL_OFFS << log2WeightBase);
+
+  int16_t wIdx = floorLog2(pu.lwidth()) - GEO_MIN_CU_LOG2;
+  int16_t hIdx = floorLog2(pu.lheight()) - GEO_MIN_CU_LOG2;
+  int16_t angle = g_GeoParams[splitDir][0];
+  int16_t stepY = 0;
+  int16_t* weight = nullptr;
+  if (g_angle2mirror[angle] == 2)
+  {
+    stepY = -GEO_WEIGHT_MASK_SIZE;
+    weight = &g_globalGeoWeights[g_angle2mask[angle]][(GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffset[splitDir][hIdx][wIdx][1]) * GEO_WEIGHT_MASK_SIZE + g_weightOffset[splitDir][hIdx][wIdx][0]];
+  }
+  else if (g_angle2mirror[angle] == 1)
+  {
+    stepY = GEO_WEIGHT_MASK_SIZE;
+    weight = &g_globalGeoWeights[g_angle2mask[angle]][g_weightOffset[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE + (GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffset[splitDir][hIdx][wIdx][0])];
+  }
+  else
+  {
+    stepY = GEO_WEIGHT_MASK_SIZE;
+    weight = &g_globalGeoWeights[g_angle2mask[angle]][g_weightOffset[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE + g_weightOffset[splitDir][hIdx][wIdx][0]];
+  }
+
+  const __m128i mmEight = _mm_set1_epi16(8);
+  const __m128i mmOffset = _mm_set1_epi32(offsetWeighted);
+  const __m128i mmShift = _mm_cvtsi32_si128(shiftWeighted);
+  const __m128i mmMin = _mm_set1_epi32(clpRng.min);
+  const __m128i mmMax = _mm_set1_epi32(clpRng.max);
+
+  if (compIdx != COMPONENT_Y && pu.chromaFormat == CHROMA_420)
+    stepY <<= 1;
+
+  if (width == 4)
+  {
+    // it will occur to chroma only
+    for (int y = 0; y < height; y++)
+    {
+      __m128i s0 = _mm_lddqu_si128((__m128i *) (src0));
+      __m128i s1 = _mm_lddqu_si128((__m128i *) (src1));
+      __m128i w0;
+      if (g_angle2mirror[angle] == 1)
+      {
+        w0 = _mm_loadu_si128((__m128i *) (weight - (8 - 1)));
+        const __m128i shuffle_mask = _mm_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+        w0 = _mm_shuffle_epi8(w0, shuffle_mask);
+      }
+      else
+      {
+        w0 = _mm_loadu_si128((__m128i *) (weight));
+      }
+      w0 = _mm_shuffle_epi8(w0, _mm_setr_epi8(0, 1, 4, 5, 8, 9, 12, 13, 0, 0, 0, 0, 0, 0, 0, 0));
+      __m128i w1 = _mm_sub_epi16(mmEight, w0);
+
+      w0 = _mm_cvtepi16_epi32(w0);
+      w1 = _mm_cvtepi16_epi32(w1);
+
+      s0 = _mm_add_epi32(_mm_mullo_epi32(s0, w0), _mm_mullo_epi32(s1, w1));
+      s0 = _mm_sra_epi32(_mm_add_epi32(s0, mmOffset), mmShift);
+      s0 = _mm_min_epi32(mmMax, _mm_max_epi32(s0, mmMin));
+
+      _mm_storeu_si128((__m128i *)dst, s0);
+
+      dst += strideDst;
+      src0 += strideSrc0;
+      src1 += strideSrc1;
+      weight += stepY;
+    }
+  }
+#ifdef USE_AVX2
+  else if ((vext >= AVX2) && (width >= 16))
+  {
+    const __m256i mmEightAVX2 = _mm256_set1_epi16(8);
+    const __m256i mmOffsetAVX2 = _mm256_set1_epi32(offsetWeighted);
+    const __m256i mmMinAVX2 = _mm256_set1_epi32(clpRng.min);
+    const __m256i mmMaxAVX2 = _mm256_set1_epi32(clpRng.max);
+    for (int y = 0; y < height; y++)
+    {
+      for (int x = 0; x < width; x += 16)
+      {
+        __m256i s00 = _mm256_lddqu_si256((__m256i *) (src0 + x));
+        __m256i s01 = _mm256_lddqu_si256((__m256i *) (src0 + x + 8));
+        __m256i s10 = _mm256_lddqu_si256((__m256i *) (src1 + x));
+        __m256i s11 = _mm256_lddqu_si256((__m256i *) (src1 + x + 8));
+
+        __m256i w0 = _mm256_lddqu_si256((__m256i *) (weight + x));
+        if (compIdx != COMPONENT_Y && pu.chromaFormat != CHROMA_444)
+        {
+          const __m256i mask = _mm256_set_epi16(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1);
+          __m256i w0p0, w0p1;
+          if (g_angle2mirror[angle] == 1)
+          {
+            w0p0 = _mm256_lddqu_si256((__m256i *) (weight - (x << 1) - (16 - 1))); // first sub-sample the required weights.
+            w0p1 = _mm256_lddqu_si256((__m256i *) (weight - (x << 1) - 16 - (16 - 1)));
+            const __m256i shuffle_mask = _mm256_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+            w0p0 = _mm256_shuffle_epi8(w0p0, shuffle_mask);
+            w0p0 = _mm256_permute4x64_epi64(w0p0, _MM_SHUFFLE(1, 0, 3, 2));
+            w0p1 = _mm256_shuffle_epi8(w0p1, shuffle_mask);
+            w0p1 = _mm256_permute4x64_epi64(w0p1, _MM_SHUFFLE(1, 0, 3, 2));
+          }
+          else
+          {
+            w0p0 = _mm256_lddqu_si256((__m256i *) (weight + (x << 1))); // first sub-sample the required weights.
+            w0p1 = _mm256_lddqu_si256((__m256i *) (weight + (x << 1) + 16));
+          }
+          w0p0 = _mm256_mullo_epi16(w0p0, mask);
+          w0p1 = _mm256_mullo_epi16(w0p1, mask);
+          w0 = _mm256_packs_epi16(w0p0, w0p1);
+          w0 = _mm256_permute4x64_epi64(w0, _MM_SHUFFLE(3, 1, 2, 0));
+        }
+        else
+        {
+          if (g_angle2mirror[angle] == 1)
+          {
+            w0 = _mm256_lddqu_si256((__m256i *) (weight - x - (16 - 1)));
+            const __m256i shuffle_mask = _mm256_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+            w0 = _mm256_shuffle_epi8(w0, shuffle_mask);
+            w0 = _mm256_permute4x64_epi64(w0, _MM_SHUFFLE(1, 0, 3, 2));
+          }
+          else
+          {
+            w0 = _mm256_lddqu_si256((__m256i *) (weight + x));
+          }
+        }
+        __m256i w1 = _mm256_sub_epi16(mmEightAVX2, w0);
+
+        __m256i w00 = _mm256_cvtepi16_epi32(_mm256_castsi256_si128(w0));
+        __m256i w01 = _mm256_cvtepi16_epi32(_mm256_castsi256_si128(_mm256_permute4x64_epi64(w0, 0xee)));
+        __m256i w10 = _mm256_cvtepi16_epi32(_mm256_castsi256_si128(w1));
+        __m256i w11 = _mm256_cvtepi16_epi32(_mm256_castsi256_si128(_mm256_permute4x64_epi64(w1, 0xee)));
+
+        __m256i s0 = _mm256_add_epi32(_mm256_mullo_epi32(s00, w00), _mm256_mullo_epi32(s10, w10));
+        __m256i s1 = _mm256_add_epi32(_mm256_mullo_epi32(s01, w01), _mm256_mullo_epi32(s11, w11));
+
+        s0 = _mm256_sra_epi32(_mm256_add_epi32(s0, mmOffsetAVX2), mmShift);
+        s1 = _mm256_sra_epi32(_mm256_add_epi32(s1, mmOffsetAVX2), mmShift);
+
+        s0 = _mm256_min_epi32(mmMaxAVX2, _mm256_max_epi32(s0, mmMinAVX2));
+        s1 = _mm256_min_epi32(mmMaxAVX2, _mm256_max_epi32(s1, mmMinAVX2));
+
+        _mm256_storeu_si256((__m256i *) (dst + x), s0);
+        _mm256_storeu_si256((__m256i *) (dst + x + 8), s1);
+      }
+
+      dst += strideDst;
+      src0 += strideSrc0;
+      src1 += strideSrc1;
+      weight += stepY;
+    }
+  }
+#endif
+  else
+  {
+    for (int y = 0; y < height; y++)
+    {
+      for (int x = 0; x < width; x += 8)
+      {
+        __m128i s00 = _mm_lddqu_si128((__m128i *) (src0 + x));
+        __m128i s01 = _mm_lddqu_si128((__m128i *) (src0 + x + 4));
+        __m128i s10 = _mm_lddqu_si128((__m128i *) (src1 + x));
+        __m128i s11 = _mm_lddqu_si128((__m128i *) (src1 + x + 4));
+        __m128i w0;
+        if (compIdx != COMPONENT_Y && pu.chromaFormat != CHROMA_444)
+        {
+          const __m128i mask = _mm_set_epi16(0, 1, 0, 1, 0, 1, 0, 1);
+          __m128i w0p0, w0p1;
+          if (g_angle2mirror[angle] == 1)
+          {
+            w0p0 = _mm_lddqu_si128((__m128i *) (weight - (x << 1) - (8 - 1))); // first sub-sample the required weights.
+            w0p1 = _mm_lddqu_si128((__m128i *) (weight - (x << 1) - 8 - (8 - 1)));
+            const __m128i shuffle_mask = _mm_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+            w0p0 = _mm_shuffle_epi8(w0p0, shuffle_mask);
+            w0p1 = _mm_shuffle_epi8(w0p1, shuffle_mask);
+          }
+          else
+          {
+            w0p0 = _mm_lddqu_si128((__m128i *) (weight + (x << 1))); // first sub-sample the required weights.
+            w0p1 = _mm_lddqu_si128((__m128i *) (weight + (x << 1) + 8));
+          }
+          w0p0 = _mm_mullo_epi16(w0p0, mask);
+          w0p1 = _mm_mullo_epi16(w0p1, mask);
+          w0 = _mm_packs_epi32(w0p0, w0p1);
+        }
+        else
+        {
+          if (g_angle2mirror[angle] == 1)
+          {
+            w0 = _mm_lddqu_si128((__m128i *) (weight - x - (8 - 1)));  // 16b
+            const __m128i shuffle_mask = _mm_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+            w0 = _mm_shuffle_epi8(w0, shuffle_mask);
+          }
+          else
+          {
+            w0 = _mm_lddqu_si128((__m128i *) (weight + x));
+          }
+        }
+        __m128i w1 = _mm_sub_epi16(mmEight, w0);
+
+        __m128i w00 = _mm_cvtepi16_epi32(w0);
+        __m128i w01 = _mm_cvtepi16_epi32(_mm_shuffle_epi32(w0, 0xee));
+        __m128i w10 = _mm_cvtepi16_epi32(w1);
+        __m128i w11 = _mm_cvtepi16_epi32(_mm_shuffle_epi32(w1, 0xee));
+
+        __m128i s0 = _mm_add_epi32(_mm_mullo_epi32(s00, w00), _mm_mullo_epi32(s10, w10));
+        __m128i s1 = _mm_add_epi32(_mm_mullo_epi32(s01, w01), _mm_mullo_epi32(s11, w11));
+
+        s0 = _mm_sra_epi32(_mm_add_epi32(s0, mmOffset), mmShift);
+        s1 = _mm_sra_epi32(_mm_add_epi32(s1, mmOffset), mmShift);
+
+        s0 = _mm_min_epi32(mmMax, _mm_max_epi32(s0, mmMin));
+        s1 = _mm_min_epi32(mmMax, _mm_max_epi32(s1, mmMin));
+
+        _mm_storeu_si128((__m128i *) (dst + x), s0);
+        _mm_storeu_si128((__m128i *) (dst + x + 4), s1);
+      }
+      dst += strideDst;
+      src0 += strideSrc0;
+      src1 += strideSrc1;
+      weight += stepY;
+    }
+  }
+}
+#endif
+template<X86_VEXT vext, int N, bool isVertical, bool isFirst, bool isLast>
+static void simdFilter( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff, bool biMCForDMVR)
+{
+  int row, col;
+
+  Pel c[8];
+  c[0] = coeff[0];
+  c[1] = coeff[1];
+  if( N >= 4 )
+  {
+    c[2] = coeff[2];
+    c[3] = coeff[3];
+  }
+  if( N >= 6 )
+  {
+    c[4] = coeff[4];
+    c[5] = coeff[5];
+  }
+  if( N == 8 )
+  {
+    c[6] = coeff[6];
+    c[7] = coeff[7];
+  }
+
+  int cStride = ( isVertical ) ? srcStride : 1;
+  src -= ( N/2 - 1 ) * cStride;
+
+  int offset;
+  int headRoom = IF_INTERNAL_FRAC_BITS(clpRng.bd);
+  int shift    = IF_FILTER_PREC;
+  // with the current settings (IF_INTERNAL_PREC = 14 and IF_FILTER_PREC = 6), though headroom can be
+  // negative for bit depths greater than 14, shift will remain non-negative for bit depths of 8->20
+  CHECK( shift < 0, "Negative shift" );
+
+
+  if( isLast )
+  {
+    shift += ( isFirst ) ? 0 : headRoom;
+    offset = 1 << ( shift - 1 );
+    offset += ( isFirst ) ? 0 : IF_INTERNAL_OFFS << IF_FILTER_PREC;
+  }
+  else
+  {
+    shift -= ( isFirst ) ? headRoom : 0;
+    offset = ( isFirst ) ? -IF_INTERNAL_OFFS << shift : 0;
+  }
+
+  if (biMCForDMVR)
+  {
+    if( isFirst )
+    {
+      shift = IF_FILTER_PREC_BILINEAR - (IF_INTERNAL_PREC_BILINEAR - clpRng.bd);
+      offset = 1 << (shift - 1);
+    }
+    else
+    {
+      shift = 4;
+      offset = 1 << (shift - 1);
+    }
+  }
+  {
+    if( N == 8 && !( width & 0x07 ) )
+    {
+      if( !isVertical )
+      {
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+        if (vext >= AVX2)
+        {
+          simdInterpolateHorM8_HBD_AVX2<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c);
+        }
+        else
+        {
+          simdInterpolateHorM8_HBD<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c);
+        }
+#else
+        if( vext>= AVX2 )
+          simdInterpolateHorM8_AVX2<vext, 8, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
+        else
+          simdInterpolateHorM8<vext, 8, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
+#endif
+      }
+      else
+      {
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+        if (vext >= AVX2)
+        {
+          simdInterpolateVerM8_HBD_AVX2<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c);
+        }
+        else
+        {
+          simdInterpolateVerM8_HBD<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c);
+        }
+#else
+        if( vext>= AVX2 )
+          simdInterpolateVerM8_AVX2<vext, 8, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
+        else
+          simdInterpolateVerM8<vext, 8, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
+#endif
+      }
+      return;
+    }
+    else if( N == 8 && !( width & 0x03 ) )
+    {
+      if( !isVertical )
+      {
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+        simdInterpolateHorM4_HBD<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c);
+#else
+        simdInterpolateHorM4<vext, 8, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
+#endif
+      }
+      else
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+        simdInterpolateVerM4_HBD<vext, 8, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c);
+#else
+        simdInterpolateVerM4<vext, 8, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
+#endif
+      return;
+    }
+    else if( N == 4 && !( width & 0x03 ) )
+    {
+      if( !isVertical )
+      {
+        if( ( width % 8 ) == 0 )
+        {
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+          if (vext >= AVX2)
+          {
+            simdInterpolateHorM8_HBD_AVX2<vext, 4, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c);
+          }
+          else
+          {
+            simdInterpolateHorM8_HBD<vext, 4, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c);
+          }
+#else
+          if( vext>= AVX2 )
+            simdInterpolateHorM8_AVX2<vext, 4, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
+          else
+            simdInterpolateHorM8<vext, 4, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
+#endif
+        }
+        else
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+          simdInterpolateHorM4_HBD<vext, 4, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c);
+#else
+          simdInterpolateHorM4<vext, 4, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
+#endif
+      }
+      else
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+        simdInterpolateVerM4_HBD<vext, 4, isLast>(src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c);
+#else
+        simdInterpolateVerM4<vext, 4, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
+#endif
+      return;
+    }
+    else if (biMCForDMVR)
+    {
+      if (N == 2 && !(width & 0x03))
+      {
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+        if (vext >= AVX2)
+        {
+          simdInterpolateN2_HBD_M4_AVX2<vext, isLast>(src, srcStride, dst, dstStride, cStride, width, height, shift, offset, clpRng, c);
+        }
+        else
+        {
+          simdInterpolateN2_HBD_M4<vext, isLast>(src, srcStride, dst, dstStride, cStride, width, height, shift, offset, clpRng, c);
+        }
+#else
+        if (clpRng.bd <= 10)
+        {
+        simdInterpolateN2_10BIT_M4<vext, isLast>(src, srcStride, dst, dstStride, cStride, width, height, shift, offset, clpRng, c);
+        }
+        else
+        {
+          simdInterpolateN2_HIGHBIT_M4<vext, isLast>(src, srcStride, dst, dstStride, cStride, width, height, shift, offset, clpRng, c);
+        }
+#endif
+        return;
+      }
+    }
+    else if( N == 2 && !( width & 0x07 ) )
+    {
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+    simdInterpolateN2_M8_HBD<vext, isLast>(src, srcStride, dst, dstStride, cStride, width, height, shift, offset, clpRng, c);
+#else
+      simdInterpolateN2_M8<vext, isLast>( src, srcStride, dst, dstStride, cStride, width, height, shift, offset, clpRng, c );
+#endif
+      return;
+    }
+    else if( N == 2 && !( width & 0x03 ) )
+    {
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+      simdInterpolateN2_M4_HBD<vext, isLast>(src, srcStride, dst, dstStride, cStride, width, height, shift, offset, clpRng, c);
+#else
+      simdInterpolateN2_M4<vext, isLast>( src, srcStride, dst, dstStride, cStride, width, height, shift, offset, clpRng, c );
+#endif
+      return;
+    }
+  }
+
+  for( row = 0; row < height; row++ )
+  {
+    for( col = 0; col < width; col++ )
+    {
+      int sum;
+
+      sum  = src[col + 0 * cStride] * c[0];
+      sum += src[col + 1 * cStride] * c[1];
+      if( N >= 4 )
+      {
+        sum += src[col + 2 * cStride] * c[2];
+        sum += src[col + 3 * cStride] * c[3];
+      }
+      if( N >= 6 )
+      {
+        sum += src[col + 4 * cStride] * c[4];
+        sum += src[col + 5 * cStride] * c[5];
+      }
+      if( N == 8 )
+      {
+        sum += src[col + 6 * cStride] * c[6];
+        sum += src[col + 7 * cStride] * c[7];
+      }
+
+      Pel val = ( sum + offset ) >> shift;
+      if( isLast )
+      {
+        val = ClipPel( val, clpRng );
+      }
+      dst[col] = val;
+    }
+
+    src += srcStride;
+    dst += dstStride;
+  }
+}
+
+template< X86_VEXT vext >
+void xWeightedGeoBlk_SSE(const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const uint8_t splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1)
+{
+  Pel* dst = predDst.get(compIdx).buf;
+  Pel* src0 = predSrc0.get(compIdx).buf;
+  Pel* src1 = predSrc1.get(compIdx).buf;
   int32_t strideDst = predDst.get(compIdx).stride;
   int32_t strideSrc0 = predSrc0.get(compIdx).stride;
   int32_t strideSrc1 = predSrc1.get(compIdx).stride;
@@ -1550,6 +2595,45 @@ void xWeightedGeoBlk_SSE(const PredictionUnit &pu, const uint32_t width, const u
 template <X86_VEXT vext>
 void InterpolationFilter::_initInterpolationFilterX86()
 {
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+  // [taps][bFirst][bLast]
+  m_filterHor[0][0][0] = simdFilter<vext, 8, false, false, false>;
+  m_filterHor[0][0][1] = simdFilter<vext, 8, false, false, true>;
+  m_filterHor[0][1][0] = simdFilter<vext, 8, false, true, false>;
+  m_filterHor[0][1][1] = simdFilter<vext, 8, false, true, true>;
+
+  m_filterHor[1][0][0] = simdFilter<vext, 4, false, false, false>;
+  m_filterHor[1][0][1] = simdFilter<vext, 4, false, false, true>;
+  m_filterHor[1][1][0] = simdFilter<vext, 4, false, true, false>;
+  m_filterHor[1][1][1] = simdFilter<vext, 4, false, true, true>;
+
+  m_filterHor[2][0][0] = simdFilter<vext, 2, false, false, false>;
+  m_filterHor[2][0][1] = simdFilter<vext, 2, false, false, true>;
+  m_filterHor[2][1][0] = simdFilter<vext, 2, false, true, false>;
+  m_filterHor[2][1][1] = simdFilter<vext, 2, false, true, true>;
+
+  m_filterVer[0][0][0] = simdFilter<vext, 8, true, false, false>;
+  m_filterVer[0][0][1] = simdFilter<vext, 8, true, false, true>;
+  m_filterVer[0][1][0] = simdFilter<vext, 8, true, true, false>;
+  m_filterVer[0][1][1] = simdFilter<vext, 8, true, true, true>;
+
+  m_filterVer[1][0][0] = simdFilter<vext, 4, true, false, false>;
+  m_filterVer[1][0][1] = simdFilter<vext, 4, true, false, true>;
+  m_filterVer[1][1][0] = simdFilter<vext, 4, true, true, false>;
+  m_filterVer[1][1][1] = simdFilter<vext, 4, true, true, true>;
+
+  m_filterVer[2][0][0] = simdFilter<vext, 2, true, false, false>;
+  m_filterVer[2][0][1] = simdFilter<vext, 2, true, false, true>;
+  m_filterVer[2][1][0] = simdFilter<vext, 2, true, true, false>;
+  m_filterVer[2][1][1] = simdFilter<vext, 2, true, true, true>;
+
+  m_filterCopy[0][0] = simdFilterCopy_HBD<vext, false, false>;
+  m_filterCopy[0][1] = simdFilterCopy_HBD<vext, false, true>;
+  m_filterCopy[1][0] = simdFilterCopy_HBD<vext, true, false>;
+  m_filterCopy[1][1] = simdFilterCopy_HBD<vext, true, true>;
+
+  m_weightedGeoBlk = xWeightedGeoBlk_HBD_SIMD<vext>;
+#else
   // [taps][bFirst][bLast]
   m_filterHor[0][0][0] = simdFilter<vext, 8, false, false, false>;
   m_filterHor[0][0][1] = simdFilter<vext, 8, false, false, true>;
@@ -1587,6 +2671,7 @@ void InterpolationFilter::_initInterpolationFilterX86()
   m_filterCopy[1][1]   = simdFilterCopy<vext, true, true>;
 
   m_weightedGeoBlk = xWeightedGeoBlk_SSE<vext>;
+#endif
 }
 
 template void InterpolationFilter::_initInterpolationFilterX86<SIMDX86>();
diff --git a/source/Lib/CommonLib/x86/RdCostX86.h b/source/Lib/CommonLib/x86/RdCostX86.h
index a41f090aec0e2223bcab752a138762df74de2673..eb44a8c3abe3dea1865995119795f20c065710c7 100644
--- a/source/Lib/CommonLib/x86/RdCostX86.h
+++ b/source/Lib/CommonLib/x86/RdCostX86.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -455,7 +455,1714 @@ Distortion RdCost::xGetSAD_NxN_SIMD( const DistParam &rcDtParam )
   return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth);
 }
 
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+static Distortion xCalcHAD2x2_HBD_SSE(const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur)
+{
+  __m128i m1[2], m2[2];
+  for (int k = 0; k < 2; k++)
+  {
+    m1[k] = _mm_sub_epi32(_mm_loadl_epi64((const __m128i*)piOrg), _mm_loadl_epi64((const __m128i*)piCur));
+    piOrg += iStrideOrg;
+    piCur += iStrideCur;
+  }
+
+  // vertical
+  m2[0] = _mm_add_epi32(m1[0], m1[1]);
+  m2[1] = _mm_sub_epi32(m1[0], m1[1]);
+
+  // transpose
+  m1[0] = _mm_unpacklo_epi32(m2[0], m2[1]);
+  m1[1] = _mm_shuffle_epi32(m1[0], 0xee);
+
+  // horizontal
+  m2[0] = _mm_abs_epi32(_mm_add_epi32(m1[0], m1[1]));
+  m2[1] = _mm_abs_epi32(_mm_sub_epi32(m1[0], m1[1]));
+
+  Distortion absDc = _mm_cvtsi128_si32(m2[0]);
+
+  // abs
+  __m128i Sum = _mm_add_epi32(m2[0], m2[1]);
+  Sum = _mm_hadd_epi32(Sum, Sum);
+
+  Distortion sad = _mm_cvtsi128_si32(Sum);
+#if JVET_R0164_MEAN_SCALED_SATD
+  sad -= absDc;
+  sad += absDc >> 2;
+#endif
+
+  return sad;
+}
+
+static Distortion xCalcHAD4x4_HBD_SSE(const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur)
+{
+  __m128i r0 = _mm_lddqu_si128((const __m128i*)&piOrg[0]);
+  __m128i r1 = _mm_lddqu_si128((const __m128i*)&piOrg[iStrideOrg]);
+  __m128i r2 = _mm_lddqu_si128((const __m128i*)&piOrg[2 * iStrideOrg]);
+  __m128i r3 = _mm_lddqu_si128((const __m128i*)&piOrg[3 * iStrideOrg]);
+  __m128i r4 = _mm_lddqu_si128((const __m128i*)&piCur[0]);
+  __m128i r5 = _mm_lddqu_si128((const __m128i*)&piCur[iStrideCur]);
+  __m128i r6 = _mm_lddqu_si128((const __m128i*)&piCur[2 * iStrideCur]);
+  __m128i r7 = _mm_lddqu_si128((const __m128i*)&piCur[3 * iStrideCur]);
+
+  r0 = _mm_sub_epi32(r0, r4);
+  r1 = _mm_sub_epi32(r1, r5);
+  r2 = _mm_sub_epi32(r2, r6);
+  r3 = _mm_sub_epi32(r3, r7);
+
+  // first stage
+  r4 = r0;
+  r5 = r1;
+
+  r0 = _mm_add_epi32(r0, r3);
+  r1 = _mm_add_epi32(r1, r2);
+
+  r4 = _mm_sub_epi32(r4, r3);
+  r5 = _mm_sub_epi32(r5, r2);
+
+  r2 = r0;
+  r3 = r4;
+
+  r0 = _mm_add_epi32(r0, r1);
+  r2 = _mm_sub_epi32(r2, r1);
+  r3 = _mm_sub_epi32(r3, r5);
+  r5 = _mm_add_epi32(r5, r4);
+
+  // shuffle - flip matrix for vertical transform
+  r4 = _mm_unpacklo_epi32(r0, r5);
+  r5 = _mm_unpackhi_epi32(r0, r5);
+  r6 = _mm_unpacklo_epi32(r2, r3);
+  r7 = _mm_unpackhi_epi32(r2, r3);
+
+  r0 = _mm_unpacklo_epi64(r4, r6);
+  r1 = _mm_unpackhi_epi64(r4, r6);
+  r2 = _mm_unpacklo_epi64(r5, r7);
+  r3 = _mm_unpackhi_epi64(r5, r7);
+
+  // second stage
+  r4 = r0;
+  r5 = r1;
+
+  r0 = _mm_add_epi32(r0, r3);
+  r1 = _mm_add_epi32(r1, r2);
+
+  r4 = _mm_sub_epi32(r4, r3);
+  r5 = _mm_sub_epi32(r5, r2);
+
+  r2 = r0;
+  r3 = r4;
+
+  r0 = _mm_add_epi32(r0, r1);
+  r2 = _mm_sub_epi32(r2, r1);
+  r3 = _mm_sub_epi32(r3, r5);
+  r5 = _mm_add_epi32(r5, r4);
+
+  // abs
+  __m128i Sum = _mm_abs_epi32(r0);
+#if JVET_R0164_MEAN_SCALED_SATD
+  Distortion absDc = _mm_cvtsi128_si32(Sum);
+#endif
+  Sum = _mm_add_epi32(Sum, _mm_abs_epi32(r2));
+  Sum = _mm_add_epi32(Sum, _mm_abs_epi32(r3));
+  Sum = _mm_add_epi32(Sum, _mm_abs_epi32(r5));
+  Sum = _mm_hadd_epi32(Sum, Sum);
+  Sum = _mm_hadd_epi32(Sum, Sum);
+
+  Distortion sad = _mm_cvtsi128_si32(Sum);
+
+#if JVET_R0164_MEAN_SCALED_SATD
+  sad -= absDc;
+  sad += absDc >> 2;
+#endif
+  sad = ((sad + 1) >> 1);
+
+  return sad;
+}
+
+static Distortion xCalcHAD8x8_HBD_SSE(const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur)
+{
+  __m128i m1[8][2], m2[8][2];
+
+  for (int k = 0; k < 8; k++)
+  {
+    m2[k][0] = _mm_sub_epi32(_mm_lddqu_si128((__m128i *) piOrg), _mm_lddqu_si128((__m128i *) piCur));
+    m2[k][1] = _mm_sub_epi32(_mm_lddqu_si128((__m128i *)(piOrg + 4)), _mm_lddqu_si128((__m128i *)(piCur + 4)));
+    piCur += iStrideCur;
+    piOrg += iStrideOrg;
+  }
+
+  for (int i = 0; i < 2; i++)
+  {
+    // vertical
+    m1[0][i] = _mm_add_epi32(m2[0][i], m2[4][i]);
+    m1[1][i] = _mm_add_epi32(m2[1][i], m2[5][i]);
+    m1[2][i] = _mm_add_epi32(m2[2][i], m2[6][i]);
+    m1[3][i] = _mm_add_epi32(m2[3][i], m2[7][i]);
+    m1[4][i] = _mm_sub_epi32(m2[0][i], m2[4][i]);
+    m1[5][i] = _mm_sub_epi32(m2[1][i], m2[5][i]);
+    m1[6][i] = _mm_sub_epi32(m2[2][i], m2[6][i]);
+    m1[7][i] = _mm_sub_epi32(m2[3][i], m2[7][i]);
+
+    m2[0][i] = _mm_add_epi32(m1[0][i], m1[2][i]);
+    m2[1][i] = _mm_add_epi32(m1[1][i], m1[3][i]);
+    m2[2][i] = _mm_sub_epi32(m1[0][i], m1[2][i]);
+    m2[3][i] = _mm_sub_epi32(m1[1][i], m1[3][i]);
+    m2[4][i] = _mm_add_epi32(m1[4][i], m1[6][i]);
+    m2[5][i] = _mm_add_epi32(m1[5][i], m1[7][i]);
+    m2[6][i] = _mm_sub_epi32(m1[4][i], m1[6][i]);
+    m2[7][i] = _mm_sub_epi32(m1[5][i], m1[7][i]);
+
+    m1[0][i] = _mm_add_epi32(m2[0][i], m2[1][i]);
+    m1[1][i] = _mm_sub_epi32(m2[0][i], m2[1][i]);
+    m1[2][i] = _mm_add_epi32(m2[2][i], m2[3][i]);
+    m1[3][i] = _mm_sub_epi32(m2[2][i], m2[3][i]);
+    m1[4][i] = _mm_add_epi32(m2[4][i], m2[5][i]);
+    m1[5][i] = _mm_sub_epi32(m2[4][i], m2[5][i]);
+    m1[6][i] = _mm_add_epi32(m2[6][i], m2[7][i]);
+    m1[7][i] = _mm_sub_epi32(m2[6][i], m2[7][i]);
+
+    // transpose
+    m2[0][i] = _mm_unpacklo_epi32(m1[0][i], m1[1][i]);
+    m2[1][i] = _mm_unpacklo_epi32(m1[2][i], m1[3][i]);
+    m2[2][i] = _mm_unpackhi_epi32(m1[0][i], m1[1][i]);
+    m2[3][i] = _mm_unpackhi_epi32(m1[2][i], m1[3][i]);
+    m2[4][i] = _mm_unpacklo_epi32(m1[4][i], m1[5][i]);
+    m2[5][i] = _mm_unpacklo_epi32(m1[6][i], m1[7][i]);
+    m2[6][i] = _mm_unpackhi_epi32(m1[4][i], m1[5][i]);
+    m2[7][i] = _mm_unpackhi_epi32(m1[6][i], m1[7][i]);
+
+    m1[0][i] = _mm_unpacklo_epi64(m2[0][i], m2[1][i]);
+    m1[1][i] = _mm_unpackhi_epi64(m2[0][i], m2[1][i]);
+    m1[2][i] = _mm_unpacklo_epi64(m2[2][i], m2[3][i]);
+    m1[3][i] = _mm_unpackhi_epi64(m2[2][i], m2[3][i]);
+    m1[4][i] = _mm_unpacklo_epi64(m2[4][i], m2[5][i]);
+    m1[5][i] = _mm_unpackhi_epi64(m2[4][i], m2[5][i]);
+    m1[6][i] = _mm_unpacklo_epi64(m2[6][i], m2[7][i]);
+    m1[7][i] = _mm_unpackhi_epi64(m2[6][i], m2[7][i]);
+  }
+
+  // transpose
+  __m128i n1[8][2];
+  __m128i n2[8][2];
+
+  for (int i = 0; i < 8; i++)
+  {
+    int ii = i % 4;
+    int ij = i >> 2;
+
+    n2[i][0] = m1[ii][ij];
+    n2[i][1] = m1[ii + 4][ij];
+  }
+
+  for (int i = 0; i < 2; i++)
+  {
+    // horizontal
+    n1[0][i] = _mm_add_epi32(n2[0][i], n2[4][i]);
+    n1[1][i] = _mm_add_epi32(n2[1][i], n2[5][i]);
+    n1[2][i] = _mm_add_epi32(n2[2][i], n2[6][i]);
+    n1[3][i] = _mm_add_epi32(n2[3][i], n2[7][i]);
+    n1[4][i] = _mm_sub_epi32(n2[0][i], n2[4][i]);
+    n1[5][i] = _mm_sub_epi32(n2[1][i], n2[5][i]);
+    n1[6][i] = _mm_sub_epi32(n2[2][i], n2[6][i]);
+    n1[7][i] = _mm_sub_epi32(n2[3][i], n2[7][i]);
+
+    n2[0][i] = _mm_add_epi32(n1[0][i], n1[2][i]);
+    n2[1][i] = _mm_add_epi32(n1[1][i], n1[3][i]);
+    n2[2][i] = _mm_sub_epi32(n1[0][i], n1[2][i]);
+    n2[3][i] = _mm_sub_epi32(n1[1][i], n1[3][i]);
+    n2[4][i] = _mm_add_epi32(n1[4][i], n1[6][i]);
+    n2[5][i] = _mm_add_epi32(n1[5][i], n1[7][i]);
+    n2[6][i] = _mm_sub_epi32(n1[4][i], n1[6][i]);
+    n2[7][i] = _mm_sub_epi32(n1[5][i], n1[7][i]);
+
+    n1[0][i] = _mm_abs_epi32(_mm_add_epi32(n2[0][i], n2[1][i]));
+    n1[1][i] = _mm_abs_epi32(_mm_sub_epi32(n2[0][i], n2[1][i]));
+    n1[2][i] = _mm_abs_epi32(_mm_add_epi32(n2[2][i], n2[3][i]));
+    n1[3][i] = _mm_abs_epi32(_mm_sub_epi32(n2[2][i], n2[3][i]));
+    n1[4][i] = _mm_abs_epi32(_mm_add_epi32(n2[4][i], n2[5][i]));
+    n1[5][i] = _mm_abs_epi32(_mm_sub_epi32(n2[4][i], n2[5][i]));
+    n1[6][i] = _mm_abs_epi32(_mm_add_epi32(n2[6][i], n2[7][i]));
+    n1[7][i] = _mm_abs_epi32(_mm_sub_epi32(n2[6][i], n2[7][i]));
+  }
+
+  for (int i = 0; i < 8; i++)
+  {
+    m1[i][0] = _mm_add_epi32(n1[i][0], n1[i][1]);
+  }
+
+  m1[0][0] = _mm_add_epi32(m1[0][0], m1[1][0]);
+  m1[2][0] = _mm_add_epi32(m1[2][0], m1[3][0]);
+  m1[4][0] = _mm_add_epi32(m1[4][0], m1[5][0]);
+  m1[6][0] = _mm_add_epi32(m1[6][0], m1[7][0]);
+
+  m1[0][0] = _mm_add_epi32(m1[0][0], m1[2][0]);
+  m1[4][0] = _mm_add_epi32(m1[4][0], m1[6][0]);
+  __m128i iSum = _mm_add_epi32(m1[0][0], m1[4][0]);
+
+  iSum = _mm_hadd_epi32(iSum, iSum);
+  iSum = _mm_hadd_epi32(iSum, iSum);
+
+  Distortion sad = _mm_cvtsi128_si32(iSum);
+#if JVET_R0164_MEAN_SCALED_SATD
+  Distortion absDc = _mm_cvtsi128_si32(n1[0][0]);
+  sad -= absDc;
+  sad += absDc >> 2;
+#endif
+  sad = ((sad + 2) >> 2);
+
+  return sad;
+}
+
+static Distortion xCalcHAD4x8_HBD_SSE(const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur)
+{
+  __m128i m1[8], m2[8];
+
+  for (int k = 0; k < 8; k++)
+  {
+    m2[k] = _mm_sub_epi32(_mm_lddqu_si128((__m128i*)piOrg), _mm_lddqu_si128((__m128i*)piCur));
+    piCur += iStrideCur;
+    piOrg += iStrideOrg;
+  }
+
+  // vertical
+  m1[0] = _mm_add_epi32(m2[0], m2[4]);
+  m1[1] = _mm_add_epi32(m2[1], m2[5]);
+  m1[2] = _mm_add_epi32(m2[2], m2[6]);
+  m1[3] = _mm_add_epi32(m2[3], m2[7]);
+  m1[4] = _mm_sub_epi32(m2[0], m2[4]);
+  m1[5] = _mm_sub_epi32(m2[1], m2[5]);
+  m1[6] = _mm_sub_epi32(m2[2], m2[6]);
+  m1[7] = _mm_sub_epi32(m2[3], m2[7]);
+
+  m2[0] = _mm_add_epi32(m1[0], m1[2]);
+  m2[1] = _mm_add_epi32(m1[1], m1[3]);
+  m2[2] = _mm_sub_epi32(m1[0], m1[2]);
+  m2[3] = _mm_sub_epi32(m1[1], m1[3]);
+  m2[4] = _mm_add_epi32(m1[4], m1[6]);
+  m2[5] = _mm_add_epi32(m1[5], m1[7]);
+  m2[6] = _mm_sub_epi32(m1[4], m1[6]);
+  m2[7] = _mm_sub_epi32(m1[5], m1[7]);
+
+  m1[0] = _mm_add_epi32(m2[0], m2[1]);
+  m1[1] = _mm_sub_epi32(m2[0], m2[1]);
+  m1[2] = _mm_add_epi32(m2[2], m2[3]);
+  m1[3] = _mm_sub_epi32(m2[2], m2[3]);
+  m1[4] = _mm_add_epi32(m2[4], m2[5]);
+  m1[5] = _mm_sub_epi32(m2[4], m2[5]);
+  m1[6] = _mm_add_epi32(m2[6], m2[7]);
+  m1[7] = _mm_sub_epi32(m2[6], m2[7]);
+
+  // transpose
+  __m128i n1[4][2], n2[4][2];
+
+  n2[0][0] = _mm_unpacklo_epi32(m1[0], m1[1]);
+  n2[0][1] = _mm_unpackhi_epi32(m1[0], m1[1]);
+  n2[1][0] = _mm_unpacklo_epi32(m1[2], m1[3]);
+  n2[1][1] = _mm_unpackhi_epi32(m1[2], m1[3]);
+  n2[2][0] = _mm_unpacklo_epi32(m1[4], m1[5]);
+  n2[2][1] = _mm_unpackhi_epi32(m1[4], m1[5]);
+  n2[3][0] = _mm_unpacklo_epi32(m1[6], m1[7]);
+  n2[3][1] = _mm_unpackhi_epi32(m1[6], m1[7]);
+
+  n1[0][0] = _mm_unpacklo_epi64(n2[0][0], n2[1][0]);
+  n1[0][1] = _mm_unpacklo_epi64(n2[2][0], n2[3][0]);
+  n1[1][0] = _mm_unpackhi_epi64(n2[0][0], n2[1][0]);
+  n1[1][1] = _mm_unpackhi_epi64(n2[2][0], n2[3][0]);
+  n1[2][0] = _mm_unpacklo_epi64(n2[0][1], n2[1][1]);
+  n1[2][1] = _mm_unpacklo_epi64(n2[2][1], n2[3][1]);
+  n1[3][0] = _mm_unpackhi_epi64(n2[0][1], n2[1][1]);
+  n1[3][1] = _mm_unpackhi_epi64(n2[2][1], n2[3][1]);
+
+  // horizontal
+  for (int i = 0; i < 2; i++)
+  {
+    n2[0][i] = _mm_add_epi32(n1[0][i], n1[2][i]);
+    n2[1][i] = _mm_add_epi32(n1[1][i], n1[3][i]);
+    n2[2][i] = _mm_sub_epi32(n1[0][i], n1[2][i]);
+    n2[3][i] = _mm_sub_epi32(n1[1][i], n1[3][i]);
+
+    n1[0][i] = _mm_abs_epi32(_mm_add_epi32(n2[0][i], n2[1][i]));
+    n1[1][i] = _mm_abs_epi32(_mm_sub_epi32(n2[0][i], n2[1][i]));
+    n1[2][i] = _mm_abs_epi32(_mm_add_epi32(n2[2][i], n2[3][i]));
+    n1[3][i] = _mm_abs_epi32(_mm_sub_epi32(n2[2][i], n2[3][i]));
+  }
+
+  for (int i = 0; i < 4; i++)
+  {
+    m1[i] = _mm_add_epi32(n1[i][0], n1[i][1]);
+  }
+
+  Distortion absDc = _mm_cvtsi128_si32(n1[0][0]);
+  m1[0] = _mm_add_epi32(m1[0], m1[1]);
+  m1[2] = _mm_add_epi32(m1[2], m1[3]);
+
+  __m128i iSum = _mm_add_epi32(m1[0], m1[2]);
+  iSum = _mm_hadd_epi32(iSum, iSum);
+  iSum = _mm_hadd_epi32(iSum, iSum);
+
+  Distortion sad = _mm_cvtsi128_si32(iSum);
+#if JVET_R0164_MEAN_SCALED_SATD
+  sad -= absDc;
+  sad += absDc >> 2;
+#endif
+  sad = (Distortion)(sad / sqrt(4.0 * 8) * 2);
+
+  return sad;
+}
+
+static Distortion xCalcHAD8x4_HBD_SSE(const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur)
+{
+  __m128i m1[8][2], m2[8][2];
+
+  for (int k = 0; k < 4; k++)
+  {
+    m1[k][0] = _mm_sub_epi32(_mm_lddqu_si128((__m128i*) piOrg), _mm_lddqu_si128((__m128i*) piCur));
+    m1[k][1] = _mm_sub_epi32(_mm_lddqu_si128((__m128i*)(piOrg + 4)), _mm_lddqu_si128((__m128i*)(piCur + 4)));
+    piCur += iStrideCur;
+    piOrg += iStrideOrg;
+  }
+
+  // vertical
+  for (int i = 0; i < 2; i++)
+  {
+    m2[0][i] = _mm_add_epi32(m1[0][i], m1[2][i]);
+    m2[1][i] = _mm_add_epi32(m1[1][i], m1[3][i]);
+    m2[2][i] = _mm_sub_epi32(m1[0][i], m1[2][i]);
+    m2[3][i] = _mm_sub_epi32(m1[1][i], m1[3][i]);
+
+    m1[0][i] = _mm_add_epi32(m2[0][i], m2[1][i]);
+    m1[1][i] = _mm_sub_epi32(m2[0][i], m2[1][i]);
+    m1[2][i] = _mm_add_epi32(m2[2][i], m2[3][i]);
+    m1[3][i] = _mm_sub_epi32(m2[2][i], m2[3][i]);
+  }
+
+  // transpose
+  m2[0][0] = _mm_unpacklo_epi32(m1[0][0], m1[1][0]);
+  m2[0][1] = _mm_unpacklo_epi32(m1[0][1], m1[1][1]);
+  m2[1][0] = _mm_unpacklo_epi32(m1[2][0], m1[3][0]);
+  m2[1][1] = _mm_unpacklo_epi32(m1[2][1], m1[3][1]);
+  m2[2][0] = _mm_unpackhi_epi32(m1[0][0], m1[1][0]);
+  m2[2][1] = _mm_unpackhi_epi32(m1[0][1], m1[1][1]);
+  m2[3][0] = _mm_unpackhi_epi32(m1[2][0], m1[3][0]);
+  m2[3][1] = _mm_unpackhi_epi32(m1[2][1], m1[3][1]);
+
+  __m128i n1[8], n2[8];
+  n2[0] = _mm_unpacklo_epi64(m2[0][0], m2[1][0]);
+  n2[1] = _mm_unpackhi_epi64(m2[0][0], m2[1][0]);
+  n2[2] = _mm_unpacklo_epi64(m2[2][0], m2[3][0]);
+  n2[3] = _mm_unpackhi_epi64(m2[2][0], m2[3][0]);
+  n2[4] = _mm_unpacklo_epi64(m2[0][1], m2[1][1]);
+  n2[5] = _mm_unpackhi_epi64(m2[0][1], m2[1][1]);
+  n2[6] = _mm_unpacklo_epi64(m2[2][1], m2[3][1]);
+  n2[7] = _mm_unpackhi_epi64(m2[2][1], m2[3][1]);
+
+  // horizontal
+  n1[0] = _mm_add_epi32(n2[0], n2[4]);
+  n1[1] = _mm_add_epi32(n2[1], n2[5]);
+  n1[2] = _mm_add_epi32(n2[2], n2[6]);
+  n1[3] = _mm_add_epi32(n2[3], n2[7]);
+  n1[4] = _mm_sub_epi32(n2[0], n2[4]);
+  n1[5] = _mm_sub_epi32(n2[1], n2[5]);
+  n1[6] = _mm_sub_epi32(n2[2], n2[6]);
+  n1[7] = _mm_sub_epi32(n2[3], n2[7]);
+
+  n2[0] = _mm_add_epi32(n1[0], n1[2]);
+  n2[1] = _mm_add_epi32(n1[1], n1[3]);
+  n2[2] = _mm_sub_epi32(n1[0], n1[2]);
+  n2[3] = _mm_sub_epi32(n1[1], n1[3]);
+  n2[4] = _mm_add_epi32(n1[4], n1[6]);
+  n2[5] = _mm_add_epi32(n1[5], n1[7]);
+  n2[6] = _mm_sub_epi32(n1[4], n1[6]);
+  n2[7] = _mm_sub_epi32(n1[5], n1[7]);
+
+  n1[0] = _mm_abs_epi32(_mm_add_epi32(n2[0], n2[1]));
+  n1[1] = _mm_abs_epi32(_mm_sub_epi32(n2[0], n2[1]));
+  n1[2] = _mm_abs_epi32(_mm_add_epi32(n2[2], n2[3]));
+  n1[3] = _mm_abs_epi32(_mm_sub_epi32(n2[2], n2[3]));
+  n1[4] = _mm_abs_epi32(_mm_add_epi32(n2[4], n2[5]));
+  n1[5] = _mm_abs_epi32(_mm_sub_epi32(n2[4], n2[5]));
+  n1[6] = _mm_abs_epi32(_mm_add_epi32(n2[6], n2[7]));
+  n1[7] = _mm_abs_epi32(_mm_sub_epi32(n2[6], n2[7]));
+
+#if JVET_R0164_MEAN_SCALED_SATD
+  Distortion absDc = _mm_cvtsi128_si32(n1[0]);
+#endif
+  n1[0] = _mm_add_epi32(n1[0], n1[1]);
+  n1[1] = _mm_add_epi32(n1[2], n1[3]);
+  n1[2] = _mm_add_epi32(n1[4], n1[5]);
+  n1[3] = _mm_add_epi32(n1[6], n1[7]);
+
+  n1[0] = _mm_add_epi32(n1[0], n1[1]);
+  n1[1] = _mm_add_epi32(n1[2], n1[3]);
+
+  __m128i iSum = _mm_add_epi32(n1[0], n1[1]);
+  iSum = _mm_hadd_epi32(iSum, iSum);
+  iSum = _mm_hadd_epi32(iSum, iSum);
+
+  Distortion sad = _mm_cvtsi128_si32(iSum);
+#if JVET_R0164_MEAN_SCALED_SATD
+  sad -= absDc;
+  sad += absDc >> 2;
+#endif
+  sad = (Distortion)(sad / sqrt(4.0 * 8) * 2);
+  return sad;
+}
+
+static Distortion xCalcHAD16x8_HBD_SSE(const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur)
+{
+  __m128i m1[16][2][2], m2[16][2][2];
+  __m128i iSum = _mm_setzero_si128();
+
+  for (int l = 0; l < 2; l++)
+  {
+    const Torg *piOrgPtr = piOrg + l * 8;
+    const Tcur *piCurPtr = piCur + l * 8;
+    for (int k = 0; k < 8; k++)
+    {
+      m2[k][l][0] = _mm_sub_epi32(_mm_lddqu_si128((__m128i*)  piOrgPtr), _mm_lddqu_si128((__m128i*)  piCurPtr));
+      m2[k][l][1] = _mm_sub_epi32(_mm_lddqu_si128((__m128i*) (piOrgPtr + 4)), _mm_lddqu_si128((__m128i*) (piCurPtr + 4)));
+      piCurPtr += iStrideCur;
+      piOrgPtr += iStrideOrg;
+    }
+
+    for (int i = 0; i < 2; i++)
+    {
+      //vertical
+      m1[0][l][i] = _mm_add_epi32(m2[0][l][i], m2[4][l][i]);
+      m1[1][l][i] = _mm_add_epi32(m2[1][l][i], m2[5][l][i]);
+      m1[2][l][i] = _mm_add_epi32(m2[2][l][i], m2[6][l][i]);
+      m1[3][l][i] = _mm_add_epi32(m2[3][l][i], m2[7][l][i]);
+      m1[4][l][i] = _mm_sub_epi32(m2[0][l][i], m2[4][l][i]);
+      m1[5][l][i] = _mm_sub_epi32(m2[1][l][i], m2[5][l][i]);
+      m1[6][l][i] = _mm_sub_epi32(m2[2][l][i], m2[6][l][i]);
+      m1[7][l][i] = _mm_sub_epi32(m2[3][l][i], m2[7][l][i]);
+
+      m2[0][l][i] = _mm_add_epi32(m1[0][l][i], m1[2][l][i]);
+      m2[1][l][i] = _mm_add_epi32(m1[1][l][i], m1[3][l][i]);
+      m2[2][l][i] = _mm_sub_epi32(m1[0][l][i], m1[2][l][i]);
+      m2[3][l][i] = _mm_sub_epi32(m1[1][l][i], m1[3][l][i]);
+      m2[4][l][i] = _mm_add_epi32(m1[4][l][i], m1[6][l][i]);
+      m2[5][l][i] = _mm_add_epi32(m1[5][l][i], m1[7][l][i]);
+      m2[6][l][i] = _mm_sub_epi32(m1[4][l][i], m1[6][l][i]);
+      m2[7][l][i] = _mm_sub_epi32(m1[5][l][i], m1[7][l][i]);
+
+      m1[0][l][i] = _mm_add_epi32(m2[0][l][i], m2[1][l][i]);
+      m1[1][l][i] = _mm_sub_epi32(m2[0][l][i], m2[1][l][i]);
+      m1[2][l][i] = _mm_add_epi32(m2[2][l][i], m2[3][l][i]);
+      m1[3][l][i] = _mm_sub_epi32(m2[2][l][i], m2[3][l][i]);
+      m1[4][l][i] = _mm_add_epi32(m2[4][l][i], m2[5][l][i]);
+      m1[5][l][i] = _mm_sub_epi32(m2[4][l][i], m2[5][l][i]);
+      m1[6][l][i] = _mm_add_epi32(m2[6][l][i], m2[7][l][i]);
+      m1[7][l][i] = _mm_sub_epi32(m2[6][l][i], m2[7][l][i]);
+    }
+  }
+
+  // 4 x 8x4 blocks
+  // 0 1
+  // 2 3
+#if JVET_R0164_MEAN_SCALED_SATD
+  uint32_t absDc = 0;
+#endif
+
+  // transpose and do horizontal in two steps
+  for (int l = 0; l < 2; l++)
+  {
+    int off = l * 4;
+
+    __m128i n1[16];
+    __m128i n2[16];
+
+    m2[0][0][0] = _mm_unpacklo_epi32(m1[0 + off][0][0], m1[1 + off][0][0]);
+    m2[1][0][0] = _mm_unpacklo_epi32(m1[2 + off][0][0], m1[3 + off][0][0]);
+    m2[2][0][0] = _mm_unpackhi_epi32(m1[0 + off][0][0], m1[1 + off][0][0]);
+    m2[3][0][0] = _mm_unpackhi_epi32(m1[2 + off][0][0], m1[3 + off][0][0]);
+
+    m2[0][0][1] = _mm_unpacklo_epi32(m1[0 + off][0][1], m1[1 + off][0][1]);
+    m2[1][0][1] = _mm_unpacklo_epi32(m1[2 + off][0][1], m1[3 + off][0][1]);
+    m2[2][0][1] = _mm_unpackhi_epi32(m1[0 + off][0][1], m1[1 + off][0][1]);
+    m2[3][0][1] = _mm_unpackhi_epi32(m1[2 + off][0][1], m1[3 + off][0][1]);
+
+    n1[0] = _mm_unpacklo_epi64(m2[0][0][0], m2[1][0][0]);
+    n1[1] = _mm_unpackhi_epi64(m2[0][0][0], m2[1][0][0]);
+    n1[2] = _mm_unpacklo_epi64(m2[2][0][0], m2[3][0][0]);
+    n1[3] = _mm_unpackhi_epi64(m2[2][0][0], m2[3][0][0]);
+    n1[4] = _mm_unpacklo_epi64(m2[0][0][1], m2[1][0][1]);
+    n1[5] = _mm_unpackhi_epi64(m2[0][0][1], m2[1][0][1]);
+    n1[6] = _mm_unpacklo_epi64(m2[2][0][1], m2[3][0][1]);
+    n1[7] = _mm_unpackhi_epi64(m2[2][0][1], m2[3][0][1]);
+
+    // transpose 8x4 -> 4x8, block 1(3)
+    m2[8 + 0][0][0] = _mm_unpacklo_epi32(m1[0 + off][1][0], m1[1 + off][1][0]);
+    m2[8 + 1][0][0] = _mm_unpacklo_epi32(m1[2 + off][1][0], m1[3 + off][1][0]);
+    m2[8 + 2][0][0] = _mm_unpackhi_epi32(m1[0 + off][1][0], m1[1 + off][1][0]);
+    m2[8 + 3][0][0] = _mm_unpackhi_epi32(m1[2 + off][1][0], m1[3 + off][1][0]);
+
+    m2[8 + 0][0][1] = _mm_unpacklo_epi32(m1[0 + off][1][1], m1[1 + off][1][1]);
+    m2[8 + 1][0][1] = _mm_unpacklo_epi32(m1[2 + off][1][1], m1[3 + off][1][1]);
+    m2[8 + 2][0][1] = _mm_unpackhi_epi32(m1[0 + off][1][1], m1[1 + off][1][1]);
+    m2[8 + 3][0][1] = _mm_unpackhi_epi32(m1[2 + off][1][1], m1[3 + off][1][1]);
+
+    n1[8 + 0] = _mm_unpacklo_epi64(m2[8 + 0][0][0], m2[8 + 1][0][0]);
+    n1[8 + 1] = _mm_unpackhi_epi64(m2[8 + 0][0][0], m2[8 + 1][0][0]);
+    n1[8 + 2] = _mm_unpacklo_epi64(m2[8 + 2][0][0], m2[8 + 3][0][0]);
+    n1[8 + 3] = _mm_unpackhi_epi64(m2[8 + 2][0][0], m2[8 + 3][0][0]);
+    n1[8 + 4] = _mm_unpacklo_epi64(m2[8 + 0][0][1], m2[8 + 1][0][1]);
+    n1[8 + 5] = _mm_unpackhi_epi64(m2[8 + 0][0][1], m2[8 + 1][0][1]);
+    n1[8 + 6] = _mm_unpacklo_epi64(m2[8 + 2][0][1], m2[8 + 3][0][1]);
+    n1[8 + 7] = _mm_unpackhi_epi64(m2[8 + 2][0][1], m2[8 + 3][0][1]);
+
+    n2[0] = _mm_add_epi32(n1[0], n1[8]);
+    n2[1] = _mm_add_epi32(n1[1], n1[9]);
+    n2[2] = _mm_add_epi32(n1[2], n1[10]);
+    n2[3] = _mm_add_epi32(n1[3], n1[11]);
+    n2[4] = _mm_add_epi32(n1[4], n1[12]);
+    n2[5] = _mm_add_epi32(n1[5], n1[13]);
+    n2[6] = _mm_add_epi32(n1[6], n1[14]);
+    n2[7] = _mm_add_epi32(n1[7], n1[15]);
+    n2[8] = _mm_sub_epi32(n1[0], n1[8]);
+    n2[9] = _mm_sub_epi32(n1[1], n1[9]);
+    n2[10] = _mm_sub_epi32(n1[2], n1[10]);
+    n2[11] = _mm_sub_epi32(n1[3], n1[11]);
+    n2[12] = _mm_sub_epi32(n1[4], n1[12]);
+    n2[13] = _mm_sub_epi32(n1[5], n1[13]);
+    n2[14] = _mm_sub_epi32(n1[6], n1[14]);
+    n2[15] = _mm_sub_epi32(n1[7], n1[15]);
+
+    n1[0] = _mm_add_epi32(n2[0], n2[4]);
+    n1[1] = _mm_add_epi32(n2[1], n2[5]);
+    n1[2] = _mm_add_epi32(n2[2], n2[6]);
+    n1[3] = _mm_add_epi32(n2[3], n2[7]);
+    n1[4] = _mm_sub_epi32(n2[0], n2[4]);
+    n1[5] = _mm_sub_epi32(n2[1], n2[5]);
+    n1[6] = _mm_sub_epi32(n2[2], n2[6]);
+    n1[7] = _mm_sub_epi32(n2[3], n2[7]);
+    n1[8] = _mm_add_epi32(n2[8], n2[12]);
+    n1[9] = _mm_add_epi32(n2[9], n2[13]);
+    n1[10] = _mm_add_epi32(n2[10], n2[14]);
+    n1[11] = _mm_add_epi32(n2[11], n2[15]);
+    n1[12] = _mm_sub_epi32(n2[8], n2[12]);
+    n1[13] = _mm_sub_epi32(n2[9], n2[13]);
+    n1[14] = _mm_sub_epi32(n2[10], n2[14]);
+    n1[15] = _mm_sub_epi32(n2[11], n2[15]);
+
+    n2[0] = _mm_add_epi32(n1[0], n1[2]);
+    n2[1] = _mm_add_epi32(n1[1], n1[3]);
+    n2[2] = _mm_sub_epi32(n1[0], n1[2]);
+    n2[3] = _mm_sub_epi32(n1[1], n1[3]);
+    n2[4] = _mm_add_epi32(n1[4], n1[6]);
+    n2[5] = _mm_add_epi32(n1[5], n1[7]);
+    n2[6] = _mm_sub_epi32(n1[4], n1[6]);
+    n2[7] = _mm_sub_epi32(n1[5], n1[7]);
+    n2[8] = _mm_add_epi32(n1[8], n1[10]);
+    n2[9] = _mm_add_epi32(n1[9], n1[11]);
+    n2[10] = _mm_sub_epi32(n1[8], n1[10]);
+    n2[11] = _mm_sub_epi32(n1[9], n1[11]);
+    n2[12] = _mm_add_epi32(n1[12], n1[14]);
+    n2[13] = _mm_add_epi32(n1[13], n1[15]);
+    n2[14] = _mm_sub_epi32(n1[12], n1[14]);
+    n2[15] = _mm_sub_epi32(n1[13], n1[15]);
+
+    n1[0] = _mm_abs_epi32(_mm_add_epi32(n2[0], n2[1]));
+    n1[1] = _mm_abs_epi32(_mm_sub_epi32(n2[0], n2[1]));
+    n1[2] = _mm_abs_epi32(_mm_add_epi32(n2[2], n2[3]));
+    n1[3] = _mm_abs_epi32(_mm_sub_epi32(n2[2], n2[3]));
+    n1[4] = _mm_abs_epi32(_mm_add_epi32(n2[4], n2[5]));
+    n1[5] = _mm_abs_epi32(_mm_sub_epi32(n2[4], n2[5]));
+    n1[6] = _mm_abs_epi32(_mm_add_epi32(n2[6], n2[7]));
+    n1[7] = _mm_abs_epi32(_mm_sub_epi32(n2[6], n2[7]));
+    n1[8] = _mm_abs_epi32(_mm_add_epi32(n2[8], n2[9]));
+    n1[9] = _mm_abs_epi32(_mm_sub_epi32(n2[8], n2[9]));
+    n1[10] = _mm_abs_epi32(_mm_add_epi32(n2[10], n2[11]));
+    n1[11] = _mm_abs_epi32(_mm_sub_epi32(n2[10], n2[11]));
+    n1[12] = _mm_abs_epi32(_mm_add_epi32(n2[12], n2[13]));
+    n1[13] = _mm_abs_epi32(_mm_sub_epi32(n2[12], n2[13]));
+    n1[14] = _mm_abs_epi32(_mm_add_epi32(n2[14], n2[15]));
+    n1[15] = _mm_abs_epi32(_mm_sub_epi32(n2[14], n2[15]));
+
+#if JVET_R0164_MEAN_SCALED_SATD
+    if (l == 0)
+      absDc = _mm_cvtsi128_si32(n1[0]);
+#endif
+
+    // sum up
+    n1[0] = _mm_add_epi32(n1[0], n1[1]);
+    n1[2] = _mm_add_epi32(n1[2], n1[3]);
+    n1[4] = _mm_add_epi32(n1[4], n1[5]);
+    n1[6] = _mm_add_epi32(n1[6], n1[7]);
+    n1[8] = _mm_add_epi32(n1[8], n1[9]);
+    n1[10] = _mm_add_epi32(n1[10], n1[11]);
+    n1[12] = _mm_add_epi32(n1[12], n1[13]);
+    n1[14] = _mm_add_epi32(n1[14], n1[15]);
+
+    n1[0] = _mm_add_epi32(n1[0], n1[2]);
+    n1[4] = _mm_add_epi32(n1[4], n1[6]);
+    n1[8] = _mm_add_epi32(n1[8], n1[10]);
+    n1[12] = _mm_add_epi32(n1[12], n1[14]);
+
+    n1[0] = _mm_add_epi32(n1[0], n1[4]);
+    n1[8] = _mm_add_epi32(n1[8], n1[12]);
+
+    n1[0] = _mm_add_epi32(n1[0], n1[8]);
+    iSum = _mm_add_epi32(iSum, n1[0]);
+  }
+
+  iSum = _mm_hadd_epi32(iSum, iSum);
+  iSum = _mm_hadd_epi32(iSum, iSum);
+
+  Distortion sad = _mm_cvtsi128_si32(iSum);
+
+#if JVET_R0164_MEAN_SCALED_SATD
+  sad -= absDc;
+  sad += absDc >> 2;
+#endif
+  sad = (Distortion)(sad / sqrt(16.0 * 8) * 2);
+
+  return sad;
+}
+
+static Distortion xCalcHAD8x16_HBD_SSE(const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur)
+{
+  __m128i m1[2][16], m2[2][16];
+  __m128i iSum = _mm_setzero_si128();
+
+  for (int k = 0; k < 16; k++)
+  {
+    m1[0][k] = _mm_sub_epi32(_mm_lddqu_si128((__m128i*) piOrg), _mm_lddqu_si128((__m128i*) piCur));
+    m1[1][k] = _mm_sub_epi32(_mm_lddqu_si128((__m128i*)(piOrg + 4)), _mm_lddqu_si128((__m128i*)(piCur + 4)));
+    piCur += iStrideCur;
+    piOrg += iStrideOrg;
+  }
+
+  for (int i = 0; i < 2; i++)
+  {
+    // vertical
+    m2[i][0] = _mm_add_epi32(m1[i][0], m1[i][8]);
+    m2[i][1] = _mm_add_epi32(m1[i][1], m1[i][9]);
+    m2[i][2] = _mm_add_epi32(m1[i][2], m1[i][10]);
+    m2[i][3] = _mm_add_epi32(m1[i][3], m1[i][11]);
+    m2[i][4] = _mm_add_epi32(m1[i][4], m1[i][12]);
+    m2[i][5] = _mm_add_epi32(m1[i][5], m1[i][13]);
+    m2[i][6] = _mm_add_epi32(m1[i][6], m1[i][14]);
+    m2[i][7] = _mm_add_epi32(m1[i][7], m1[i][15]);
+    m2[i][8] = _mm_sub_epi32(m1[i][0], m1[i][8]);
+    m2[i][9] = _mm_sub_epi32(m1[i][1], m1[i][9]);
+    m2[i][10] = _mm_sub_epi32(m1[i][2], m1[i][10]);
+    m2[i][11] = _mm_sub_epi32(m1[i][3], m1[i][11]);
+    m2[i][12] = _mm_sub_epi32(m1[i][4], m1[i][12]);
+    m2[i][13] = _mm_sub_epi32(m1[i][5], m1[i][13]);
+    m2[i][14] = _mm_sub_epi32(m1[i][6], m1[i][14]);
+    m2[i][15] = _mm_sub_epi32(m1[i][7], m1[i][15]);
+
+    m1[i][0] = _mm_add_epi32(m2[i][0], m2[i][4]);
+    m1[i][1] = _mm_add_epi32(m2[i][1], m2[i][5]);
+    m1[i][2] = _mm_add_epi32(m2[i][2], m2[i][6]);
+    m1[i][3] = _mm_add_epi32(m2[i][3], m2[i][7]);
+    m1[i][4] = _mm_sub_epi32(m2[i][0], m2[i][4]);
+    m1[i][5] = _mm_sub_epi32(m2[i][1], m2[i][5]);
+    m1[i][6] = _mm_sub_epi32(m2[i][2], m2[i][6]);
+    m1[i][7] = _mm_sub_epi32(m2[i][3], m2[i][7]);
+    m1[i][8] = _mm_add_epi32(m2[i][8], m2[i][12]);
+    m1[i][9] = _mm_add_epi32(m2[i][9], m2[i][13]);
+    m1[i][10] = _mm_add_epi32(m2[i][10], m2[i][14]);
+    m1[i][11] = _mm_add_epi32(m2[i][11], m2[i][15]);
+    m1[i][12] = _mm_sub_epi32(m2[i][8], m2[i][12]);
+    m1[i][13] = _mm_sub_epi32(m2[i][9], m2[i][13]);
+    m1[i][14] = _mm_sub_epi32(m2[i][10], m2[i][14]);
+    m1[i][15] = _mm_sub_epi32(m2[i][11], m2[i][15]);
+
+    m2[i][0] = _mm_add_epi32(m1[i][0], m1[i][2]);
+    m2[i][1] = _mm_add_epi32(m1[i][1], m1[i][3]);
+    m2[i][2] = _mm_sub_epi32(m1[i][0], m1[i][2]);
+    m2[i][3] = _mm_sub_epi32(m1[i][1], m1[i][3]);
+    m2[i][4] = _mm_add_epi32(m1[i][4], m1[i][6]);
+    m2[i][5] = _mm_add_epi32(m1[i][5], m1[i][7]);
+    m2[i][6] = _mm_sub_epi32(m1[i][4], m1[i][6]);
+    m2[i][7] = _mm_sub_epi32(m1[i][5], m1[i][7]);
+    m2[i][8] = _mm_add_epi32(m1[i][8], m1[i][10]);
+    m2[i][9] = _mm_add_epi32(m1[i][9], m1[i][11]);
+    m2[i][10] = _mm_sub_epi32(m1[i][8], m1[i][10]);
+    m2[i][11] = _mm_sub_epi32(m1[i][9], m1[i][11]);
+    m2[i][12] = _mm_add_epi32(m1[i][12], m1[i][14]);
+    m2[i][13] = _mm_add_epi32(m1[i][13], m1[i][15]);
+    m2[i][14] = _mm_sub_epi32(m1[i][12], m1[i][14]);
+    m2[i][15] = _mm_sub_epi32(m1[i][13], m1[i][15]);
+
+    m1[i][0] = _mm_add_epi32(m2[i][0], m2[i][1]);
+    m1[i][1] = _mm_sub_epi32(m2[i][0], m2[i][1]);
+    m1[i][2] = _mm_add_epi32(m2[i][2], m2[i][3]);
+    m1[i][3] = _mm_sub_epi32(m2[i][2], m2[i][3]);
+    m1[i][4] = _mm_add_epi32(m2[i][4], m2[i][5]);
+    m1[i][5] = _mm_sub_epi32(m2[i][4], m2[i][5]);
+    m1[i][6] = _mm_add_epi32(m2[i][6], m2[i][7]);
+    m1[i][7] = _mm_sub_epi32(m2[i][6], m2[i][7]);
+    m1[i][8] = _mm_add_epi32(m2[i][8], m2[i][9]);
+    m1[i][9] = _mm_sub_epi32(m2[i][8], m2[i][9]);
+    m1[i][10] = _mm_add_epi32(m2[i][10], m2[i][11]);
+    m1[i][11] = _mm_sub_epi32(m2[i][10], m2[i][11]);
+    m1[i][12] = _mm_add_epi32(m2[i][12], m2[i][13]);
+    m1[i][13] = _mm_sub_epi32(m2[i][12], m2[i][13]);
+    m1[i][14] = _mm_add_epi32(m2[i][14], m2[i][15]);
+    m1[i][15] = _mm_sub_epi32(m2[i][14], m2[i][15]);
+  }
+
+  // process horizontal in two steps ( 2 x 8x8 blocks )
+  for (int l = 0; l < 4; l++)
+  {
+    int off = l * 4;
+
+    for (int i = 0; i < 2; i++)
+    {
+      // transpose 4x4
+      m2[i][0 + off] = _mm_unpacklo_epi32(m1[i][0 + off], m1[i][1 + off]);
+      m2[i][1 + off] = _mm_unpackhi_epi32(m1[i][0 + off], m1[i][1 + off]);
+      m2[i][2 + off] = _mm_unpacklo_epi32(m1[i][2 + off], m1[i][3 + off]);
+      m2[i][3 + off] = _mm_unpackhi_epi32(m1[i][2 + off], m1[i][3 + off]);
+
+      m1[i][0 + off] = _mm_unpacklo_epi64(m2[i][0 + off], m2[i][2 + off]);
+      m1[i][1 + off] = _mm_unpackhi_epi64(m2[i][0 + off], m2[i][2 + off]);
+      m1[i][2 + off] = _mm_unpacklo_epi64(m2[i][1 + off], m2[i][3 + off]);
+      m1[i][3 + off] = _mm_unpackhi_epi64(m2[i][1 + off], m2[i][3 + off]);
+    }
+  }
+
+#if JVET_R0164_MEAN_SCALED_SATD
+  uint32_t absDc = 0;
+#endif
+
+  for (int l = 0; l < 2; l++)
+  {
+    int off = l * 8;
+
+    __m128i n1[2][8];
+    __m128i n2[2][8];
+
+    for (int i = 0; i < 8; i++)
+    {
+      int ii = i % 4;
+      int ij = i >> 2;
+
+      n2[0][i] = m1[ij][off + ii];
+      n2[1][i] = m1[ij][off + ii + 4];
+    }
+
+    for (int i = 0; i < 2; i++)
+    {
+      n1[i][0] = _mm_add_epi32(n2[i][0], n2[i][4]);
+      n1[i][1] = _mm_add_epi32(n2[i][1], n2[i][5]);
+      n1[i][2] = _mm_add_epi32(n2[i][2], n2[i][6]);
+      n1[i][3] = _mm_add_epi32(n2[i][3], n2[i][7]);
+      n1[i][4] = _mm_sub_epi32(n2[i][0], n2[i][4]);
+      n1[i][5] = _mm_sub_epi32(n2[i][1], n2[i][5]);
+      n1[i][6] = _mm_sub_epi32(n2[i][2], n2[i][6]);
+      n1[i][7] = _mm_sub_epi32(n2[i][3], n2[i][7]);
+
+      n2[i][0] = _mm_add_epi32(n1[i][0], n1[i][2]);
+      n2[i][1] = _mm_add_epi32(n1[i][1], n1[i][3]);
+      n2[i][2] = _mm_sub_epi32(n1[i][0], n1[i][2]);
+      n2[i][3] = _mm_sub_epi32(n1[i][1], n1[i][3]);
+      n2[i][4] = _mm_add_epi32(n1[i][4], n1[i][6]);
+      n2[i][5] = _mm_add_epi32(n1[i][5], n1[i][7]);
+      n2[i][6] = _mm_sub_epi32(n1[i][4], n1[i][6]);
+      n2[i][7] = _mm_sub_epi32(n1[i][5], n1[i][7]);
+
+      n1[i][0] = _mm_abs_epi32(_mm_add_epi32(n2[i][0], n2[i][1]));
+      n1[i][1] = _mm_abs_epi32(_mm_sub_epi32(n2[i][0], n2[i][1]));
+      n1[i][2] = _mm_abs_epi32(_mm_add_epi32(n2[i][2], n2[i][3]));
+      n1[i][3] = _mm_abs_epi32(_mm_sub_epi32(n2[i][2], n2[i][3]));
+      n1[i][4] = _mm_abs_epi32(_mm_add_epi32(n2[i][4], n2[i][5]));
+      n1[i][5] = _mm_abs_epi32(_mm_sub_epi32(n2[i][4], n2[i][5]));
+      n1[i][6] = _mm_abs_epi32(_mm_add_epi32(n2[i][6], n2[i][7]));
+      n1[i][7] = _mm_abs_epi32(_mm_sub_epi32(n2[i][6], n2[i][7]));
+
+#if JVET_R0164_MEAN_SCALED_SATD
+      if (l + i == 0)
+        absDc = _mm_cvtsi128_si32(n1[i][0]);
+#endif
+    }
+
+    for (int i = 0; i < 8; i++)
+    {
+      n2[0][i] = _mm_add_epi32(n1[0][i], n1[1][i]);
+    }
+
+    n2[0][0] = _mm_add_epi32(n2[0][0], n2[0][1]);
+    n2[0][2] = _mm_add_epi32(n2[0][2], n2[0][3]);
+    n2[0][4] = _mm_add_epi32(n2[0][4], n2[0][5]);
+    n2[0][6] = _mm_add_epi32(n2[0][6], n2[0][7]);
+
+    n2[0][0] = _mm_add_epi32(n2[0][0], n2[0][2]);
+    n2[0][4] = _mm_add_epi32(n2[0][4], n2[0][6]);
+    iSum = _mm_add_epi32(iSum, _mm_add_epi32(n2[0][0], n2[0][4]));
+  }
+
+  iSum = _mm_hadd_epi32(iSum, iSum);
+  iSum = _mm_hadd_epi32(iSum, iSum);
+
+  Distortion sad = _mm_cvtsi128_si32(iSum);
+#if JVET_R0164_MEAN_SCALED_SATD
+  sad -= absDc;
+  sad += absDc >> 2;
+#endif
+  sad = (Distortion)(sad / sqrt(16.0 * 8) * 2);
+
+  return sad;
+}
+
+#ifdef USE_AVX2
+static Distortion xCalcHAD4x4_HBD_AVX2(const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur)
+{
+  __m256i r0 = _mm256_castsi128_si256(_mm_lddqu_si128((const __m128i*)&piOrg[0]));
+  __m256i r1 = _mm256_castsi128_si256(_mm_lddqu_si128((const __m128i*)&piOrg[iStrideOrg]));
+  __m256i r2 = _mm256_castsi128_si256(_mm_lddqu_si128((const __m128i*)&piOrg[2 * iStrideOrg]));
+  __m256i r3 = _mm256_castsi128_si256(_mm_lddqu_si128((const __m128i*)&piOrg[3 * iStrideOrg]));
+  __m256i r4 = _mm256_castsi128_si256(_mm_lddqu_si128((const __m128i*)&piCur[0]));
+  __m256i r5 = _mm256_castsi128_si256(_mm_lddqu_si128((const __m128i*)&piCur[iStrideCur]));
+  __m256i r6 = _mm256_castsi128_si256(_mm_lddqu_si128((const __m128i*)&piCur[2 * iStrideCur]));
+  __m256i r7 = _mm256_castsi128_si256(_mm_lddqu_si128((const __m128i*)&piCur[3 * iStrideCur]));
+
+  r0 = _mm256_sub_epi32(r0, r4);
+  r1 = _mm256_sub_epi32(r1, r5);
+  r2 = _mm256_sub_epi32(r2, r6);
+  r3 = _mm256_sub_epi32(r3, r7);
+
+  // first stage
+  r4 = r0;
+  r5 = r1;
+
+  r0 = _mm256_add_epi32(r0, r3);
+  r1 = _mm256_add_epi32(r1, r2);
+
+  r4 = _mm256_sub_epi32(r4, r3);
+  r5 = _mm256_sub_epi32(r5, r2);
+
+  r2 = r0;
+  r3 = r4;
+
+  r0 = _mm256_add_epi32(r0, r1);
+  r2 = _mm256_sub_epi32(r2, r1);
+  r3 = _mm256_sub_epi32(r3, r5);
+  r5 = _mm256_add_epi32(r5, r4);
+
+  // shuffle - flip matrix for vertical transform
+  r0 = _mm256_permute4x64_epi64(r0, 0x50);
+  r2 = _mm256_permute4x64_epi64(r2, 0x50);
+  r3 = _mm256_permute4x64_epi64(r3, 0x50);
+  r5 = _mm256_permute4x64_epi64(r5, 0x50);
+
+  r0 = _mm256_unpacklo_epi32(r0, r5);
+  r2 = _mm256_unpacklo_epi32(r2, r3);
+
+  r1 = r0;
+  r0 = _mm256_unpacklo_epi64(r0, r2);
+  r1 = _mm256_unpackhi_epi64(r1, r2);
+
+  r2 = _mm256_permute4x64_epi64(r0, 0xee);
+  r3 = _mm256_permute4x64_epi64(r1, 0xee);
+
+  // second stage
+  r4 = r0;
+  r5 = r1;
+
+  r0 = _mm256_add_epi32(r0, r3);
+  r1 = _mm256_add_epi32(r1, r2);
+
+  r4 = _mm256_sub_epi32(r4, r3);
+  r5 = _mm256_sub_epi32(r5, r2);
+
+  r2 = r0;
+  r3 = r4;
+
+  r0 = _mm256_add_epi32(r0, r1);
+  r2 = _mm256_sub_epi32(r2, r1);
+  r3 = _mm256_sub_epi32(r3, r5);
+  r5 = _mm256_add_epi32(r5, r4);
+
+  // abs
+  __m256i Sum = _mm256_abs_epi32(r0);
+#if JVET_R0164_MEAN_SCALED_SATD
+  Distortion absDc = _mm_cvtsi128_si32(_mm256_castsi256_si128(Sum));
+#endif
+  Sum = _mm256_add_epi32(Sum, _mm256_abs_epi32(r2));
+  Sum = _mm256_add_epi32(Sum, _mm256_abs_epi32(r3));
+  Sum = _mm256_add_epi32(Sum, _mm256_abs_epi32(r5));
+  Sum = _mm256_hadd_epi32(Sum, Sum);
+  Sum = _mm256_hadd_epi32(Sum, Sum);
+
+  Distortion sad = _mm_cvtsi128_si32(_mm256_castsi256_si128(Sum));
+
+#if JVET_R0164_MEAN_SCALED_SATD
+  sad -= absDc;
+  sad += absDc >> 2;
+#endif
+  sad = ((sad + 1) >> 1);
+
+  return sad;
+}
+
+static Distortion xCalcHAD8x8_HBD_AVX2(const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur)
+{
+  __m256i m1[8], m2[8];
+
+  for (int k = 0; k < 8; k++)
+  {
+    m2[k] = _mm256_sub_epi32(_mm256_lddqu_si256((__m256i *) piOrg), _mm256_lddqu_si256((__m256i *) piCur));
+    piCur += iStrideCur;
+    piOrg += iStrideOrg;
+  }
+
+  // vertical
+  m1[0] = _mm256_add_epi32(m2[0], m2[4]);
+  m1[1] = _mm256_add_epi32(m2[1], m2[5]);
+  m1[2] = _mm256_add_epi32(m2[2], m2[6]);
+  m1[3] = _mm256_add_epi32(m2[3], m2[7]);
+  m1[4] = _mm256_sub_epi32(m2[0], m2[4]);
+  m1[5] = _mm256_sub_epi32(m2[1], m2[5]);
+  m1[6] = _mm256_sub_epi32(m2[2], m2[6]);
+  m1[7] = _mm256_sub_epi32(m2[3], m2[7]);
+
+  m2[0] = _mm256_add_epi32(m1[0], m1[2]);
+  m2[1] = _mm256_add_epi32(m1[1], m1[3]);
+  m2[2] = _mm256_sub_epi32(m1[0], m1[2]);
+  m2[3] = _mm256_sub_epi32(m1[1], m1[3]);
+  m2[4] = _mm256_add_epi32(m1[4], m1[6]);
+  m2[5] = _mm256_add_epi32(m1[5], m1[7]);
+  m2[6] = _mm256_sub_epi32(m1[4], m1[6]);
+  m2[7] = _mm256_sub_epi32(m1[5], m1[7]);
+
+  m1[0] = _mm256_add_epi32(m2[0], m2[1]);
+  m1[1] = _mm256_sub_epi32(m2[0], m2[1]);
+  m1[2] = _mm256_add_epi32(m2[2], m2[3]);
+  m1[3] = _mm256_sub_epi32(m2[2], m2[3]);
+  m1[4] = _mm256_add_epi32(m2[4], m2[5]);
+  m1[5] = _mm256_sub_epi32(m2[4], m2[5]);
+  m1[6] = _mm256_add_epi32(m2[6], m2[7]);
+  m1[7] = _mm256_sub_epi32(m2[6], m2[7]);
+
+  // transpose
+  m2[0] = _mm256_unpacklo_epi32(m1[0], m1[1]);
+  m2[1] = _mm256_unpacklo_epi32(m1[2], m1[3]);
+  m2[2] = _mm256_unpacklo_epi32(m1[4], m1[5]);
+  m2[3] = _mm256_unpacklo_epi32(m1[6], m1[7]);
+  m2[4] = _mm256_unpackhi_epi32(m1[0], m1[1]);
+  m2[5] = _mm256_unpackhi_epi32(m1[2], m1[3]);
+  m2[6] = _mm256_unpackhi_epi32(m1[4], m1[5]);
+  m2[7] = _mm256_unpackhi_epi32(m1[6], m1[7]);
+
+  m1[0] = _mm256_unpacklo_epi64(m2[0], m2[1]);
+  m1[1] = _mm256_unpacklo_epi64(m2[2], m2[3]);
+  m1[2] = _mm256_unpacklo_epi64(m2[4], m2[5]);
+  m1[3] = _mm256_unpacklo_epi64(m2[6], m2[7]);
+  m1[4] = _mm256_unpackhi_epi64(m2[0], m2[1]);
+  m1[5] = _mm256_unpackhi_epi64(m2[2], m2[3]);
+  m1[6] = _mm256_unpackhi_epi64(m2[4], m2[5]);
+  m1[7] = _mm256_unpackhi_epi64(m2[6], m2[7]);
+
+  m2[0] = _mm256_permute2x128_si256(m1[0], m1[1], 0x20);
+  m2[4] = _mm256_permute2x128_si256(m1[0], m1[1], 0x31);
+  m2[2] = _mm256_permute2x128_si256(m1[2], m1[3], 0x20);
+  m2[6] = _mm256_permute2x128_si256(m1[2], m1[3], 0x31);
+  m2[1] = _mm256_permute2x128_si256(m1[4], m1[5], 0x20);
+  m2[5] = _mm256_permute2x128_si256(m1[4], m1[5], 0x31);
+  m2[3] = _mm256_permute2x128_si256(m1[6], m1[7], 0x20);
+  m2[7] = _mm256_permute2x128_si256(m1[6], m1[7], 0x31);
+
+  // horizontal
+  m1[0] = _mm256_add_epi32(m2[0], m2[4]);
+  m1[1] = _mm256_add_epi32(m2[1], m2[5]);
+  m1[2] = _mm256_add_epi32(m2[2], m2[6]);
+  m1[3] = _mm256_add_epi32(m2[3], m2[7]);
+  m1[4] = _mm256_sub_epi32(m2[0], m2[4]);
+  m1[5] = _mm256_sub_epi32(m2[1], m2[5]);
+  m1[6] = _mm256_sub_epi32(m2[2], m2[6]);
+  m1[7] = _mm256_sub_epi32(m2[3], m2[7]);
+
+  m2[0] = _mm256_add_epi32(m1[0], m1[2]);
+  m2[1] = _mm256_add_epi32(m1[1], m1[3]);
+  m2[2] = _mm256_sub_epi32(m1[0], m1[2]);
+  m2[3] = _mm256_sub_epi32(m1[1], m1[3]);
+  m2[4] = _mm256_add_epi32(m1[4], m1[6]);
+  m2[5] = _mm256_add_epi32(m1[5], m1[7]);
+  m2[6] = _mm256_sub_epi32(m1[4], m1[6]);
+  m2[7] = _mm256_sub_epi32(m1[5], m1[7]);
+
+  m1[0] = _mm256_abs_epi32(_mm256_add_epi32(m2[0], m2[1]));
+  m1[1] = _mm256_abs_epi32(_mm256_sub_epi32(m2[0], m2[1]));
+  m1[2] = _mm256_abs_epi32(_mm256_add_epi32(m2[2], m2[3]));
+  m1[3] = _mm256_abs_epi32(_mm256_sub_epi32(m2[2], m2[3]));
+  m1[4] = _mm256_abs_epi32(_mm256_add_epi32(m2[4], m2[5]));
+  m1[5] = _mm256_abs_epi32(_mm256_sub_epi32(m2[4], m2[5]));
+  m1[6] = _mm256_abs_epi32(_mm256_add_epi32(m2[6], m2[7]));
+  m1[7] = _mm256_abs_epi32(_mm256_sub_epi32(m2[6], m2[7]));
+
+  m2[0] = _mm256_add_epi32(m1[0], m1[1]);
+  m2[2] = _mm256_add_epi32(m1[2], m1[3]);
+  m2[4] = _mm256_add_epi32(m1[4], m1[5]);
+  m2[6] = _mm256_add_epi32(m1[6], m1[7]);
+
+  m2[0] = _mm256_add_epi32(m2[0], m2[2]);
+  m2[4] = _mm256_add_epi32(m2[4], m2[6]);
+  __m256i iSum = _mm256_add_epi32(m2[0], m2[4]);
+
+  iSum = _mm256_hadd_epi32(iSum, iSum);
+  iSum = _mm256_hadd_epi32(iSum, iSum);
+
+  Distortion sad = _mm_cvtsi128_si32(_mm256_castsi256_si128(iSum));
+  sad += _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_permute4x64_epi64(iSum, 0xee)));
+#if JVET_R0164_MEAN_SCALED_SATD
+  Distortion absDc = _mm_cvtsi128_si32(_mm256_castsi256_si128(m1[0]));
+  sad -= absDc;
+  sad += absDc >> 2;
+#endif
+  sad = ((sad + 2) >> 2);
+
+  return sad;
+}
+
+static Distortion xCalcHAD4x8_HBD_AVX2(const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur)
+{
+  __m256i m1[8], m2[8], n1[4], n2[4];
+  for (int k = 0; k < 8; k++)
+  {
+    m2[k] = _mm256_sub_epi32(_mm256_castsi128_si256(_mm_lddqu_si128((__m128i*)piOrg)), _mm256_castsi128_si256(_mm_lddqu_si128((__m128i*)piCur)));
+    piCur += iStrideCur;
+    piOrg += iStrideOrg;
+  }
+
+  // vertical
+  m1[0] = _mm256_add_epi32(m2[0], m2[4]);
+  m1[1] = _mm256_add_epi32(m2[1], m2[5]);
+  m1[2] = _mm256_add_epi32(m2[2], m2[6]);
+  m1[3] = _mm256_add_epi32(m2[3], m2[7]);
+  m1[4] = _mm256_sub_epi32(m2[0], m2[4]);
+  m1[5] = _mm256_sub_epi32(m2[1], m2[5]);
+  m1[6] = _mm256_sub_epi32(m2[2], m2[6]);
+  m1[7] = _mm256_sub_epi32(m2[3], m2[7]);
+
+  m2[0] = _mm256_add_epi32(m1[0], m1[2]);
+  m2[1] = _mm256_add_epi32(m1[1], m1[3]);
+  m2[2] = _mm256_sub_epi32(m1[0], m1[2]);
+  m2[3] = _mm256_sub_epi32(m1[1], m1[3]);
+  m2[4] = _mm256_add_epi32(m1[4], m1[6]);
+  m2[5] = _mm256_add_epi32(m1[5], m1[7]);
+  m2[6] = _mm256_sub_epi32(m1[4], m1[6]);
+  m2[7] = _mm256_sub_epi32(m1[5], m1[7]);
+
+  m1[0] = _mm256_permute4x64_epi64(_mm256_add_epi32(m2[0], m2[1]), 0x50);
+  m1[1] = _mm256_permute4x64_epi64(_mm256_sub_epi32(m2[0], m2[1]), 0x50);
+  m1[2] = _mm256_permute4x64_epi64(_mm256_add_epi32(m2[2], m2[3]), 0x50);
+  m1[3] = _mm256_permute4x64_epi64(_mm256_sub_epi32(m2[2], m2[3]), 0x50);
+  m1[4] = _mm256_permute4x64_epi64(_mm256_add_epi32(m2[4], m2[5]), 0x50);
+  m1[5] = _mm256_permute4x64_epi64(_mm256_sub_epi32(m2[4], m2[5]), 0x50);
+  m1[6] = _mm256_permute4x64_epi64(_mm256_add_epi32(m2[6], m2[7]), 0x50);
+  m1[7] = _mm256_permute4x64_epi64(_mm256_sub_epi32(m2[6], m2[7]), 0x50);
+
+  // transpose
+  m2[0] = _mm256_unpacklo_epi32(m1[0], m1[1]);
+  m2[1] = _mm256_unpacklo_epi32(m1[2], m1[3]);
+  m2[2] = _mm256_unpacklo_epi32(m1[4], m1[5]);
+  m2[3] = _mm256_unpacklo_epi32(m1[6], m1[7]);
+
+  m1[0] = _mm256_unpacklo_epi64(m2[0], m2[1]);
+  m1[1] = _mm256_unpackhi_epi64(m2[0], m2[1]);
+  m1[2] = _mm256_unpacklo_epi64(m2[2], m2[3]);
+  m1[3] = _mm256_unpackhi_epi64(m2[2], m2[3]);
+
+  n1[0] = _mm256_inserti128_si256(m1[0], _mm256_castsi256_si128(m1[2]), 1);
+  n1[1] = _mm256_inserti128_si256(m1[1], _mm256_castsi256_si128(m1[3]), 1);
+  n1[2] = _mm256_inserti128_si256(m1[2], _mm256_castsi256_si128(_mm256_permute4x64_epi64(m1[0], 0xee)), 0);
+  n1[3] = _mm256_inserti128_si256(m1[3], _mm256_castsi256_si128(_mm256_permute4x64_epi64(m1[1], 0xee)), 0);
+
+  n2[0] = _mm256_add_epi32(n1[0], n1[2]);
+  n2[1] = _mm256_add_epi32(n1[1], n1[3]);
+  n2[2] = _mm256_sub_epi32(n1[0], n1[2]);
+  n2[3] = _mm256_sub_epi32(n1[1], n1[3]);
+
+  n1[0] = _mm256_abs_epi32(_mm256_add_epi32(n2[0], n2[1]));
+  n1[1] = _mm256_abs_epi32(_mm256_sub_epi32(n2[0], n2[1]));
+  n1[2] = _mm256_abs_epi32(_mm256_add_epi32(n2[2], n2[3]));
+  n1[3] = _mm256_abs_epi32(_mm256_sub_epi32(n2[2], n2[3]));
+#if JVET_R0164_MEAN_SCALED_SATD
+  Distortion absDc = _mm_cvtsi128_si32(_mm256_castsi256_si128(n1[0]));
+#endif
+
+  m1[0] = _mm256_add_epi32(n1[0], n1[1]);
+  m1[2] = _mm256_add_epi32(n1[2], n1[3]);
+
+  __m256i iSum = _mm256_add_epi32(m1[0], m1[2]);
+  iSum = _mm256_hadd_epi32(iSum, iSum);
+  iSum = _mm256_hadd_epi32(iSum, iSum);
+
+  Distortion sad = _mm_cvtsi128_si32(_mm256_castsi256_si128(iSum));
+  sad += _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_permute4x64_epi64(iSum, 0xee)));
+
+#if JVET_R0164_MEAN_SCALED_SATD
+  sad -= absDc;
+  sad += absDc >> 2;
+#endif
+  sad = (Distortion)(sad / sqrt(4.0 * 8) * 2);
+
+  return sad;
+}
+
+static Distortion xCalcHAD8x4_HBD_AVX2(const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur)
+{
+  __m256i m1[8], m2[8];
+
+  for (int k = 0; k < 4; k++)
+  {
+    m1[k] = _mm256_sub_epi32(_mm256_lddqu_si256((__m256i*) piOrg), _mm256_lddqu_si256((__m256i*) piCur));
+    piCur += iStrideCur;
+    piOrg += iStrideOrg;
+  }
+
+  // vertical
+  m2[0] = _mm256_add_epi32(m1[0], m1[2]);
+  m2[1] = _mm256_add_epi32(m1[1], m1[3]);
+  m2[2] = _mm256_sub_epi32(m1[0], m1[2]);
+  m2[3] = _mm256_sub_epi32(m1[1], m1[3]);
+
+  m1[0] = _mm256_add_epi32(m2[0], m2[1]);
+  m1[1] = _mm256_sub_epi32(m2[0], m2[1]);
+  m1[2] = _mm256_add_epi32(m2[2], m2[3]);
+  m1[3] = _mm256_sub_epi32(m2[2], m2[3]);
+
+  // transpose
+  m2[0] = _mm256_unpacklo_epi32(m1[0], m1[1]);
+  m2[1] = _mm256_unpacklo_epi32(m1[2], m1[3]);
+  m2[2] = _mm256_unpackhi_epi32(m1[0], m1[1]);
+  m2[3] = _mm256_unpackhi_epi32(m1[2], m1[3]);
+
+  m1[0] = _mm256_unpacklo_epi64(m2[0], m2[1]);
+  m1[1] = _mm256_unpackhi_epi64(m2[0], m2[1]);
+  m1[2] = _mm256_unpacklo_epi64(m2[2], m2[3]);
+  m1[3] = _mm256_unpackhi_epi64(m2[2], m2[3]);
+
+  m2[0] = m1[0];
+  m2[1] = m1[1];
+  m2[2] = m1[2];
+  m2[3] = m1[3];
+  m2[4] = _mm256_permute4x64_epi64(m1[0], 0xee);
+  m2[5] = _mm256_permute4x64_epi64(m1[1], 0xee);
+  m2[6] = _mm256_permute4x64_epi64(m1[2], 0xee);
+  m2[7] = _mm256_permute4x64_epi64(m1[3], 0xee);
+
+  // horizontal
+  m1[0] = _mm256_add_epi32(m2[0], m2[4]);
+  m1[1] = _mm256_add_epi32(m2[1], m2[5]);
+  m1[2] = _mm256_add_epi32(m2[2], m2[6]);
+  m1[3] = _mm256_add_epi32(m2[3], m2[7]);
+  m1[4] = _mm256_sub_epi32(m2[0], m2[4]);
+  m1[5] = _mm256_sub_epi32(m2[1], m2[5]);
+  m1[6] = _mm256_sub_epi32(m2[2], m2[6]);
+  m1[7] = _mm256_sub_epi32(m2[3], m2[7]);
+
+  m2[0] = _mm256_add_epi32(m1[0], m1[2]);
+  m2[1] = _mm256_add_epi32(m1[1], m1[3]);
+  m2[2] = _mm256_sub_epi32(m1[0], m1[2]);
+  m2[3] = _mm256_sub_epi32(m1[1], m1[3]);
+  m2[4] = _mm256_add_epi32(m1[4], m1[6]);
+  m2[5] = _mm256_add_epi32(m1[5], m1[7]);
+  m2[6] = _mm256_sub_epi32(m1[4], m1[6]);
+  m2[7] = _mm256_sub_epi32(m1[5], m1[7]);
+
+  m1[0] = _mm256_abs_epi32(_mm256_add_epi32(m2[0], m2[1]));
+  m1[1] = _mm256_abs_epi32(_mm256_sub_epi32(m2[0], m2[1]));
+  m1[2] = _mm256_abs_epi32(_mm256_add_epi32(m2[2], m2[3]));
+  m1[3] = _mm256_abs_epi32(_mm256_sub_epi32(m2[2], m2[3]));
+  m1[4] = _mm256_abs_epi32(_mm256_add_epi32(m2[4], m2[5]));
+  m1[5] = _mm256_abs_epi32(_mm256_sub_epi32(m2[4], m2[5]));
+  m1[6] = _mm256_abs_epi32(_mm256_add_epi32(m2[6], m2[7]));
+  m1[7] = _mm256_abs_epi32(_mm256_sub_epi32(m2[6], m2[7]));
+
+#if JVET_R0164_MEAN_SCALED_SATD
+  Distortion absDc = _mm_cvtsi128_si32(_mm256_castsi256_si128(m1[0]));
+#endif
+  m1[0] = _mm256_add_epi32(m1[0], m1[1]);
+  m1[1] = _mm256_add_epi32(m1[2], m1[3]);
+  m1[2] = _mm256_add_epi32(m1[4], m1[5]);
+  m1[3] = _mm256_add_epi32(m1[6], m1[7]);
+
+  m1[0] = _mm256_add_epi32(m1[0], m1[1]);
+  m1[1] = _mm256_add_epi32(m1[2], m1[3]);
+
+  __m256i iSum = _mm256_add_epi32(m1[0], m1[1]);
+  iSum = _mm256_hadd_epi32(iSum, iSum);
+  iSum = _mm256_hadd_epi32(iSum, iSum);
+
+  Distortion sad = _mm_cvtsi128_si32(_mm256_castsi256_si128(iSum));
+#if JVET_R0164_MEAN_SCALED_SATD
+  sad -= absDc;
+  sad += absDc >> 2;
+#endif
+  sad = (Distortion)(sad / sqrt(4.0 * 8) * 2);
+  return sad;
+}
+
+static Distortion xCalcHAD16x8_HBD_AVX2(const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur)
+{
+  __m256i m1[16], m2[16];
+
+  for (int k = 0; k < 8; k++)
+  {
+    m1[k] = _mm256_sub_epi32(_mm256_lddqu_si256((__m256i*) piOrg), _mm256_lddqu_si256((__m256i*) piCur));
+    m1[k + 8] = _mm256_sub_epi32(_mm256_lddqu_si256((__m256i*)(piOrg + 8)), _mm256_lddqu_si256((__m256i*)(piCur + 8)));
+    piCur += iStrideCur;
+    piOrg += iStrideOrg;
+  }
+
+  // vertical, first 8x8
+  m2[0] = _mm256_add_epi32(m1[0], m1[4]);
+  m2[1] = _mm256_add_epi32(m1[1], m1[5]);
+  m2[2] = _mm256_add_epi32(m1[2], m1[6]);
+  m2[3] = _mm256_add_epi32(m1[3], m1[7]);
+  m2[4] = _mm256_sub_epi32(m1[0], m1[4]);
+  m2[5] = _mm256_sub_epi32(m1[1], m1[5]);
+  m2[6] = _mm256_sub_epi32(m1[2], m1[6]);
+  m2[7] = _mm256_sub_epi32(m1[3], m1[7]);
+
+  m1[0] = _mm256_add_epi32(m2[0], m2[2]);
+  m1[1] = _mm256_add_epi32(m2[1], m2[3]);
+  m1[2] = _mm256_sub_epi32(m2[0], m2[2]);
+  m1[3] = _mm256_sub_epi32(m2[1], m2[3]);
+  m1[4] = _mm256_add_epi32(m2[4], m2[6]);
+  m1[5] = _mm256_add_epi32(m2[5], m2[7]);
+  m1[6] = _mm256_sub_epi32(m2[4], m2[6]);
+  m1[7] = _mm256_sub_epi32(m2[5], m2[7]);
+
+  m2[0] = _mm256_add_epi32(m1[0], m1[1]);
+  m2[1] = _mm256_sub_epi32(m1[0], m1[1]);
+  m2[2] = _mm256_add_epi32(m1[2], m1[3]);
+  m2[3] = _mm256_sub_epi32(m1[2], m1[3]);
+  m2[4] = _mm256_add_epi32(m1[4], m1[5]);
+  m2[5] = _mm256_sub_epi32(m1[4], m1[5]);
+  m2[6] = _mm256_add_epi32(m1[6], m1[7]);
+  m2[7] = _mm256_sub_epi32(m1[6], m1[7]);
+
+  // vertical, second 8x8
+  m2[8 + 0] = _mm256_add_epi32(m1[8 + 0], m1[8 + 4]);
+  m2[8 + 1] = _mm256_add_epi32(m1[8 + 1], m1[8 + 5]);
+  m2[8 + 2] = _mm256_add_epi32(m1[8 + 2], m1[8 + 6]);
+  m2[8 + 3] = _mm256_add_epi32(m1[8 + 3], m1[8 + 7]);
+  m2[8 + 4] = _mm256_sub_epi32(m1[8 + 0], m1[8 + 4]);
+  m2[8 + 5] = _mm256_sub_epi32(m1[8 + 1], m1[8 + 5]);
+  m2[8 + 6] = _mm256_sub_epi32(m1[8 + 2], m1[8 + 6]);
+  m2[8 + 7] = _mm256_sub_epi32(m1[8 + 3], m1[8 + 7]);
+
+  m1[8 + 0] = _mm256_add_epi32(m2[8 + 0], m2[8 + 2]);
+  m1[8 + 1] = _mm256_add_epi32(m2[8 + 1], m2[8 + 3]);
+  m1[8 + 2] = _mm256_sub_epi32(m2[8 + 0], m2[8 + 2]);
+  m1[8 + 3] = _mm256_sub_epi32(m2[8 + 1], m2[8 + 3]);
+  m1[8 + 4] = _mm256_add_epi32(m2[8 + 4], m2[8 + 6]);
+  m1[8 + 5] = _mm256_add_epi32(m2[8 + 5], m2[8 + 7]);
+  m1[8 + 6] = _mm256_sub_epi32(m2[8 + 4], m2[8 + 6]);
+  m1[8 + 7] = _mm256_sub_epi32(m2[8 + 5], m2[8 + 7]);
+
+  m2[8 + 0] = _mm256_add_epi32(m1[8 + 0], m1[8 + 1]);
+  m2[8 + 1] = _mm256_sub_epi32(m1[8 + 0], m1[8 + 1]);
+  m2[8 + 2] = _mm256_add_epi32(m1[8 + 2], m1[8 + 3]);
+  m2[8 + 3] = _mm256_sub_epi32(m1[8 + 2], m1[8 + 3]);
+  m2[8 + 4] = _mm256_add_epi32(m1[8 + 4], m1[8 + 5]);
+  m2[8 + 5] = _mm256_sub_epi32(m1[8 + 4], m1[8 + 5]);
+  m2[8 + 6] = _mm256_add_epi32(m1[8 + 6], m1[8 + 7]);
+  m2[8 + 7] = _mm256_sub_epi32(m1[8 + 6], m1[8 + 7]);
+
+  // transpose
+  constexpr int perm_unpacklo_epi128 = (0 << 0) + (2 << 4);
+  constexpr int perm_unpackhi_epi128 = (1 << 0) + (3 << 4);
+
+  m1[0] = _mm256_unpacklo_epi32(m2[0], m2[1]);
+  m1[1] = _mm256_unpacklo_epi32(m2[2], m2[3]);
+  m1[2] = _mm256_unpacklo_epi32(m2[4], m2[5]);
+  m1[3] = _mm256_unpacklo_epi32(m2[6], m2[7]);
+  m1[4] = _mm256_unpackhi_epi32(m2[0], m2[1]);
+  m1[5] = _mm256_unpackhi_epi32(m2[2], m2[3]);
+  m1[6] = _mm256_unpackhi_epi32(m2[4], m2[5]);
+  m1[7] = _mm256_unpackhi_epi32(m2[6], m2[7]);
+
+  m2[0] = _mm256_unpacklo_epi64(m1[0], m1[1]);
+  m2[1] = _mm256_unpackhi_epi64(m1[0], m1[1]);
+  m2[2] = _mm256_unpacklo_epi64(m1[2], m1[3]);
+  m2[3] = _mm256_unpackhi_epi64(m1[2], m1[3]);
+  m2[4] = _mm256_unpacklo_epi64(m1[4], m1[5]);
+  m2[5] = _mm256_unpackhi_epi64(m1[4], m1[5]);
+  m2[6] = _mm256_unpacklo_epi64(m1[6], m1[7]);
+  m2[7] = _mm256_unpackhi_epi64(m1[6], m1[7]);
+
+  m1[0] = _mm256_permute2x128_si256(m2[0], m2[2], perm_unpacklo_epi128);
+  m1[1] = _mm256_permute2x128_si256(m2[0], m2[2], perm_unpackhi_epi128);
+  m1[2] = _mm256_permute2x128_si256(m2[1], m2[3], perm_unpacklo_epi128);
+  m1[3] = _mm256_permute2x128_si256(m2[1], m2[3], perm_unpackhi_epi128);
+  m1[4] = _mm256_permute2x128_si256(m2[4], m2[6], perm_unpacklo_epi128);
+  m1[5] = _mm256_permute2x128_si256(m2[4], m2[6], perm_unpackhi_epi128);
+  m1[6] = _mm256_permute2x128_si256(m2[5], m2[7], perm_unpacklo_epi128);
+  m1[7] = _mm256_permute2x128_si256(m2[5], m2[7], perm_unpackhi_epi128);
+
+  m1[8 + 0] = _mm256_unpacklo_epi32(m2[8 + 0], m2[8 + 1]);
+  m1[8 + 1] = _mm256_unpacklo_epi32(m2[8 + 2], m2[8 + 3]);
+  m1[8 + 2] = _mm256_unpacklo_epi32(m2[8 + 4], m2[8 + 5]);
+  m1[8 + 3] = _mm256_unpacklo_epi32(m2[8 + 6], m2[8 + 7]);
+  m1[8 + 4] = _mm256_unpackhi_epi32(m2[8 + 0], m2[8 + 1]);
+  m1[8 + 5] = _mm256_unpackhi_epi32(m2[8 + 2], m2[8 + 3]);
+  m1[8 + 6] = _mm256_unpackhi_epi32(m2[8 + 4], m2[8 + 5]);
+  m1[8 + 7] = _mm256_unpackhi_epi32(m2[8 + 6], m2[8 + 7]);
+
+  m2[8 + 0] = _mm256_unpacklo_epi64(m1[8 + 0], m1[8 + 1]);
+  m2[8 + 1] = _mm256_unpackhi_epi64(m1[8 + 0], m1[8 + 1]);
+  m2[8 + 2] = _mm256_unpacklo_epi64(m1[8 + 2], m1[8 + 3]);
+  m2[8 + 3] = _mm256_unpackhi_epi64(m1[8 + 2], m1[8 + 3]);
+  m2[8 + 4] = _mm256_unpacklo_epi64(m1[8 + 4], m1[8 + 5]);
+  m2[8 + 5] = _mm256_unpackhi_epi64(m1[8 + 4], m1[8 + 5]);
+  m2[8 + 6] = _mm256_unpacklo_epi64(m1[8 + 6], m1[8 + 7]);
+  m2[8 + 7] = _mm256_unpackhi_epi64(m1[8 + 6], m1[8 + 7]);
+
+  m1[8 + 0] = _mm256_permute2x128_si256(m2[8 + 0], m2[8 + 2], perm_unpacklo_epi128);
+  m1[8 + 1] = _mm256_permute2x128_si256(m2[8 + 0], m2[8 + 2], perm_unpackhi_epi128);
+  m1[8 + 2] = _mm256_permute2x128_si256(m2[8 + 1], m2[8 + 3], perm_unpacklo_epi128);
+  m1[8 + 3] = _mm256_permute2x128_si256(m2[8 + 1], m2[8 + 3], perm_unpackhi_epi128);
+  m1[8 + 4] = _mm256_permute2x128_si256(m2[8 + 4], m2[8 + 6], perm_unpacklo_epi128);
+  m1[8 + 5] = _mm256_permute2x128_si256(m2[8 + 4], m2[8 + 6], perm_unpackhi_epi128);
+  m1[8 + 6] = _mm256_permute2x128_si256(m2[8 + 5], m2[8 + 7], perm_unpacklo_epi128);
+  m1[8 + 7] = _mm256_permute2x128_si256(m2[8 + 5], m2[8 + 7], perm_unpackhi_epi128);
+
+  // horizontal
+  m2[0] = _mm256_add_epi32(m1[0], m1[8]);
+  m2[1] = _mm256_add_epi32(m1[1], m1[9]);
+  m2[2] = _mm256_add_epi32(m1[2], m1[10]);
+  m2[3] = _mm256_add_epi32(m1[3], m1[11]);
+  m2[4] = _mm256_add_epi32(m1[4], m1[12]);
+  m2[5] = _mm256_add_epi32(m1[5], m1[13]);
+  m2[6] = _mm256_add_epi32(m1[6], m1[14]);
+  m2[7] = _mm256_add_epi32(m1[7], m1[15]);
+  m2[8] = _mm256_sub_epi32(m1[0], m1[8]);
+  m2[9] = _mm256_sub_epi32(m1[1], m1[9]);
+  m2[10] = _mm256_sub_epi32(m1[2], m1[10]);
+  m2[11] = _mm256_sub_epi32(m1[3], m1[11]);
+  m2[12] = _mm256_sub_epi32(m1[4], m1[12]);
+  m2[13] = _mm256_sub_epi32(m1[5], m1[13]);
+  m2[14] = _mm256_sub_epi32(m1[6], m1[14]);
+  m2[15] = _mm256_sub_epi32(m1[7], m1[15]);
+
+  m1[0] = _mm256_add_epi32(m2[0], m2[4]);
+  m1[1] = _mm256_add_epi32(m2[1], m2[5]);
+  m1[2] = _mm256_add_epi32(m2[2], m2[6]);
+  m1[3] = _mm256_add_epi32(m2[3], m2[7]);
+  m1[4] = _mm256_sub_epi32(m2[0], m2[4]);
+  m1[5] = _mm256_sub_epi32(m2[1], m2[5]);
+  m1[6] = _mm256_sub_epi32(m2[2], m2[6]);
+  m1[7] = _mm256_sub_epi32(m2[3], m2[7]);
+  m1[8] = _mm256_add_epi32(m2[8], m2[12]);
+  m1[9] = _mm256_add_epi32(m2[9], m2[13]);
+  m1[10] = _mm256_add_epi32(m2[10], m2[14]);
+  m1[11] = _mm256_add_epi32(m2[11], m2[15]);
+  m1[12] = _mm256_sub_epi32(m2[8], m2[12]);
+  m1[13] = _mm256_sub_epi32(m2[9], m2[13]);
+  m1[14] = _mm256_sub_epi32(m2[10], m2[14]);
+  m1[15] = _mm256_sub_epi32(m2[11], m2[15]);
+
+  m2[0] = _mm256_add_epi32(m1[0], m1[2]);
+  m2[1] = _mm256_add_epi32(m1[1], m1[3]);
+  m2[2] = _mm256_sub_epi32(m1[0], m1[2]);
+  m2[3] = _mm256_sub_epi32(m1[1], m1[3]);
+  m2[4] = _mm256_add_epi32(m1[4], m1[6]);
+  m2[5] = _mm256_add_epi32(m1[5], m1[7]);
+  m2[6] = _mm256_sub_epi32(m1[4], m1[6]);
+  m2[7] = _mm256_sub_epi32(m1[5], m1[7]);
+  m2[8] = _mm256_add_epi32(m1[8], m1[10]);
+  m2[9] = _mm256_add_epi32(m1[9], m1[11]);
+  m2[10] = _mm256_sub_epi32(m1[8], m1[10]);
+  m2[11] = _mm256_sub_epi32(m1[9], m1[11]);
+  m2[12] = _mm256_add_epi32(m1[12], m1[14]);
+  m2[13] = _mm256_add_epi32(m1[13], m1[15]);
+  m2[14] = _mm256_sub_epi32(m1[12], m1[14]);
+  m2[15] = _mm256_sub_epi32(m1[13], m1[15]);
+
+  m1[0] = _mm256_abs_epi32(_mm256_add_epi32(m2[0], m2[1]));
+  m1[1] = _mm256_abs_epi32(_mm256_sub_epi32(m2[0], m2[1]));
+  m1[2] = _mm256_abs_epi32(_mm256_add_epi32(m2[2], m2[3]));
+  m1[3] = _mm256_abs_epi32(_mm256_sub_epi32(m2[2], m2[3]));
+  m1[4] = _mm256_abs_epi32(_mm256_add_epi32(m2[4], m2[5]));
+  m1[5] = _mm256_abs_epi32(_mm256_sub_epi32(m2[4], m2[5]));
+  m1[6] = _mm256_abs_epi32(_mm256_add_epi32(m2[6], m2[7]));
+  m1[7] = _mm256_abs_epi32(_mm256_sub_epi32(m2[6], m2[7]));
+  m1[8] = _mm256_abs_epi32(_mm256_add_epi32(m2[8], m2[9]));
+  m1[9] = _mm256_abs_epi32(_mm256_sub_epi32(m2[8], m2[9]));
+  m1[10] = _mm256_abs_epi32(_mm256_add_epi32(m2[10], m2[11]));
+  m1[11] = _mm256_abs_epi32(_mm256_sub_epi32(m2[10], m2[11]));
+  m1[12] = _mm256_abs_epi32(_mm256_add_epi32(m2[12], m2[13]));
+  m1[13] = _mm256_abs_epi32(_mm256_sub_epi32(m2[12], m2[13]));
+  m1[14] = _mm256_abs_epi32(_mm256_add_epi32(m2[14], m2[15]));
+  m1[15] = _mm256_abs_epi32(_mm256_sub_epi32(m2[14], m2[15]));
+
+#if JVET_R0164_MEAN_SCALED_SATD
+  Distortion absDc = _mm_cvtsi128_si32(_mm256_castsi256_si128(m1[0]));
+#endif
+
+  // sum up
+  m1[0] = _mm256_add_epi32(m1[0], m1[1]);
+  m1[2] = _mm256_add_epi32(m1[2], m1[3]);
+  m1[4] = _mm256_add_epi32(m1[4], m1[5]);
+  m1[6] = _mm256_add_epi32(m1[6], m1[7]);
+  m1[8] = _mm256_add_epi32(m1[8], m1[9]);
+  m1[10] = _mm256_add_epi32(m1[10], m1[11]);
+  m1[12] = _mm256_add_epi32(m1[12], m1[13]);
+  m1[14] = _mm256_add_epi32(m1[14], m1[15]);
+
+  m1[0] = _mm256_add_epi32(m1[0], m1[2]);
+  m1[4] = _mm256_add_epi32(m1[4], m1[6]);
+  m1[8] = _mm256_add_epi32(m1[8], m1[10]);
+  m1[12] = _mm256_add_epi32(m1[12], m1[14]);
+
+  m1[0] = _mm256_add_epi32(m1[0], m1[4]);
+  m1[8] = _mm256_add_epi32(m1[8], m1[12]);
+
+  __m256i iSum = _mm256_add_epi32(m1[0], m1[8]);
+  iSum = _mm256_hadd_epi32(iSum, iSum);
+  iSum = _mm256_hadd_epi32(iSum, iSum);
+  iSum = _mm256_add_epi32(iSum, _mm256_permute2x128_si256(iSum, iSum, 0x11));
+
+  Distortion sad = _mm_cvtsi128_si32(_mm256_castsi256_si128(iSum));
+#if JVET_R0164_MEAN_SCALED_SATD
+  sad -= absDc;
+  sad += absDc >> 2;
+#endif
+  sad = (uint32_t)(sad / sqrt(16.0 * 8) * 2);
+
+  return (sad);
+}
+
+static Distortion xCalcHAD8x16_HBD_AVX2(const Pel* piOrg, const Pel* piCur, const int iStrideOrg, const int iStrideCur)
+{
+  __m256i m1[16], m2[16];
+
+  for (int k = 0; k < 16; k++)
+  {
+    m1[k] = _mm256_sub_epi32(_mm256_lddqu_si256((__m256i*)piOrg), _mm256_lddqu_si256((__m256i*)piCur));
+    piCur += iStrideCur;
+    piOrg += iStrideOrg;
+  }
+
+  // vertical
+  m2[0] = _mm256_add_epi32(m1[0], m1[8]);
+  m2[1] = _mm256_add_epi32(m1[1], m1[9]);
+  m2[2] = _mm256_add_epi32(m1[2], m1[10]);
+  m2[3] = _mm256_add_epi32(m1[3], m1[11]);
+  m2[4] = _mm256_add_epi32(m1[4], m1[12]);
+  m2[5] = _mm256_add_epi32(m1[5], m1[13]);
+  m2[6] = _mm256_add_epi32(m1[6], m1[14]);
+  m2[7] = _mm256_add_epi32(m1[7], m1[15]);
+  m2[8] = _mm256_sub_epi32(m1[0], m1[8]);
+  m2[9] = _mm256_sub_epi32(m1[1], m1[9]);
+  m2[10] = _mm256_sub_epi32(m1[2], m1[10]);
+  m2[11] = _mm256_sub_epi32(m1[3], m1[11]);
+  m2[12] = _mm256_sub_epi32(m1[4], m1[12]);
+  m2[13] = _mm256_sub_epi32(m1[5], m1[13]);
+  m2[14] = _mm256_sub_epi32(m1[6], m1[14]);
+  m2[15] = _mm256_sub_epi32(m1[7], m1[15]);
+
+  m1[0] = _mm256_add_epi32(m2[0], m2[4]);
+  m1[1] = _mm256_add_epi32(m2[1], m2[5]);
+  m1[2] = _mm256_add_epi32(m2[2], m2[6]);
+  m1[3] = _mm256_add_epi32(m2[3], m2[7]);
+  m1[4] = _mm256_sub_epi32(m2[0], m2[4]);
+  m1[5] = _mm256_sub_epi32(m2[1], m2[5]);
+  m1[6] = _mm256_sub_epi32(m2[2], m2[6]);
+  m1[7] = _mm256_sub_epi32(m2[3], m2[7]);
+  m1[8] = _mm256_add_epi32(m2[8], m2[12]);
+  m1[9] = _mm256_add_epi32(m2[9], m2[13]);
+  m1[10] = _mm256_add_epi32(m2[10], m2[14]);
+  m1[11] = _mm256_add_epi32(m2[11], m2[15]);
+  m1[12] = _mm256_sub_epi32(m2[8], m2[12]);
+  m1[13] = _mm256_sub_epi32(m2[9], m2[13]);
+  m1[14] = _mm256_sub_epi32(m2[10], m2[14]);
+  m1[15] = _mm256_sub_epi32(m2[11], m2[15]);
+
+  m2[0] = _mm256_add_epi32(m1[0], m1[2]);
+  m2[1] = _mm256_add_epi32(m1[1], m1[3]);
+  m2[2] = _mm256_sub_epi32(m1[0], m1[2]);
+  m2[3] = _mm256_sub_epi32(m1[1], m1[3]);
+  m2[4] = _mm256_add_epi32(m1[4], m1[6]);
+  m2[5] = _mm256_add_epi32(m1[5], m1[7]);
+  m2[6] = _mm256_sub_epi32(m1[4], m1[6]);
+  m2[7] = _mm256_sub_epi32(m1[5], m1[7]);
+  m2[8] = _mm256_add_epi32(m1[8], m1[10]);
+  m2[9] = _mm256_add_epi32(m1[9], m1[11]);
+  m2[10] = _mm256_sub_epi32(m1[8], m1[10]);
+  m2[11] = _mm256_sub_epi32(m1[9], m1[11]);
+  m2[12] = _mm256_add_epi32(m1[12], m1[14]);
+  m2[13] = _mm256_add_epi32(m1[13], m1[15]);
+  m2[14] = _mm256_sub_epi32(m1[12], m1[14]);
+  m2[15] = _mm256_sub_epi32(m1[13], m1[15]);
+
+  m1[0] = _mm256_add_epi32(m2[0], m2[1]);
+  m1[1] = _mm256_sub_epi32(m2[0], m2[1]);
+  m1[2] = _mm256_add_epi32(m2[2], m2[3]);
+  m1[3] = _mm256_sub_epi32(m2[2], m2[3]);
+  m1[4] = _mm256_add_epi32(m2[4], m2[5]);
+  m1[5] = _mm256_sub_epi32(m2[4], m2[5]);
+  m1[6] = _mm256_add_epi32(m2[6], m2[7]);
+  m1[7] = _mm256_sub_epi32(m2[6], m2[7]);
+  m1[8] = _mm256_add_epi32(m2[8], m2[9]);
+  m1[9] = _mm256_sub_epi32(m2[8], m2[9]);
+  m1[10] = _mm256_add_epi32(m2[10], m2[11]);
+  m1[11] = _mm256_sub_epi32(m2[10], m2[11]);
+  m1[12] = _mm256_add_epi32(m2[12], m2[13]);
+  m1[13] = _mm256_sub_epi32(m2[12], m2[13]);
+  m1[14] = _mm256_add_epi32(m2[14], m2[15]);
+  m1[15] = _mm256_sub_epi32(m2[14], m2[15]);
+
+  // transpose
+  constexpr int perm_unpacklo_epi128 = (0 << 0) + (2 << 4);
+  constexpr int perm_unpackhi_epi128 = (1 << 0) + (3 << 4);
+
+  // 1. 8x8
+  m2[0] = _mm256_unpacklo_epi32(m1[0], m1[1]);
+  m2[1] = _mm256_unpacklo_epi32(m1[2], m1[3]);
+  m2[2] = _mm256_unpacklo_epi32(m1[4], m1[5]);
+  m2[3] = _mm256_unpacklo_epi32(m1[6], m1[7]);
+  m2[4] = _mm256_unpackhi_epi32(m1[0], m1[1]);
+  m2[5] = _mm256_unpackhi_epi32(m1[2], m1[3]);
+  m2[6] = _mm256_unpackhi_epi32(m1[4], m1[5]);
+  m2[7] = _mm256_unpackhi_epi32(m1[6], m1[7]);
+
+  m1[0] = _mm256_unpacklo_epi64(m2[0], m2[1]);
+  m1[1] = _mm256_unpackhi_epi64(m2[0], m2[1]);
+  m1[2] = _mm256_unpacklo_epi64(m2[2], m2[3]);
+  m1[3] = _mm256_unpackhi_epi64(m2[2], m2[3]);
+  m1[4] = _mm256_unpacklo_epi64(m2[4], m2[5]);
+  m1[5] = _mm256_unpackhi_epi64(m2[4], m2[5]);
+  m1[6] = _mm256_unpacklo_epi64(m2[6], m2[7]);
+  m1[7] = _mm256_unpackhi_epi64(m2[6], m2[7]);
+
+  m2[0] = _mm256_permute2x128_si256(m1[0], m1[2], perm_unpacklo_epi128);
+  m2[1] = _mm256_permute2x128_si256(m1[0], m1[2], perm_unpackhi_epi128);
+  m2[2] = _mm256_permute2x128_si256(m1[1], m1[3], perm_unpacklo_epi128);
+  m2[3] = _mm256_permute2x128_si256(m1[1], m1[3], perm_unpackhi_epi128);
+  m2[4] = _mm256_permute2x128_si256(m1[4], m1[6], perm_unpacklo_epi128);
+  m2[5] = _mm256_permute2x128_si256(m1[4], m1[6], perm_unpackhi_epi128);
+  m2[6] = _mm256_permute2x128_si256(m1[5], m1[7], perm_unpacklo_epi128);
+  m2[7] = _mm256_permute2x128_si256(m1[5], m1[7], perm_unpackhi_epi128);
+
+  // 2. 8x8
+  m2[0 + 8] = _mm256_unpacklo_epi32(m1[0 + 8], m1[1 + 8]);
+  m2[1 + 8] = _mm256_unpacklo_epi32(m1[2 + 8], m1[3 + 8]);
+  m2[2 + 8] = _mm256_unpacklo_epi32(m1[4 + 8], m1[5 + 8]);
+  m2[3 + 8] = _mm256_unpacklo_epi32(m1[6 + 8], m1[7 + 8]);
+  m2[4 + 8] = _mm256_unpackhi_epi32(m1[0 + 8], m1[1 + 8]);
+  m2[5 + 8] = _mm256_unpackhi_epi32(m1[2 + 8], m1[3 + 8]);
+  m2[6 + 8] = _mm256_unpackhi_epi32(m1[4 + 8], m1[5 + 8]);
+  m2[7 + 8] = _mm256_unpackhi_epi32(m1[6 + 8], m1[7 + 8]);
+
+  m1[0 + 8] = _mm256_unpacklo_epi64(m2[0 + 8], m2[1 + 8]);
+  m1[1 + 8] = _mm256_unpackhi_epi64(m2[0 + 8], m2[1 + 8]);
+  m1[2 + 8] = _mm256_unpacklo_epi64(m2[2 + 8], m2[3 + 8]);
+  m1[3 + 8] = _mm256_unpackhi_epi64(m2[2 + 8], m2[3 + 8]);
+  m1[4 + 8] = _mm256_unpacklo_epi64(m2[4 + 8], m2[5 + 8]);
+  m1[5 + 8] = _mm256_unpackhi_epi64(m2[4 + 8], m2[5 + 8]);
+  m1[6 + 8] = _mm256_unpacklo_epi64(m2[6 + 8], m2[7 + 8]);
+  m1[7 + 8] = _mm256_unpackhi_epi64(m2[6 + 8], m2[7 + 8]);
+
+  m2[0 + 8] = _mm256_permute2x128_si256(m1[0 + 8], m1[2 + 8], perm_unpacklo_epi128);
+  m2[1 + 8] = _mm256_permute2x128_si256(m1[0 + 8], m1[2 + 8], perm_unpackhi_epi128);
+  m2[2 + 8] = _mm256_permute2x128_si256(m1[1 + 8], m1[3 + 8], perm_unpacklo_epi128);
+  m2[3 + 8] = _mm256_permute2x128_si256(m1[1 + 8], m1[3 + 8], perm_unpackhi_epi128);
+  m2[4 + 8] = _mm256_permute2x128_si256(m1[4 + 8], m1[6 + 8], perm_unpacklo_epi128);
+  m2[5 + 8] = _mm256_permute2x128_si256(m1[4 + 8], m1[6 + 8], perm_unpackhi_epi128);
+  m2[6 + 8] = _mm256_permute2x128_si256(m1[5 + 8], m1[7 + 8], perm_unpacklo_epi128);
+  m2[7 + 8] = _mm256_permute2x128_si256(m1[5 + 8], m1[7 + 8], perm_unpackhi_epi128);
+
+  // horizontal
+  m1[0] = _mm256_add_epi32(m2[0], m2[4]);
+  m1[1] = _mm256_add_epi32(m2[1], m2[5]);
+  m1[2] = _mm256_add_epi32(m2[2], m2[6]);
+  m1[3] = _mm256_add_epi32(m2[3], m2[7]);
+  m1[4] = _mm256_sub_epi32(m2[0], m2[4]);
+  m1[5] = _mm256_sub_epi32(m2[1], m2[5]);
+  m1[6] = _mm256_sub_epi32(m2[2], m2[6]);
+  m1[7] = _mm256_sub_epi32(m2[3], m2[7]);
+
+  m2[0] = _mm256_add_epi32(m1[0], m1[2]);
+  m2[1] = _mm256_add_epi32(m1[1], m1[3]);
+  m2[2] = _mm256_sub_epi32(m1[0], m1[2]);
+  m2[3] = _mm256_sub_epi32(m1[1], m1[3]);
+  m2[4] = _mm256_add_epi32(m1[4], m1[6]);
+  m2[5] = _mm256_add_epi32(m1[5], m1[7]);
+  m2[6] = _mm256_sub_epi32(m1[4], m1[6]);
+  m2[7] = _mm256_sub_epi32(m1[5], m1[7]);
+
+  m1[0] = _mm256_abs_epi32(_mm256_add_epi32(m2[0], m2[1]));
+  m1[1] = _mm256_abs_epi32(_mm256_sub_epi32(m2[0], m2[1]));
+  m1[2] = _mm256_abs_epi32(_mm256_add_epi32(m2[2], m2[3]));
+  m1[3] = _mm256_abs_epi32(_mm256_sub_epi32(m2[2], m2[3]));
+  m1[4] = _mm256_abs_epi32(_mm256_add_epi32(m2[4], m2[5]));
+  m1[5] = _mm256_abs_epi32(_mm256_sub_epi32(m2[4], m2[5]));
+  m1[6] = _mm256_abs_epi32(_mm256_add_epi32(m2[6], m2[7]));
+  m1[7] = _mm256_abs_epi32(_mm256_sub_epi32(m2[6], m2[7]));
+
+#if JVET_R0164_MEAN_SCALED_SATD
+  int absDc = _mm_cvtsi128_si32(_mm256_castsi256_si128(m1[0]));
+#endif
+
+  m1[0 + 8] = _mm256_add_epi32(m2[0 + 8], m2[4 + 8]);
+  m1[1 + 8] = _mm256_add_epi32(m2[1 + 8], m2[5 + 8]);
+  m1[2 + 8] = _mm256_add_epi32(m2[2 + 8], m2[6 + 8]);
+  m1[3 + 8] = _mm256_add_epi32(m2[3 + 8], m2[7 + 8]);
+  m1[4 + 8] = _mm256_sub_epi32(m2[0 + 8], m2[4 + 8]);
+  m1[5 + 8] = _mm256_sub_epi32(m2[1 + 8], m2[5 + 8]);
+  m1[6 + 8] = _mm256_sub_epi32(m2[2 + 8], m2[6 + 8]);
+  m1[7 + 8] = _mm256_sub_epi32(m2[3 + 8], m2[7 + 8]);
+
+  m2[0 + 8] = _mm256_add_epi32(m1[0 + 8], m1[2 + 8]);
+  m2[1 + 8] = _mm256_add_epi32(m1[1 + 8], m1[3 + 8]);
+  m2[2 + 8] = _mm256_sub_epi32(m1[0 + 8], m1[2 + 8]);
+  m2[3 + 8] = _mm256_sub_epi32(m1[1 + 8], m1[3 + 8]);
+  m2[4 + 8] = _mm256_add_epi32(m1[4 + 8], m1[6 + 8]);
+  m2[5 + 8] = _mm256_add_epi32(m1[5 + 8], m1[7 + 8]);
+  m2[6 + 8] = _mm256_sub_epi32(m1[4 + 8], m1[6 + 8]);
+  m2[7 + 8] = _mm256_sub_epi32(m1[5 + 8], m1[7 + 8]);
+
+  m1[0 + 8] = _mm256_abs_epi32(_mm256_add_epi32(m2[0 + 8], m2[1 + 8]));
+  m1[1 + 8] = _mm256_abs_epi32(_mm256_sub_epi32(m2[0 + 8], m2[1 + 8]));
+  m1[2 + 8] = _mm256_abs_epi32(_mm256_add_epi32(m2[2 + 8], m2[3 + 8]));
+  m1[3 + 8] = _mm256_abs_epi32(_mm256_sub_epi32(m2[2 + 8], m2[3 + 8]));
+  m1[4 + 8] = _mm256_abs_epi32(_mm256_add_epi32(m2[4 + 8], m2[5 + 8]));
+  m1[5 + 8] = _mm256_abs_epi32(_mm256_sub_epi32(m2[4 + 8], m2[5 + 8]));
+  m1[6 + 8] = _mm256_abs_epi32(_mm256_add_epi32(m2[6 + 8], m2[7 + 8]));
+  m1[7 + 8] = _mm256_abs_epi32(_mm256_sub_epi32(m2[6 + 8], m2[7 + 8]));
+
+  // sum up
+  m1[0] = _mm256_add_epi32(m1[0], m1[1]);
+  m1[1] = _mm256_add_epi32(m1[2], m1[3]);
+  m1[2] = _mm256_add_epi32(m1[4], m1[5]);
+  m1[3] = _mm256_add_epi32(m1[6], m1[7]);
+  m1[4] = _mm256_add_epi32(m1[8], m1[9]);
+  m1[5] = _mm256_add_epi32(m1[10], m1[11]);
+  m1[6] = _mm256_add_epi32(m1[12], m1[13]);
+  m1[7] = _mm256_add_epi32(m1[14], m1[15]);
+
+  // sum up
+  m1[0] = _mm256_add_epi32(m1[0], m1[1]);
+  m1[1] = _mm256_add_epi32(m1[2], m1[3]);
+  m1[2] = _mm256_add_epi32(m1[4], m1[5]);
+  m1[3] = _mm256_add_epi32(m1[6], m1[7]);
+
+  m1[0] = _mm256_add_epi32(m1[0], m1[1]);
+  m1[1] = _mm256_add_epi32(m1[2], m1[3]);
+
+  __m256i iSum = _mm256_add_epi32(m1[0], m1[1]);
+  iSum = _mm256_hadd_epi32(iSum, iSum);
+  iSum = _mm256_hadd_epi32(iSum, iSum);
+  iSum = _mm256_add_epi32(iSum, _mm256_permute2x128_si256(iSum, iSum, 0x11));
+
+  Distortion sad2 = _mm_cvtsi128_si32(_mm256_castsi256_si128(iSum));
+#if JVET_R0164_MEAN_SCALED_SATD
+  sad2 -= absDc;
+  sad2 += absDc >> 2;
+#endif
+  Distortion sad = (uint32_t)(sad2 / sqrt(16.0 * 8) * 2);
 
+  return (sad);
+}
+#endif
+#else
 static uint32_t xCalcHAD4x4_SSE( const Torg *piOrg, const Tcur *piCur, const int iStrideOrg, const int iStrideCur )
 {
   __m128i r0 = ( sizeof( Torg ) > 1 ) ? ( _mm_loadl_epi64( ( const __m128i* )&piOrg[0] ) ) : ( _mm_unpacklo_epi8( _mm_cvtsi32_si128( *(const int*)&piOrg[0] ), _mm_setzero_si128() ) );
@@ -1995,162 +3702,628 @@ static uint32_t xCalcHAD8x16_AVX2( const Pel* piOrg, const Pel* piCur, const int
     int absDc = _mm_cvtsi128_si32( _mm256_castsi256_si128( m1[0] ) );
 #endif
 
-    m1[0 + 8] = _mm256_add_epi32( m2[0 + 8], m2[4 + 8] );
-    m1[1 + 8] = _mm256_add_epi32( m2[1 + 8], m2[5 + 8] );
-    m1[2 + 8] = _mm256_add_epi32( m2[2 + 8], m2[6 + 8] );
-    m1[3 + 8] = _mm256_add_epi32( m2[3 + 8], m2[7 + 8] );
-    m1[4 + 8] = _mm256_sub_epi32( m2[0 + 8], m2[4 + 8] );
-    m1[5 + 8] = _mm256_sub_epi32( m2[1 + 8], m2[5 + 8] );
-    m1[6 + 8] = _mm256_sub_epi32( m2[2 + 8], m2[6 + 8] );
-    m1[7 + 8] = _mm256_sub_epi32( m2[3 + 8], m2[7 + 8] );
-
-    m2[0 + 8] = _mm256_add_epi32( m1[0 + 8], m1[2 + 8] );
-    m2[1 + 8] = _mm256_add_epi32( m1[1 + 8], m1[3 + 8] );
-    m2[2 + 8] = _mm256_sub_epi32( m1[0 + 8], m1[2 + 8] );
-    m2[3 + 8] = _mm256_sub_epi32( m1[1 + 8], m1[3 + 8] );
-    m2[4 + 8] = _mm256_add_epi32( m1[4 + 8], m1[6 + 8] );
-    m2[5 + 8] = _mm256_add_epi32( m1[5 + 8], m1[7 + 8] );
-    m2[6 + 8] = _mm256_sub_epi32( m1[4 + 8], m1[6 + 8] );
-    m2[7 + 8] = _mm256_sub_epi32( m1[5 + 8], m1[7 + 8] );
+    m1[0 + 8] = _mm256_add_epi32( m2[0 + 8], m2[4 + 8] );
+    m1[1 + 8] = _mm256_add_epi32( m2[1 + 8], m2[5 + 8] );
+    m1[2 + 8] = _mm256_add_epi32( m2[2 + 8], m2[6 + 8] );
+    m1[3 + 8] = _mm256_add_epi32( m2[3 + 8], m2[7 + 8] );
+    m1[4 + 8] = _mm256_sub_epi32( m2[0 + 8], m2[4 + 8] );
+    m1[5 + 8] = _mm256_sub_epi32( m2[1 + 8], m2[5 + 8] );
+    m1[6 + 8] = _mm256_sub_epi32( m2[2 + 8], m2[6 + 8] );
+    m1[7 + 8] = _mm256_sub_epi32( m2[3 + 8], m2[7 + 8] );
+
+    m2[0 + 8] = _mm256_add_epi32( m1[0 + 8], m1[2 + 8] );
+    m2[1 + 8] = _mm256_add_epi32( m1[1 + 8], m1[3 + 8] );
+    m2[2 + 8] = _mm256_sub_epi32( m1[0 + 8], m1[2 + 8] );
+    m2[3 + 8] = _mm256_sub_epi32( m1[1 + 8], m1[3 + 8] );
+    m2[4 + 8] = _mm256_add_epi32( m1[4 + 8], m1[6 + 8] );
+    m2[5 + 8] = _mm256_add_epi32( m1[5 + 8], m1[7 + 8] );
+    m2[6 + 8] = _mm256_sub_epi32( m1[4 + 8], m1[6 + 8] );
+    m2[7 + 8] = _mm256_sub_epi32( m1[5 + 8], m1[7 + 8] );
+
+    m1[0 + 8] = _mm256_abs_epi32( _mm256_add_epi32( m2[0 + 8], m2[1 + 8] ) );
+    m1[1 + 8] = _mm256_abs_epi32( _mm256_sub_epi32( m2[0 + 8], m2[1 + 8] ) );
+    m1[2 + 8] = _mm256_abs_epi32( _mm256_add_epi32( m2[2 + 8], m2[3 + 8] ) );
+    m1[3 + 8] = _mm256_abs_epi32( _mm256_sub_epi32( m2[2 + 8], m2[3 + 8] ) );
+    m1[4 + 8] = _mm256_abs_epi32( _mm256_add_epi32( m2[4 + 8], m2[5 + 8] ) );
+    m1[5 + 8] = _mm256_abs_epi32( _mm256_sub_epi32( m2[4 + 8], m2[5 + 8] ) );
+    m1[6 + 8] = _mm256_abs_epi32( _mm256_add_epi32( m2[6 + 8], m2[7 + 8] ) );
+    m1[7 + 8] = _mm256_abs_epi32( _mm256_sub_epi32( m2[6 + 8], m2[7 + 8] ) );
+
+    // sum up
+    m1[0] = _mm256_add_epi32( m1[0], m1[1] );
+    m1[1] = _mm256_add_epi32( m1[2], m1[3] );
+    m1[2] = _mm256_add_epi32( m1[4], m1[5] );
+    m1[3] = _mm256_add_epi32( m1[6], m1[7] );
+    m1[4] = _mm256_add_epi32( m1[8], m1[9] );
+    m1[5] = _mm256_add_epi32( m1[10], m1[11] );
+    m1[6] = _mm256_add_epi32( m1[12], m1[13] );
+    m1[7] = _mm256_add_epi32( m1[14], m1[15] );
+
+    // sum up
+    m1[ 0] = _mm256_add_epi32( m1[ 0], m1[ 1] );
+    m1[ 1] = _mm256_add_epi32( m1[ 2], m1[ 3] );
+    m1[ 2] = _mm256_add_epi32( m1[ 4], m1[ 5] );
+    m1[ 3] = _mm256_add_epi32( m1[ 6], m1[ 7] );
+
+    m1[ 0] = _mm256_add_epi32( m1[ 0], m1[ 1] );
+    m1[ 1] = _mm256_add_epi32( m1[ 2], m1[ 3] );
+
+    __m256i iSum = _mm256_add_epi32( m1[0], m1[1] );
+
+    iSum = _mm256_hadd_epi32( iSum, iSum );
+    iSum = _mm256_hadd_epi32( iSum, iSum );
+    iSum = _mm256_add_epi32( iSum, _mm256_permute2x128_si256( iSum, iSum, 0x11 ) );
+
+    int sad2 = _mm_cvtsi128_si32( _mm256_castsi256_si128( iSum ) );
+
+#if JVET_R0164_MEAN_SCALED_SATD
+    sad2 -= absDc;
+    sad2 += absDc >> 2;
+#endif
+    sad   = (uint32_t)(sad2 / sqrt(16.0 * 8) * 2);
+  }
+
+#endif //USE_AVX2
+
+  return (sad);
+}
+#endif
+template< X86_VEXT vext >
+Distortion RdCost::xGetSADwMask_SIMD( const DistParam &rcDtParam )
+{
+  if (rcDtParam.org.width < 4  || rcDtParam.bitDepth > 10 || rcDtParam.applyWeight)
+    return RdCost::xGetSADwMask( rcDtParam );
+
+  const short* src1   = (const short*)rcDtParam.org.buf;
+  const short* src2   = (const short*)rcDtParam.cur.buf;
+  const short* weightMask   = (const short*)rcDtParam.mask;
+  int  rows           = rcDtParam.org.height;
+  int  cols           = rcDtParam.org.width;
+  int  subShift       = rcDtParam.subShift;
+  int  subStep        = ( 1 << subShift);
+  const int strideSrc1 = rcDtParam.org.stride * subStep;
+  const int strideSrc2 = rcDtParam.cur.stride * subStep;
+  const int strideMask = rcDtParam.maskStride * subStep;
+
+  Distortion sum = 0;
+  if( vext >= AVX2 && (cols & 15 ) == 0 )
+  {
+#ifdef USE_AVX2
+    // Do for width that multiple of 16
+    __m256i vzero = _mm256_setzero_si256();
+    __m256i vsum32 = vzero;
+    for( int y = 0; y < rows; y+= subStep)
+    {
+      for( int x = 0; x < cols; x+=16 )
+      {
+        __m256i vsrc1 = _mm256_lddqu_si256( ( __m256i* )( &src1[x] ) );
+        __m256i vsrc2 = _mm256_lddqu_si256( ( __m256i* )( &src2[x] ) );
+        __m256i vmask;
+        if (rcDtParam.stepX == -1)
+        {
+          vmask = _mm256_lddqu_si256((__m256i*)((&weightMask[x]) - (x << 1) - (16 - 1)));
+          const __m256i shuffle_mask = _mm256_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+          vmask = _mm256_shuffle_epi8(vmask, shuffle_mask);
+          vmask = _mm256_permute4x64_epi64(vmask, _MM_SHUFFLE(1, 0, 3, 2));
+        }
+        else
+        {
+          vmask = _mm256_lddqu_si256((__m256i*)(&weightMask[x]));
+        }
+        vsum32 = _mm256_add_epi32( vsum32, _mm256_madd_epi16( vmask, _mm256_abs_epi16( _mm256_sub_epi16( vsrc1, vsrc2 ) ) ) );
+      }
+      src1 += strideSrc1;
+      src2 += strideSrc2;
+      weightMask += strideMask;
+    }
+    vsum32 = _mm256_hadd_epi32( vsum32, vzero );
+    vsum32 = _mm256_hadd_epi32( vsum32, vzero );
+    sum =  _mm_cvtsi128_si32( _mm256_castsi256_si128( vsum32 ) ) + _mm_cvtsi128_si32( _mm256_castsi256_si128( _mm256_permute2x128_si256( vsum32, vsum32, 0x11 ) ) );
+#endif
+  }
+  else
+  {
+    // Do with step of 8
+    __m128i vzero = _mm_setzero_si128();
+    __m128i vsum32 = vzero;
+    for( int y = 0; y < rows; y+= subStep)
+    {
+      for( int x = 0; x < cols; x+=8 )
+      {
+        __m128i vsrc1 = _mm_loadu_si128( ( const __m128i* )( &src1[x] ) );
+        __m128i vsrc2 = _mm_lddqu_si128( ( const __m128i* )( &src2[x] ) );
+        __m128i vmask;
+        if (rcDtParam.stepX == -1)
+        {
+          vmask = _mm_lddqu_si128((__m128i*)((&weightMask[x]) - (x << 1) - (8 - 1)));
+          const __m128i shuffle_mask = _mm_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+          vmask = _mm_shuffle_epi8(vmask, shuffle_mask);
+        }
+        else
+        {
+          vmask = _mm_lddqu_si128((const __m128i*)(&weightMask[x]));
+        }
+        vsum32 = _mm_add_epi32( vsum32, _mm_madd_epi16( vmask, _mm_abs_epi16( _mm_sub_epi16( vsrc1, vsrc2 ) ) ) );
+      }
+      src1 += strideSrc1;
+      src2 += strideSrc2;
+      weightMask += strideMask;
+    }
+    vsum32 = _mm_hadd_epi32( vsum32, vzero );
+    vsum32 = _mm_hadd_epi32( vsum32, vzero );
+    sum =  _mm_cvtsi128_si32( vsum32 );
+  }
+  sum <<= subShift;
+
+  return sum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth);
+}
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+template<X86_VEXT vext>
+Distortion RdCost::xGetHADs_HBD_SIMD(const DistParam &rcDtParam)
+{
+  if (rcDtParam.applyWeight)
+  {
+    return RdCostWeightPrediction::xGetHADsw(rcDtParam);
+  }
+
+  const Pel* piOrg = rcDtParam.org.buf;
+  const Pel* piCur = rcDtParam.cur.buf;
+  const int  iRows = rcDtParam.org.height;
+  const int  iCols = rcDtParam.org.width;
+  const int  iStrideCur = rcDtParam.cur.stride;
+  const int  iStrideOrg = rcDtParam.org.stride;
+  const int  iStep = rcDtParam.step;
+
+  CHECK(iStep != 1, "the function only supports of iStep equal to 1");
+
+  int  x = 0, y = 0;
+  Distortion uiSum = 0;
+
+  if (iCols > iRows && (iRows & 7) == 0 && (iCols & 15) == 0)
+  {
+    for (y = 0; y < iRows; y += 8)
+    {
+      for (x = 0; x < iCols; x += 16)
+      {
+#ifdef USE_AVX2
+        if (vext >= AVX2)
+        {
+          uiSum += xCalcHAD16x8_HBD_AVX2(&piOrg[x], &piCur[x], iStrideOrg, iStrideCur);
+        }
+        else
+#endif
+          uiSum += xCalcHAD16x8_HBD_SSE(&piOrg[x], &piCur[x], iStrideOrg, iStrideCur);
+      }
+      piOrg += iStrideOrg * 8;
+      piCur += iStrideCur * 8;
+    }
+  }
+  else if (iCols < iRows && (iCols & 7) == 0 && (iRows & 15) == 0)
+  {
+    for (y = 0; y < iRows; y += 16)
+    {
+      for (x = 0; x < iCols; x += 8)
+      {
+#ifdef USE_AVX2
+        if (vext >= AVX2)
+        {
+          uiSum += xCalcHAD8x16_HBD_AVX2(&piOrg[x], &piCur[x], iStrideOrg, iStrideCur);
+        }
+        else
+#endif
+          uiSum += xCalcHAD8x16_HBD_SSE(&piOrg[x], &piCur[x], iStrideOrg, iStrideCur);
+      }
+      piOrg += iStrideOrg * 16;
+      piCur += iStrideCur * 16;
+    }
+  }
+  else if (iCols > iRows && (iRows & 3) == 0 && (iCols & 7) == 0)
+  {
+    for (y = 0; y < iRows; y += 4)
+    {
+      for (x = 0; x < iCols; x += 8)
+      {
+#ifdef USE_AVX2
+        if (vext >= AVX2)
+        {
+          uiSum += xCalcHAD8x4_HBD_AVX2(&piOrg[x], &piCur[x], iStrideOrg, iStrideCur);
+        }
+        else
+#endif
+          uiSum += xCalcHAD8x4_HBD_SSE(&piOrg[x], &piCur[x], iStrideOrg, iStrideCur);
+      }
+      piOrg += iStrideOrg * 4;
+      piCur += iStrideCur * 4;
+    }
+  }
+  else if (iCols < iRows && (iCols & 3) == 0 && (iRows & 7) == 0)
+  {
+    for (y = 0; y < iRows; y += 8)
+    {
+      for (x = 0; x < iCols; x += 4)
+      {
+#ifdef USE_AVX2
+        if (vext >= AVX2)
+        {
+          uiSum += xCalcHAD4x8_HBD_AVX2(&piOrg[x], &piCur[x], iStrideOrg, iStrideCur);
+        }
+        else
+#endif
+          uiSum += xCalcHAD4x8_HBD_SSE(&piOrg[x], &piCur[x], iStrideOrg, iStrideCur);
+      }
+      piOrg += iStrideOrg * 8;
+      piCur += iStrideCur * 8;
+    }
+  }
+  else if ((iRows % 8 == 0) && (iCols % 8 == 0))
+  {
+    int  iOffsetOrg = iStrideOrg << 3;
+    int  iOffsetCur = iStrideCur << 3;
+    for (y = 0; y < iRows; y += 8)
+    {
+      for (x = 0; x < iCols; x += 8)
+      {
+#ifdef USE_AVX2
+        if (vext >= AVX2)
+        {
+          uiSum += xCalcHAD8x8_HBD_AVX2(&piOrg[x], &piCur[x], iStrideOrg, iStrideCur);
+        }
+        else
+#endif
+          uiSum += xCalcHAD8x8_HBD_SSE(&piOrg[x], &piCur[x], iStrideOrg, iStrideCur);
+      }
+      piOrg += iOffsetOrg;
+      piCur += iOffsetCur;
+    }
+  }
+  else if ((iRows % 4 == 0) && (iCols % 4 == 0))
+  {
+    int  iOffsetOrg = iStrideOrg << 2;
+    int  iOffsetCur = iStrideCur << 2;
+
+    for (y = 0; y < iRows; y += 4)
+    {
+      for (x = 0; x < iCols; x += 4)
+      {
+#ifdef USE_AVX2
+        if (vext >= AVX2)
+        {
+          uiSum += xCalcHAD4x4_HBD_AVX2(&piOrg[x], &piCur[x], iStrideOrg, iStrideCur);
+        }
+        else
+#endif
+          uiSum += xCalcHAD4x4_HBD_SSE(&piOrg[x], &piCur[x], iStrideOrg, iStrideCur);
+      }
+      piOrg += iOffsetOrg;
+      piCur += iOffsetCur;
+    }
+  }
+  else if ((iRows % 2 == 0) && (iCols % 2 == 0))
+  {
+    int  iOffsetOrg = iStrideOrg << 1;
+    int  iOffsetCur = iStrideCur << 1;
+    for (y = 0; y < iRows; y += 2)
+    {
+      for (x = 0; x < iCols; x += 2)
+      {
+        uiSum += xCalcHAD2x2_HBD_SSE(&piOrg[x], &piCur[x], iStrideOrg, iStrideCur);
+      }
+      piOrg += iOffsetOrg;
+      piCur += iOffsetCur;
+    }
+  }
+  else
+  {
+    THROW("Invalid size");
+  }
 
-    m1[0 + 8] = _mm256_abs_epi32( _mm256_add_epi32( m2[0 + 8], m2[1 + 8] ) );
-    m1[1 + 8] = _mm256_abs_epi32( _mm256_sub_epi32( m2[0 + 8], m2[1 + 8] ) );
-    m1[2 + 8] = _mm256_abs_epi32( _mm256_add_epi32( m2[2 + 8], m2[3 + 8] ) );
-    m1[3 + 8] = _mm256_abs_epi32( _mm256_sub_epi32( m2[2 + 8], m2[3 + 8] ) );
-    m1[4 + 8] = _mm256_abs_epi32( _mm256_add_epi32( m2[4 + 8], m2[5 + 8] ) );
-    m1[5 + 8] = _mm256_abs_epi32( _mm256_sub_epi32( m2[4 + 8], m2[5 + 8] ) );
-    m1[6 + 8] = _mm256_abs_epi32( _mm256_add_epi32( m2[6 + 8], m2[7 + 8] ) );
-    m1[7 + 8] = _mm256_abs_epi32( _mm256_sub_epi32( m2[6 + 8], m2[7 + 8] ) );
+  return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth));
+}
 
-    // sum up
-    m1[0] = _mm256_add_epi32( m1[0], m1[1] );
-    m1[1] = _mm256_add_epi32( m1[2], m1[3] );
-    m1[2] = _mm256_add_epi32( m1[4], m1[5] );
-    m1[3] = _mm256_add_epi32( m1[6], m1[7] );
-    m1[4] = _mm256_add_epi32( m1[8], m1[9] );
-    m1[5] = _mm256_add_epi32( m1[10], m1[11] );
-    m1[6] = _mm256_add_epi32( m1[12], m1[13] );
-    m1[7] = _mm256_add_epi32( m1[14], m1[15] );
+template< X86_VEXT vext >
+Distortion RdCost::xGetSAD_HBD_SIMD(const DistParam &rcDtParam)
+{
+  if (rcDtParam.applyWeight)
+  {
+    return RdCost::xGetSAD(rcDtParam);
+  }
 
-    // sum up
-    m1[ 0] = _mm256_add_epi32( m1[ 0], m1[ 1] );
-    m1[ 1] = _mm256_add_epi32( m1[ 2], m1[ 3] );
-    m1[ 2] = _mm256_add_epi32( m1[ 4], m1[ 5] );
-    m1[ 3] = _mm256_add_epi32( m1[ 6], m1[ 7] );
+  const Pel* pSrc1 = (const Pel*)rcDtParam.org.buf;
+  const Pel* pSrc2 = (const Pel*)rcDtParam.cur.buf;
+  int  iRows = rcDtParam.org.height;
+  int  iCols = rcDtParam.org.width;
+  int  iSubShift = rcDtParam.subShift;
+  int  iSubStep = (1 << iSubShift);
+  const int iStrideSrc1 = rcDtParam.org.stride * iSubStep;
+  const int iStrideSrc2 = rcDtParam.cur.stride * iSubStep;
 
-    m1[ 0] = _mm256_add_epi32( m1[ 0], m1[ 1] );
-    m1[ 1] = _mm256_add_epi32( m1[ 2], m1[ 3] );
+  if ((iCols < 4) && (iRows < (iSubStep << 1)))
+  {
+    return RdCost::xGetSAD(rcDtParam);
+  }
 
-    __m256i iSum = _mm256_add_epi32( m1[0], m1[1] );
+  uint32_t uiSum = 0;
+#ifdef USE_AVX2
+  if ((vext >= AVX2) && ((iCols & 7) == 0))
+  {
+    __m256i vzero = _mm256_setzero_si256();
+    __m256i vsum32 = vzero;
+    __m256i vsrc1, vsrc2, vsum;
+    for (int iY = 0; iY < iRows; iY += iSubStep)
+    {
+      for (int iX = 0; iX < iCols; iX += 8)
+      {
+        vsrc1 = _mm256_lddqu_si256((__m256i*)(&pSrc1[iX]));
+        vsrc2 = _mm256_lddqu_si256((__m256i*)(&pSrc2[iX]));
+        vsum = _mm256_abs_epi32(_mm256_sub_epi32(vsrc1, vsrc2));
+        vsum32 = _mm256_add_epi32(vsum32, vsum);
+      }
+      pSrc1 += iStrideSrc1;
+      pSrc2 += iStrideSrc2;
+    }
+    vsum32 = _mm256_hadd_epi32(vsum32, vzero);
+    vsum32 = _mm256_hadd_epi32(vsum32, vzero);
+    uiSum = _mm_cvtsi128_si32(_mm256_castsi256_si128(vsum32)) + _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_permute2x128_si256(vsum32, vsum32, 0x11)));
+  }
+  else
+#endif
+    if ((iCols & 3) == 0)
+    {
+      __m128i vzero = _mm_setzero_si128();
+      __m128i vsum32 = vzero;
+      __m128i vsrc1, vsrc2, vsum;
+      for (int iY = 0; iY < iRows; iY += iSubStep)
+      {
+        for (int iX = 0; iX < iCols; iX += 4)
+        {
+          vsrc1 = _mm_lddqu_si128((const __m128i*)(&pSrc1[iX]));
+          vsrc2 = _mm_lddqu_si128((const __m128i*)(&pSrc2[iX]));
+          vsum = _mm_abs_epi32(_mm_sub_epi32(vsrc1, vsrc2));
+          vsum32 = _mm_add_epi32(vsum32, vsum);
+        }
+        pSrc1 += iStrideSrc1;
+        pSrc2 += iStrideSrc2;
+      }
+      vsum32 = _mm_hadd_epi32(vsum32, vzero);
+      vsum32 = _mm_hadd_epi32(vsum32, vzero);
+      uiSum = _mm_cvtsi128_si32(vsum32);
+    }
+    else
+    {
+      __m128i vzero = _mm_setzero_si128();
+      __m128i vsum32 = vzero;
+      __m128i vsrc10, vsrc20, vsrc11, vsrc21, vsum0, vsum1, vsum;
 
-    iSum = _mm256_hadd_epi32( iSum, iSum );
-    iSum = _mm256_hadd_epi32( iSum, iSum );
-    iSum = _mm256_add_epi32( iSum, _mm256_permute2x128_si256( iSum, iSum, 0x11 ) );
+      int i2StrideSrc1 = (iStrideSrc1 << 1);
+      int i2StrideSrc2 = (iStrideSrc2 << 1);
 
-    int sad2 = _mm_cvtsi128_si32( _mm256_castsi256_si128( iSum ) );
+      for (int iY = 0; iY < iRows; iY += (iSubStep << 1))
+      {
+        for (int iX = 0; iX < iCols; iX += 2)
+        {
+          vsrc10 = _mm_loadl_epi64((const __m128i*)(&pSrc1[iX]));
+          vsrc20 = _mm_loadl_epi64((const __m128i*)(&pSrc2[iX]));
+          vsum0 = _mm_abs_epi32(_mm_sub_epi32(vsrc10, vsrc20));
 
-#if JVET_R0164_MEAN_SCALED_SATD
-    sad2 -= absDc;
-    sad2 += absDc >> 2;
-#endif
-    sad   = (uint32_t)(sad2 / sqrt(16.0 * 8) * 2);
-  }
+          vsrc11 = _mm_loadl_epi64((const __m128i*)(&pSrc1[iX + iStrideSrc1]));
+          vsrc21 = _mm_loadl_epi64((const __m128i*)(&pSrc2[iX + iStrideSrc2]));
+          vsum1 = _mm_abs_epi32(_mm_sub_epi32(vsrc11, vsrc21));
 
-#endif //USE_AVX2
+          vsum = _mm_unpacklo_epi32(vsum0, vsum1);
+          vsum32 = _mm_add_epi32(vsum32, vsum);
+        }
+        pSrc1 += i2StrideSrc1;
+        pSrc2 += i2StrideSrc2;
+      }
+      vsum32 = _mm_hadd_epi32(vsum32, vzero);
+      vsum32 = _mm_hadd_epi32(vsum32, vzero);
+      uiSum = _mm_cvtsi128_si32(vsum32);
+    }
 
-  return (sad);
+  uiSum <<= iSubShift;
+  return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth);
 }
 
 template< X86_VEXT vext >
-Distortion RdCost::xGetSADwMask_SIMD( const DistParam &rcDtParam )
+Distortion RdCost::xGetSADwMask_HBD_SIMD(const DistParam &rcDtParam)
 {
-  if (rcDtParam.org.width < 4  || rcDtParam.bitDepth > 10 || rcDtParam.applyWeight)
-    return RdCost::xGetSADwMask( rcDtParam );
-
-  const short* src1   = (const short*)rcDtParam.org.buf;
-  const short* src2   = (const short*)rcDtParam.cur.buf;
-  const short* weightMask   = (const short*)rcDtParam.mask;
-  int  rows           = rcDtParam.org.height;
-  int  cols           = rcDtParam.org.width;
-  int  subShift       = rcDtParam.subShift;
-  int  subStep        = ( 1 << subShift);
+  CHECK((rcDtParam.org.width & 7), "the function only support width multiple of 8");
+  CHECK(rcDtParam.applyWeight, "the function does not support weighted distortion");
+
+  const Pel* src1 = rcDtParam.org.buf;
+  const Pel* src2 = rcDtParam.cur.buf;
+  const Pel* weightMask = rcDtParam.mask;
+  int  rows = rcDtParam.org.height;
+  int  cols = rcDtParam.org.width;
+  int  subShift = rcDtParam.subShift;
+  int  subStep = (1 << subShift);
   const int strideSrc1 = rcDtParam.org.stride * subStep;
   const int strideSrc2 = rcDtParam.cur.stride * subStep;
   const int strideMask = rcDtParam.maskStride * subStep;
 
   Distortion sum = 0;
-  if( vext >= AVX2 && (cols & 15 ) == 0 )
-  {
+
 #ifdef USE_AVX2
-    // Do for width that multiple of 16
+  if (vext >= AVX2)
+  {
     __m256i vzero = _mm256_setzero_si256();
     __m256i vsum32 = vzero;
-    for( int y = 0; y < rows; y+= subStep)
+    for (int y = 0; y < rows; y += subStep)
     {
-      for( int x = 0; x < cols; x+=16 )
+      for (int x = 0; x < cols; x += 8)
       {
-        __m256i vsrc1 = _mm256_lddqu_si256( ( __m256i* )( &src1[x] ) );
-        __m256i vsrc2 = _mm256_lddqu_si256( ( __m256i* )( &src2[x] ) );
-        __m256i vmask;
+        __m256i vsrc1 = _mm256_lddqu_si256((const __m256i*)(&src1[x]));
+        __m256i vsrc2 = _mm256_lddqu_si256((const __m256i*)(&src2[x]));
+        __m256i vmask, vsum;
         if (rcDtParam.stepX == -1)
         {
-          vmask = _mm256_lddqu_si256((__m256i*)((&weightMask[x]) - (x << 1) - (16 - 1)));
-          const __m256i shuffle_mask = _mm256_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
-          vmask = _mm256_shuffle_epi8(vmask, shuffle_mask);
-          vmask = _mm256_permute4x64_epi64(vmask, _MM_SHUFFLE(1, 0, 3, 2));
+          vmask = _mm256_lddqu_si256((__m256i*)((&weightMask[x]) - (x << 1) - (8 - 1)));
+          vmask = _mm256_permute4x64_epi64(_mm256_shuffle_epi32(vmask, 0x1b), 0x4e);
         }
         else
         {
-          vmask = _mm256_lddqu_si256((__m256i*)(&weightMask[x]));
+          vmask = _mm256_lddqu_si256((const __m256i*)(&weightMask[x]));
         }
-        vsum32 = _mm256_add_epi32( vsum32, _mm256_madd_epi16( vmask, _mm256_abs_epi16( _mm256_sub_epi16( vsrc1, vsrc2 ) ) ) );
+
+        vsum = _mm256_mullo_epi32(vmask, _mm256_abs_epi32(_mm256_sub_epi32(vsrc1, vsrc2)));
+        vsum32 = _mm256_add_epi32(vsum32, vsum);
       }
       src1 += strideSrc1;
       src2 += strideSrc2;
       weightMask += strideMask;
     }
-    vsum32 = _mm256_hadd_epi32( vsum32, vzero );
-    vsum32 = _mm256_hadd_epi32( vsum32, vzero );
-    sum =  _mm_cvtsi128_si32( _mm256_castsi256_si128( vsum32 ) ) + _mm_cvtsi128_si32( _mm256_castsi256_si128( _mm256_permute2x128_si256( vsum32, vsum32, 0x11 ) ) );
-#endif
+
+    vsum32 = _mm256_add_epi32(vsum32, _mm256_permute4x64_epi64(vsum32, 0x4e));
+    vsum32 = _mm256_add_epi32(vsum32, _mm256_permute4x64_epi64(vsum32, 0xb1));
+    vsum32 = _mm256_add_epi32(vsum32, _mm256_shuffle_epi32(vsum32, 0x1b));
+    sum = _mm_cvtsi128_si32(_mm256_castsi256_si128(vsum32));
   }
   else
+#endif
   {
-    // Do with step of 8
     __m128i vzero = _mm_setzero_si128();
     __m128i vsum32 = vzero;
-    for( int y = 0; y < rows; y+= subStep)
+    for (int y = 0; y < rows; y += subStep)
     {
-      for( int x = 0; x < cols; x+=8 )
+      for (int x = 0; x < cols; x += 8)
       {
-        __m128i vsrc1 = _mm_loadu_si128( ( const __m128i* )( &src1[x] ) );
-        __m128i vsrc2 = _mm_lddqu_si128( ( const __m128i* )( &src2[x] ) );
-        __m128i vmask;
+        __m128i vsrc11 = _mm_lddqu_si128((const __m128i*)(&src1[x]));
+        __m128i vsrc12 = _mm_lddqu_si128((const __m128i*)(&src1[x + 4]));
+        __m128i vsrc21 = _mm_lddqu_si128((const __m128i*)(&src2[x]));
+        __m128i vsrc22 = _mm_lddqu_si128((const __m128i*)(&src2[x + 4]));
+
+        __m128i vmask1, vmask2, vsum1, vsum2;
         if (rcDtParam.stepX == -1)
         {
-          vmask = _mm_lddqu_si128((__m128i*)((&weightMask[x]) - (x << 1) - (8 - 1)));
-          const __m128i shuffle_mask = _mm_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
-          vmask = _mm_shuffle_epi8(vmask, shuffle_mask);
+          vmask1 = _mm_lddqu_si128((__m128i*)((&weightMask[x]) - (x << 1) - (8 - 1) + 4));
+          vmask1 = _mm_shuffle_epi32(vmask1, 0x1b);
+          vmask2 = _mm_lddqu_si128((__m128i*)((&weightMask[x]) - (x << 1) - (8 - 1)));
+          vmask2 = _mm_shuffle_epi32(vmask2, 0x1b);
         }
         else
         {
-          vmask = _mm_lddqu_si128((const __m128i*)(&weightMask[x]));
+          vmask1 = _mm_lddqu_si128((const __m128i*)(&weightMask[x]));
+          vmask2 = _mm_lddqu_si128((const __m128i*)(&weightMask[x + 4]));
         }
-        vsum32 = _mm_add_epi32( vsum32, _mm_madd_epi16( vmask, _mm_abs_epi16( _mm_sub_epi16( vsrc1, vsrc2 ) ) ) );
+
+        vsum1 = _mm_mullo_epi32(vmask1, _mm_abs_epi32(_mm_sub_epi32(vsrc11, vsrc21)));
+        vsum2 = _mm_mullo_epi32(vmask2, _mm_abs_epi32(_mm_sub_epi32(vsrc12, vsrc22)));
+        vsum32 = _mm_add_epi32(vsum32, vsum1);
+        vsum32 = _mm_add_epi32(vsum32, vsum2);
       }
       src1 += strideSrc1;
       src2 += strideSrc2;
       weightMask += strideMask;
     }
-    vsum32 = _mm_hadd_epi32( vsum32, vzero );
-    vsum32 = _mm_hadd_epi32( vsum32, vzero );
-    sum =  _mm_cvtsi128_si32( vsum32 );
+    vsum32 = _mm_hadd_epi32(vsum32, vzero);
+    vsum32 = _mm_hadd_epi32(vsum32, vzero);
+    sum = _mm_cvtsi128_si32(vsum32);
   }
-  sum <<= subShift;
 
+  sum <<= subShift;
   return sum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth);
 }
 
+template< X86_VEXT vext >
+Distortion RdCost::xGetSSE_HBD_SIMD(const DistParam& pcDtParam)
+{
+#ifndef FULL_NBIT
+#error the function only supports full bit-depth
+#endif
+  CHECK(pcDtParam.applyWeight, "the function does not support weighted SSE");
+
+  const Pel* piOrg = pcDtParam.org.buf;
+  const Pel* piCur = pcDtParam.cur.buf;
+  int  iRows = pcDtParam.org.height;
+  int  iCols = pcDtParam.org.width;
+  int  iStrideCur = pcDtParam.cur.stride;
+  int  iStrideOrg = pcDtParam.org.stride;
+
+  Distortion uiSum = 0;
+#ifdef USE_AVX2
+  if ((vext >= AVX2) && ((iCols & 7) == 0))
+  {
+    __m256i vsum = _mm256_setzero_si256();
+    for (int iY = 0; iY < iRows; iY++)
+    {
+      for (int iX = 0; iX < iCols; iX += 8)
+      {
+        __m256i vorg = _mm256_lddqu_si256((const __m256i*)(&piOrg[iX]));
+        __m256i vcur = _mm256_lddqu_si256((const __m256i*)(&piCur[iX]));
+        __m256i vtemp = _mm256_sub_epi32(vorg, vcur);
+        vsum = _mm256_add_epi64(vsum, _mm256_mul_epi32(vtemp, vtemp));
+
+        vorg = _mm256_srli_si256(vorg, 4);
+        vcur = _mm256_srli_si256(vcur, 4);
+        vtemp = _mm256_sub_epi32(vorg, vcur);
+        vsum = _mm256_add_epi64(vsum, _mm256_mul_epi32(vtemp, vtemp));
+      }
+      piOrg += iStrideOrg;
+      piCur += iStrideCur;
+    }
+    uiSum += _mm256_extract_epi64(vsum, 0) + _mm256_extract_epi64(vsum, 1) + _mm256_extract_epi64(vsum, 2) + _mm256_extract_epi64(vsum, 3);
+  }
+  else
+#endif
+    if ((iCols & 3) == 0)
+    {
+      __m128i vsum = _mm_setzero_si128();
+      for (int iY = 0; iY < iRows; iY++)
+      {
+        for (int iX = 0; iX < iCols; iX += 4)
+        {
+          __m128i vorg = _mm_lddqu_si128((const __m128i*)(&piOrg[iX]));
+          __m128i vcur = _mm_lddqu_si128((const __m128i*)(&piCur[iX]));
+          __m128i vtemp = _mm_sub_epi32(vorg, vcur);
+          vsum = _mm_add_epi64(vsum, _mm_mul_epi32(vtemp, vtemp));
+
+          vorg = _mm_srli_si128(vorg, 4);
+          vcur = _mm_srli_si128(vcur, 4);
+          vtemp = _mm_sub_epi32(vorg, vcur);
+          vsum = _mm_add_epi64(vsum, _mm_mul_epi32(vtemp, vtemp));
+        }
+        piOrg += iStrideOrg;
+        piCur += iStrideCur;
+      }
+      uiSum += _mm_extract_epi64(vsum, 0) + _mm_extract_epi64(vsum, 1);
+    }
+    else if ((iCols & 1) == 0)
+    {
+      __m128i vsum = _mm_setzero_si128();
+      for (int iY = 0; iY < iRows; iY++)
+      {
+        for (int iX = 0; iX < iCols; iX += 2)
+        {
+          __m128i vorg = _mm_loadl_epi64((const __m128i*)(&piOrg[iX]));
+          __m128i vcur = _mm_loadl_epi64((const __m128i*)(&piCur[iX]));
+          vorg = _mm_shuffle_epi32(vorg, 0xd8);
+          vcur = _mm_shuffle_epi32(vcur, 0xd8);
+          __m128i vtemp = _mm_sub_epi32(vorg, vcur);
+          vsum = _mm_add_epi64(vsum, _mm_mul_epi32(vtemp, vtemp));
+        }
+        piOrg += iStrideOrg;
+        piCur += iStrideCur;
+      }
+      uiSum += _mm_extract_epi64(vsum, 0) + _mm_extract_epi64(vsum, 1);
+    }
+    else
+    {
+      Intermediate_Int iTemp;
+      for (int iY = 0; iY < iRows; iY++)
+      {
+        for (int iX = 0; iX < iCols; iX++)
+        {
+          iTemp = piOrg[iX] - piCur[iX];
+          uiSum += Distortion(iTemp * iTemp);
+        }
+        piOrg += iStrideOrg;
+        piCur += iStrideCur;
+      }
+    }
+
+  return uiSum;
+}
+#else
 template<X86_VEXT vext>
 Distortion RdCost::xGetHADs_SIMD( const DistParam &rcDtParam )
 {
@@ -2289,7 +4462,7 @@ Distortion RdCost::xGetHADs_SIMD( const DistParam &rcDtParam )
 
   return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth);
 }
-
+#endif
 template <X86_VEXT vext>
 void RdCost::_initRdCostX86()
 {
@@ -2303,7 +4476,41 @@ void RdCost::_initRdCostX86()
   //m_afpDistortFunc[DF_SSE32  ] = xGetSSE_NxN_SIMD<Pel, Pel, 32, vext>;
   //m_afpDistortFunc[DF_SSE64  ] = xGetSSE_NxN_SIMD<Pel, Pel, 64, vext>;
   //m_afpDistortFunc[DF_SSE16N ] = xGetSSE_SIMD<Pel, Pel, vext>;
-
+#if RExt__HIGH_BIT_DEPTH_SUPPORT
+  m_afpDistortFunc[DF_SAD] = xGetSAD_HBD_SIMD<vext>;
+  m_afpDistortFunc[DF_SAD2] = xGetSAD_HBD_SIMD<vext>;
+  m_afpDistortFunc[DF_SAD4] = xGetSAD_HBD_SIMD<vext>;
+  m_afpDistortFunc[DF_SAD8] = xGetSAD_HBD_SIMD<vext>;
+  m_afpDistortFunc[DF_SAD16] = xGetSAD_HBD_SIMD<vext>;
+  m_afpDistortFunc[DF_SAD32] = xGetSAD_HBD_SIMD<vext>;
+  m_afpDistortFunc[DF_SAD64] = xGetSAD_HBD_SIMD<vext>;
+  m_afpDistortFunc[DF_SAD16N] = xGetSAD_HBD_SIMD<vext>;
+  m_afpDistortFunc[DF_SAD12] = xGetSAD_HBD_SIMD<vext>;
+  m_afpDistortFunc[DF_SAD24] = xGetSAD_HBD_SIMD<vext>;
+  m_afpDistortFunc[DF_SAD48] = xGetSAD_HBD_SIMD<vext>;
+  m_afpDistortFunc[DF_SAD_INTERMEDIATE_BITDEPTH] = xGetSAD_HBD_SIMD<vext>;
+  m_afpDistortFunc[DF_SAD_WITH_MASK] = xGetSADwMask_HBD_SIMD<vext>;
+
+  m_afpDistortFunc[DF_HAD] = xGetHADs_HBD_SIMD<vext>;
+  m_afpDistortFunc[DF_HAD2] = xGetHADs_HBD_SIMD<vext>;
+  m_afpDistortFunc[DF_HAD4] = xGetHADs_HBD_SIMD<vext>;
+  m_afpDistortFunc[DF_HAD8] = xGetHADs_HBD_SIMD<vext>;
+  m_afpDistortFunc[DF_HAD16] = xGetHADs_HBD_SIMD<vext>;
+  m_afpDistortFunc[DF_HAD32] = xGetHADs_HBD_SIMD<vext>;
+  m_afpDistortFunc[DF_HAD64] = xGetHADs_HBD_SIMD<vext>;
+  m_afpDistortFunc[DF_HAD16N] = xGetHADs_HBD_SIMD<vext>;
+
+#if FULL_NBIT
+  m_afpDistortFunc[DF_SSE] = xGetSSE_HBD_SIMD<vext>;
+  m_afpDistortFunc[DF_SSE2] = xGetSSE_HBD_SIMD<vext>;
+  m_afpDistortFunc[DF_SSE4] = xGetSSE_HBD_SIMD<vext>;
+  m_afpDistortFunc[DF_SSE8] = xGetSSE_HBD_SIMD<vext>;
+  m_afpDistortFunc[DF_SSE16] = xGetSSE_HBD_SIMD<vext>;
+  m_afpDistortFunc[DF_SSE32] = xGetSSE_HBD_SIMD<vext>;
+  m_afpDistortFunc[DF_SSE64] = xGetSSE_HBD_SIMD<vext>;
+  m_afpDistortFunc[DF_SSE16N] = xGetSSE_HBD_SIMD<vext>;
+#endif
+#else
   m_afpDistortFunc[DF_SAD    ] = xGetSAD_SIMD<vext>;
   m_afpDistortFunc[DF_SAD2   ] = xGetSAD_SIMD<vext>;
   m_afpDistortFunc[DF_SAD4   ] = xGetSAD_NxN_SIMD<4,  vext>;
@@ -2329,6 +4536,7 @@ void RdCost::_initRdCostX86()
   m_afpDistortFunc[DF_SAD_INTERMEDIATE_BITDEPTH] = RdCost::xGetSAD_IBD_SIMD<vext>;
 
   m_afpDistortFunc[DF_SAD_WITH_MASK] = xGetSADwMask_SIMD<vext>;
+#endif
 }
 
 template void RdCost::_initRdCostX86<SIMDX86>();
diff --git a/source/Lib/DecoderAnalyserLib/CMakeLists.txt b/source/Lib/DecoderAnalyserLib/CMakeLists.txt
index 4fbd3463e03b7c3a6b7e3eef48f11f6b4a417a30..d91d1d50a3ab43374223ca6eb2b0aa987df05c5f 100644
--- a/source/Lib/DecoderAnalyserLib/CMakeLists.txt
+++ b/source/Lib/DecoderAnalyserLib/CMakeLists.txt
@@ -29,28 +29,8 @@ if( SET_ENABLE_TRACING )
   endif()
 endif()
 
-if( OpenMP_FOUND )
-  if( SET_ENABLE_SPLIT_PARALLELISM )
-    if( ENABLE_SPLIT_PARALLELISM )
-      target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 )
-    else()
-      target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
-    endif()
-  endif()
-  if( SET_ENABLE_WPP_PARALLELISM )
-    if( ENABLE_WPP_PARALLELISM )
-      target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 )
-    else()
-      target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
-    endif()
-  endif()
-else()
-  target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
-  target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
-endif()
-
 target_include_directories( ${LIB_NAME} PUBLIC ../DecoderLib )
-target_link_libraries( ${LIB_NAME} CommonAnalyserLib Threads::Threads )
+target_link_libraries( ${LIB_NAME} CommonAnalyserLib )
 
 # example: place header files in different folders
 source_group( "Natvis Files" FILES ${NATVIS_FILES} )
diff --git a/source/Lib/DecoderLib/AnnexBread.cpp b/source/Lib/DecoderLib/AnnexBread.cpp
index 7058de923b32d00ecf690cabd1961393eda7a466..3d488d9fe2ad5760a0316426cb4b9ab298f2b725 100644
--- a/source/Lib/DecoderLib/AnnexBread.cpp
+++ b/source/Lib/DecoderLib/AnnexBread.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -125,7 +125,10 @@ _byteStreamNALUnit(
   statBits.bits+=24; statBits.count+=3;
 #endif
 #endif
-  if(start_code_prefix_one_3bytes != 0x000001) { THROW( "Invalid code prefix" );}
+  if (start_code_prefix_one_3bytes != 0x000001)
+  {
+    THROW("Invalid code prefix");
+  }
   stats.m_numStartCodePrefixBytes += 3;
 
   /* 3. NumBytesInNALunit is set equal to the number of bytes starting with
diff --git a/source/Lib/DecoderLib/AnnexBread.h b/source/Lib/DecoderLib/AnnexBread.h
index 6f9c7334d7133ccddc0c402c11542109d3e12b90..a5f322c40b0d32101b9326d341220fd191030182 100644
--- a/source/Lib/DecoderLib/AnnexBread.h
+++ b/source/Lib/DecoderLib/AnnexBread.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/DecoderLib/BinDecoder.cpp b/source/Lib/DecoderLib/BinDecoder.cpp
index 81d4783ba41efb3cc7bcf75c1d203ea2c5ee40bc..df60845123c3ad2eddb8e0f2431ffc6a116ea257 100644
--- a/source/Lib/DecoderLib/BinDecoder.cpp
+++ b/source/Lib/DecoderLib/BinDecoder.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -97,6 +97,18 @@ void BinDecoderBase::reset( int qp, int initId )
   start();
 }
 
+#if JVET_W0178_CONSTRAINTS_ON_REXT_TOOLS
+void BinDecoderBase::riceStatReset(int bitDepth, bool persistentRiceAdaptationEnabledFlag)
+#else
+void BinDecoderBase::riceStatReset(int bitDepth)
+#endif
+{
+#if JVET_W0178_CONSTRAINTS_ON_REXT_TOOLS
+  Ctx::riceStatReset(bitDepth, persistentRiceAdaptationEnabledFlag);
+#else
+  Ctx::riceStatReset(bitDepth);
+#endif
+}
 
 unsigned BinDecoderBase::decodeBinEP()
 {
diff --git a/source/Lib/DecoderLib/BinDecoder.h b/source/Lib/DecoderLib/BinDecoder.h
index 11a4260974e322f69032b782d7f64522b93b686d..15b6f0ce456751fc586eeef25afe3eedc5aa20b0 100644
--- a/source/Lib/DecoderLib/BinDecoder.h
+++ b/source/Lib/DecoderLib/BinDecoder.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -61,6 +61,11 @@ public:
   void      start   ();
   void      finish  ();
   void      reset   ( int qp, int initId );
+#if JVET_W0178_CONSTRAINTS_ON_REXT_TOOLS
+  void      riceStatReset(int bitDepth, bool persistentRiceAdaptationEnabledFlag);
+#else
+  void      riceStatReset(int bitDepth);
+#endif
 #if RExt__DECODER_DEBUG_BIT_STATISTICS
   void      set     ( const CodingStatisticsClassType& type) { ptype = &type; }
 #endif
diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp
index 2980b49c3e0fa65fc548fe24306f6edf97349f32..d5c5d364bb853f9f4facea22f555eeb428719c88 100644
--- a/source/Lib/DecoderLib/CABACReader.cpp
+++ b/source/Lib/DecoderLib/CABACReader.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -83,6 +83,12 @@ void CABACReader::initCtxModels( Slice& slice )
     }
   }
   m_BinDecoder.reset( qp, (int)sliceType );
+  m_BinDecoder.setBaseLevel(slice.getRiceBaseLevel());
+#if JVET_W0178_CONSTRAINTS_ON_REXT_TOOLS
+  m_BinDecoder.riceStatReset(slice.getSPS()->getBitDepth(CHANNEL_TYPE_LUMA), slice.getSPS()->getSpsRangeExtension().getPersistentRiceAdaptationEnabledFlag());
+#else
+  m_BinDecoder.riceStatReset(slice.getSPS()->getBitDepth(CHANNEL_TYPE_LUMA));
+#endif
 }
 
 
@@ -144,7 +150,7 @@ void CABACReader::coding_tree_unit( CodingStructure& cs, const UnitArea& area, i
 
 
   sao( cs, ctuRsAddr );
-  if (cs.sps->getALFEnabledFlag() && (cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Y)))
+  if (cs.sps->getALFEnabledFlag() && (cs.slice->getAlfEnabledFlag(COMPONENT_Y)))
   {
     const PreCalcValues& pcv = *cs.pcv;
     int                 frame_width_in_ctus = pcv.widthInCtus;
@@ -161,7 +167,7 @@ void CABACReader::coding_tree_unit( CodingStructure& cs, const UnitArea& area, i
 
     for( int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++ )
     {
-      if (cs.slice->getTileGroupAlfEnabledFlag((ComponentID)compIdx))
+      if (cs.slice->getAlfEnabledFlag((ComponentID)compIdx))
       {
         uint8_t* ctbAlfFlag = cs.slice->getPic()->getAlfCtuEnableFlag( compIdx );
         int ctx = 0;
@@ -177,7 +183,7 @@ void CABACReader::coding_tree_unit( CodingStructure& cs, const UnitArea& area, i
         }
         if( isChroma( (ComponentID)compIdx ) )
         {
-          int apsIdx = cs.slice->getTileGroupApsIdChroma();
+          int apsIdx = cs.slice->getAlfApsIdChroma();
           CHECK(cs.slice->getAlfAPSs()[apsIdx] == nullptr, "APS not initialized");
           const AlfParam& alfParam = cs.slice->getAlfAPSs()[apsIdx]->getAlfAPSParam();
           const int numAlts = alfParam.numAlternativesChroma;
@@ -243,7 +249,7 @@ void CABACReader::coding_tree_unit( CodingStructure& cs, const UnitArea& area, i
 void CABACReader::readAlfCtuFilterIndex(CodingStructure& cs, unsigned ctuRsAddr)
 {
   short* alfCtbFilterSetIndex = cs.slice->getPic()->getAlfCtbFilterIndex();
-  unsigned numAps = cs.slice->getTileGroupNumAps();
+  unsigned numAps = cs.slice->getNumAlfApsIdsLuma();
   unsigned numAvailableFiltSets = numAps + NUM_FIXED_FILTER_SETS;
   uint32_t filtIndex = 0;
   if (numAvailableFiltSets > NUM_FIXED_FILTER_SETS)
@@ -1140,7 +1146,7 @@ void CABACReader::pred_mode( CodingUnit& cu )
     else
     {
       cu.predMode = m_BinDecoder.decodeBin(Ctx::PredMode(DeriveCtx::CtxPredModeFlag(cu))) ? MODE_INTRA : MODE_INTER;
-      if (CU::isIntra(cu) && cu.cs->slice->getSPS()->getPLTMode() && cu.lwidth() <= 64 && ( ( (!isLuma(cu.chType)) && (cu.chromaSize().width * cu.chromaSize().height > 16) ) || ((isLuma(cu.chType)) && ((cu.lumaSize().width * cu.lumaSize().height) > 16 ) )  ) && (!cu.isLocalSepTree() || isLuma(cu.chType)  )  )
+      if (CU::isIntra(cu) && cu.cs->slice->getSPS()->getPLTMode() && cu.lwidth() <= 64 &&  cu.lheight() <= 64 && ( ( (!isLuma(cu.chType)) && (cu.chromaSize().width * cu.chromaSize().height > 16) ) || ((isLuma(cu.chType)) && ((cu.lumaSize().width * cu.lumaSize().height) > 16 ) )  ) && (!cu.isLocalSepTree() || isLuma(cu.chType)  )  )
       {
         if (m_BinDecoder.decodeBin(Ctx::PLTFlag(0)))
         {
@@ -2961,6 +2967,15 @@ void CABACReader::residual_coding( TransformUnit& tu, ComponentID compID, CUCtx&
   int ctxBinSampleRatio = (compID == COMPONENT_Y) ? MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA : MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA;
   cctx.regBinLimit = (tu.getTbAreaAfterCoefZeroOut(compID) * ctxBinSampleRatio) >> 4;
 
+  int baseLevel = m_BinDecoder.getCtx().getBaseLevel();
+  cctx.setBaseLevel(baseLevel);
+  if (tu.cs->slice->getSPS()->getSpsRangeExtension().getPersistentRiceAdaptationEnabledFlag())
+  {
+    cctx.setUpdateHist(1);
+    unsigned riceStats = m_BinDecoder.getCtx().getGRAdaptStats((unsigned)compID);
+    TCoeff historyValue = (TCoeff)1 << riceStats;
+    cctx.setHistValue(historyValue);
+  }
   for (int subSetId = (cctx.scanPosLast() >> cctx.log2CGSize()); subSetId >= 0; subSetId--)
   {
     cctx.initSubblock(subSetId);
@@ -3117,11 +3132,19 @@ int CABACReader::last_sig_coeff( CoeffCodingContext& cctx, TransformUnit& tu, Co
   unsigned PosLastX = 0, PosLastY = 0;
   unsigned maxLastPosX = cctx.maxLastPosX();
   unsigned maxLastPosY = cctx.maxLastPosY();
+#if JVET_W0046_RLSCP
+  unsigned zoTbWdith  = std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, cctx.width());
+  unsigned zoTbHeight = std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, cctx.height());
+#endif
 
   if( tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[ compID ].width <= 32 && tu.blocks[ compID ].height <= 32 && compID == COMPONENT_Y )
   {
-    maxLastPosX = ( tu.blocks[ compID ].width  == 32 ) ? g_uiGroupIdx[ 15 ] : maxLastPosX;
-    maxLastPosY = ( tu.blocks[ compID ].height == 32 ) ? g_uiGroupIdx[ 15 ] : maxLastPosY;
+    maxLastPosX = (tu.blocks[compID].width == 32) ? g_groupIdx[15] : maxLastPosX;
+    maxLastPosY = (tu.blocks[compID].height == 32) ? g_groupIdx[15] : maxLastPosY;
+#if JVET_W0046_RLSCP
+    zoTbWdith  = (tu.blocks[compID].width == 32) ? 16 : zoTbWdith;
+    zoTbHeight = (tu.blocks[compID].height == 32) ? 16 : zoTbHeight;
+#endif
   }
 
   for( ; PosLastX < maxLastPosX; PosLastX++ )
@@ -3140,25 +3163,32 @@ int CABACReader::last_sig_coeff( CoeffCodingContext& cctx, TransformUnit& tu, Co
   }
   if( PosLastX > 3 )
   {
-    uint32_t uiTemp  = 0;
+    uint32_t temp    = 0;
     uint32_t uiCount = ( PosLastX - 2 ) >> 1;
     for ( int i = uiCount - 1; i >= 0; i-- )
     {
-      uiTemp += m_BinDecoder.decodeBinEP( ) << i;
+      temp += m_BinDecoder.decodeBinEP() << i;
     }
-    PosLastX = g_uiMinInGroup[ PosLastX ] + uiTemp;
+    PosLastX = g_minInGroup[PosLastX] + temp;
   }
   if( PosLastY > 3 )
   {
-    uint32_t uiTemp  = 0;
+    uint32_t temp    = 0;
     uint32_t uiCount = ( PosLastY - 2 ) >> 1;
     for ( int i = uiCount - 1; i >= 0; i-- )
     {
-      uiTemp += m_BinDecoder.decodeBinEP( ) << i;
+      temp += m_BinDecoder.decodeBinEP() << i;
     }
-    PosLastY = g_uiMinInGroup[ PosLastY ] + uiTemp;
+    PosLastY = g_minInGroup[PosLastY] + temp;
   }
 
+#if JVET_W0046_RLSCP
+  if (tu.cu->slice->getReverseLastSigCoeffFlag())
+  {
+    PosLastX = zoTbWdith - 1 - PosLastX;
+    PosLastY = zoTbHeight - 1 - PosLastY;
+  }
+#endif
   int blkPos;
   blkPos = PosLastX + (PosLastY * cctx.width());
 
@@ -3196,6 +3226,8 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co
   const bool  isLast      = cctx.isLast();
   int         firstSigPos = ( isLast ? cctx.scanPosLast() : cctx.maxSubPos() );
   int         nextSigPos  = firstSigPos;
+  int baseLevel = cctx.getBaseLevel();
+  bool updateHistory = cctx.getUpdateHist();
 
   //===== decode significant_coeffgroup_flag =====
   RExt__DECODER_DEBUG_BIT_STATISTICS_SET( ctype_group );
@@ -3281,8 +3313,8 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co
   unsigned ricePar = 0;
   for( int scanPos = firstSigPos; scanPos > firstPosMode2; scanPos-- )
   {
-    int       sumAll = cctx.templateAbsSum(scanPos, coeff, 4);
-    ricePar = g_auiGoRiceParsCoeff[sumAll];
+    ricePar = (cctx.*(cctx.deriveRiceRRC))(scanPos, coeff, baseLevel);
+
     TCoeff& tcoeff = coeff[ cctx.blockPos( scanPos ) ];
     if( tcoeff >= 4 )
     {
@@ -3290,20 +3322,33 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co
       int       rem     = m_BinDecoder.decodeRemAbsEP( ricePar, COEF_REMAIN_BIN_REDUCTION, cctx.maxLog2TrDRange() );
       DTRACE( g_trace_ctx, D_SYNTAX_RESI, "rem_val() bin=%d ctx=%d\n", rem, ricePar );
       tcoeff += (rem<<1);
+      if ((updateHistory) && (rem > 0))
+      {
+        unsigned &riceStats = m_BinDecoder.getCtx().getGRAdaptStats((unsigned)(cctx.compID()));
+        cctx.updateRiceStat(riceStats, rem, 1);
+        cctx.setUpdateHist(0);
+        updateHistory = 0;
+      }
     }
   }
 
   //===== coeff bypass ====
   for( int scanPos = firstPosMode2; scanPos >= minSubPos; scanPos-- )
   {
-    int       sumAll = cctx.templateAbsSum(scanPos, coeff, 0);
-    int       rice      = g_auiGoRiceParsCoeff                        [sumAll];
-    int       pos0      = g_auiGoRicePosCoeff0(state, rice);
+    int rice = (cctx.*(cctx.deriveRiceRRC))(scanPos, coeff, 0);
+    int       pos0   = g_goRicePosCoeff0(state, rice);
     RExt__DECODER_DEBUG_BIT_STATISTICS_SET(ctype_escs);
     int       rem       = m_BinDecoder.decodeRemAbsEP( rice, COEF_REMAIN_BIN_REDUCTION, cctx.maxLog2TrDRange() );
     DTRACE( g_trace_ctx, D_SYNTAX_RESI, "rem_val() bin=%d ctx=%d\n", rem, rice );
     TCoeff    tcoeff  = ( rem == pos0 ? 0 : rem < pos0 ? rem+1 : rem );
     state = ( stateTransTable >> ((state<<2)+((tcoeff&1)<<1)) ) & 3;
+    if ((updateHistory) && (rem > 0))
+    {
+      unsigned &riceStats = m_BinDecoder.getCtx().getGRAdaptStats((unsigned)(cctx.compID()));
+      cctx.updateRiceStat(riceStats, rem, 0);
+      cctx.setUpdateHist(0);
+      updateHistory = 0;
+    }
     if( tcoeff )
     {
       int        blkPos         = cctx.blockPos( scanPos );
@@ -3352,11 +3397,16 @@ void CABACReader::residual_codingTS( TransformUnit& tu, ComponentID compID )
   for( int subSetId = 0; subSetId <= ( cctx.maxNumCoeff() - 1 ) >> cctx.log2CGSize(); subSetId++ )
   {
     cctx.initSubblock         ( subSetId );
-    residual_coding_subblockTS( cctx, coeff );
+    int goRiceParam = 1;
+    if (tu.cu->slice->getSPS()->getSpsRangeExtension().getTSRCRicePresentFlag() && tu.mtsIdx[compID] == MTS_SKIP)
+    {
+      goRiceParam = goRiceParam + tu.cu->slice->get_tsrc_index();
+    }
+    residual_coding_subblockTS( cctx, coeff, goRiceParam);
   }
 }
 
-void CABACReader::residual_coding_subblockTS( CoeffCodingContext& cctx, TCoeff* coeff )
+void CABACReader::residual_coding_subblockTS( CoeffCodingContext& cctx, TCoeff* coeff, int riceParam)
 {
   // NOTE: All coefficients of the subblock must be set to zero before calling this function
 #if RExt__DECODER_DEBUG_BIT_STATISTICS
@@ -3498,7 +3548,7 @@ void CABACReader::residual_coding_subblockTS( CoeffCodingContext& cctx, TCoeff*
     }
     if( tcoeff >= cutoffVal )
     {
-      int       rice = cctx.templateAbsSumTS( scanPos, coeff );
+      int       rice = riceParam;
       int       rem  = m_BinDecoder.decodeRemAbsEP( rice, COEF_REMAIN_BIN_REDUCTION, cctx.maxLog2TrDRange() );
       DTRACE( g_trace_ctx, D_SYNTAX_RESI, "ts_rem_val() bin=%d ctx=%d sp=%d\n", rem, rice, scanPos );
       tcoeff += (scanPos <= lastScanPosPass1) ? (rem << 1) : rem;
diff --git a/source/Lib/DecoderLib/CABACReader.h b/source/Lib/DecoderLib/CABACReader.h
index 85869f9d53441808bc7d84374122ac1eb3578bdf..92e81dea14576a689db87bd91f51c646aae941e9 100644
--- a/source/Lib/DecoderLib/CABACReader.h
+++ b/source/Lib/DecoderLib/CABACReader.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -138,7 +138,7 @@ public:
   int         last_sig_coeff            ( CoeffCodingContext&           cctx,   TransformUnit& tu, ComponentID   compID );
   void        residual_coding_subblock  ( CoeffCodingContext&           cctx,   TCoeff*         coeff, const int stateTransTable, int& state );
   void        residual_codingTS         ( TransformUnit&                tu,     ComponentID     compID );
-  void        residual_coding_subblockTS( CoeffCodingContext&           cctx,   TCoeff*         coeff  );
+  void        residual_coding_subblockTS( CoeffCodingContext&           cctx,   TCoeff*         coeff, int riceParam);
   void        joint_cb_cr               ( TransformUnit&                tu,     const int cbfMask );
 
 
diff --git a/source/Lib/DecoderLib/CMakeLists.txt b/source/Lib/DecoderLib/CMakeLists.txt
index 23a3659a11518bf298d19a5155b0df1830d55398..9fdcfb250114c78f1f24332aa228ac0b3b282285 100644
--- a/source/Lib/DecoderLib/CMakeLists.txt
+++ b/source/Lib/DecoderLib/CMakeLists.txt
@@ -28,28 +28,8 @@ if( SET_ENABLE_TRACING )
   endif()
 endif()
 
-if( OpenMP_FOUND )
-  if( SET_ENABLE_SPLIT_PARALLELISM )
-    if( ENABLE_SPLIT_PARALLELISM )
-      target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 )
-    else()
-      target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
-    endif()
-  endif()
-  if( SET_ENABLE_WPP_PARALLELISM )
-    if( ENABLE_WPP_PARALLELISM )
-      target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 )
-    else()
-      target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
-    endif()
-  endif()
-else()
-  target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
-  target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
-endif()
-
 target_include_directories( ${LIB_NAME} PUBLIC . )
-target_link_libraries( ${LIB_NAME} CommonLib Threads::Threads )
+target_link_libraries( ${LIB_NAME} CommonLib )
 
 # example: place header files in different folders
 source_group( "Natvis Files" FILES ${NATVIS_FILES} )
diff --git a/source/Lib/DecoderLib/DecCu.cpp b/source/Lib/DecoderLib/DecCu.cpp
index eeec34743555978b0171a48d8c77a5bb24d0f643..096e02e429b8cd9a6938f24d57ebc7db580457b4 100644
--- a/source/Lib/DecoderLib/DecCu.cpp
+++ b/source/Lib/DecoderLib/DecCu.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -612,26 +612,6 @@ void DecCu::xIntraRecACTQT(CodingUnit &cu)
   }
 }
 
-/** Function for filling the PCM buffer of a CU using its reconstructed sample array
-* \param pCU   pointer to current CU
-* \param depth CU Depth
-*/
-void DecCu::xFillPCMBuffer(CodingUnit &cu)
-{
-  for( auto &currTU : CU::traverseTUs( cu ) )
-  {
-    for (const CompArea &area : currTU.blocks)
-    {
-      if( !area.valid() ) continue;;
-
-      CPelBuf source      = cu.cs->getRecoBuf(area);
-       PelBuf destination = currTU.getPcmbuf(area.compID);
-
-      destination.copyFrom(source);
-    }
-  }
-}
-
 #include "CommonLib/dtrace_buffer.h"
 
 void DecCu::xReconInter(CodingUnit &cu)
diff --git a/source/Lib/DecoderLib/DecCu.h b/source/Lib/DecoderLib/DecCu.h
index 8789afadb46e306d9c6cadc7108d2fd949a7d613..4b8ce5b49907175039d56c38427d16968fd478a4 100644
--- a/source/Lib/DecoderLib/DecCu.h
+++ b/source/Lib/DecoderLib/DecCu.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -81,7 +81,6 @@ protected:
   void xReconInter        ( CodingUnit&      cu );
   void xDecodeInterTexture( CodingUnit&      cu );
   void xReconIntraQT      ( CodingUnit&      cu );
-  void xFillPCMBuffer     ( CodingUnit&      cu );
 
   void xIntraRecBlk       ( TransformUnit&   tu, const ComponentID compID );
   void xIntraRecACTBlk(TransformUnit&   tu);
diff --git a/source/Lib/DecoderLib/DecLib.cpp b/source/Lib/DecoderLib/DecLib.cpp
index 17ebf60e2b6a3b7ea79e1f1d490146c89488fc1c..1347b946c11bea680c009af19d52d77be915c05b 100644
--- a/source/Lib/DecoderLib/DecLib.cpp
+++ b/source/Lib/DecoderLib/DecLib.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -212,17 +212,17 @@ bool tryDecodePicture( Picture* pcEncPic, const int expectedPoc, const std::stri
 
                   for( int i = 0; i < pic->slices.size(); i++ )
                   {
-                    pcEncPic->slices[i]->setTileGroupNumAps(pic->slices[i]->getTileGroupNumAps());
-                    pcEncPic->slices[i]->setAlfAPSs(pic->slices[i]->getTileGroupApsIdLuma());
+                    pcEncPic->slices[i]->setNumAlfApsIdsLuma(pic->slices[i]->getNumAlfApsIdsLuma());
+                    pcEncPic->slices[i]->setAlfApsIdsLuma(pic->slices[i]->getAlfApsIdsLuma());
                     pcEncPic->slices[i]->setAlfAPSs(pic->slices[i]->getAlfAPSs());
-                    pcEncPic->slices[i]->setTileGroupApsIdChroma(pic->slices[i]->getTileGroupApsIdChroma());
-                    pcEncPic->slices[i]->setTileGroupAlfEnabledFlag(COMPONENT_Y,  pic->slices[i]->getTileGroupAlfEnabledFlag(COMPONENT_Y));
-                    pcEncPic->slices[i]->setTileGroupAlfEnabledFlag(COMPONENT_Cb, pic->slices[i]->getTileGroupAlfEnabledFlag(COMPONENT_Cb));
-                    pcEncPic->slices[i]->setTileGroupAlfEnabledFlag(COMPONENT_Cr, pic->slices[i]->getTileGroupAlfEnabledFlag(COMPONENT_Cr));
-                    pcEncPic->slices[i]->setTileGroupCcAlfCbApsId(pic->slices[i]->getTileGroupCcAlfCbApsId());
-                    pcEncPic->slices[i]->setTileGroupCcAlfCbEnabledFlag(pic->slices[i]->getTileGroupCcAlfCbEnabledFlag());
-                    pcEncPic->slices[i]->setTileGroupCcAlfCrApsId(pic->slices[i]->getTileGroupCcAlfCrApsId());
-                    pcEncPic->slices[i]->setTileGroupCcAlfCrEnabledFlag(pic->slices[i]->getTileGroupCcAlfCrEnabledFlag());
+                    pcEncPic->slices[i]->setAlfApsIdChroma(pic->slices[i]->getAlfApsIdChroma());
+                    pcEncPic->slices[i]->setAlfEnabledFlag(COMPONENT_Y,  pic->slices[i]->getAlfEnabledFlag(COMPONENT_Y));
+                    pcEncPic->slices[i]->setAlfEnabledFlag(COMPONENT_Cb, pic->slices[i]->getAlfEnabledFlag(COMPONENT_Cb));
+                    pcEncPic->slices[i]->setAlfEnabledFlag(COMPONENT_Cr, pic->slices[i]->getAlfEnabledFlag(COMPONENT_Cr));
+                    pcEncPic->slices[i]->setCcAlfCbApsId(pic->slices[i]->getCcAlfCbApsId());
+                    pcEncPic->slices[i]->setCcAlfCbEnabledFlag(pic->slices[i]->getCcAlfCbEnabledFlag());
+                    pcEncPic->slices[i]->setCcAlfCrApsId(pic->slices[i]->getCcAlfCrApsId());
+                    pcEncPic->slices[i]->setCcAlfCrEnabledFlag(pic->slices[i]->getCcAlfCrEnabledFlag());
                   }
                 }
 
@@ -331,6 +331,8 @@ bool tryDecodePicture( Picture* pcEncPic, const int expectedPoc, const std::stri
           }
 
           pcDecLib->updateAssociatedIRAP();
+          pcDecLib->updatePrevGDRInSameLayer();
+          pcDecLib->updatePrevIRAPAndGDRSubpic();
           // LMCS APS will be assigned later in LMCS initialization step
           pcEncPic->cs->picHeader->setLmcsAPS( nullptr );
           if( bitstreamFile )
@@ -404,6 +406,11 @@ DecLib::DecLib()
   , m_parameterSetManager()
   , m_apcSlicePilot(NULL)
   , m_SEIs()
+  , m_sdiSEIInFirstAU(NULL)
+  , m_maiSEIInFirstAU(NULL)
+#if JVET_W0078_MVP_SEI 
+  , m_mvpSEIInFirstAU(NULL)
+#endif
   , m_cIntraPred()
   , m_cInterPred()
   , m_cTrQuant()
@@ -412,7 +419,7 @@ DecLib::DecLib()
   , m_cCuDecoder()
   , m_HLSReader()
   , m_seiReader()
-  , m_cLoopFilter()
+  , m_deblockingFilter()
   , m_cSAO()
   , m_cReshaper()
 #if JVET_J0090_MEMORY_BANDWITH_MEASURE
@@ -424,11 +431,13 @@ DecLib::DecLib()
   , m_prevPicPOC(MAX_INT)
   , m_prevTid0POC(0)
   , m_bFirstSliceInPicture(true)
-  , m_firstSliceInSequence{ true }
+  , m_firstPictureInSequence(true)
+  , m_colourTranfParams()
   , m_firstSliceInBitstream(true)
   , m_isFirstAuInCvs( true )
   , m_prevSliceSkipped(false)
-  , m_skippedPOC(0)
+  , m_skippedPOC(MAX_INT)
+  , m_skippedLayerID(MAX_INT)
   , m_lastPOCNoOutputPriorPics(-1)
   , m_isNoOutputPriorPics(false)
   , m_lastNoOutputBeforeRecoveryFlag{ false }
@@ -444,13 +453,11 @@ DecLib::DecLib()
   , m_prefixSEINALUs()
   , m_debugPOC( -1 )
   , m_debugCTU( -1 )
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
   , m_opi( nullptr )
   , m_mTidExternalSet(false)
   , m_mTidOpiSet(false)
   , m_tOlsIdxTidExternalSet(false)
   , m_tOlsIdxTidOpiSet(false)
-#endif
   , m_vps( nullptr )
   , m_maxDecSubPicIdx(0)
   , m_maxDecSliceAddrInSubPic(-1)
@@ -465,6 +472,8 @@ DecLib::DecLib()
   memset(m_prevEOS, false, sizeof(m_prevEOS));
   memset(m_accessUnitEos, false, sizeof(m_accessUnitEos));
   std::fill_n(m_prevGDRInSameLayerPOC, MAX_VPS_LAYERS, -MAX_INT);
+  std::fill_n(m_prevGDRInSameLayerRecoveryPOC, MAX_VPS_LAYERS, -MAX_INT);
+  std::fill_n(m_firstSliceInSequence, MAX_VPS_LAYERS, true);
   std::fill_n(m_pocCRA, MAX_VPS_LAYERS, -MAX_INT);
 #if JVET_S0176_ITEM5
   std::fill_n(m_accessUnitSpsNumSubpic, MAX_VPS_LAYERS, 1);
@@ -486,7 +495,23 @@ DecLib::~DecLib()
     delete m_prefixSEINALUs.front();
     m_prefixSEINALUs.pop_front();
   }
-
+  if (m_sdiSEIInFirstAU != NULL)
+  {
+    delete m_sdiSEIInFirstAU;
+  }
+  m_sdiSEIInFirstAU = NULL;
+  if (m_maiSEIInFirstAU != NULL)
+  {
+    delete m_maiSEIInFirstAU;
+  }
+  m_maiSEIInFirstAU = NULL;
+#if JVET_W0078_MVP_SEI 
+  if (m_mvpSEIInFirstAU != NULL)
+  {
+    delete m_mvpSEIInFirstAU;
+  }
+  m_mvpSEIInFirstAU = NULL;
+#endif
 }
 
 void DecLib::create()
@@ -539,7 +564,7 @@ void DecLib::deletePicBuffer ( )
   }
   m_cALF.destroy();
   m_cSAO.destroy();
-  m_cLoopFilter.destroy();
+  m_deblockingFilter.destroy();
 #if JVET_J0090_MEMORY_BANDWITH_MEASURE
   m_cacheModel.reportSequence( );
   m_cacheModel.destroy( );
@@ -601,6 +626,13 @@ Picture* DecLib::xGetNewPicBuffer( const SPS &sps, const PPS &pps, const uint32_
       pcPic->destroy();
       pcPic->create( sps.getChromaFormatIdc(), Size( pps.getPicWidthInLumaSamples(), pps.getPicHeightInLumaSamples() ), sps.getMaxCUWidth(), sps.getMaxCUWidth() + 16, true, layerId );
     }
+#if GDR_ENABLED // picHeader should be deleted in case pcPic slot gets reused
+    if (pcPic && pcPic->cs && pcPic->cs->picHeader)
+    {          
+      delete pcPic->cs->picHeader;
+      pcPic->cs->picHeader = nullptr;    
+    }
+#endif
   }
 
   pcPic->setBorderExtension( false );
@@ -643,7 +675,7 @@ void DecLib::executeLoopFilters()
       m_cSAO.setReshaper(&m_cReshaper);
   }
   // deblocking filter
-  m_cLoopFilter.loopFilterPic( cs );
+  m_deblockingFilter.deblockingFilterPic( cs );
   CS::setRefinedMotionField(cs);
   if( cs.sps->getSAOEnabledFlag() )
   {
@@ -691,6 +723,34 @@ void DecLib::finishPictureLight(int& poc, PicList*& rpcListPic )
   Slice*  pcSlice = m_pcPic->cs->slice;
 
   m_pcPic->neededForOutput = (pcSlice->getPicHeader()->getPicOutputFlag() ? true : false);
+
+  const VPS *vps = pcSlice->getVPS();
+  if (vps != nullptr)
+  {
+    if (!vps->getEachLayerIsAnOlsFlag())
+    {
+      const int layerId        = pcSlice->getNalUnitLayerId();
+      const int generalLayerId = vps->getGeneralLayerIdx(layerId);
+      bool      layerIsOutput  = true;
+
+      if (vps->getOlsModeIdc() == 0)
+      {
+        layerIsOutput = generalLayerId == vps->m_targetOlsIdx;
+      }
+      else if (vps->getOlsModeIdc() == 1)
+      {
+        layerIsOutput = generalLayerId <= vps->m_targetOlsIdx;
+      }
+      else if (vps->getOlsModeIdc() == 2)
+      {
+        layerIsOutput = vps->getOlsOutputLayerFlag(vps->m_targetOlsIdx, generalLayerId);
+      }
+      if (!layerIsOutput)
+      {
+        m_pcPic->neededForOutput = false;
+      }
+    }
+  }
   m_pcPic->reconstructed = true;
 
   Slice::sortPicList( m_cListPic ); // sorting for application output
@@ -717,6 +777,7 @@ void DecLib::finishPicture(int &poc, PicList *&rpcListPic, MsgLevel msgl, bool a
   }
 
   if (pcSlice->isDRAP()) c = 'D';
+  if (pcSlice->getEdrapRapId() > 0) c = 'E';
 
   //-- For time output for each slice
   msg( msgl, "POC %4d LId: %2d TId: %1d ( %s, %c-SLICE, QP%3d ) ", pcSlice->getPOC(), pcSlice->getPic()->layerId,
@@ -772,6 +833,24 @@ void DecLib::finishPicture(int &poc, PicList *&rpcListPic, MsgLevel msgl, bool a
       msg( WARNING, "Warning: Got multiple decoded picture hash SEI messages. Using first.");
     }
     m_numberOfChecksumErrorsDetected += calcAndPrintHashStatus(((const Picture*) m_pcPic)->getRecoBuf(), hash, pcSlice->getSPS()->getBitDepths(), msgl);
+
+    SEIMessages scalableNestingSeis = getSeisByType(m_pcPic->SEIs, SEI::SCALABLE_NESTING );
+    for (auto seiIt : scalableNestingSeis)
+    {
+      SEIScalableNesting *nestingSei = dynamic_cast<SEIScalableNesting*>(seiIt);
+      if (nestingSei->m_snSubpicFlag)
+      {
+        uint32_t subpicId = nestingSei->m_snSubpicId.front();
+        SEIMessages nestedPictureHashes = getSeisByType(nestingSei->m_nestedSEIs, SEI::DECODED_PICTURE_HASH);
+        for (auto decPicHash : nestedPictureHashes)
+        {
+          const SubPic& subpic = pcSlice->getPPS()->getSubPic(subpicId);
+          const UnitArea area = UnitArea(pcSlice->getSPS()->getChromaFormatIdc(), Area(subpic.getSubPicLeft(), subpic.getSubPicTop(), subpic.getSubPicWidthInLumaSample(), subpic.getSubPicHeightInLumaSample()));
+          PelUnitBuf recoBuf = m_pcPic->cs->getRecoBuf(area);
+          m_numberOfChecksumErrorsDetected += calcAndPrintHashStatus(recoBuf, dynamic_cast<SEIDecodedPictureHash*>(decPicHash), pcSlice->getSPS()->getBitDepths(), msgl);
+        }
+      }
+    }
   }
 
   msg( msgl, "\n");
@@ -792,7 +871,7 @@ void DecLib::finishPicture(int &poc, PicList *&rpcListPic, MsgLevel msgl, bool a
     else if (pcSlice->getPPS()->getMixedNaluTypesInPicFlag())
     {
       bool isRaslPic = true;
-      for (int i = 0; isRaslPic && i < m_pcPic->numSlices; i++) 
+      for (int i = 0; isRaslPic && i < m_pcPic->numSlices; i++)
       {
         if (!(pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_RADL))
         {
@@ -805,8 +884,38 @@ void DecLib::finishPicture(int &poc, PicList *&rpcListPic, MsgLevel msgl, bool a
       }
     }
   }
+
+  const VPS *vps = pcSlice->getVPS();
+  if (vps != nullptr)
+  {
+    if (!vps->getEachLayerIsAnOlsFlag())
+    {
+      const int layerId        = pcSlice->getNalUnitLayerId();
+      const int generalLayerId = vps->getGeneralLayerIdx(layerId);
+      bool      layerIsOutput  = true;
+
+      if (vps->getOlsModeIdc() == 0)
+      {
+        layerIsOutput = generalLayerId == vps->m_targetOlsIdx;
+      }
+      else if (vps->getOlsModeIdc() == 1)
+      {
+        layerIsOutput = generalLayerId <= vps->m_targetOlsIdx;
+      }
+      else if (vps->getOlsModeIdc() == 2)
+      {
+        layerIsOutput = vps->getOlsOutputLayerFlag(vps->m_targetOlsIdx, generalLayerId);
+      }
+      if (!layerIsOutput)
+      {
+        m_pcPic->neededForOutput = false;
+      }
+    }
+  }
   m_pcPic->reconstructed = true;
 
+  // process buffered suffix APS NALUs
+  processSuffixApsNalus();
 
   Slice::sortPicList( m_cListPic ); // sorting for application output
   poc                 = pcSlice->getPOC();
@@ -901,15 +1010,25 @@ void DecLib::xCreateLostPicture( int iLostPoc, const int layerId )
 
 void  DecLib::xCreateUnavailablePicture( const PPS *pps, const int iUnavailablePoc, const bool longTermFlag, const int temporalId, const int layerId, const bool interLayerRefPicFlag )
 {
-  msg(INFO, "\ninserting unavailable poc : %d\n", iUnavailablePoc);
-  Picture* cFillPic = xGetNewPicBuffer( *( m_parameterSetManager.getFirstSPS() ), *( m_parameterSetManager.getFirstPPS() ), 0, layerId );
+  msg(INFO, "Note: Inserting unavailable POC : %d\n", iUnavailablePoc);
+  auto const sps = m_parameterSetManager.getSPS(pps->getSPSId());
+  Picture* cFillPic = xGetNewPicBuffer( *sps, *pps, 0, layerId );
 
-  CHECK(!cFillPic->slices.size(), "No slices in picture");
+  cFillPic->cs = new CodingStructure( g_globalUnitCache.cuCache, g_globalUnitCache.puCache, g_globalUnitCache.tuCache );
+  cFillPic->cs->sps = sps;
+  cFillPic->cs->pps = pps;
+  cFillPic->cs->vps = m_parameterSetManager.getVPS(sps->getVPSId());
+  cFillPic->cs->create(cFillPic->cs->sps->getChromaFormatIdc(), Area(0, 0, cFillPic->cs->pps->getPicWidthInLumaSamples(), cFillPic->cs->pps->getPicHeightInLumaSamples()), true, (bool)(cFillPic->cs->sps->getPLTMode()));
+  cFillPic->allocateNewSlice();
 
   cFillPic->slices[0]->initSlice();
 
-  uint32_t yFill = 1 << (m_parameterSetManager.getFirstSPS()->getBitDepth(CHANNEL_TYPE_LUMA) - 1);
-  uint32_t cFill = 1 << (m_parameterSetManager.getFirstSPS()->getBitDepth(CHANNEL_TYPE_CHROMA) - 1);
+  cFillPic->setDecodingOrderNumber(0);
+  cFillPic->subLayerNonReferencePictureDueToSTSA = false;
+  cFillPic->unscaledPic = cFillPic;
+
+  uint32_t yFill = 1 << (sps->getBitDepth(CHANNEL_TYPE_LUMA) - 1);
+  uint32_t cFill = 1 << (sps->getBitDepth(CHANNEL_TYPE_CHROMA) - 1);
   cFillPic->getRecoBuf().Y().fill(yFill);
   cFillPic->getRecoBuf().Cb().fill(cFill);
   cFillPic->getRecoBuf().Cr().fill(cFill);
@@ -919,7 +1038,12 @@ void  DecLib::xCreateUnavailablePicture( const PPS *pps, const int iUnavailableP
   cFillPic->interLayerRefPicFlag = interLayerRefPicFlag;
   cFillPic->longTerm = longTermFlag;
   cFillPic->slices[0]->setPOC(iUnavailablePoc);
-  xUpdatePreviousTid0POC(cFillPic->slices[0]);
+  cFillPic->poc = iUnavailablePoc;
+  if( (cFillPic->slices[0]->getTLayer() == 0) && (cFillPic->slices[0]->getNalUnitType() != NAL_UNIT_CODED_SLICE_RASL) && (cFillPic->slices[0]->getNalUnitType() != NAL_UNIT_CODED_SLICE_RADL) )
+  {
+    m_prevTid0POC = cFillPic->slices[0]->getPOC();
+  }
+
   cFillPic->reconstructed = true;
   cFillPic->neededForOutput = false;
   // picture header is not derived for generated reference picture
@@ -958,6 +1082,11 @@ void DecLib::checkLayerIdIncludedInCvss()
     for (auto pic = m_accessUnitPicInfo.begin(); pic != m_accessUnitPicInfo.end(); pic++)
     {
       bool layerIdFind;
+      if ( m_firstAccessUnitPicInfo.size() == 0 )
+      {
+        msg( NOTICE, "Note: checkIncludedInFirstAu(), m_firstAccessUnitPicInfo.size() is 0.\n");
+        continue;
+      }
       for (auto picFirst = m_firstAccessUnitPicInfo.begin(); picFirst != m_firstAccessUnitPicInfo.end(); picFirst++)
       {
         layerIdFind = pic->m_nuhLayerId == picFirst->m_nuhLayerId ? true : false;
@@ -1085,6 +1214,13 @@ void DecLib::checkSEIInAccessUnit()
   int olsIdxIncludeAllLayes = -1;
   bool isNonNestedSliFound = false;
 #endif
+
+  bool bSdiPresentInAu = false;
+#if JVET_W0078_MVP_SEI 
+  bool bAuxSEIsBeforeSdiSEIPresent[4] = { false, false, false, false };
+#else
+  bool bAuxSEIsBeforeSdiSEIPresent[3] = { false, false, false };
+#endif
   for (auto &sei : m_accessUnitSeiPayLoadTypes)
   {
     enum NalUnitType         naluType = std::get<0>(sei);
@@ -1124,7 +1260,34 @@ void DecLib::checkSEIInAccessUnit()
       }
       CHECK(!olsIncludeAllLayersFind, "When there is no OLS that includes all layers in the current CVS in the entire bitstream, there shall be no non-scalable-nested SEI message with payloadType equal to 0 (BP), 1 (PT), 130 (DUI), or 203 (SLI)");
     }
+    if (payloadType == SEI::SCALABILITY_DIMENSION_INFO)
+    {
+      bSdiPresentInAu = true;
+    }
+    else if (payloadType == SEI::MULTIVIEW_ACQUISITION_INFO && !bSdiPresentInAu)
+    {
+      bAuxSEIsBeforeSdiSEIPresent[0] = true;
+    }
+    else if (payloadType == SEI::ALPHA_CHANNEL_INFO && !bSdiPresentInAu)
+    {
+      bAuxSEIsBeforeSdiSEIPresent[1] = true;
+    }
+    else if (payloadType == SEI::DEPTH_REPRESENTATION_INFO && !bSdiPresentInAu)
+    {
+      bAuxSEIsBeforeSdiSEIPresent[2] = true;
+    }
+#if JVET_W0078_MVP_SEI 
+    else if (payloadType == SEI::MULTIVIEW_VIEW_POSITION && !bSdiPresentInAu)
+    {
+      bAuxSEIsBeforeSdiSEIPresent[3] = true;
+    }
+#endif
   }
+
+  CHECK(bSdiPresentInAu && bAuxSEIsBeforeSdiSEIPresent[0], "When an AU contains both an SDI SEI message and an MAI SEI message, the SDI SEI message shall precede the MAI SEI message in decoding order.");
+  CHECK(bSdiPresentInAu && bAuxSEIsBeforeSdiSEIPresent[1], "When an AU contains both an SDI SEI message with sdi_aux_id[i] equal to 1 for at least one value of i and an ACI SEI message, the SDI SEI message shall precede the ACI SEI message in decoding order.");
+  CHECK(bSdiPresentInAu && bAuxSEIsBeforeSdiSEIPresent[2], "When an AU contains both an SDI SEI message with sdi_aux_id[i] equal to 2 for at least one value of i and a DRI SEI message, the SDI SEI message shall precede the DRI SEI message in decoding order.");
+
 #if JVET_S0176_ITEM5
   if (m_isFirstAuInCvs)
   {
@@ -1175,7 +1338,7 @@ void DecLib::checkMultiSubpicNum(int olsIdx)
  - Count the number of identical SEI messages in the current picture
  */
 void DecLib::checkSeiInPictureUnit()
-{  
+{
   std::vector<std::tuple<int, uint32_t, uint8_t*>> seiList;
 
   // payload types subject to constrained SEI repetition
@@ -1187,7 +1350,7 @@ void DecLib::checkSeiInPictureUnit()
     InputBitstream bs = sei->getBitstream();
 
     do
-    {  
+    {
       int payloadType = 0;
       uint32_t val = 0;
 
@@ -1203,7 +1366,7 @@ void DecLib::checkSeiInPictureUnit()
         bs.readByte(val);
         payloadSize += val;
       } while (val==0xFF);
-    
+
       uint8_t *payload = new uint8_t[payloadSize];
       for (uint32_t i = 0; i < payloadSize; i++)
       {
@@ -1218,7 +1381,7 @@ void DecLib::checkSeiInPictureUnit()
   // count repeated messages in list
   for (uint32_t i = 0; i < seiList.size(); i++)
   {
-    int      k, count = 1;      
+    int      k, count = 1;
     int      payloadType1 = std::get<0>(seiList[i]);
     uint32_t payloadSize1 = std::get<1>(seiList[i]);
     uint8_t  *payload1    = std::get<2>(seiList[i]);
@@ -1242,7 +1405,7 @@ void DecLib::checkSeiInPictureUnit()
       int      payloadType2 = std::get<0>(seiList[j]);
       uint32_t payloadSize2 = std::get<1>(seiList[j]);
       uint8_t  *payload2    = std::get<2>(seiList[j]);
-      
+
       // check for identical SEI type, size, and payload
       if(payloadType1 == payloadType2 && payloadSize1 == payloadSize2)
       {
@@ -1251,7 +1414,7 @@ void DecLib::checkSeiInPictureUnit()
           count++;
         }
       }
-    }    
+    }
     CHECK(count > 4, "There shall be less than or equal to 4 identical sei_payload( ) syntax structures within a picture unit.");
   }
 
@@ -1267,7 +1430,7 @@ void DecLib::checkSeiInPictureUnit()
 /**
  - Reset list of SEI NAL units from the current picture
  */
-void DecLib::resetPictureSeiNalus()   
+void DecLib::resetPictureSeiNalus()
 {
   while (!m_pictureSeiNalus.empty())
   {
@@ -1276,6 +1439,19 @@ void DecLib::resetPictureSeiNalus()
   }
 }
 
+/**
+ - Process buffered list of suffix APS NALUs
+ */
+void DecLib::processSuffixApsNalus()
+{
+  while (!m_suffixApsNalus.empty())
+  {
+    xDecodeAPS(*m_suffixApsNalus.front());
+    delete m_suffixApsNalus.front();
+    m_suffixApsNalus.pop_front();
+  }
+}
+
 /**
  - Determine if the first VCL NAL unit of a picture is also the first VCL NAL of an Access Unit
  */
@@ -1343,11 +1519,11 @@ void activateAPS(PicHeader* picHeader, Slice* pSlice, ParameterSetManager& param
 {
   const SPS *sps = parameterSetManager.getSPS(picHeader->getSPSId());
   //luma APSs
-  if (pSlice->getTileGroupAlfEnabledFlag(COMPONENT_Y))
+  if (pSlice->getAlfEnabledFlag(COMPONENT_Y))
   {
-    for (int i = 0; i < pSlice->getTileGroupApsIdLuma().size(); i++)
+    for (int i = 0; i < pSlice->getAlfApsIdsLuma().size(); i++)
     {
-      int apsId = pSlice->getTileGroupApsIdLuma()[i];
+      int apsId = pSlice->getAlfApsIdsLuma()[i];
       APS* aps = parameterSetManager.getAPS(apsId, ALF_APS);
 
       if (aps)
@@ -1367,10 +1543,10 @@ void activateAPS(PicHeader* picHeader, Slice* pSlice, ParameterSetManager& param
       }
     }
   }
-  if (pSlice->getTileGroupAlfEnabledFlag(COMPONENT_Cb)||pSlice->getTileGroupAlfEnabledFlag(COMPONENT_Cr) )
+  if (pSlice->getAlfEnabledFlag(COMPONENT_Cb)||pSlice->getAlfEnabledFlag(COMPONENT_Cr) )
   {
     //chroma APS
-    int apsId = pSlice->getTileGroupApsIdChroma();
+    int apsId = pSlice->getAlfApsIdChroma();
     APS* aps = parameterSetManager.getAPS(apsId, ALF_APS);
     if (aps)
     {
@@ -1397,9 +1573,9 @@ void activateAPS(PicHeader* picHeader, Slice* pSlice, ParameterSetManager& param
   memset( filterParam.ccAlfFilterIdxEnabled[COMPONENT_Cb - 1], false, sizeof(filterParam.ccAlfFilterIdxEnabled[COMPONENT_Cb - 1]) );
   memset( filterParam.ccAlfFilterIdxEnabled[COMPONENT_Cr - 1], false, sizeof(filterParam.ccAlfFilterIdxEnabled[COMPONENT_Cr - 1]) );
 
-  if(pSlice->getTileGroupCcAlfCbEnabledFlag())
+  if(pSlice->getCcAlfCbEnabledFlag())
   {
-    int apsId = pSlice->getTileGroupCcAlfCbApsId();
+    int apsId = pSlice->getCcAlfCbApsId();
     APS *aps = parameterSetManager.getAPS(apsId, ALF_APS);
     if(aps)
     {
@@ -1425,9 +1601,9 @@ void activateAPS(PicHeader* picHeader, Slice* pSlice, ParameterSetManager& param
     }
   }
 
-  if(pSlice->getTileGroupCcAlfCrEnabledFlag())
+  if(pSlice->getCcAlfCrEnabledFlag())
   {
-    int apsId = pSlice->getTileGroupCcAlfCrApsId();
+    int apsId = pSlice->getCcAlfCrApsId();
     APS *aps = parameterSetManager.getAPS(apsId, ALF_APS);
     if(aps)
     {
@@ -1567,7 +1743,7 @@ void DecLib::xActivateParameterSets( const InputNALUnit nalu )
       //No VPS in bitstream: set defaults values of variables in VPS to the ones signalled in SPS
       m_vps->setMaxSubLayers( sps->getMaxTLayers() );
       m_vps->setLayerId( 0, sps->getLayerId() );
-      m_vps->deriveOutputLayerSets(); 
+      m_vps->deriveOutputLayerSets();
     }
     else
     {
@@ -1597,11 +1773,21 @@ void DecLib::xActivateParameterSets( const InputNALUnit nalu )
     }
 #endif
 
+    m_apcSlicePilot->applyReferencePictureListBasedMarking( m_cListPic, m_apcSlicePilot->getRPL0(), m_apcSlicePilot->getRPL1(), layerId, *pps);
+
     //  Get a new picture buffer. This will also set up m_pcPic, and therefore give us a SPS and PPS pointer that we can use.
     m_pcPic = xGetNewPicBuffer( *sps, *pps, m_apcSlicePilot->getTLayer(), layerId );
 
-    m_apcSlicePilot->applyReferencePictureListBasedMarking( m_cListPic, m_apcSlicePilot->getRPL0(), m_apcSlicePilot->getRPL1(), layerId, *pps);
+#if GDR_ENABLED
+    PicHeader *picHeader = new PicHeader;
+    *picHeader = m_picHeader;
+    m_apcSlicePilot->setPicHeader(picHeader);
+    m_pcPic->finalInit(vps, *sps, *pps, picHeader, apss, lmcsAPS, scalinglistAPS);
+#else
     m_pcPic->finalInit( vps, *sps, *pps, &m_picHeader, apss, lmcsAPS, scalinglistAPS );
+#endif
+    m_pcPic->createColourTransfProcessor(m_firstPictureInSequence, &m_colourTranfParams, &m_invColourTransfBuf, pps->getPicWidthInLumaSamples(), pps->getPicHeightInLumaSamples(), sps->getChromaFormatIdc(), sps->getBitDepth(CHANNEL_TYPE_LUMA));
+    m_firstPictureInSequence = false;
     m_pcPic->createTempBuffers( m_pcPic->cs->pps->pcv->maxCUWidth );
     m_pcPic->cs->createCoeffs((bool)m_pcPic->cs->sps->getPLTMode());
 
@@ -1638,7 +1824,7 @@ void DecLib::xActivateParameterSets( const InputNALUnit nalu )
                    sps->getMaxCUWidth(), sps->getMaxCUHeight(),
                    maxDepth,
                    log2SaoOffsetScaleLuma, log2SaoOffsetScaleChroma );
-    m_cLoopFilter.create(maxDepth);
+    m_deblockingFilter.create(maxDepth);
     m_cIntraPred.init( sps->getChromaFormatIdc(), sps->getBitDepth( CHANNEL_TYPE_LUMA ) );
     m_cInterPred.init( &m_cRdCost, sps->getChromaFormatIdc(), sps->getMaxCUHeight() );
     if (sps->getUseLmcs())
@@ -1782,7 +1968,7 @@ void DecLib::xCheckParameterSetConstraints(const int layerId)
   const SPS *sps = slice->getSPS();
   const PPS *pps = slice->getPPS();
   const VPS *vps = slice->getVPS();
-  
+
   if (sps->getVPSId() && (vps != nullptr))
   {
     if ((layerId == vps->getLayerId(0)) && m_firstSliceInSequence[layerId])
@@ -1837,7 +2023,7 @@ void DecLib::xCheckParameterSetConstraints(const int layerId)
   {
     CHECK(sps->getSubPicInfoPresentFlag() != 0, "When sps_res_change_in_clvs_allowed_flag is equal to 1, the value of sps_subpic_info_present_flag shall be equal to 0.");
   }
-  CHECK(sps->getResChangeInClvsEnabledFlag() && sps->getVirtualBoundariesEnabledFlag(), "when the value of sps_res_change_in_clvs_allowed_flag is equal to 1, the value of sps_virtual_boundaries_present_flag shall be equal to 0");
+  CHECK(sps->getResChangeInClvsEnabledFlag() && sps->getVirtualBoundariesPresentFlag(), "when the value of sps_res_change_in_clvs_allowed_flag is equal to 1, the value of sps_virtual_boundaries_present_flag shall be equal to 0");
 
   if( sps->getCTUSize() + 2 * ( 1 << sps->getLog2MinCodingBlockSize() ) > pps->getPicWidthInLumaSamples() )
   {
@@ -1881,6 +2067,14 @@ void DecLib::xCheckParameterSetConstraints(const int layerId)
         CHECK(curLayerChromaFormat != refLayerChromaFormat, "The chroma formats of the current layer and the reference layer are different");
         int refLayerBitDepth = m_layerBitDepth[i];
         CHECK(curLayerBitDepth != refLayerBitDepth, "The bit-depth of the current layer and the reference layer are different");
+        if (vps->getMaxTidIlRefPicsPlus1(curLayerIdx, i) == 0 && pps->getMixedNaluTypesInPicFlag())
+        {
+          for (int j = 0; j < m_uiSliceSegmentIdx; j++)
+          {
+            Slice* preSlice = m_pcPic->slices[j];
+            CHECK( (preSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL || preSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP || preSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA), "mixed IRAP and non-IRAP NAL units in the picture when sps_video_parameter_set_id is greater than 0 and vps_max_tid_il_ref_pics_plus1[i][j] is equal to 0");
+          }
+        }
       }
     }
   }
@@ -1914,12 +2108,10 @@ void DecLib::xCheckParameterSetConstraints(const int layerId)
     CHECK(pps->getMixedNaluTypesInPicFlag(), "When gci_no_mixed_nalu_types_in_pic_constraint_flag equal to 1, the value of pps_mixed_nalu_types_in_pic_flag shall be equal to 0")
   }
 
-#if JVET_R0266_GCI
   if (sps->getProfileTierLevel()->getConstraintInfo()->getNoGdrConstraintFlag())
   {
     CHECK(sps->getGDREnabledFlag(), "gci_no_gdr_constraint_flag equal to 1 specifies that sps_gdr_enabled_flag for all pictures in OlsInScope shall be equal to 0");
   }
-#endif
 
   if (sps->getProfileTierLevel()->getConstraintInfo()->getNoRectSliceConstraintFlag())
   {
@@ -1988,7 +2180,7 @@ void DecLib::xCheckParameterSetConstraints(const int layerId)
   }
 
   if( sps->getVPSId() && vps->m_numLayersInOls[vps->m_targetOlsIdx] == 1 )
-  {    
+  {
     CHECK( !sps->getPtlDpbHrdParamsPresentFlag(), "When sps_video_parameter_set_id is greater than 0 and there is an OLS that contains only one layer with nuh_layer_id equal to the nuh_layer_id of the SPS, the value of sps_ptl_dpb_hrd_params_present_flag shall be equal to 1" );
   }
 
@@ -2069,6 +2261,7 @@ void DecLib::xParsePrefixSEImessages()
     }
   }
 #endif
+  xCheckDUISEIMessages(m_SEIs);
 }
 
 void DecLib::xCheckPrefixSEIMessages( SEIMessages& prefixSEIs )
@@ -2085,8 +2278,164 @@ void DecLib::xCheckPrefixSEIMessages( SEIMessages& prefixSEIs )
       msg( WARNING, "Warning: ffi_display_elemental_periods_minus1 is different in picture timing and frame field information SEI messages!");
     }
   }
+  if ((getVPS()->getMaxLayers() == 1 || m_audIrapOrGdrAuFlag) && (m_isFirstAuInCvs || m_accessUnitPicInfo.begin()->m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_N_LP || m_accessUnitPicInfo.begin()->m_nalUnitType == NAL_UNIT_CODED_SLICE_IDR_W_RADL || ((m_accessUnitPicInfo.begin()->m_nalUnitType == NAL_UNIT_CODED_SLICE_CRA || m_accessUnitPicInfo.begin()->m_nalUnitType == NAL_UNIT_CODED_SLICE_GDR) && m_lastNoOutputBeforeRecoveryFlag[m_accessUnitPicInfo.begin()->m_nuhLayerId])) && m_accessUnitPicInfo.size() == 1)
+  {
+    if (m_sdiSEIInFirstAU != NULL)
+    {
+      delete m_sdiSEIInFirstAU;
+    }
+    m_sdiSEIInFirstAU = NULL;
+    if (m_maiSEIInFirstAU != NULL)
+    {
+      delete m_maiSEIInFirstAU;
+    }
+    m_maiSEIInFirstAU = NULL;
+#if JVET_W0078_MVP_SEI 
+    if (m_mvpSEIInFirstAU != NULL) 
+    {
+      delete m_mvpSEIInFirstAU;
+    }
+    m_mvpSEIInFirstAU = NULL;
+#endif
+    SEIMessages sdiSEIs  = getSeisByType(prefixSEIs, SEI::SCALABILITY_DIMENSION_INFO);
+    if (!sdiSEIs.empty())
+    {
+      SEIScalabilityDimensionInfo *sdi = (SEIScalabilityDimensionInfo*)sdiSEIs.front();
+      m_sdiSEIInFirstAU = new SEIScalabilityDimensionInfo(*sdi);
+      if (sdiSEIs.size() > 1)
+      {
+        for (SEIMessages::const_iterator it=sdiSEIs.begin(); it!=sdiSEIs.end(); it++)
+        {
+          CHECK(!m_sdiSEIInFirstAU->isSDISameContent((SEIScalabilityDimensionInfo*)*it), "All SDI SEI messages in a CVS shall have the same content.")
+        }
+      }
+    }
+    SEIMessages maiSEIs  = getSeisByType(prefixSEIs, SEI::MULTIVIEW_ACQUISITION_INFO);
+    if (!maiSEIs.empty())
+    {
+      SEIMultiviewAcquisitionInfo *mai = (SEIMultiviewAcquisitionInfo*)maiSEIs.front();
+      m_maiSEIInFirstAU = new SEIMultiviewAcquisitionInfo(*mai);
+      if (maiSEIs.size() > 1)
+      {
+        for (SEIMessages::const_iterator it=maiSEIs.begin(); it!=maiSEIs.end(); it++)
+        {
+          CHECK(!m_maiSEIInFirstAU->isMAISameContent((SEIMultiviewAcquisitionInfo*)*it), "All MAI SEI messages in a CVS shall have the same content.")
+        }
+      }
+    }
+#if JVET_W0078_MVP_SEI 
+    SEIMessages mvpSEIs = getSeisByType(prefixSEIs, SEI::MULTIVIEW_VIEW_POSITION);
+    if (!mvpSEIs.empty())
+    {
+      SEIMultiviewViewPosition *mvp = (SEIMultiviewViewPosition*)mvpSEIs.front();
+      m_mvpSEIInFirstAU = new SEIMultiviewViewPosition(*mvp);
+      if (mvpSEIs.size() > 1)
+      {
+        for (SEIMessages::const_iterator it = mvpSEIs.begin(); it != mvpSEIs.end(); it++)
+        {
+          CHECK(!m_mvpSEIInFirstAU->isMVPSameContent((SEIMultiviewViewPosition*)*it), "All MVP SEI messages in a CVS shall have the same content.")
+        }
+      }
+    }
+#endif
+  }
+  else
+  {
+    SEIMessages sdiSEIs  = getSeisByType(prefixSEIs, SEI::SCALABILITY_DIMENSION_INFO);
+    CHECK(!m_sdiSEIInFirstAU && !sdiSEIs.empty(), "When an SDI SEI message is present in any AU of a CVS, an SDI SEI message shall be present for the first AU of the CVS.");
+    if (!sdiSEIs.empty())
+    {
+      for (SEIMessages::const_iterator it=sdiSEIs.begin(); it!=sdiSEIs.end(); it++)
+      {
+        CHECK(!m_sdiSEIInFirstAU->isSDISameContent((SEIScalabilityDimensionInfo*)*it), "All SDI SEI messages in a CVS shall have the same content.")
+      }
+    }
+    SEIMessages maiSEIs  = getSeisByType(prefixSEIs, SEI::MULTIVIEW_ACQUISITION_INFO);
+    CHECK(!m_maiSEIInFirstAU && !maiSEIs.empty(), "When an MAI SEI message is present in any AU of a CVS, an MAI SEI message shall be present for the first AU of the CVS.");
+    if (!maiSEIs.empty())
+    {
+      for (SEIMessages::const_iterator it=maiSEIs.begin(); it!=maiSEIs.end(); it++)
+      {
+        CHECK(!m_maiSEIInFirstAU->isMAISameContent((SEIMultiviewAcquisitionInfo*)*it), "All MAI SEI messages in a CVS shall have the same content.")
+      }
+    }
+#if JVET_W0078_MVP_SEI 
+    SEIMessages mvpSEIs = getSeisByType(prefixSEIs, SEI::MULTIVIEW_VIEW_POSITION);
+    CHECK(!m_mvpSEIInFirstAU && !mvpSEIs.empty(), "When an MVP SEI message is present in any AU of a CVS, an MVP SEI message shall be present for the first AU of the CVS.");
+    if (!mvpSEIs.empty())
+    {
+      for (SEIMessages::const_iterator it = mvpSEIs.begin(); it != mvpSEIs.end(); it++)
+      {
+        CHECK(!m_mvpSEIInFirstAU->isMVPSameContent((SEIMultiviewViewPosition*)*it), "All MVP SEI messages in a CVS shall have the same content.")
+      }
+    }
+#endif
+  }
+
+  for (SEIMessages::const_iterator it=prefixSEIs.begin(); it!=prefixSEIs.end(); it++)
+  {
+    if ((*it)->payloadType() == SEI::MULTIVIEW_ACQUISITION_INFO)
+    {
+      CHECK(!m_sdiSEIInFirstAU, "When a CVS does not contain an SDI SEI message, the CVS shall not contain an MAI SEI message.");
+      SEIMultiviewAcquisitionInfo *maiSei = (SEIMultiviewAcquisitionInfo*)*it;
+      CHECK(m_sdiSEIInFirstAU->m_sdiNumViews - 1 != maiSei->m_maiNumViewsMinus1, "The value of num_views_minus1 shall be equal to NumViews - 1");
+    }
+    else if ((*it)->payloadType() == SEI::ALPHA_CHANNEL_INFO)
+    {
+      CHECK(!m_sdiSEIInFirstAU, "When a CVS does not contain an SDI SEI message with sdi_aux_id[i] equal to 1 for at least one value of i, no picture in the CVS shall be associated with an ACI SEI message.");
+    }
+    else if ((*it)->payloadType() == SEI::DEPTH_REPRESENTATION_INFO)
+    {
+      CHECK(!m_sdiSEIInFirstAU, "When a CVS does not contain an SDI SEI message with sdi_aux_id[i] equal to 2 for at least one value of i, no picture in the CVS shall be associated with a DRI SEI message.");
+    }
+#if JVET_W0078_MVP_SEI
+    else if ((*it)->payloadType() == SEI::MULTIVIEW_VIEW_POSITION)
+    {
+      CHECK(!m_sdiSEIInFirstAU, "When a CVS does not contain an SDI SEI message, the CVS shall not contain an MVP SEI message.");
+      SEIMultiviewViewPosition *mvpSei = (SEIMultiviewViewPosition*)*it;
+      CHECK(m_sdiSEIInFirstAU->m_sdiNumViews - 1 != mvpSei->m_mvpNumViewsMinus1, "The value of num_views_minus1 shall be equal to NumViews - 1");
+    }
+#endif
+  }
 }
 
+void DecLib::xCheckDUISEIMessages(SEIMessages &prefixSEIs)
+{
+  SEIMessages BPSEIs  = getSeisByType(prefixSEIs, SEI::BUFFERING_PERIOD);
+  SEIMessages DUISEIs = getSeisByType(prefixSEIs, SEI::DECODING_UNIT_INFO);
+  if (BPSEIs.empty())
+  {
+    return;
+  }
+  else
+  {
+    bool duDelayFlag = false;
+
+    SEIBufferingPeriod *bp = (SEIBufferingPeriod *) BPSEIs.front();
+    if (bp->m_bpDecodingUnitHrdParamsPresentFlag)
+    {
+      if (!bp->m_decodingUnitDpbDuParamsInPicTimingSeiFlag)
+      {
+        if (DUISEIs.empty())
+        {
+          return;
+        }
+        for (auto it = DUISEIs.cbegin(); it != DUISEIs.cend(); ++it)
+        {
+          const SEIDecodingUnitInfo *dui = (const SEIDecodingUnitInfo *) *it;
+          if (dui->m_picSptDpbOutputDuDelay != -1)
+          {
+            duDelayFlag = true;
+            break;
+          }
+        }
+        CHECK(duDelayFlag == false, "At least one DUI SEI should have dui->m_picSptDpbOutputDuDelay not equal to -1")
+      }
+    }
+  }
+}
+
+
 void DecLib::xDecodePicHeader( InputNALUnit& nalu )
 {
   m_HLSReader.setBitstream( &nalu.getBitstream() );
@@ -2094,6 +2443,19 @@ void DecLib::xDecodePicHeader( InputNALUnit& nalu )
   m_picHeader.setValid();
 }
 
+bool DecLib::getMixedNaluTypesInPicFlag()
+{
+  if (!m_picHeader.isValid())
+  {
+    return false;
+  }
+
+  PPS *pps = m_parameterSetManager.getPPS(m_picHeader.getPPSId());
+  CHECK(pps == 0, "No PPS present");
+
+  return pps->getMixedNaluTypesInPicFlag();
+}
+
 bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDisplay )
 {
   m_apcSlicePilot->setPicHeader( &m_picHeader );
@@ -2119,9 +2481,7 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
   for( auto& naluTemporalId : m_accessUnitNals )
   {
     if (
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
       naluTemporalId.m_nalUnitType != NAL_UNIT_OPI &&
-#endif
       naluTemporalId.m_nalUnitType != NAL_UNIT_DCI
       && naluTemporalId.m_nalUnitType != NAL_UNIT_VPS
       && naluTemporalId.m_nalUnitType != NAL_UNIT_SPS
@@ -2147,6 +2507,11 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
     m_accessUnitNoOutputPriorPicFlags.push_back(m_apcSlicePilot->getNoOutputOfPriorPicsFlag());
   }
 
+  if (m_picHeader.getGdrPicFlag() && m_prevGDRInSameLayerPOC[nalu.m_nuhLayerId] == -MAX_INT ) // Only care about recovery POC if it is the first coded GDR picture in the layer
+  {
+    m_prevGDRInSameLayerRecoveryPOC[nalu.m_nuhLayerId] = m_apcSlicePilot->getPOC() + m_picHeader.getRecoveryPocCnt();
+  }
+
   PPS *pps = m_parameterSetManager.getPPS(m_picHeader.getPPSId());
   CHECK(pps == 0, "No PPS present");
   SPS *sps = m_parameterSetManager.getSPS(pps->getSPSId());
@@ -2264,6 +2629,10 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
         m_picHeader.setNoOutputBeforeRecoveryFlag( m_picHeader.getHandleGdrAsCvsStartFlag() );
       }
     }
+    else
+    {
+      m_picHeader.setNoOutputBeforeRecoveryFlag( false );
+    }
 
     //the inference for NoOutputOfPriorPicsFlag
     if( !m_firstSliceInBitstream && m_picHeader.getNoOutputBeforeRecoveryFlag() )
@@ -2343,14 +2712,17 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
 
   // Skip pictures due to random access
 
-  if (isRandomAccessSkipPicture(iSkipFrame, iPOCLastDisplay))
+  if (isRandomAccessSkipPicture(iSkipFrame, iPOCLastDisplay, pps->getMixedNaluTypesInPicFlag(), nalu.m_nuhLayerId))
   {
     m_prevSliceSkipped = true;
     m_skippedPOC = m_apcSlicePilot->getPOC();
+    m_skippedLayerID = nalu.m_nuhLayerId;
+
     // reset variables for bitstream conformance tests
     resetAccessUnitNals();
     resetAccessUnitApsNals();
     resetAccessUnitPicInfo();
+    resetPictureUnitNals();
     m_maxDecSubPicIdx = 0;
     m_maxDecSliceAddrInSubPic = -1;
     return false;
@@ -2389,13 +2761,13 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
     int refPicIndex;
     while ((lostPoc = m_apcSlicePilot->checkThatAllRefPicsAreAvailable(m_cListPic, m_apcSlicePilot->getRPL0(), 0, true, &refPicIndex, m_apcSlicePilot->getNumRefIdx(REF_PIC_LIST_0))) > 0)
     {
-      if( !pps->getMixedNaluTypesInPicFlag() && ( 
+      if( !pps->getMixedNaluTypesInPicFlag() && (
       ( ( m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP ) && ( sps->getIDRRefParamListPresent() || pps->getRplInfoInPhFlag() ) ) ||
-        ( ( m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA ) && m_picHeader.getNoOutputBeforeRecoveryFlag() ) ) ) 
+        ( ( m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA ) && m_picHeader.getNoOutputBeforeRecoveryFlag() ) ) )
       {
         if (m_apcSlicePilot->getRPL0()->isInterLayerRefPic(refPicIndex) == 0)
         {
-          xCreateUnavailablePicture( m_apcSlicePilot->getPPS(), lostPoc - 1, m_apcSlicePilot->getRPL0()->isRefPicLongterm( refPicIndex ), m_apcSlicePilot->getPic()->temporalId, m_apcSlicePilot->getPic()->layerId, m_apcSlicePilot->getRPL0()->isInterLayerRefPic( refPicIndex ) );
+          xCreateUnavailablePicture( pps, lostPoc, m_apcSlicePilot->getRPL0()->isRefPicLongterm( refPicIndex ), m_apcSlicePilot->getTLayer(), m_apcSlicePilot->getNalUnitLayerId(), m_apcSlicePilot->getRPL0()->isInterLayerRefPic( refPicIndex ) );
         }
       }
       else
@@ -2405,13 +2777,13 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
     }
     while ((lostPoc = m_apcSlicePilot->checkThatAllRefPicsAreAvailable(m_cListPic, m_apcSlicePilot->getRPL1(), 0, true, &refPicIndex, m_apcSlicePilot->getNumRefIdx(REF_PIC_LIST_1))) > 0)
     {
-      if( !pps->getMixedNaluTypesInPicFlag() && ( 
+      if( !pps->getMixedNaluTypesInPicFlag() && (
         ( ( m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP ) && ( sps->getIDRRefParamListPresent() || pps->getRplInfoInPhFlag() ) ) ||
         ( ( m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA ) && m_picHeader.getNoOutputBeforeRecoveryFlag() ) ) )
       {
         if (m_apcSlicePilot->getRPL1()->isInterLayerRefPic(refPicIndex) == 0)
         {
-          xCreateUnavailablePicture( m_apcSlicePilot->getPPS(), lostPoc - 1, m_apcSlicePilot->getRPL1()->isRefPicLongterm( refPicIndex ), m_apcSlicePilot->getPic()->temporalId, m_apcSlicePilot->getPic()->layerId, m_apcSlicePilot->getRPL1()->isInterLayerRefPic( refPicIndex ) );
+          xCreateUnavailablePicture( pps, lostPoc, m_apcSlicePilot->getRPL1()->isRefPicLongterm( refPicIndex ), m_apcSlicePilot->getTLayer(), m_apcSlicePilot->getNalUnitLayerId(), m_apcSlicePilot->getRPL1()->isInterLayerRefPic( refPicIndex ) );
         }
       }
       else
@@ -2443,9 +2815,20 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
   m_pcPic->layerId     = nalu.m_nuhLayerId;
   m_pcPic->subLayerNonReferencePictureDueToSTSA = false;
 
+  if (pcSlice->getSPS()->getSpsRangeExtension().getRrcRiceExtensionEnableFlag())
+  {
+    int bitDepth = pcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA);
+    int baseLevel = (bitDepth > 12) ? (pcSlice->isIntra() ? 5 : 2 * 5 ) : (pcSlice->isIntra() ? 2 * 5 : 3 * 5);
+    pcSlice->setRiceBaseLevel(baseLevel);
+  }
+  else
+  {
+    pcSlice->setRiceBaseLevel(4);
+  }
+
   if (pcSlice->getSPS()->getProfileTierLevel()->getConstraintInfo()->getNoApsConstraintFlag())
   {
-    bool flag = pcSlice->getSPS()->getCCALFEnabledFlag() || pcSlice->getPicHeader()->getNumAlfAps() || pcSlice->getPicHeader()->getAlfEnabledFlag(COMPONENT_Cb) || pcSlice->getPicHeader()->getAlfEnabledFlag(COMPONENT_Cr);
+    bool flag = pcSlice->getSPS()->getCCALFEnabledFlag() || pcSlice->getPicHeader()->getNumAlfApsIdsLuma() || pcSlice->getPicHeader()->getAlfEnabledFlag(COMPONENT_Cb) || pcSlice->getPicHeader()->getAlfEnabledFlag(COMPONENT_Cr);
     CHECK(flag, "When no_aps_constraint_flag is equal to 1, the values of ph_num_alf_aps_ids_luma, sh_num_alf_aps_ids_luma, ph_alf_cb_flag, ph_alf_cr_flag, sh_alf_cb_flag, sh_alf_cr_flag, and sps_ccalf_enabled_flag shall all be equal to 0")
   }
   if( pcSlice->getNalUnitLayerId() != pcSlice->getSPS()->getLayerId() )
@@ -2525,8 +2908,12 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
     CU::checkConformanceILRP(pcSlice);
   }
 
+#if GDR_ENABLED
+  PicHeader *picHeader = nullptr; // picHeader is not necessary for scaledReference picture at decoder but should not share picHeader with non-scaled picture
+  pcSlice->scaleRefPicList(scaledRefPic, picHeader, m_parameterSetManager.getAPSs(), m_picHeader.getLmcsAPS(), m_picHeader.getScalingListAPS(), true);
+#else
   pcSlice->scaleRefPicList( scaledRefPic, m_pcPic->cs->picHeader, m_parameterSetManager.getAPSs(), m_picHeader.getLmcsAPS(), m_picHeader.getScalingListAPS(), true );
-
+#endif
 
     if (!pcSlice->isIntra())
     {
@@ -2655,6 +3042,23 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
       pcSlice->setLatestDRAPPOC(pcSlice->getPOC());
     }
     pcSlice->checkConformanceForDRAP(nalu.m_temporalId);
+    if (pcSlice->isIntra())
+      pcSlice->getPic()->setEdrapRapId(0);
+    SEIMessages edrapSEIs = getSeisByType(m_pcPic->SEIs, SEI::EXTENDED_DRAP_INDICATION );
+    if (!edrapSEIs.empty())
+    {
+      msg( NOTICE, "Extended DRAP indication SEI decoded\n");
+      SEIExtendedDrapIndication *seiEdrap = (SEIExtendedDrapIndication *)edrapSEIs.front();
+      pcSlice->setEdrapRapId(seiEdrap->m_edrapIndicationRapIdMinus1 + 1);
+      pcSlice->getPic()->setEdrapRapId(seiEdrap->m_edrapIndicationRapIdMinus1 + 1);
+      pcSlice->setEdrapNumRefRapPics(seiEdrap->m_edrapIndicationNumRefRapPicsMinus1 + 1);
+      for (int i = 0; i < pcSlice->getEdrapNumRefRapPics(); i++)
+      {
+        pcSlice->addEdrapRefRapIds(seiEdrap->m_edrapIndicationRefRapId[i]);
+      }
+      pcSlice->setLatestEDRAPPOC(pcSlice->getPOC());
+    }
+    pcSlice->checkConformanceForEDRAP(nalu.m_temporalId);
 
   Quant *quant = m_cTrQuant.getQuant();
 
@@ -2781,6 +3185,26 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
     m_cReshaper.setRecReshaped(false);
   }
 
+#if GDR_LEAK_TEST
+  if (m_gdrPocRandomAccess == pcSlice->getPOC())
+  {
+    for (int e = 0; e < 2; e++)
+    {
+      for (int ridx = 0; ridx < pcSlice->getNumRefIdx((RefPicList)e); ridx++)
+      {
+        Picture *pic = pcSlice->getRefPic((RefPicList)e, ridx);
+        if (pic)
+        {
+          CodingStructure& cs = *pic->cs;
+          cs.getRecoBuf().Y().fill(0 * 4); // for 8-bit sequence
+          cs.getRecoBuf().Cb().fill(0 * 4);
+          cs.getRecoBuf().Cr().fill(0 * 4);
+          cs.getMotionBuf().memset(0);    // clear MV storage
+        }
+      }
+    }
+  }
+#endif // GDR_LEAK_TEST
   //  Decode a picture
   m_cSliceDecoder.decompressSlice( pcSlice, &( nalu.getBitstream() ), ( m_pcPic->poc == getDebugPOC() ? getDebugCTU() : -1 ) );
 
@@ -2837,7 +3261,6 @@ void DecLib::updatePrevIRAPAndGDRSubpic()
   }
 }
 
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
 void DecLib::xDecodeOPI( InputNALUnit& nalu )
 {
   m_opi = new OPI();
@@ -2847,7 +3270,6 @@ void DecLib::xDecodeOPI( InputNALUnit& nalu )
 
   m_HLSReader.parseOPI( m_opi );
 }
-#endif
 
 void DecLib::xDecodeVPS( InputNALUnit& nalu )
 {
@@ -2929,7 +3351,12 @@ void DecLib::xDecodeAPS(InputNALUnit& nalu)
   {
     APS* apsEnc = new APS();
     *apsEnc = *aps;
-    m_apsMapEnc->storePS( ( apsEnc->getAPSId() << NUM_APS_TYPE_LEN ) + apsEnc->getAPSType(), apsEnc ); 
+    m_apsMapEnc->storePS( ( apsEnc->getAPSId() << NUM_APS_TYPE_LEN ) + apsEnc->getAPSType(), apsEnc );
+  }
+
+  if( nalu.m_nalUnitType == NAL_UNIT_SUFFIX_APS && m_prevSliceSkipped )
+  {
+    m_accessUnitApsNals.pop_back();
   }
 
   // aps will be deleted if it was already stored (and did not changed),
@@ -2940,18 +3367,27 @@ bool DecLib::decode(InputNALUnit& nalu, int& iSkipFrame, int& iPOCLastDisplay, i
 {
   bool ret;
   // ignore all NAL units of layers > 0
-
-  AccessUnitInfo auInfo;
-  auInfo.m_nalUnitType = nalu.m_nalUnitType;
-  auInfo.m_nuhLayerId = nalu.m_nuhLayerId;
-  auInfo.m_temporalId = nalu.m_temporalId;
-  m_accessUnitNals.push_back(auInfo);
-  m_pictureUnitNals.push_back( nalu.m_nalUnitType );
+  if( (nalu.m_nalUnitType != NAL_UNIT_SUFFIX_APS       &&
+       nalu.m_nalUnitType != NAL_UNIT_EOS              &&
+       nalu.m_nalUnitType != NAL_UNIT_EOB              &&
+       nalu.m_nalUnitType != NAL_UNIT_SUFFIX_SEI       &&
+       nalu.m_nalUnitType != NAL_UNIT_FD               &&
+       nalu.m_nalUnitType != NAL_UNIT_RESERVED_NVCL_27 &&
+       nalu.m_nalUnitType != NAL_UNIT_UNSPECIFIED_30   &&
+       nalu.m_nalUnitType != NAL_UNIT_UNSPECIFIED_31)  ||
+       !m_prevSliceSkipped )
+  {
+    AccessUnitInfo auInfo;
+    auInfo.m_nalUnitType = nalu.m_nalUnitType;
+    auInfo.m_nuhLayerId = nalu.m_nuhLayerId;
+    auInfo.m_temporalId = nalu.m_temporalId;
+    m_accessUnitNals.push_back(auInfo);
+    m_pictureUnitNals.push_back( nalu.m_nalUnitType );
+  }
   switch (nalu.m_nalUnitType)
   {
     case NAL_UNIT_VPS:
       xDecodeVPS( nalu );
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
       if (getTOlsIdxExternalFlag())
       {
         m_vps->m_targetOlsIdx = iTargetOlsIdx;
@@ -2964,15 +3400,10 @@ bool DecLib::decode(InputNALUnit& nalu, int& iSkipFrame, int& iPOCLastDisplay, i
       {
         m_vps->m_targetOlsIdx = m_vps->deriveTargetOLSIdx();
       }
-#else
-      m_vps->m_targetOlsIdx = iTargetOlsIdx;
-#endif
       return false;
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
     case NAL_UNIT_OPI:
       xDecodeOPI( nalu );
       return false;
-#endif
     case NAL_UNIT_DCI:
       xDecodeDCI( nalu );
       return false;
@@ -2989,10 +3420,20 @@ bool DecLib::decode(InputNALUnit& nalu, int& iSkipFrame, int& iPOCLastDisplay, i
       return !m_bFirstSliceInPicture;
 
     case NAL_UNIT_PREFIX_APS:
-    case NAL_UNIT_SUFFIX_APS:
       xDecodeAPS(nalu);
       return false;
 
+    case NAL_UNIT_SUFFIX_APS:
+      if( m_prevSliceSkipped )
+      {
+        xDecodeAPS(nalu);
+      }
+      else
+      {
+        m_suffixApsNalus.push_back(new InputNALUnit(nalu));
+      }
+      return false;
+
     case NAL_UNIT_PREFIX_SEI:
       // Buffer up prefix SEI messages until SPS of associated VCL is known.
       m_prefixSEINALUs.push_back(new InputNALUnit(nalu));
@@ -3002,6 +3443,11 @@ bool DecLib::decode(InputNALUnit& nalu, int& iSkipFrame, int& iPOCLastDisplay, i
     case NAL_UNIT_SUFFIX_SEI:
       if (m_pcPic)
       {
+        if ( m_prevSliceSkipped )
+        {
+          msg( NOTICE, "Note: received suffix SEI but current picture is skipped.\n");
+          return false;
+        }
         m_pictureSeiNalus.push_back(new InputNALUnit(nalu));
         m_accessUnitSeiTids.push_back(nalu.m_temporalId);
         const SPS *sps = m_parameterSetManager.getActiveSPS();
@@ -3033,6 +3479,7 @@ bool DecLib::decode(InputNALUnit& nalu, int& iSkipFrame, int& iPOCLastDisplay, i
       m_associatedIRAPType[nalu.m_nuhLayerId] = NAL_UNIT_INVALID;
       m_pocCRA[nalu.m_nuhLayerId] = -MAX_INT;
       m_prevGDRInSameLayerPOC[nalu.m_nuhLayerId] = -MAX_INT;
+      m_prevGDRInSameLayerRecoveryPOC[nalu.m_nuhLayerId] = -MAX_INT;
       std::fill_n(m_prevGDRSubpicPOC[nalu.m_nuhLayerId], MAX_NUM_SUB_PICS, -MAX_INT);
       std::fill_n(m_prevIRAPSubpicPOC[nalu.m_nuhLayerId], MAX_NUM_SUB_PICS, -MAX_INT);
       memset(m_prevIRAPSubpicDecOrderNo[nalu.m_nuhLayerId], 0, sizeof(int)*MAX_NUM_SUB_PICS);
@@ -3067,9 +3514,6 @@ bool DecLib::decode(InputNALUnit& nalu, int& iSkipFrame, int& iPOCLastDisplay, i
     }
 
     case NAL_UNIT_RESERVED_IRAP_VCL_11:
-#if !JVET_S0163_ON_TARGETOLS_SUBLAYERS
-    case NAL_UNIT_RESERVED_IRAP_VCL_12:
-#endif
       msg( NOTICE, "Note: found reserved VCL NAL unit.\n");
       xParsePrefixSEIsForUnknownVCLNal();
       return false;
@@ -3104,11 +3548,22 @@ bool DecLib::decode(InputNALUnit& nalu, int& iSkipFrame, int& iPOCLastDisplay, i
  *  equal to or greater than the random access point POC is attempted. For non IDR/CRA/BLA random
  *  access point there is no guarantee that the decoder will not crash.
  */
-bool DecLib::isRandomAccessSkipPicture( int& iSkipFrame, int& iPOCLastDisplay )
+bool DecLib::isRandomAccessSkipPicture( int& iSkipFrame, int& iPOCLastDisplay, bool mixedNaluInPicFlag, uint32_t layerId )
 {
+  if( (iSkipFrame > 0) &&
+      (m_apcSlicePilot->getFirstCtuRsAddrInSlice() == 0 && layerId == 0) &&
+      (m_skippedPOC != MAX_INT) && (m_skippedLayerID != MAX_INT))
+  {
+    // When skipFrame count greater than 0, and current frame is not the first frame of sequence, decrement skipFrame count.
+    // If skipFrame count is still greater than 0, the current frame will be skipped.
+    iSkipFrame--;
+  }
+
   if (iSkipFrame)
   {
     iSkipFrame--;   // decrement the counter
+    m_maxDecSubPicIdx = 0;
+    m_maxDecSliceAddrInSubPic = -1;
     return true;
   }
   else if ( m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_W_RADL || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP )
@@ -3117,7 +3572,7 @@ bool DecLib::isRandomAccessSkipPicture( int& iSkipFrame, int& iPOCLastDisplay )
   }
   else if (m_pocRandomAccess == MAX_INT) // start of random access point, m_pocRandomAccess has not been set yet.
   {
-    if (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA )
+    if (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA || m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_GDR )
     {
       // set the POC random access since we need to skip the reordered pictures in the case of CRA/CRANT/BLA/BLANT.
       m_pocRandomAccess = m_apcSlicePilot->getPOC();
@@ -3126,16 +3581,24 @@ bool DecLib::isRandomAccessSkipPicture( int& iSkipFrame, int& iPOCLastDisplay )
     {
       if(!m_warningMessageSkipPicture)
       {
-        msg( WARNING, "\nWarning: this is not a valid random access point and the data is discarded until the first CRA picture");
+        msg( WARNING, "Warning: This is not a valid random access point and the data is discarded until the first CRA or GDR picture\n");
         m_warningMessageSkipPicture = true;
       }
+      iSkipFrame--;
+      m_maxDecSubPicIdx = 0;
+      m_maxDecSliceAddrInSubPic = -1;
       return true;
     }
   }
   // skip the reordered pictures, if necessary
-  else if (m_apcSlicePilot->getPOC() < m_pocRandomAccess && (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL))
+  else if (m_apcSlicePilot->getPOC() < m_pocRandomAccess &&
+      (m_apcSlicePilot->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL ||
+       mixedNaluInPicFlag))
   {
     iPOCLastDisplay++;
+    iSkipFrame--;
+    m_maxDecSubPicIdx = 0;
+    m_maxDecSliceAddrInSubPic = -1;
     return true;
   }
   // if we reach here, then the picture is not skipped.
@@ -3149,7 +3612,6 @@ void DecLib::checkNalUnitConstraints( uint32_t naluType )
     const ConstraintInfo *cInfo = m_parameterSetManager.getActiveSPS()->getProfileTierLevel()->getConstraintInfo();
     xCheckNalUnitConstraintFlags( cInfo, naluType );
   }
-
 }
 void DecLib::xCheckNalUnitConstraintFlags( const ConstraintInfo *cInfo, uint32_t naluType )
 {
@@ -3264,9 +3726,7 @@ bool DecLib::isNewPicture(std::ifstream *bitstreamFile, class InputByteStream *b
 
       // NUT that indicate the start of a new picture
       case NAL_UNIT_ACCESS_UNIT_DELIMITER:
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
       case NAL_UNIT_OPI:
-#endif
       case NAL_UNIT_DCI:
       case NAL_UNIT_VPS:
       case NAL_UNIT_SPS:
@@ -3289,9 +3749,6 @@ bool DecLib::isNewPicture(std::ifstream *bitstreamFile, class InputByteStream *b
       case NAL_UNIT_CODED_SLICE_CRA:
       case NAL_UNIT_CODED_SLICE_GDR:
       case NAL_UNIT_RESERVED_IRAP_VCL_11:
-#if !JVET_S0163_ON_TARGETOLS_SUBLAYERS
-      case NAL_UNIT_RESERVED_IRAP_VCL_12:
-#endif
         ret = checkPictureHeaderInSliceHeaderFlag(nalu);
         finished = true;
         break;
diff --git a/source/Lib/DecoderLib/DecLib.h b/source/Lib/DecoderLib/DecLib.h
index 6b89a8f4c558b90551b03ec8f78d9e3e0b09c559..5e61927540dac5022d83b7ee65d94a549472ff1f 100644
--- a/source/Lib/DecoderLib/DecLib.h
+++ b/source/Lib/DecoderLib/DecLib.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -49,7 +49,7 @@
 #include "CommonLib/TrQuant.h"
 #include "CommonLib/InterPrediction.h"
 #include "CommonLib/IntraPrediction.h"
-#include "CommonLib/LoopFilter.h"
+#include "CommonLib/DeblockingFilter.h"
 #include "CommonLib/AdaptiveLoopFilter.h"
 #include "CommonLib/SEI.h"
 #include "CommonLib/Unit.h"
@@ -73,6 +73,7 @@ private:
   GeneralHrdParams        m_prevGeneralHrdParams;
 
   int                     m_prevGDRInSameLayerPOC[MAX_VPS_LAYERS]; ///< POC number of the latest GDR picture
+  int                     m_prevGDRInSameLayerRecoveryPOC[MAX_VPS_LAYERS]; ///< Recovery POC number of the latest GDR picture
   NalUnitType             m_associatedIRAPType[MAX_VPS_LAYERS]; ///< NAL unit type of the previous IRAP picture
   int                     m_pocCRA[MAX_VPS_LAYERS];            ///< POC number of the previous CRA picture
   int                     m_associatedIRAPDecodingOrderNumber[MAX_VPS_LAYERS]; ///< Decoding order number of the previous IRAP picture
@@ -94,7 +95,11 @@ private:
 
 
   SEIMessages             m_SEIs; ///< List of SEI messages that have been received before the first slice and between slices, excluding prefix SEIs...
-
+  SEIScalabilityDimensionInfo* m_sdiSEIInFirstAU;
+  SEIMultiviewAcquisitionInfo* m_maiSEIInFirstAU;
+#if JVET_W0078_MVP_SEI 
+  SEIMultiviewViewPosition*    m_mvpSEIInFirstAU;
+#endif
 
   // functional classes
   IntraPrediction         m_cIntraPred;
@@ -109,7 +114,7 @@ private:
 #if JVET_S0257_DUMP_360SEI_MESSAGE
   SeiCfgFileDump          m_seiCfgDump;
 #endif
-  LoopFilter              m_cLoopFilter;
+  DeblockingFilter        m_deblockingFilter;
   SampleAdaptiveOffset    m_cSAO;
   AdaptiveLoopFilter      m_cALF;
   Reshape                 m_cReshaper;                        ///< reshaper class
@@ -119,7 +124,7 @@ private:
 #if JVET_J0090_MEMORY_BANDWITH_MEASURE
   CacheModel              m_cacheModel;
 #endif
-  bool isRandomAccessSkipPicture(int& iSkipFrame,  int& iPOCLastDisplay);
+  bool isRandomAccessSkipPicture(int& iSkipFrame, int& iPOCLastDisplay, bool mixedNaluInPicFlag, uint32_t layerId);
   Picture*                m_pcPic;
   uint32_t                m_uiSliceSegmentIdx;
   uint32_t                m_prevLayerID;
@@ -127,12 +132,16 @@ private:
   int                     m_prevPicPOC;
   int                     m_prevTid0POC;
   bool                    m_bFirstSliceInPicture;
+  bool                    m_firstPictureInSequence;
+  SEIColourTransformApply m_colourTranfParams;
+  PelStorage              m_invColourTransfBuf;
   bool                    m_firstSliceInSequence[MAX_VPS_LAYERS];
   bool                    m_firstSliceInBitstream;
   bool                    m_isFirstAuInCvs;
   bool                    m_accessUnitEos[MAX_VPS_LAYERS];
   bool                    m_prevSliceSkipped;
   int                     m_skippedPOC;
+  uint32_t                m_skippedLayerID;
   int                     m_lastPOCNoOutputPriorPics;
   bool                    m_isNoOutputPriorPics;
   bool                    m_lastNoOutputBeforeRecoveryFlag[MAX_VPS_LAYERS];    //value of variable NoOutputBeforeRecoveryFlag of the assocated CRA/GDR pic
@@ -195,14 +204,13 @@ private:
 
   std::vector<NalUnitType> m_pictureUnitNals;
   std::list<InputNALUnit*> m_pictureSeiNalus; 
+  std::list<InputNALUnit*> m_suffixApsNalus; 
 
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
   OPI*                    m_opi;
   bool                    m_mTidExternalSet;
   bool                    m_mTidOpiSet;
   bool                    m_tOlsIdxTidExternalSet;
   bool                    m_tOlsIdxTidOpiSet;
-#endif
   VPS*                    m_vps;
   int                     m_maxDecSubPicIdx;
   int                     m_maxDecSliceAddrInSubPic;
@@ -213,6 +221,11 @@ public:
 
   DCI*                    m_dci;
   ParameterSetMap<APS>*   m_apsMapEnc;
+#if GDR_LEAK_TEST
+public:
+  int                     m_gdrPocRandomAccess;
+#endif // GDR_LEAK_TEST
+
 public:
   DecLib();
   virtual ~DecLib();
@@ -239,10 +252,8 @@ public:
   void  updateAssociatedIRAP();
   void  updatePrevGDRInSameLayer();
   void  updatePrevIRAPAndGDRSubpic();
+  bool  getGDRRecoveryPocReached()          { return ( m_pcPic->getPOC() >= m_prevGDRInSameLayerRecoveryPOC[m_pcPic->layerId] ); }
 
-#if JVET_S0078_NOOUTPUTPRIORPICFLAG
-  bool  getAudIrapOrGdrAuFlag() const       { return m_audIrapOrGdrAuFlag;  }
-#endif
   bool  getNoOutputPriorPicsFlag () const   { return m_isNoOutputPriorPics; }
   void  setNoOutputPriorPicsFlag (bool val) { m_isNoOutputPriorPics = val; }
   void  setFirstSliceInPicture (bool val)  { m_bFirstSliceInPicture = val; }
@@ -279,6 +290,7 @@ public:
   void checkSeiInPictureUnit();
   void resetPictureSeiNalus();
   bool isSliceNaluFirstInAU( bool newPicture, InputNALUnit &nalu );
+  void processSuffixApsNalus();
 
   void checkAPSInPictureUnit();
   void resetPictureUnitNals() { m_pictureUnitNals.clear(); }
@@ -295,7 +307,6 @@ public:
   bool  isNewPicture( std::ifstream *bitstreamFile, class InputByteStream *bytestream );
   bool  isNewAccessUnit( bool newPicture, std::ifstream *bitstreamFile, class InputByteStream *bytestream );
 
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
   bool      getHTidExternalSetFlag()               const { return m_mTidExternalSet; }
   void      setHTidExternalSetFlag(bool mTidExternalSet)  { m_mTidExternalSet = mTidExternalSet; }
   bool      getHTidOpiSetFlag()               const { return m_mTidOpiSet; }
@@ -305,7 +316,8 @@ public:
   bool      getTOlsIdxOpiFlag()               const { return m_tOlsIdxTidOpiSet; }
   void      setTOlsIdxOpiFlag(bool tOlsIdxOpiSet)  { m_tOlsIdxTidOpiSet = tOlsIdxOpiSet; }
   const OPI* getOPI()                     { return m_opi; }
-#endif
+
+  bool      getMixedNaluTypesInPicFlag();
 
 protected:
   void  xUpdateRasInit(Slice* slice);
@@ -318,9 +330,7 @@ protected:
   void  xCheckParameterSetConstraints( const int layerId );
   void      xDecodePicHeader( InputNALUnit& nalu );
   bool      xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDisplay);
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
   void      xDecodeOPI( InputNALUnit& nalu );
-#endif
   void      xDecodeVPS( InputNALUnit& nalu );
   void      xDecodeDCI( InputNALUnit& nalu );
   void      xDecodeSPS( InputNALUnit& nalu );
@@ -336,6 +346,8 @@ protected:
   void      xParsePrefixSEImessages();
   void      xParsePrefixSEIsForUnknownVCLNal();
   void      xCheckPrefixSEIMessages( SEIMessages& prefixSEIs );
+  void      xCheckDUISEIMessages(SEIMessages &prefixSEIs);
+
 
   void  xCheckNalUnitConstraintFlags( const ConstraintInfo *cInfo, uint32_t naluType );
   void     xCheckMixedNalUnit(Slice* pcSlice, SPS *sps, InputNALUnit &nalu);
diff --git a/source/Lib/DecoderLib/DecSlice.cpp b/source/Lib/DecoderLib/DecSlice.cpp
index 57c1c92c42bc9774898714bf330f5dc96c316e72..df27bae1ce4b014468107f9a82983fdd40e95c98 100644
--- a/source/Lib/DecoderLib/DecSlice.cpp
+++ b/source/Lib/DecoderLib/DecSlice.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/DecoderLib/DecSlice.h b/source/Lib/DecoderLib/DecSlice.h
index 56cc1da07e3c4330a08d298017e8ed94221b9d6c..365c6a3a304ef110ee4f44e5a9c0777e87c2421e 100644
--- a/source/Lib/DecoderLib/DecSlice.h
+++ b/source/Lib/DecoderLib/DecSlice.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/DecoderLib/NALread.cpp b/source/Lib/DecoderLib/NALread.cpp
index a1cbcf760166c542dd07b02cefde7f61eda32e6a..ccda00687f253b72bdfe93a2ce1db302622097f4 100644
--- a/source/Lib/DecoderLib/NALread.cpp
+++ b/source/Lib/DecoderLib/NALread.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -111,7 +111,8 @@ static void xTraceNalUnitHeader(InputNALUnit& nalu)
 {
   DTRACE( g_trace_ctx, D_NALUNITHEADER, "*********** NAL UNIT (%s) ***********\n", nalUnitTypeToString(nalu.m_nalUnitType) );
   bool zeroTidRequiredFlag = 0;
-  if((nalu.m_nalUnitType >= 16) && (nalu.m_nalUnitType <= 31)) {
+  if ((nalu.m_nalUnitType >= 16) && (nalu.m_nalUnitType <= 31))
+  {
     zeroTidRequiredFlag = 1;
   }
   DTRACE( g_trace_ctx, D_NALUNITHEADER, "%-50s u(%d)  : %u\n", "zero_tid_required_flag", 1, zeroTidRequiredFlag );
@@ -163,10 +164,12 @@ void read(InputNALUnit& nalu)
   InputBitstream &bitstream = nalu.getBitstream();
   vector<uint8_t>& nalUnitBuf=bitstream.getFifo();
   // perform anti-emulation prevention
-  convertPayloadToRBSP(nalUnitBuf, &bitstream, (nalUnitBuf[0] & 64) == 0);
+  const NalUnitType nut = (NalUnitType)(nalUnitBuf[1] >> 3);
+  convertPayloadToRBSP(nalUnitBuf, &bitstream, nut <= NAL_UNIT_RESERVED_IRAP_VCL_11);
   bitstream.resetToStart();
   readNalUnitHeader(nalu);
 }
+
 bool checkPictureHeaderInSliceHeaderFlag(InputNALUnit& nalu)
 {
   InputBitstream& bitstream = nalu.getBitstream();
diff --git a/source/Lib/DecoderLib/NALread.h b/source/Lib/DecoderLib/NALread.h
index 4cf7bac937a0cc2b43d7f9d52498fa1d3d028742..e543b2f9d3649b62ff630f71924fbbeb62b37ff6 100644
--- a/source/Lib/DecoderLib/NALread.h
+++ b/source/Lib/DecoderLib/NALread.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/DecoderLib/SEIread.cpp b/source/Lib/DecoderLib/SEIread.cpp
index d5c99310226fc2102819f292a30981e16988617c..d6dd927c0b71d6996d230b1c761d92436ca49ff5 100644
--- a/source/Lib/DecoderLib/SEIread.cpp
+++ b/source/Lib/DecoderLib/SEIread.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -230,10 +230,22 @@ void SEIReader::xReadSEImessage(SEIMessages& seis, const NalUnitType nalUnitType
       sei = new SEIDependentRAPIndication;
       xParseSEIDependentRAPIndication((SEIDependentRAPIndication&) *sei, payloadSize, pDecodedMessageOutputStream);
       break;
+    case SEI::EXTENDED_DRAP_INDICATION:
+      sei = new SEIExtendedDrapIndication;
+      xParseSEIExtendedDrapIndication((SEIExtendedDrapIndication&) *sei, payloadSize, pDecodedMessageOutputStream);
+      break;
     case SEI::FRAME_PACKING:
       sei = new SEIFramePacking;
       xParseSEIFramePacking((SEIFramePacking&) *sei, payloadSize, pDecodedMessageOutputStream);
       break;
+    case SEI::DISPLAY_ORIENTATION:
+      sei = new SEIDisplayOrientation;
+      xParseSEIDisplayOrientation((SEIDisplayOrientation&)*sei, payloadSize, pDecodedMessageOutputStream);
+      break;
+    case SEI::ANNOTATED_REGIONS:
+      sei = new SEIAnnotatedRegions;
+      xParseSEIAnnotatedRegions((SEIAnnotatedRegions&)*sei, payloadSize, pDecodedMessageOutputStream);
+      break;
     case SEI::PARAMETER_SETS_INCLUSION_INDICATION:
       sei = new SEIParameterSetsInclusionIndication;
       xParseSEIParameterSetsInclusionIndication((SEIParameterSetsInclusionIndication&)*sei, payloadSize, pDecodedMessageOutputStream);
@@ -268,6 +280,28 @@ void SEIReader::xReadSEImessage(SEIMessages& seis, const NalUnitType nalUnitType
       sei = new SEIGeneralizedCubemapProjection;
       xParseSEIGeneralizedCubemapProjection((SEIGeneralizedCubemapProjection&) *sei, payloadSize, pDecodedMessageOutputStream);
       break;
+    case SEI::SCALABILITY_DIMENSION_INFO:
+      sei = new SEIScalabilityDimensionInfo;
+      xParseSEIScalabilityDimensionInfo((SEIScalabilityDimensionInfo&) *sei, payloadSize, pDecodedMessageOutputStream );
+      break;
+    case SEI::MULTIVIEW_ACQUISITION_INFO:
+      sei = new SEIMultiviewAcquisitionInfo;
+      xParseSEIMultiviewAcquisitionInfo((SEIMultiviewAcquisitionInfo&) *sei, payloadSize, pDecodedMessageOutputStream );
+      break;
+#if JVET_W0078_MVP_SEI 
+    case SEI::MULTIVIEW_VIEW_POSITION:
+      sei = new SEIMultiviewViewPosition;
+      xParseSEIMultiviewViewPosition((SEIMultiviewViewPosition&)*sei, payloadSize, pDecodedMessageOutputStream);
+      break;
+#endif
+    case SEI::ALPHA_CHANNEL_INFO:
+      sei = new SEIAlphaChannelInfo;
+      xParseSEIAlphaChannelInfo((SEIAlphaChannelInfo&) *sei, payloadSize, pDecodedMessageOutputStream );
+      break;
+    case SEI::DEPTH_REPRESENTATION_INFO:
+      sei = new SEIDepthRepresentationInfo;
+      xParseSEIDepthRepresentationInfo((SEIDepthRepresentationInfo&) *sei, payloadSize, pDecodedMessageOutputStream );
+      break;
     case SEI::SUBPICTURE_LEVEL_INFO:
       sei = new SEISubpicureLevelInfo;
       xParseSEISubpictureLevelInfo((SEISubpicureLevelInfo&) *sei, payloadSize, pDecodedMessageOutputStream);
@@ -296,6 +330,16 @@ void SEIReader::xReadSEImessage(SEIMessages& seis, const NalUnitType nalUnitType
       sei = new SEIContentColourVolume;
       xParseSEIContentColourVolume((SEIContentColourVolume&)*sei, payloadSize, pDecodedMessageOutputStream);
       break;
+    case SEI::COLOUR_TRANSFORM_INFO:
+      sei = new SEIColourTransformInfo;
+      xParseSEIColourTransformInfo((SEIColourTransformInfo&)*sei, payloadSize, pDecodedMessageOutputStream);
+      break;
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+    case SEI::CONSTRAINED_RASL_ENCODING:
+      sei = new SEIConstrainedRaslIndication;
+      xParseSEIConstrainedRaslIndication((SEIConstrainedRaslIndication&) *sei, payloadSize, pDecodedMessageOutputStream);
+      break;
+#endif
     default:
       for (uint32_t i = 0; i < payloadSize; i++)
       {
@@ -438,14 +482,12 @@ void SEIReader::xParseSEIDecodedPictureHash(SEIDecodedPictureHash& sei, uint32_t
   uint32_t val;
   sei_read_code( pDecodedMessageOutputStream, 8, val, "dph_sei_hash_type");
   sei.method = static_cast<HashType>(val); bytesRead++;
-#if FIX_TICKET_1405
   sei_read_code( pDecodedMessageOutputStream, 1, val, "dph_sei_single_component_flag");
   sei.singleCompFlag = val;
   sei_read_code( pDecodedMessageOutputStream, 7, val, "dph_sei_reserved_zero_7bits");
   bytesRead++;
   uint32_t expectedSize = ( sei.singleCompFlag ? 1 : 3 ) * (sei.method == 0 ? 16 : (sei.method == 1 ? 2 : 4));
   CHECK ((payloadSize - bytesRead) != expectedSize, "The size of the decoded picture hash does not match the expected size.");
-#endif
 
   const char *traceString="\0";
   switch (sei.method)
@@ -591,6 +633,9 @@ void SEIReader::xCheckScalableNestingConstraints(const SEIScalableNesting& sei,
   for (auto nestedsei : sei.m_nestedSEIs)
   {
     CHECK(nestedsei->payloadType() == SEI::FILLER_PAYLOAD || nestedsei->payloadType() == SEI::SCALABLE_NESTING, "An SEI message that has payloadType equal to filler payload or scalable nesting shall not be contained in a scalable nesting SEI message");
+    
+    CHECK(nestedsei->payloadType() == SEI::SCALABILITY_DIMENSION_INFO, "A scalability dimension information SEI message shall not be contained in a scalable nesting SEI message");
+    CHECK(nestedsei->payloadType() == SEI::MULTIVIEW_ACQUISITION_INFO, "A multiview acquisition information SEI message shall not be contained in a scalable nesting SEI message");
 
     CHECK(nestedsei->payloadType() != SEI::FILLER_PAYLOAD && nestedsei->payloadType() != SEI::DECODED_PICTURE_HASH && nalUnitType != NAL_UNIT_PREFIX_SEI, "When a scalable nesting SEI message contains an SEI message that has payloadType not equal to filler payload or decoded picture hash, the SEI NAL unit containing the scalable nesting SEI message shall have nal_unit_type equal to PREFIX_SEI_NUT");
 
@@ -658,7 +703,8 @@ void SEIReader::xParseSEIDecodingUnitInfo(SEIDecodingUnitInfo& sei, uint32_t pay
       sei.m_duSptCpbRemovalDelayIncrement[i] = 0;
     }
   }
-  if (bp.m_decodingUnitDpbDuParamsInPicTimingSeiFlag)
+  if (!bp.m_decodingUnitDpbDuParamsInPicTimingSeiFlag)
+
   {
     sei_read_flag(pDecodedMessageOutputStream, val, "dpb_output_du_delay_present_flag"); sei.m_dpbOutputDuDelayPresentFlag = (val != 0);
   }
@@ -669,6 +715,8 @@ void SEIReader::xParseSEIDecodingUnitInfo(SEIDecodingUnitInfo& sei, uint32_t pay
   if(sei.m_dpbOutputDuDelayPresentFlag)
   {
     sei_read_code( pDecodedMessageOutputStream, bp.getDpbOutputDelayDuLength(), val, "pic_spt_dpb_output_du_delay");
+    if (sei.m_picSptDpbOutputDuDelay != -1)
+       CHECK(sei.m_picSptDpbOutputDuDelay!=val,"When signaled m_picSptDpbOutputDuDelay value must be same for DUs");
     sei.m_picSptDpbOutputDuDelay = val;
   }
 }
@@ -758,15 +806,15 @@ void SEIReader::xParseSEIBufferingPeriod(SEIBufferingPeriod& sei, uint32_t paylo
           sei_read_code( pDecodedMessageOutputStream, sei.m_initialCpbRemovalDelayLength, code, nalOrVcl ? "vcl_initial_cpb_removal_delay[i][j]" : "nal_initial_cpb_removal_delay[i][j]" );
           sei.m_initialCpbRemovalDelay[i][j][nalOrVcl] = code;
           sei_read_code( pDecodedMessageOutputStream, sei.m_initialCpbRemovalDelayLength, code, nalOrVcl ? "vcl_initial_cpb_removal_offset[i][j]" : "nal_initial_cpb_removal_offset[i][j]" );
-          sei.m_initialCpbRemovalDelay[i][j][nalOrVcl] = code;
+          sei.m_initialCpbRemovalOffset[i][j][nalOrVcl] = code;
         }
       }
     }
   }
-  if (sei.m_bpMaxSubLayers-1 > 0) 
+  if (sei.m_bpMaxSubLayers-1 > 0)
   {
     sei_read_flag(pDecodedMessageOutputStream, code, "bp_sublayer_dpb_output_offsets_present_flag");
-    sei.m_sublayerDpbOutputOffsetsPresentFlag = code; 
+    sei.m_sublayerDpbOutputOffsetsPresentFlag = code;
   }
   else
   {
@@ -980,14 +1028,130 @@ void SEIReader::xParseSEIPictureTiming(SEIPictureTiming& sei, uint32_t payloadSi
       sei.m_duCommonCpbRemovalDelayFlag = 0;
     }
   }
-#if JVET_S0175_ASPECT5
   sei_read_code( pDecodedMessageOutputStream, 8, symbol,    "pt_display_elemental_periods_minus1" );
-#else
-  sei_read_uvlc( pDecodedMessageOutputStream, symbol,    "pt_display_elemental_periods_minus1" );
-#endif
   sei.m_ptDisplayElementalPeriodsMinus1 = symbol;
 }
 
+void SEIReader::xParseSEIAnnotatedRegions(SEIAnnotatedRegions& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
+{
+  output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
+  uint32_t val;
+
+  sei_read_flag(pDecodedMessageOutputStream, val, "ar_cancel_flag");                                   sei.m_hdr.m_cancelFlag = val;
+  if (!sei.m_hdr.m_cancelFlag)
+  {
+    sei_read_flag(pDecodedMessageOutputStream, val, "ar_not_optimized_for_viewing_flag");              sei.m_hdr.m_notOptimizedForViewingFlag = val;
+    sei_read_flag(pDecodedMessageOutputStream, val, "ar_true_motion_flag");                            sei.m_hdr.m_trueMotionFlag = val;
+    sei_read_flag(pDecodedMessageOutputStream, val, "ar_occluded_object_flag");                        sei.m_hdr.m_occludedObjectFlag = val; // must be constant
+    sei_read_flag(pDecodedMessageOutputStream, val, "ar_partial_object_flag_present_flag");            sei.m_hdr.m_partialObjectFlagPresentFlag = val; // must be constant
+    sei_read_flag(pDecodedMessageOutputStream, val, "ar_object_label_present_flag");                   sei.m_hdr.m_objectLabelPresentFlag = val;
+    sei_read_flag(pDecodedMessageOutputStream, val, "ar_object_confidence_info_present_flag");         sei.m_hdr.m_objectConfidenceInfoPresentFlag = val; // must be constant
+    if (sei.m_hdr.m_objectConfidenceInfoPresentFlag)
+    {
+      sei_read_code(pDecodedMessageOutputStream, 4, val, "ar_object_confidence_length_minus_1"); sei.m_hdr.m_objectConfidenceLength = (val + 1); // must be constant
+    }
+    if (sei.m_hdr.m_objectLabelPresentFlag)
+    {
+      sei_read_flag(pDecodedMessageOutputStream, val, "ar_object_label_language_present_flag");      sei.m_hdr.m_objectLabelLanguagePresentFlag = val;
+      if (sei.m_hdr.m_objectLabelLanguagePresentFlag)
+      {
+        // byte alignment
+        while (m_pcBitstream->getNumBitsRead() % 8 != 0)
+        {
+          uint32_t code;
+          sei_read_flag(pDecodedMessageOutputStream, code, "ar_bit_equal_to_zero");
+        }
+        sei.m_hdr.m_annotatedRegionsObjectLabelLang.clear();
+        do
+        {
+          sei_read_code(pDecodedMessageOutputStream, 8, val, "ar_label_language");
+          if (val)
+          {
+            assert(sei.m_hdr.m_annotatedRegionsObjectLabelLang.size()<256);
+            sei.m_hdr.m_annotatedRegionsObjectLabelLang.push_back((char)val);
+          }
+        } while (val != '\0');
+      }
+    }
+
+    uint32_t numLabelUpdates;
+    sei_read_uvlc(pDecodedMessageOutputStream, numLabelUpdates, "ar_num_label_updates");
+    assert(numLabelUpdates<256);
+
+    sei.m_annotatedLabels.clear();
+    sei.m_annotatedLabels.resize(numLabelUpdates);
+    for (auto it=sei.m_annotatedLabels.begin(); it!=sei.m_annotatedLabels.end(); it++)
+    {
+      SEIAnnotatedRegions::AnnotatedRegionLabel &ar = it->second;
+      sei_read_uvlc(pDecodedMessageOutputStream, val, "ar_label_idx[]");             it->first = val;
+      assert(val<256);
+      sei_read_flag(pDecodedMessageOutputStream, val, "ar_label_cancel_flag");       ar.labelValid = !val;
+      if (ar.labelValid)
+      {
+        ar.label.clear();
+        // byte alignment
+        while (m_pcBitstream->getNumBitsRead() % 8 != 0)
+        {
+          uint32_t code;
+          sei_read_flag(pDecodedMessageOutputStream, code, "ar_bit_equal_to_zero");
+        }
+        do
+        {
+          sei_read_code(pDecodedMessageOutputStream, 8, val, "ar_label[]");
+          if (val)
+          {
+            assert(ar.label.size()<256);
+            ar.label.push_back((char)val);
+          }
+        } while (val != '\0');
+      }
+    }
+
+    uint32_t numObjUpdates;
+    sei_read_uvlc(pDecodedMessageOutputStream, numObjUpdates, "ar_num_object_updates");
+    assert(numObjUpdates<256);
+    sei.m_annotatedRegions.clear();
+    sei.m_annotatedRegions.resize(numObjUpdates);
+    for (auto it=sei.m_annotatedRegions.begin(); it!=sei.m_annotatedRegions.end(); it++)
+    {
+      sei_read_uvlc(pDecodedMessageOutputStream, val, "ar_object_idx"); it->first=val;
+      assert(val<256);
+      SEIAnnotatedRegions::AnnotatedRegionObject &ar = it->second;
+      sei_read_flag(pDecodedMessageOutputStream, val, "ar_object_cancel_flag");                           ar.objectCancelFlag = val;
+      ar.objectLabelValid=false;
+      ar.boundingBoxValid=false;
+
+      if (!ar.objectCancelFlag)
+      {
+        if (sei.m_hdr.m_objectLabelPresentFlag)
+        {
+          sei_read_flag(pDecodedMessageOutputStream, val, "ar_object_label_update_flag");             ar.objectLabelValid = val;
+          if (ar.objectLabelValid)
+          {
+            sei_read_uvlc(pDecodedMessageOutputStream, val, "ar_object_label_idx");                      ar.objLabelIdx = val;
+            assert(val<256);
+          }
+        }
+        sei_read_flag(pDecodedMessageOutputStream, val, "ar_bounding_box_update_flag");              ar.boundingBoxValid = val;
+        if (ar.boundingBoxValid)
+        {
+          sei_read_code(pDecodedMessageOutputStream, 16, val, "ar_bounding_box_top");                      ar.boundingBoxTop = val;
+          sei_read_code(pDecodedMessageOutputStream, 16, val, "ar_bounding_box_left");                     ar.boundingBoxLeft = val;
+          sei_read_code(pDecodedMessageOutputStream, 16, val, "ar_bounding_box_width");                    ar.boundingBoxWidth = val;
+          sei_read_code(pDecodedMessageOutputStream, 16, val, "ar_bounding_box_height");                   ar.boundingBoxHeight = val;
+          if (sei.m_hdr.m_partialObjectFlagPresentFlag)
+          {
+            sei_read_flag(pDecodedMessageOutputStream, val, "ar_partial_object_flag");                ar.partialObjectFlag = val;
+          }
+          if (sei.m_hdr.m_objectConfidenceInfoPresentFlag)
+          {
+            sei_read_code(pDecodedMessageOutputStream, sei.m_hdr.m_objectConfidenceLength, val, "ar_object_confidence"); ar.objectConfidence = val;
+          }
+        }
+      }
+    }
+  }
+}
 void SEIReader::xParseSEIFrameFieldinfo(SEIFrameFieldInfo& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
 {
   output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
@@ -1016,11 +1180,7 @@ void SEIReader::xParseSEIFrameFieldinfo(SEIFrameFieldInfo& sei, uint32_t payload
       sei_read_flag( pDecodedMessageOutputStream, symbol,  "ffi_top_field_first_flag" );
       sei.m_topFieldFirstFlag = symbol;
     }
-#if JVET_S0175_ASPECT5
     sei_read_code( pDecodedMessageOutputStream, 8, symbol, "ffi_display_elemental_periods_minus1" );
-#else
-    sei_read_uvlc( pDecodedMessageOutputStream, symbol,    "ffi_display_elemental_periods_minus1" );
-#endif
     sei.m_displayElementalPeriodsMinus1 = symbol;
   }
   sei_read_code( pDecodedMessageOutputStream, 2, symbol,   "ffi_source_scan_type" );
@@ -1072,6 +1232,20 @@ void SEIReader::xParseSEIFramePacking(SEIFramePacking& sei, uint32_t payloadSize
   sei_read_flag( pDecodedMessageOutputStream, val, "fp_upsampled_aspect_ratio_flag" );       sei.m_upsampledAspectRatio = val;
 }
 
+void SEIReader::xParseSEIDisplayOrientation(SEIDisplayOrientation& sei, uint32_t payloadSize, std::ostream* pDecodedMessageOutputStream)
+{
+  uint32_t val;
+  output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
+
+  sei_read_flag(pDecodedMessageOutputStream, val, "display_orientation_cancel_flag");           sei.m_doCancelFlag = val;
+  if (!sei.m_doCancelFlag)
+  {
+    sei_read_flag(pDecodedMessageOutputStream, val, "display_orientation_persistence_flag");    sei.m_doPersistenceFlag = val;
+    sei_read_code(pDecodedMessageOutputStream, 3, val, "display_orientation_transform_type");   sei.m_doTransformType = val;
+    CHECK((sei.m_doTransformType < 0) || (sei.m_doTransformType > 7), "Invalid transform type");
+  }
+}
+
 void SEIReader::xParseSEIParameterSetsInclusionIndication(SEIParameterSetsInclusionIndication& sei, uint32_t payloadSize, std::ostream* pDecodedMessageOutputStream)
 {
   uint32_t val;
@@ -1208,6 +1382,84 @@ void SEIReader::xParseSEIAmbientViewingEnvironment(SEIAmbientViewingEnvironment&
   sei_read_code(pDecodedMessageOutputStream, 16, code, "ambient_light_y");     sei.m_ambientLightY = (uint16_t)code;
 }
 
+void SEIReader::xParseSEIColourTransformInfo(SEIColourTransformInfo& sei, uint32_t payloadSize, std::ostream* pDecodedMessageOutputStream)
+{
+  uint32_t code;
+
+  output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
+
+  sei_read_uvlc(pDecodedMessageOutputStream, code, "colour_transform_id");               sei.m_id = code;
+  sei_read_flag(pDecodedMessageOutputStream, code, "colour_transform_cancel_flag");      bool colourTransformCancelFlag = code;
+
+  if (colourTransformCancelFlag == 0)
+  {
+    sei_read_flag(pDecodedMessageOutputStream, code, "colour_transform_persistence_flag");
+    sei_read_flag(pDecodedMessageOutputStream, code, "colour_transform_video_signal_info_present_flag"); sei.m_signalInfoFlag = code;
+
+    if (sei.m_signalInfoFlag)
+    {
+      sei_read_flag(pDecodedMessageOutputStream, code, "colour_transform_full_range_flag");        sei.m_fullRangeFlag = code;
+      sei_read_code(pDecodedMessageOutputStream, 8, code, "colour_transform_primaries");           sei.m_primaries = code;
+      sei_read_code(pDecodedMessageOutputStream, 8, code, "colour_transform_transfer_function");   sei.m_transferFunction = code;
+      sei_read_code(pDecodedMessageOutputStream, 8, code, "colour_transform_matrix_coefficients"); sei.m_matrixCoefs = code;
+    }
+    else
+    {
+      sei.m_fullRangeFlag = 0;
+      sei.m_primaries = 0;
+      sei.m_transferFunction = 0;
+      sei.m_matrixCoefs = 0;
+    }
+    sei_read_code(pDecodedMessageOutputStream, 4, code, "colour_transform_bit_depth_minus8");                       sei.m_bitdepth = 8+code;
+    sei_read_code(pDecodedMessageOutputStream, 3, code, "colour_transform_log2_number_of_points_per_lut_minus1");   sei.m_log2NumberOfPointsPerLut = code + 1;
+    int numLutValues = (1 << sei.m_log2NumberOfPointsPerLut) + 1;
+    sei_read_flag(pDecodedMessageOutputStream, code, "colour_transform_cross_comp_flag");                sei.m_crossComponentFlag = code;
+    sei.m_crossComponentInferred = 0;
+    if (sei.m_crossComponentFlag == true)
+    {
+      sei_read_flag(pDecodedMessageOutputStream, code, "colour_transform_cross_comp_inferred");          sei.m_crossComponentInferred = code;
+    }
+    for (int i = 0; i < MAX_NUM_COMPONENT; i++) {
+      sei.m_lut[i].lutValues.resize(numLutValues);
+    }
+
+    uint16_t lutCodingLength = 2 + sei.m_bitdepth - sei.m_log2NumberOfPointsPerLut;
+    for (uint32_t j = 0; j < numLutValues; j++)
+    {
+      sei_read_code(pDecodedMessageOutputStream, lutCodingLength, code, "colour_transform_lut[0][i]");
+      sei.m_lut[0].lutValues[j] = code;
+    }
+    sei.m_lut[0].numLutValues = numLutValues;
+    sei.m_lut[0].presentFlag = true;
+    if (sei.m_crossComponentFlag == 0 || sei.m_crossComponentInferred == 0)
+    {
+      sei_read_flag(pDecodedMessageOutputStream, code, "colour_transform_number_chroma_lut_minus1");      sei.m_numberChromaLutMinus1 = code;
+      for (uint32_t j = 0; j < numLutValues; j++)
+      {
+        sei_read_code(pDecodedMessageOutputStream, lutCodingLength, code, "colour_transform_lut[1][i]");
+        sei.m_lut[1].lutValues[j] = code;
+        sei.m_lut[2].lutValues[j] = code;
+      }
+      if (sei.m_numberChromaLutMinus1 == 1)
+      {
+        for (uint32_t j = 0; j < numLutValues; j++)
+        {
+          sei_read_code(pDecodedMessageOutputStream, lutCodingLength, code, "colour_transform_lut[2][i]");
+          sei.m_lut[2].lutValues[j] = code;
+        }
+      }
+      sei.m_lut[1].numLutValues = numLutValues;
+      sei.m_lut[2].numLutValues = numLutValues;
+      sei.m_lut[1].presentFlag = true;
+      sei.m_lut[2].presentFlag = true;
+    }
+    else
+    {
+      sei_read_code(pDecodedMessageOutputStream, lutCodingLength, code, "colour_transform_chroma_offset");
+      sei.m_chromaOffset = code;
+    }
+  }
+}
 void SEIReader::xParseSEIContentColourVolume(SEIContentColourVolume& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
 {
   int i;
@@ -1421,6 +1673,246 @@ void SEIReader::xParseSEIGeneralizedCubemapProjection(SEIGeneralizedCubemapProje
   }
 }
 
+void SEIReader::xParseSEIScalabilityDimensionInfo(SEIScalabilityDimensionInfo& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
+{
+  output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
+  uint32_t val;
+  sei_read_code( pDecodedMessageOutputStream,   6,  val,    "sdi_max_layers_minus1" );            sei.m_sdiMaxLayersMinus1 = val;
+  sei_read_flag( pDecodedMessageOutputStream,       val,    "sdi_multiview_info_flag" );          sei.m_sdiMultiviewInfoFlag = val;
+  sei_read_flag( pDecodedMessageOutputStream,       val,    "sdi_auxiliary_info_flag" );          sei.m_sdiAuxiliaryInfoFlag = val;
+  if (sei.m_sdiMultiviewInfoFlag || sei.m_sdiAuxiliaryInfoFlag)
+  {
+    if (sei.m_sdiMultiviewInfoFlag)
+    {
+      sei_read_code( pDecodedMessageOutputStream, 4, val, "sdi_view_id_len_minus1" ); sei.m_sdiViewIdLenMinus1 = val;
+    }
+    for (int i = 0; i <= sei.m_sdiMaxLayersMinus1; i++)
+    {
+      sei.m_sdiLayerId.resize(sei.m_sdiViewIdLenMinus1 + 1);
+      sei_read_code( pDecodedMessageOutputStream, 6, val, "sdi_layer_id" ); sei.m_sdiLayerId[i] = val;
+      if (sei.m_sdiMultiviewInfoFlag)
+      {
+        sei.m_sdiViewIdVal.resize(sei.m_sdiViewIdLenMinus1 + 1);
+        sei_read_code( pDecodedMessageOutputStream, sei.m_sdiViewIdLenMinus1 + 1, val, "sdi_view_id_val" ); sei.m_sdiViewIdVal[i] = val;
+      }
+      if (sei.m_sdiAuxiliaryInfoFlag)
+      {
+        sei.m_sdiAuxId.resize(sei.m_sdiViewIdLenMinus1 + 1);
+        sei.m_sdiNumAssociatedPrimaryLayersMinus1.resize(sei.m_sdiViewIdLenMinus1 + 1);
+        sei.m_sdiAssociatedPrimaryLayerIdx.resize(sei.m_sdiViewIdLenMinus1 + 1);
+        sei_read_code( pDecodedMessageOutputStream, 8, val, "sdi_aux_id" ); sei.m_sdiAuxId[i] = val;
+        if (sei.m_sdiAuxId[i] > 0)
+        {
+          sei_read_code( pDecodedMessageOutputStream, 6, val, "sdi_num_associated_primary_layers_minus1" ); sei.m_sdiNumAssociatedPrimaryLayersMinus1[i] = val;
+          sei.m_sdiAssociatedPrimaryLayerIdx[i].resize(sei.m_sdiNumAssociatedPrimaryLayersMinus1[i] + 1);
+          for (int j = 0; j <= sei.m_sdiNumAssociatedPrimaryLayersMinus1[i]; j++)
+          {
+            sei_read_code( pDecodedMessageOutputStream, 6, val, "sdi_associated_primary_layer_idx" );
+            sei.m_sdiAssociatedPrimaryLayerIdx[i][j] = val;
+          }
+        }
+      }
+    }
+    sei.m_sdiNumViews = 1;
+    if (sei.m_sdiMultiviewInfoFlag)
+    {
+      for (int i = 1; i <= sei.m_sdiMaxLayersMinus1; i++)
+      {
+        bool newViewFlag = true;
+        for (int j = 0; j < i; j++)
+        {
+          if (sei.m_sdiViewIdVal[i] == sei.m_sdiViewIdVal[j])
+            newViewFlag = false;
+        }
+        if (newViewFlag)
+          sei.m_sdiNumViews++;
+      }
+    }
+  }
+}
+
+void SEIReader::xParseSEIMultiviewAcquisitionInfo(SEIMultiviewAcquisitionInfo& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
+{
+  output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
+  uint32_t val;
+
+  sei_read_flag( pDecodedMessageOutputStream, val, "intrinsic_param_flag" ); sei.m_maiIntrinsicParamFlag = (val == 1);
+  sei_read_flag( pDecodedMessageOutputStream, val, "extrinsic_param_flag" ); sei.m_maiExtrinsicParamFlag = (val == 1);
+  sei_read_uvlc( pDecodedMessageOutputStream, val, "num_views_minus1"     ); sei.m_maiNumViewsMinus1     =  val      ;
+  sei.resizeArrays( );
+  if( sei.m_maiIntrinsicParamFlag )
+  {
+    sei_read_flag( pDecodedMessageOutputStream, val, "intrinsic_params_equal_flag" ); sei.m_maiIntrinsicParamsEqualFlag = (val == 1);
+    sei_read_uvlc( pDecodedMessageOutputStream, val, "prec_focal_length"           ); sei.m_maiPrecFocalLength          =  val      ;
+    sei_read_uvlc( pDecodedMessageOutputStream, val, "prec_principal_point"        ); sei.m_maiPrecPrincipalPoint       =  val      ;
+    sei_read_uvlc( pDecodedMessageOutputStream, val, "prec_skew_factor"            ); sei.m_maiPrecSkewFactor           =  val      ;
+
+    for( int i = 0; i  <=  ( sei.m_maiIntrinsicParamsEqualFlag ? 0 : sei.m_maiNumViewsMinus1 ); i++ )
+    {
+      sei_read_flag( pDecodedMessageOutputStream,                                         val, "sign_focal_length_x"        ); sei.m_maiSignFocalLengthX       [i] = (val == 1);
+      sei_read_code( pDecodedMessageOutputStream, 6,                                      val, "exponent_focal_length_x"    ); sei.m_maiExponentFocalLengthX   [i] =  val      ;
+      sei_read_code( pDecodedMessageOutputStream, sei.getMantissaFocalLengthXLen   ( i ), val, "mantissa_focal_length_x"    ); sei.m_maiMantissaFocalLengthX   [i] =  val      ;
+      sei_read_flag( pDecodedMessageOutputStream,                                         val, "sign_focal_length_y"        ); sei.m_maiSignFocalLengthY       [i] = (val == 1);
+      sei_read_code( pDecodedMessageOutputStream, 6,                                      val, "exponent_focal_length_y"    ); sei.m_maiExponentFocalLengthY   [i] =  val      ;
+      sei_read_code( pDecodedMessageOutputStream, sei.getMantissaFocalLengthYLen   ( i ), val, "mantissa_focal_length_y"    ); sei.m_maiMantissaFocalLengthY   [i] =  val      ;
+      sei_read_flag( pDecodedMessageOutputStream,                                         val, "sign_principal_point_x"     ); sei.m_maiSignPrincipalPointX    [i] = (val == 1);
+      sei_read_code( pDecodedMessageOutputStream, 6,                                      val, "exponent_principal_point_x" ); sei.m_maiExponentPrincipalPointX[i] =  val      ;
+      sei_read_code( pDecodedMessageOutputStream, sei.getMantissaPrincipalPointXLen( i ), val, "mantissa_principal_point_x" ); sei.m_maiMantissaPrincipalPointX[i] =  val      ;
+      sei_read_flag( pDecodedMessageOutputStream,                                         val, "sign_principal_point_y"     ); sei.m_maiSignPrincipalPointY    [i] = (val == 1);
+      sei_read_code( pDecodedMessageOutputStream, 6,                                      val, "exponent_principal_point_y" ); sei.m_maiExponentPrincipalPointY[i] =  val      ;
+      sei_read_code( pDecodedMessageOutputStream, sei.getMantissaPrincipalPointYLen( i ), val, "mantissa_principal_point_y" ); sei.m_maiMantissaPrincipalPointY[i] =  val      ;
+      sei_read_flag( pDecodedMessageOutputStream,                                         val, "sign_skew_factor"           ); sei.m_maiSignSkewFactor         [i] = (val == 1);
+      sei_read_code( pDecodedMessageOutputStream, 6,                                      val, "exponent_skew_factor"       ); sei.m_maiExponentSkewFactor     [i] =  val      ;
+      sei_read_code( pDecodedMessageOutputStream, sei.getMantissaSkewFactorLen     ( i ), val, "mantissa_skew_factor"       ); sei.m_maiMantissaSkewFactor     [i] =  val      ;
+    }
+  }
+  if( sei.m_maiExtrinsicParamFlag )
+  {
+    sei_read_uvlc( pDecodedMessageOutputStream, val, "prec_rotation_param"    ); sei.m_maiPrecRotationParam    = val;
+    sei_read_uvlc( pDecodedMessageOutputStream, val, "prec_translation_param" ); sei.m_maiPrecTranslationParam = val;
+
+    for( int i = 0; i  <=  sei.m_maiNumViewsMinus1; i++ )
+    {
+      for( int j = 0; j  <=  2; j++ )  /* row */
+      {
+        for( int k = 0; k  <=  2; k++ )  /* column */
+        {
+          sei_read_flag( pDecodedMessageOutputStream,                                 val, "sign_r"     ); sei.m_maiSignR    [i][j][k] = (val == 1);
+          sei_read_code( pDecodedMessageOutputStream, 6,                              val, "exponent_r" ); sei.m_maiExponentR[i][j][k] =  val      ;
+          sei_read_code( pDecodedMessageOutputStream, sei.getMantissaRLen( i, j, k ), val, "mantissa_r" ); sei.m_maiMantissaR[i][j][k] =  val      ;
+        }
+        sei_read_flag( pDecodedMessageOutputStream,                              val, "sign_t"     ); sei.m_maiSignT    [i][j] = (val == 1);
+        sei_read_code( pDecodedMessageOutputStream, 6,                           val, "exponent_t" ); sei.m_maiExponentT[i][j] =  val      ;
+        sei_read_code( pDecodedMessageOutputStream, sei.getMantissaTLen( i, j ), val, "mantissa_t" ); sei.m_maiMantissaT[i][j] =  val      ;
+      }
+    }
+  }
+};
+
+#if JVET_W0078_MVP_SEI
+void SEIReader::xParseSEIMultiviewViewPosition(SEIMultiviewViewPosition& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
+{
+  uint32_t val;
+  output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
+
+  sei_read_uvlc(pDecodedMessageOutputStream, val, "num_views_minus1"); sei.m_mvpNumViewsMinus1 = val;
+  sei.m_mvpViewPosition.resize(sei.m_mvpNumViewsMinus1 + 1);
+  for (int i = 0; i <= sei.m_mvpNumViewsMinus1; i++)
+  {
+    sei_read_uvlc(pDecodedMessageOutputStream, val, "view_position"); sei.m_mvpViewPosition[i] = val;
+  }
+};
+#endif
+
+void SEIReader::xParseSEIAlphaChannelInfo(SEIAlphaChannelInfo& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
+{
+  uint32_t val;
+  output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
+
+  sei_read_flag( pDecodedMessageOutputStream, val, "alpha_channel_cancel_flag" ); sei.m_aciCancelFlag = (val == 1);
+  if( !sei.m_aciCancelFlag )
+  {
+    sei_read_code( pDecodedMessageOutputStream, 3, val, "alpha_channel_use_idc" ); sei.m_aciUseIdc = val;
+    sei_read_code( pDecodedMessageOutputStream, 3, val, "alpha_channel_bit_depth_minus8" ); sei.m_aciBitDepthMinus8 = val;
+    sei_read_code( pDecodedMessageOutputStream, sei.m_aciBitDepthMinus8 + 9, val, "alpha_transparent_value" ); sei.m_aciTransparentValue = val;
+    sei_read_code( pDecodedMessageOutputStream, sei.m_aciBitDepthMinus8 + 9, val, "alpha_opaque_value" ); sei.m_aciOpaqueValue = val;
+    sei_read_flag( pDecodedMessageOutputStream, val, "alpha_channel_incr_flag" ); sei.m_aciIncrFlag = (val == 1);
+    sei_read_flag( pDecodedMessageOutputStream, val, "alpha_channel_clip_flag" ); sei.m_aciClipFlag = (val == 1);
+    if( sei.m_aciClipFlag )
+    {
+      sei_read_flag( pDecodedMessageOutputStream, val, "alpha_channel_clip_type_flag" ); sei.m_aciClipTypeFlag = (val == 1);
+    }
+  }
+};
+
+void SEIReader::xParseSEIDepthRepresentationInfo(SEIDepthRepresentationInfo& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
+{
+  uint32_t val;
+  double zNear,zFar,dMin,dMax;
+  std::vector<int> DepthNonlinearRepresentationModel;
+
+  output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
+
+  sei_read_flag( pDecodedMessageOutputStream, val, "z_near_flag" );    sei.m_driZNearFlag  = (val == 1);
+  sei_read_flag( pDecodedMessageOutputStream, val, "z_far_flag" );     sei.m_driZFarFlag = (val == 1);
+  sei_read_flag( pDecodedMessageOutputStream, val, "d_min_flag" );     sei.m_driDMinFlag = (val == 1);
+  sei_read_flag( pDecodedMessageOutputStream, val, "d_max_flag" );     sei.m_driDMaxFlag = (val == 1);
+  sei_read_uvlc( pDecodedMessageOutputStream, val, "depth_representation_type" ); sei.m_driDepthRepresentationType = val;
+  if( sei.m_driDMinFlag  ||  sei.m_driDMaxFlag )
+  {
+    sei_read_uvlc( pDecodedMessageOutputStream, val, "disparity_ref_view_id" ); sei.m_driDisparityRefViewId = val;
+  }
+  if( sei.m_driZNearFlag )
+  {
+    xParseSEIDepthRepInfoElement(zNear, pDecodedMessageOutputStream);
+    sei.m_driZNear = zNear;
+  }
+  if( sei.m_driZFarFlag )
+  {
+    xParseSEIDepthRepInfoElement(zFar, pDecodedMessageOutputStream);
+    sei.m_driZFar = zFar;
+  }
+  if( sei.m_driDMinFlag )
+  {
+    xParseSEIDepthRepInfoElement(dMin, pDecodedMessageOutputStream);
+    sei.m_driDMin = dMin;
+  }
+  if( sei.m_driDMaxFlag )
+  {
+    xParseSEIDepthRepInfoElement(dMax, pDecodedMessageOutputStream);
+    sei.m_driDMax = dMax;
+  }
+
+  if( sei.m_driDepthRepresentationType == 3 )
+  {
+    sei_read_uvlc( pDecodedMessageOutputStream, val, "depth_nonlinear_representation_num_minus1" ); sei.m_driDepthNonlinearRepresentationNumMinus1 = val;
+    for( int i = 1; i <= sei.m_driDepthNonlinearRepresentationNumMinus1 + 1; i++ )
+    {
+      sei_read_uvlc(pDecodedMessageOutputStream,val,"DepthNonlinearRepresentationModel" ) ;
+      sei.m_driDepthNonlinearRepresentationModel.push_back(val);
+    }
+  }
+}
+
+void SEIReader::xParseSEIDepthRepInfoElement(double& f,std::ostream *pDecodedMessageOutputStream)
+{
+  uint32_t val;
+  uint32_t x_sign,x_mantissa_len,x_mantissa;
+  int x_exp;
+  
+  sei_read_flag(pDecodedMessageOutputStream,     val,"da_sign_flag");  x_sign = val ? 1 : 0 ;
+  sei_read_code(pDecodedMessageOutputStream,  7, val, "da_exponent" );         x_exp = val-31;
+  sei_read_code(pDecodedMessageOutputStream,  5, val, "da_mantissa_len_minus1" );         x_mantissa_len = val+1;
+  sei_read_code(pDecodedMessageOutputStream,  x_mantissa_len, val, "da_mantissa" );         x_mantissa = val;
+  if (x_mantissa_len>=16)
+  {
+    f =1.0 +  (x_mantissa*1.0)/(1u<<(x_mantissa_len-16))/(256.0*256.0 );
+  }else
+  {
+    f =1.0 +  (x_mantissa*1.0)/(1u<<x_mantissa_len);
+  }
+  double m=1.0;
+  int i;
+  if (x_exp<0)
+  {
+    for(i=0;i<-x_exp;i++)
+    m = m * 2;
+    
+    f = f/m;
+  }
+  else
+  {
+    for(i=0;i<x_exp;i++)
+    m = m * 2;
+    
+    f= f * m;
+  }
+  if (x_sign==1)
+  {
+    f= -f;
+  }
+};
+
 void SEIReader::xParseSEISubpictureLevelInfo(SEISubpicureLevelInfo& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
 {
   output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
@@ -1525,6 +2017,29 @@ void SEIReader::xParseSEISampleAspectRatioInfo(SEISampleAspectRatioInfo& sei, ui
   }
 }
 
+void SEIReader::xParseSEIExtendedDrapIndication(SEIExtendedDrapIndication& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream)
+{
+  uint32_t val;
+  output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
+  sei_read_code( pDecodedMessageOutputStream, 16, val,        "edrap_rap_id_minus1"          );   sei.m_edrapIndicationRapIdMinus1         = val;
+  sei_read_flag( pDecodedMessageOutputStream,     val,        "edrap_leading_pictures_decodable_flag" );       sei.m_edrapIndicationLeadingPicturesDecodableFlag = val;
+  sei_read_code( pDecodedMessageOutputStream, 12, val,        "edrap_reserved_zero_12bits"          );   sei.m_edrapIndicationReservedZero12Bits = val;
+  sei_read_code( pDecodedMessageOutputStream, 3, val,         "edrap_num_ref_rap_pics_minus1"          );   sei.m_edrapIndicationNumRefRapPicsMinus1 = val;
+  sei.m_edrapIndicationRefRapId.resize(sei.m_edrapIndicationNumRefRapPicsMinus1 + 1);
+  for (int i = 0; i <= sei.m_edrapIndicationNumRefRapPicsMinus1; i++)
+  {
+    sei_read_code( pDecodedMessageOutputStream, 16, val,       "edrap_ref_rap_id[i]"          );
+    sei.m_edrapIndicationRefRapId[i] = val;
+  }
+}
+
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+void SEIReader::xParseSEIConstrainedRaslIndication( SEIConstrainedRaslIndication& sei, uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream )
+{
+  output_sei_message_header(sei, pDecodedMessageOutputStream, payloadSize);
+}
+#endif
+
 #if JVET_S0257_DUMP_360SEI_MESSAGE
 void SeiCfgFileDump::write360SeiDump (std::string decoded360MessageFileName, SEIMessages& seis, const SPS* sps)
 {
@@ -1547,7 +2062,7 @@ void SeiCfgFileDump::write360SeiDump (std::string decoded360MessageFileName, SEI
     {
       SEIGeneralizedCubemapProjection* sei = (SEIGeneralizedCubemapProjection*)generalizedCubemapProjectionSEIs.front();
       xDumpSEIGeneralizedCubemapProjection(*sei, sps, decoded360MessageFileName);
-      m_360SEIMessageDumped = true; 
+      m_360SEIMessageDumped = true;
     }
   }
 }
diff --git a/source/Lib/DecoderLib/SEIread.h b/source/Lib/DecoderLib/SEIread.h
index 996f6e1f350bbc3309adabd7c575e5ecdec41f36..8a9f0aea74b5aa8521f88a3206de32f9ca82d78a 100644
--- a/source/Lib/DecoderLib/SEIread.h
+++ b/source/Lib/DecoderLib/SEIread.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -69,8 +69,10 @@ protected:
   void xParseSEIFrameFieldinfo                (SEIFrameFieldInfo& sei,                uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream);
   void xParseSEIDependentRAPIndication        (SEIDependentRAPIndication& sei,        uint32_t payLoadSize,                     std::ostream *pDecodedMessageOutputStream);
   void xParseSEIFramePacking                  (SEIFramePacking& sei,                  uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
+  void xParseSEIDisplayOrientation            (SEIDisplayOrientation& sei,            uint32_t payloadSize,                     std::ostream* pDecodedMessageOutputStream);
   void xParseSEIParameterSetsInclusionIndication(SEIParameterSetsInclusionIndication& sei, uint32_t payloadSize,                std::ostream* pDecodedMessageOutputStream);
   void xParseSEIMasteringDisplayColourVolume  (SEIMasteringDisplayColourVolume& sei,  uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
+  void xParseSEIAnnotatedRegions              (SEIAnnotatedRegions& sei,              uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
 #if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
   void xParseSEIAlternativeTransferCharacteristics(SEIAlternativeTransferCharacteristics& sei,              uint32_t payLoadSize,                     std::ostream *pDecodedMessageOutputStream);
 #endif
@@ -79,6 +81,14 @@ protected:
   void xParseSEIOmniViewport                  (SEIOmniViewport& sei,                  uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
   void xParseSEIRegionWisePacking             (SEIRegionWisePacking& sei,             uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
   void xParseSEIGeneralizedCubemapProjection  (SEIGeneralizedCubemapProjection &sei,  uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
+  void xParseSEIScalabilityDimensionInfo      (SEIScalabilityDimensionInfo& sei,      uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream);
+  void xParseSEIMultiviewAcquisitionInfo      (SEIMultiviewAcquisitionInfo& sei,      uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream);
+#if JVET_W0078_MVP_SEI 
+  void xParseSEIMultiviewViewPosition         (SEIMultiviewViewPosition& sei,         uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream);
+#endif
+  void xParseSEIAlphaChannelInfo              (SEIAlphaChannelInfo& sei,              uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
+  void xParseSEIDepthRepresentationInfo       (SEIDepthRepresentationInfo& sei,       uint32_t payloadSize, std::ostream *pDecodedMessageOutputStream);
+  void xParseSEIDepthRepInfoElement           (double &f,std::ostream *pDecodedMessageOutputStream);
   void xParseSEISubpictureLevelInfo           (SEISubpicureLevelInfo& sei,            uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
   void xParseSEISampleAspectRatioInfo         (SEISampleAspectRatioInfo& sei,         uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
   void xParseSEIUserDataRegistered            (SEIUserDataRegistered& sei,            uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
@@ -86,6 +96,11 @@ protected:
   void xParseSEIContentLightLevelInfo         (SEIContentLightLevelInfo& sei,         uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
   void xParseSEIAmbientViewingEnvironment     (SEIAmbientViewingEnvironment& sei,     uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
   void xParseSEIContentColourVolume           (SEIContentColourVolume& sei,           uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
+  void xParseSEIExtendedDrapIndication        (SEIExtendedDrapIndication& sei,        uint32_t payloadSize,                     std::ostream *pDecodedMessageOutputStream);
+  void xParseSEIColourTransformInfo           (SEIColourTransformInfo& sei, uint32_t payloadSize, std::ostream* pDecodedMessageOutputStream);
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+  void xParseSEIConstrainedRaslIndication     (SEIConstrainedRaslIndication& sei,     uint32_t payLoadSize,                     std::ostream *pDecodedMessageOutputStream);
+#endif
 
   void sei_read_scode(std::ostream *pOS, uint32_t length, int& code, const char *pSymbolName);
   void sei_read_code(std::ostream *pOS, uint32_t uiLength, uint32_t& ruiCode, const char *pSymbolName);
diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp
index 97651347a617ba1718e8b8c0c5eeaba7c9283c0a..83ccbe070079e5f635018b89ae808a6ec391d7dd 100644
--- a/source/Lib/DecoderLib/VLCReader.cpp
+++ b/source/Lib/DecoderLib/VLCReader.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -280,6 +280,9 @@ void FDReader::parseFillerData(InputBitstream* bs, uint32_t &fdSize)
 
 HLSyntaxReader::HLSyntaxReader()
 {
+#if GDR_ENABLED
+  m_lastGdrPoc = -1;
+#endif
 }
 
 HLSyntaxReader::~HLSyntaxReader()
@@ -440,6 +443,7 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS )
     READ_UVLC(uiCode, "pps_conf_win_bottom_offset");             conf.setWindowBottomOffset(uiCode);
   }
   READ_FLAG( uiCode, "pps_scaling_window_explicit_signalling_flag" );
+  pcPPS->setExplicitScalingWindowFlag( uiCode );
   if( uiCode != 0 )
   {
     Window &scalingWindow = pcPPS->getScalingWindow();
@@ -563,7 +567,7 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS )
         }
 
         if( tileIdx / pcPPS->getNumTileColumns() != pcPPS->getNumTileRows() - 1  &&
-         ( pcPPS->getTileIdxDeltaPresentFlag() || tileIdx % pcPPS->getNumTileColumns() == 0 ) ) 
+         ( pcPPS->getTileIdxDeltaPresentFlag() || tileIdx % pcPPS->getNumTileColumns() == 0 ) )
         {
           READ_UVLC( uiCode, "pps_slice_height_in_tiles_minus1[i]" );
           pcPPS->setSliceHeightInTiles( i, uiCode + 1 );
@@ -689,7 +693,7 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS )
   READ_FLAG(uiCode, "pps_ref_wraparound_enabled_flag");           pcPPS->setWrapAroundEnabledFlag( uiCode ? true : false );
   if (pcPPS->getWrapAroundEnabledFlag())
   {
-    READ_UVLC(uiCode, "pps_ref_wraparound_offset");               
+    READ_UVLC(uiCode, "pps_ref_wraparound_offset");
     pcPPS->setPicWidthMinusWrapAroundOffset(uiCode);
   }
   else
@@ -1194,12 +1198,8 @@ void HLSyntaxReader::parseGeneralHrdParameters(GeneralHrdParams *hrd)
   READ_CODE(32, symbol, "time_scale");                       hrd->setTimeScale(symbol);
   READ_FLAG(symbol, "general_nal_hrd_parameters_present_flag");           hrd->setGeneralNalHrdParametersPresentFlag(symbol == 1 ? true : false);
   READ_FLAG(symbol, "general_vcl_hrd_parameters_present_flag");           hrd->setGeneralVclHrdParametersPresentFlag(symbol == 1 ? true : false);
-#if JVET_S0175_ASPECT6
   if(  hrd->getGeneralNalHrdParametersPresentFlag() || hrd->getGeneralVclHrdParametersPresentFlag() )
   {
-#else
-  CHECK((hrd->getGeneralNalHrdParametersPresentFlag() == 0) && (hrd->getGeneralVclHrdParametersPresentFlag() == 0), "general_nal_hrd_params_present_flag and general_vcl_hrd_params_present_flag in each general_hrd_parameters( ) syntax structure shall not be both equal to 0.");
-#endif
     READ_FLAG(symbol, "general_same_pic_timing_in_all_ols_flag");           hrd->setGeneralSamePicTimingInAllOlsFlag(symbol == 1 ? true : false);
     READ_FLAG(symbol, "general_decoding_unit_hrd_params_present_flag");     hrd->setGeneralDecodingUnitHrdParamsPresentFlag(symbol == 1 ? true : false);
     if (hrd->getGeneralDecodingUnitHrdParamsPresentFlag())
@@ -1214,9 +1214,7 @@ void HLSyntaxReader::parseGeneralHrdParameters(GeneralHrdParams *hrd)
     }
     READ_UVLC(symbol, "hrd_cpb_cnt_minus1");                      hrd->setHrdCpbCntMinus1(symbol);
     CHECK(symbol > 31,"The value of hrd_cpb_cnt_minus1 shall be in the range of 0 to 31, inclusive");
-#if JVET_S0175_ASPECT6
   }
-#endif
 }
 void HLSyntaxReader::parseOlsHrdParameters(GeneralHrdParams * generalHrd, OlsHrdParams *olsHrd, uint32_t firstSubLayer, uint32_t maxNumSubLayersMinus1)
 {
@@ -1241,11 +1239,7 @@ void HLSyntaxReader::parseOlsHrdParameters(GeneralHrdParams * generalHrd, OlsHrd
     {
       READ_UVLC(symbol, "elemental_duration_in_tc_minus1");             hrd->setElementDurationInTcMinus1(symbol);
     }
-#if JVET_S0175_ASPECT6
     else if((generalHrd->getGeneralNalHrdParametersPresentFlag() || generalHrd->getGeneralVclHrdParametersPresentFlag()) && generalHrd->getHrdCpbCntMinus1() == 0)
-#else
-    else if(generalHrd->getHrdCpbCntMinus1() == 0)
-#endif
     {
       READ_FLAG(symbol, "low_delay_hrd_flag");                      hrd->setLowDelayHrdFlag(symbol == 1 ? true : false);
     }
@@ -1316,6 +1310,16 @@ void HLSyntaxReader::dpb_parameters(int maxSubLayersMinus1, bool subLayerInfoFla
     READ_UVLC(code, "dpb_max_latency_increase_plus1[i]");
     pcSPS->setMaxLatencyIncreasePlus1(code, i);
   }
+
+  if (!subLayerInfoFlag)
+  {
+    for(int i = 0; i < maxSubLayersMinus1; ++i)
+    {
+      pcSPS->setMaxDecPicBuffering(pcSPS->getMaxDecPicBuffering(maxSubLayersMinus1), i);
+      pcSPS->setMaxNumReorderPics(pcSPS->getMaxNumReorderPics(maxSubLayersMinus1), i);
+      pcSPS->setMaxLatencyIncreasePlus1(pcSPS->getMaxLatencyIncreasePlus1(maxSubLayersMinus1), i);
+    }
+  }
 }
 
 
@@ -1357,12 +1361,10 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS)
   READ_FLAG(uiCode, "sps_gdr_enabled_flag");
   pcSPS->setGDREnabledFlag(uiCode);
 
-#if JVET_R0266_GCI
   if (pcSPS->getProfileTierLevel()->getConstraintInfo()->getNoGdrConstraintFlag())
   {
     CHECK(uiCode != 0, "When gci_no_gdr_constraint_flag equal to 1 , the value of sps_gdr_enabled_flag shall be equal to 0");
   }
-#endif
 
   READ_FLAG(uiCode, "sps_ref_pic_resampling_enabled_flag");          pcSPS->setRprEnabledFlag(uiCode);
   if (pcSPS->getProfileTierLevel()->getConstraintInfo()->getNoRprConstraintFlag())
@@ -1546,7 +1548,7 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS)
   READ_FLAG( uiCode, "sps_entry_point_offsets_present_flag");   pcSPS->setEntryPointsPresentFlag(uiCode == 1);
   READ_CODE(4, uiCode, "sps_log2_max_pic_order_cnt_lsb_minus4");     pcSPS->setBitsForPOC( 4 + uiCode );
   CHECK(uiCode > 12, "sps_log2_max_pic_order_cnt_lsb_minus4 shall be in the range of 0 to 12");
-  
+
   READ_FLAG(uiCode, "sps_poc_msb_cycle_flag");                    pcSPS->setPocMsbCycleFlag(uiCode ? true : false);
   if (pcSPS->getPocMsbCycleFlag())
   {
@@ -1786,7 +1788,7 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS)
     for (uint32_t ii = 0; ii < numberOfRPL; ii++)
       copyRefPicList(pcSPS, rplListSource->getReferencePictureList(ii), rplListDest->getReferencePictureList(ii));
   }
-  
+
 
   READ_FLAG(uiCode, "sps_ref_wraparound_enabled_flag");                  pcSPS->setWrapAroundEnabledFlag( uiCode ? true : false );
 
@@ -1922,6 +1924,10 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS)
     pcSPS->setVerCollocatedChromaFlag(true);
   }
   READ_FLAG( uiCode,  "sps_palette_enabled_flag");                                pcSPS->setPLTMode                ( uiCode != 0 );
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+  CHECK((profile == Profile::MAIN_12 || profile == Profile::MAIN_12_INTRA || profile == Profile::MAIN_12_STILL_PICTURE)
+    && uiCode != 0, "sps_palette_enabled_flag shall be equal to 0 for Main 12 (420) profiles");
+#endif
   if (pcSPS->getChromaFormatIdc() == CHROMA_444 && pcSPS->getLog2MaxTbSize() != 6)
   {
     READ_FLAG(uiCode, "sps_act_enabled_flag");                                pcSPS->setUseColorTrans(uiCode != 0);
@@ -2076,6 +2082,7 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS)
   // KJS: no SPS extensions defined yet
 
   READ_FLAG( uiCode, "sps_extension_present_flag");
+
   if (uiCode)
   {
 #if ENABLE_TRACING || RExt__DECODER_DEBUG_BIT_STATISTICS
@@ -2096,6 +2103,12 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS)
       sps_extension_flags[i] = uiCode!=0;
     }
 
+#if JVET_W0178_CONSTRAINTS_ON_REXT_TOOLS
+    if (pcSPS->getBitDepth(CHANNEL_TYPE_LUMA) <= 10)
+      CHECK(sps_extension_flags[SPS_EXT__REXT] == 1,
+            "The value of sps_range_extension_flag shall be 0 when BitDepth is less than or equal to 10.");
+#endif
+
     bool bSkipTrailingExtensionBits=false;
     for(int i=0; i<NUM_SPS_EXTENSION_FLAGS; i++) // loop used so that the order is determined by the enum.
     {
@@ -2107,13 +2120,31 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS)
           CHECK(bSkipTrailingExtensionBits, "Skipping trailing extension bits not supported");
           {
             SPSRExt &spsRangeExtension = pcSPS->getSpsRangeExtension();
+#if !JVET_W2005_RANGE_EXTENSION_PROFILES
             READ_FLAG( uiCode, "transform_skip_rotation_enabled_flag");     spsRangeExtension.setTransformSkipRotationEnabledFlag(uiCode != 0);
             READ_FLAG( uiCode, "transform_skip_context_enabled_flag");      spsRangeExtension.setTransformSkipContextEnabledFlag (uiCode != 0);
+#endif
             READ_FLAG( uiCode, "extended_precision_processing_flag");       spsRangeExtension.setExtendedPrecisionProcessingFlag (uiCode != 0);
+#if JVET_W0070_W0121_SPSRE_CLEANUP
+            if (pcSPS->getTransformSkipEnabledFlag()) 
+            {
+              READ_FLAG( uiCode, "sps_ts_residual_coding_rice_present_in_sh_flag"); spsRangeExtension.setTSRCRicePresentFlag(uiCode != 0);
+            }
+#else
+            READ_FLAG( uiCode, "sps_ts_residual_coding_rice_present_in_sh_flag"); spsRangeExtension.setTSRCRicePresentFlag(uiCode != 0);
+#endif
+#if !JVET_W2005_RANGE_EXTENSION_PROFILES
             READ_FLAG( uiCode, "intra_smoothing_disabled_flag");            spsRangeExtension.setIntraSmoothingDisabledFlag      (uiCode != 0);
             READ_FLAG( uiCode, "high_precision_offsets_enabled_flag");      spsRangeExtension.setHighPrecisionOffsetsEnabledFlag (uiCode != 0);
+#endif
+            READ_FLAG(uiCode,  "rrc_rice_extension_flag");                  spsRangeExtension.setRrcRiceExtensionEnableFlag      (uiCode != 0);
             READ_FLAG( uiCode, "persistent_rice_adaptation_enabled_flag");  spsRangeExtension.setPersistentRiceAdaptationEnabledFlag (uiCode != 0);
+#if JVET_W0046_RLSCP
+            READ_FLAG( uiCode, "reverse_last_position_enabled_flag");       spsRangeExtension.setReverseLastSigCoeffEnabledFlag(uiCode != 0);
+#endif
+#if !JVET_W2005_RANGE_EXTENSION_PROFILES
             READ_FLAG( uiCode, "cabac_bypass_alignment_enabled_flag");      spsRangeExtension.setCabacBypassAlignmentEnabledFlag  (uiCode != 0);
+#endif
           }
           break;
         default:
@@ -2133,7 +2164,6 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS)
   xReadRbspTrailingBits();
 }
 
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
 void HLSyntaxReader::parseOPI(OPI* opi)
 {
 #if ENABLE_TRACING
@@ -2146,13 +2176,13 @@ void HLSyntaxReader::parseOPI(OPI* opi)
   READ_FLAG(symbol, "opi_htid_info_present_flag");
   opi->setHtidInfoPresentFlag(symbol);
 
-  if (opi->getOlsInfoPresentFlag()) 
+  if (opi->getOlsInfoPresentFlag())
   {
-    READ_UVLC(symbol, "opi_ols_idx");  
+    READ_UVLC(symbol, "opi_ols_idx");
     opi->setOpiOlsIdx(symbol);
   }
 
-  if (opi->getHtidInfoPresentFlag()) 
+  if (opi->getHtidInfoPresentFlag())
   {
     READ_CODE(3, symbol, "opi_htid_plus1");
     opi->setOpiHtidPlus1(symbol);
@@ -2168,7 +2198,6 @@ void HLSyntaxReader::parseOPI(OPI* opi)
   }
   xReadRbspTrailingBits();
 }
-#endif
 
 
 void HLSyntaxReader::parseDCI(DCI* dci)
@@ -2236,11 +2265,9 @@ void HLSyntaxReader::parseVPS(VPS* pcVPS)
       pcVPS->setEachLayerIsAnOlsFlag(0);
     }
   }
-#if JVET_R0193
   std::vector<std::vector<uint32_t>> maxTidilRefPicsPlus1;
   maxTidilRefPicsPlus1.resize(pcVPS->getMaxLayers(), std::vector<uint32_t>(pcVPS->getMaxLayers(), NOT_VALID));
   pcVPS->setMaxTidIlRefPicsPlus1(maxTidilRefPicsPlus1);
-#endif
   for (uint32_t i = 0; i < pcVPS->getMaxLayers(); i++)
   {
     READ_CODE(6, uiCode, "vps_layer_id");                     pcVPS->setLayerId(i, uiCode);
@@ -2251,7 +2278,6 @@ void HLSyntaxReader::parseVPS(VPS* pcVPS)
       READ_FLAG(uiCode, "vps_independent_layer_flag");     pcVPS->setIndependentLayerFlag(i, uiCode);
       if (!pcVPS->getIndependentLayerFlag(i))
       {
-#if JVET_R0193
         READ_FLAG(uiCode, "max_tid_ref_present_flag[ i ]");
         bool presentFlag = uiCode;
         uint16_t sumUiCode = 0;
@@ -2275,30 +2301,6 @@ void HLSyntaxReader::parseVPS(VPS* pcVPS)
           }
         }
         CHECK(sumUiCode == 0, "There has to be at least one value of j such that the value of vps_direct_dependency_flag[ i ][ j ] is equal to 1,when vps_independent_layer_flag[ i ] is equal to 0 ");
-#else
-        uint16_t sumUiCode = 0;
-        for (int j = 0, k = 0; j < i; j++)
-        {
-          READ_FLAG(uiCode, "vps_direct_dependency_flag"); pcVPS->setDirectRefLayerFlag(i, j, uiCode);
-          if( uiCode )
-          {
-            pcVPS->setInterLayerRefIdc( i, j, k );
-            pcVPS->setDirectRefLayerIdx( i, k++, j );
-            sumUiCode++;
-          }
-        }
-        CHECK(sumUiCode == 0, "There has to be at least one value of j such that the value of vps_direct_dependency_flag[ i ][ j ] is equal to 1,when vps_independent_layer_flag[ i ] is equal to 0 ");
-        READ_FLAG(uiCode, "vps_max_tid_ref_present_flag[ i ]");
-        if (uiCode)
-        {
-          READ_CODE(3, uiCode, "vps_max_tid_il_ref_pics_plus1[ i ]");
-          pcVPS->setMaxTidIlRefPicsPlus1(i, uiCode);
-        }
-        else
-        {
-          pcVPS->setMaxTidIlRefPicsPlus1(i, 7);
-        }
-#endif
       }
     }
   }
@@ -2372,7 +2374,7 @@ void HLSyntaxReader::parseVPS(VPS* pcVPS)
   ptls.resize(pcVPS->getNumPtls());
   for (int i = 0; i < pcVPS->getNumPtls(); i++)
   {
-    parseProfileTierLevel(&ptls[i], pcVPS->getPtPresentFlag(i), pcVPS->getPtlMaxTemporalId(i) - 1);
+    parseProfileTierLevel(&ptls[i], pcVPS->getPtPresentFlag(i), pcVPS->getPtlMaxTemporalId(i));
   }
   pcVPS->setProfileTierLevel(ptls);
   for (int i = 0; i < pcVPS->getTotalNumOLSs(); i++)
@@ -2647,7 +2649,7 @@ void HLSyntaxReader::parsePictureHeader( PicHeader* picHeader, ParameterSetManag
       {
         READ_CODE(3, uiCode, "ph_num_alf_aps_ids_luma");
         int numAps = uiCode;
-        picHeader->setNumAlfAps(numAps);
+        picHeader->setNumAlfApsIdsLuma(numAps);
 
         std::vector<int> apsId(numAps, -1);
         for (int i = 0; i < numAps; i++)
@@ -2655,7 +2657,7 @@ void HLSyntaxReader::parsePictureHeader( PicHeader* picHeader, ParameterSetManag
           READ_CODE(3, uiCode, "ph_alf_aps_id_luma");
           apsId[i] = uiCode;
         }
-        picHeader->setAlfAPSs(apsId);
+        picHeader->setAlfApsIdsLuma(apsId);
 
         if (sps->getChromaFormatIdc() != CHROMA_400)
         {
@@ -2697,7 +2699,7 @@ void HLSyntaxReader::parsePictureHeader( PicHeader* picHeader, ParameterSetManag
       }
       else
       {
-        picHeader->setNumAlfAps(0);
+        picHeader->setNumAlfApsIdsLuma(0);
       }
       picHeader->setAlfEnabledFlag(COMPONENT_Cb, alfCbEnabledFlag);
       picHeader->setAlfEnabledFlag(COMPONENT_Cr, alfCrEnabledFlag);
@@ -2764,7 +2766,12 @@ void HLSyntaxReader::parsePictureHeader( PicHeader* picHeader, ParameterSetManag
     pps->getConformanceWindow().setWindowRightOffset(sps->getConformanceWindow().getWindowRightOffset());
     pps->getConformanceWindow().setWindowTopOffset(sps->getConformanceWindow().getWindowTopOffset());
     pps->getConformanceWindow().setWindowBottomOffset(sps->getConformanceWindow().getWindowBottomOffset());
+    if (!pps->getExplicitScalingWindowFlag())
+    {
+      pps->setScalingWindow(pps->getConformanceWindow());
+    }
   }
+  CHECK(!sps->getRprEnabledFlag() && pps->getExplicitScalingWindowFlag(), "When sps_ref_pic_resampling_enabled_flag is equal to 0, the value of pps_scaling_window_explicit_signalling_flag shall be equal to 0");
 
   // initialize tile/slice info for no partitioning case
 
@@ -2832,6 +2839,11 @@ void HLSyntaxReader::parsePictureHeader( PicHeader* picHeader, ParameterSetManag
         READ_UVLC(uiCode, "ph_virtual_boundary_pos_x_minus1[i]");        picHeader->setVirtualBoundariesPosX((uiCode + 1) << 3, i);
         CHECK(uiCode > (((pps->getPicWidthInLumaSamples() + 7) >> 3) - 2), "The value of ph_virtual_boundary_pos_x_minus1[ i ] shall be in the range of 0 to Ceil( pps_pic_width_in_luma_samples / 8 ) - 2, inclusive.");
       }
+#if GDR_DEC_TRACE
+      printf("\n");
+      printf("-num_ver_boundary :%d\n", picHeader->getNumVerVirtualBoundaries());
+      printf("-vir_boundary_pos :%d\n", picHeader->getVirtualBoundariesPosX(0));
+#endif
       READ_UVLC(uiCode, "ph_num_hor_virtual_boundaries");        picHeader->setNumHorVirtualBoundaries( uiCode );
       if (pps->getPicHeightInLumaSamples() <= 8)
       {
@@ -2937,12 +2949,10 @@ void HLSyntaxReader::parsePictureHeader( PicHeader* picHeader, ParameterSetManag
           *rpl = *sps->getRPLList( listIdx )->getReferencePictureList(picHeader->getRPLIdx( listIdx ));
         }
       }
-#if JVET_S0096_RPL_CONSTRAINT
       if (picHeader->getPicInterSliceAllowedFlag() && listIdx == 0)
       {
           CHECK(picHeader->getRPL(0)->getNumRefEntries() <= 0, "When pps_rpl_info_in_ph_flag is equal to 1 and ph_inter_slice_allowed_flag is equal to 1, the value of num_ref_entries[ 0 ][ RplsIdx[ 0 ] ] shall be greater than 0");
       }
-#endif
       // POC MSB cycle signalling for LTRP
       for (int i = 0; i < rpl->getNumberOfLongtermPictures() + rpl->getNumberOfShorttermPictures(); i++)
       {
@@ -3404,12 +3414,12 @@ void  HLSyntaxReader::checkAlfNaluTidAndPicTid(Slice* pcSlice, PicHeader* picHea
   PPS* pps = parameterSetManager->getPPS(picHeader->getPPSId());
   int curPicTid = pcSlice->getTLayer();
   APS* aps;
-  const std::vector<int>&   apsId = picHeader->getAlfAPSs();
+  const std::vector<int>&   apsId = picHeader->getAlfApsIdsLuma();
 
   if (sps->getALFEnabledFlag() && pps->getAlfInfoInPhFlag() && picHeader->getAlfEnabledFlag(COMPONENT_Y))
   {
     //luma
-    for (int i = 0; i < picHeader->getNumAlfAps(); i++)
+    for (int i = 0; i < picHeader->getNumAlfApsIdsLuma(); i++)
     {
       aps = parameterSetManager->getAPS(apsId[i], ALF_APS);
       CHECK(aps->getTemporalId() > curPicTid, "The TemporalId of the APS NAL unit having aps_params_type equal to ALF_APS and adaptation_parameter_set_id equal to ph_alf_aps_id_luma[ i ] shall be less than or equal to the TemporalId of the picture associated with the PH.");
@@ -3665,7 +3675,7 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, PicHeader* picHeader, Par
   if (sps->getALFEnabledFlag() && !pps->getAlfInfoInPhFlag())
   {
     READ_FLAG(uiCode, "sh_alf_enabled_flag");
-    pcSlice->setTileGroupAlfEnabledFlag(COMPONENT_Y, uiCode);
+    pcSlice->setAlfEnabledFlag(COMPONENT_Y, uiCode);
     int alfCbEnabledFlag = 0;
     int alfCrEnabledFlag = 0;
 
@@ -3673,7 +3683,7 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, PicHeader* picHeader, Par
     {
       READ_CODE(3, uiCode, "sh_num_alf_aps_ids_luma");
       int numAps = uiCode;
-      pcSlice->setTileGroupNumAps(numAps);
+      pcSlice->setNumAlfApsIdsLuma(numAps);
       std::vector<int> apsId(numAps, -1);
       for (int i = 0; i < numAps; i++)
       {
@@ -3685,7 +3695,7 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, PicHeader* picHeader, Par
       }
 
 
-      pcSlice->setAlfAPSs(apsId);
+      pcSlice->setAlfApsIdsLuma(apsId);
       if (bChroma)
       {
         READ_CODE(1, uiCode, "sh_alf_cb_enabled_flag");   alfCbEnabledFlag = uiCode;
@@ -3699,7 +3709,7 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, PicHeader* picHeader, Par
       if (alfCbEnabledFlag || alfCrEnabledFlag)
       {
         READ_CODE(3, uiCode, "sh_alf_aps_id_chroma");
-        pcSlice->setTileGroupApsIdChroma(uiCode);
+        pcSlice->setAlfApsIdChroma(uiCode);
         APS* APStoCheckChroma = parameterSetManager->getAPS(uiCode, ALF_APS);
         CHECK(APStoCheckChroma == nullptr, "referenced APS not found");
         CHECK(APStoCheckChroma->getAlfAPSParam().newFilterFlag[CHANNEL_TYPE_CHROMA] != 1, "bitstream conformance error, alf_chroma_filter_signal_flag shall be equal to 1");
@@ -3707,42 +3717,42 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, PicHeader* picHeader, Par
     }
     else
     {
-      pcSlice->setTileGroupNumAps(0);
+      pcSlice->setNumAlfApsIdsLuma(0);
     }
-    pcSlice->setTileGroupAlfEnabledFlag(COMPONENT_Cb, alfCbEnabledFlag);
-    pcSlice->setTileGroupAlfEnabledFlag(COMPONENT_Cr, alfCrEnabledFlag);
+    pcSlice->setAlfEnabledFlag(COMPONENT_Cb, alfCbEnabledFlag);
+    pcSlice->setAlfEnabledFlag(COMPONENT_Cr, alfCrEnabledFlag);
 
     CcAlfFilterParam &filterParam = pcSlice->m_ccAlfFilterParam;
-    if (sps->getCCALFEnabledFlag() && pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Y))
+    if (sps->getCCALFEnabledFlag() && pcSlice->getAlfEnabledFlag(COMPONENT_Y))
     {
       READ_FLAG(uiCode, "sh_alf_cc_cb_enabled_flag");
-      pcSlice->setTileGroupCcAlfCbEnabledFlag(uiCode);
+      pcSlice->setCcAlfCbEnabledFlag(uiCode);
       filterParam.ccAlfFilterEnabled[COMPONENT_Cb - 1] = (uiCode == 1) ? true : false;
-      pcSlice->setTileGroupCcAlfCbApsId(-1);
+      pcSlice->setCcAlfCbApsId(-1);
       if (filterParam.ccAlfFilterEnabled[COMPONENT_Cb - 1])
       {
         // parse APS ID
         READ_CODE(3, uiCode, "sh_alf_cc_cb_aps_id");
-        pcSlice->setTileGroupCcAlfCbApsId(uiCode);
+        pcSlice->setCcAlfCbApsId(uiCode);
       }
       // Cr
       READ_FLAG(uiCode, "sh_alf_cc_cr_enabled_flag");
-      pcSlice->setTileGroupCcAlfCrEnabledFlag(uiCode);
+      pcSlice->setCcAlfCrEnabledFlag(uiCode);
       filterParam.ccAlfFilterEnabled[COMPONENT_Cr - 1] = (uiCode == 1) ? true : false;
-      pcSlice->setTileGroupCcAlfCrApsId(-1);
+      pcSlice->setCcAlfCrApsId(-1);
       if (filterParam.ccAlfFilterEnabled[COMPONENT_Cr - 1])
       {
         // parse APS ID
         READ_CODE(3, uiCode, "sh_alf_cc_cr_aps_id");
-        pcSlice->setTileGroupCcAlfCrApsId(uiCode);
+        pcSlice->setCcAlfCrApsId(uiCode);
       }
     }
     else
     {
       filterParam.ccAlfFilterEnabled[COMPONENT_Cb - 1] = false;
       filterParam.ccAlfFilterEnabled[COMPONENT_Cr - 1] = false;
-      pcSlice->setTileGroupCcAlfCbApsId(-1);
-      pcSlice->setTileGroupCcAlfCrApsId(-1);
+      pcSlice->setCcAlfCbApsId(-1);
+      pcSlice->setCcAlfCrApsId(-1);
     }
   }
   if (picHeader->getLmcsEnabledFlag() && !pcSlice->getPictureHeaderInSliceHeader())
@@ -3883,10 +3893,12 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, PicHeader* picHeader, Par
         {
           int numBits = ceilLog2(sps->getNumRPL1());
           READ_CODE(numBits, uiCode, "ref_pic_list_idx[1]");
+          pcSlice->setRPL1idx(uiCode);
           *rpl1 = *sps->getRPLList1()->getReferencePictureList(uiCode);
         }
         else if (sps->getNumRPL(1) == 1)
         {
+          pcSlice->setRPL1idx(0);
           *rpl1 = *sps->getRPLList1()->getReferencePictureList(0);
         }
         else
@@ -4282,6 +4294,22 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, PicHeader* picHeader, Par
     pcSlice->setTSResidualCodingDisabledFlag( false );
   }
 
+  if ((!pcSlice->getTSResidualCodingDisabledFlag()) && sps->getSpsRangeExtension().getTSRCRicePresentFlag())
+  {
+    READ_CODE(3, uiCode, "sh_ts_residual_coding_rice_idx_minus1");
+    pcSlice->set_tsrc_index(uiCode);
+  }
+#if JVET_W0046_RLSCP
+  if (sps->getSpsRangeExtension().getReverseLastSigCoeffEnabledFlag())
+  {
+    READ_FLAG(uiCode, "sh_reverse_last_sig_coeff_flag");
+    pcSlice->setReverseLastSigCoeffFlag(uiCode != 0);
+  }
+  else {
+    pcSlice->setReverseLastSigCoeffFlag(false);
+  }
+#endif
+
   if( pcSlice->getFirstCtuRsAddrInSlice() == 0 )
   {
     pcSlice->setDefaultClpRng( *sps );
@@ -4358,6 +4386,37 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, PicHeader* picHeader, Par
       pcSlice->addSubstreamSize(entryPointOffset [ idx ] );
     }
   }
+#if GDR_ENABLED
+  int curPoc = pcSlice->getPOC();
+
+  if (picHeader->getGdrPicFlag())
+  {
+    setLastGdrPoc(curPoc);
+    setLastGdrRecoveryPocCnt(pcSlice->getPicHeader()->getRecoveryPocCnt());
+  }
+
+  int recoveryPocCnt = getLastGdrRecoveryPocCnt();
+
+  if (getLastGdrPoc() > 0 && (getLastGdrPoc() <= curPoc) && (curPoc < (getLastGdrPoc() + recoveryPocCnt)))
+  {
+    picHeader->setInGdrInterval(true);
+  }
+  else
+  {
+    picHeader->setInGdrInterval(false);
+  }
+#endif
+
+#if GDR_DEC_TRACE
+  printf("-gdr_pic_flag:%d\n", picHeader->getGdrPicFlag() ? 1 : 0);
+  printf("-recovery_poc_cnt:%d\n", picHeader->getRecoveryPocCnt());
+#if GDR_ENABLED
+  printf("-inGdrInterval:%d\n", picHeader->getInGdrInterval());
+#endif
+
+  printf("-lmcs_enable : %d\n", picHeader->getLmcsEnabledFlag() ? 1 : 0);
+  printf("-lmcs_chroma : %d\n", picHeader->getLmcsChromaResidualScaleFlag() ? 1 : 0);
+#endif
   return;
 }
 
@@ -4446,7 +4505,11 @@ void HLSyntaxReader::getSlicePoc(Slice* pcSlice, PicHeader* picHeader, Parameter
   DTRACE_UPDATE( g_trace_ctx, std::make_pair( "final", 1 ) );
 }
 
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+void HLSyntaxReader::parseConstraintInfo(ConstraintInfo *cinfo, const ProfileTierLevel* ptl )
+#else
 void HLSyntaxReader::parseConstraintInfo(ConstraintInfo *cinfo)
+#endif
 {
   uint32_t symbol;
   READ_FLAG(symbol, "gci_present_flag"); cinfo->setGciPresentFlag(symbol ? true : false);
@@ -4455,7 +4518,7 @@ void HLSyntaxReader::parseConstraintInfo(ConstraintInfo *cinfo)
     /* general */
     READ_FLAG(symbol, "gci_intra_only_constraint_flag");                 cinfo->setIntraOnlyConstraintFlag(symbol ? true : false);
     READ_FLAG(symbol, "gci_all_layers_independent_constraint_flag");     cinfo->setAllLayersIndependentConstraintFlag(symbol ? true : false);
-    READ_FLAG(symbol, "gci_one_au_only_constraint_flag");                cinfo->setOnePictureOnlyConstraintFlag(symbol ? true : false); 
+    READ_FLAG(symbol, "gci_one_au_only_constraint_flag");                cinfo->setOnePictureOnlyConstraintFlag(symbol ? true : false);
 
     /* picture format */
     READ_CODE(4, symbol, "gci_sixteen_minus_max_bitdepth_constraint_idc"); cinfo->setMaxBitDepthConstraintIdc(symbol>8 ? 16 : (16 - symbol));
@@ -4538,7 +4601,24 @@ void HLSyntaxReader::parseConstraintInfo(ConstraintInfo *cinfo)
     READ_FLAG(symbol, "gci_no_virtual_boundaries_constraint_flag");      cinfo->setNoVirtualBoundaryConstraintFlag(symbol > 0 ? true : false);
     READ_CODE(8, symbol, "gci_num_reserved_bits");
     uint32_t const numReservedBits = symbol;
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+    int numReservedBitsUsed;
+    if (numReservedBits > 0)
+    {
+      READ_FLAG(symbol, "general_lower_bit_rate_constraint_flag");       cinfo->setLowerBitRateConstraintFlag(symbol > 0 ? true : false);
+      numReservedBitsUsed = 1;
+      Profile::Name profile = ptl->getProfileIdc();
+      CHECK((profile == Profile::MAIN_12 || profile == Profile::MAIN_12_444 || profile == Profile::MAIN_16_444) &&
+        symbol == 0, "general_lower_bitrate_constraint_flag shall be equal to 1 for non-Intra/Still Picture operation range extension profiles.");
+    }
+    else
+    {
+      numReservedBitsUsed = 0;
+    }
+    for (int i = 0; i < numReservedBits - numReservedBitsUsed; i++)
+#else
     for (int i = 0; i < numReservedBits; i++)
+#endif
     {
       READ_FLAG(symbol, "gci_reserved_zero_bit");                    CHECK(symbol != 0, "gci_reserved_zero_bit not equal to zero");
     }
@@ -4571,7 +4651,11 @@ void HLSyntaxReader::parseProfileTierLevel(ProfileTierLevel *ptl, bool profileTi
 
   if(profileTierPresentFlag)
   {
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+    parseConstraintInfo(ptl->getConstraintInfo(), ptl);
+#else
     parseConstraintInfo(ptl->getConstraintInfo());
+#endif
   }
 
   for (int i = maxNumSubLayersMinus1 - 1; i >= 0; i--)
diff --git a/source/Lib/DecoderLib/VLCReader.h b/source/Lib/DecoderLib/VLCReader.h
index dfd0b53ad39547614869620a78cc027d6558bb75..ae43701999aa80b29a5eaf1f9760525bbd2de02d 100644
--- a/source/Lib/DecoderLib/VLCReader.h
+++ b/source/Lib/DecoderLib/VLCReader.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -146,6 +146,11 @@ public:
 
 class HLSyntaxReader : public VLCReader
 {
+#if GDR_ENABLED
+  int m_lastGdrPoc;
+  int m_lastGdrRecoveryPocCnt;
+#endif
+
 public:
   HLSyntaxReader();
   virtual ~HLSyntaxReader();
@@ -155,10 +160,14 @@ protected:
   void  parseRefPicList(SPS* pcSPS, ReferencePictureList* rpl, int rplIdx);
 
 public:
+#if GDR_ENABLED
+  void setLastGdrPoc(int poc) { m_lastGdrPoc = poc;  }
+  int  getLastGdrPoc()        { return m_lastGdrPoc; }
+  void setLastGdrRecoveryPocCnt(int recoveryPocCnt) { m_lastGdrRecoveryPocCnt = recoveryPocCnt; }
+  int  getLastGdrRecoveryPocCnt()                     { return m_lastGdrRecoveryPocCnt; }
+#endif
   void  setBitstream        ( InputBitstream* p )   { m_pcBitstream = p; }
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
   void  parseOPI            ( OPI* opi );
-#endif
   void  parseVPS            ( VPS* pcVPS );
   void  parseDCI            ( DCI* dci );
   void  parseSPS            ( SPS* pcSPS );
@@ -168,7 +177,11 @@ public:
   void  parseLmcsAps        ( APS* pcAPS );
   void  parseScalingListAps ( APS* pcAPS );
   void  parseVUI            ( VUI* pcVUI, SPS* pcSPS );
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+  void  parseConstraintInfo (ConstraintInfo *cinfo, const ProfileTierLevel* ptl );
+#else
   void  parseConstraintInfo   (ConstraintInfo *cinfo);
+#endif
   void  parseProfileTierLevel(ProfileTierLevel *ptl, bool profileTierPresentFlag, int maxNumSubLayersMinus1);
   void  parseOlsHrdParameters(GeneralHrdParams* generalHrd, OlsHrdParams *olsHrd, uint32_t firstSubLayer, uint32_t tempLevelHigh);
   void parseGeneralHrdParameters(GeneralHrdParams *generalHrd);
diff --git a/source/Lib/EncoderLib/AQp.cpp b/source/Lib/EncoderLib/AQp.cpp
index a157e1c854416e142fb11b39f1b5c27472732f29..6f7150380d19c79167d29a5bfcdbb216b2c986a6 100644
--- a/source/Lib/EncoderLib/AQp.cpp
+++ b/source/Lib/EncoderLib/AQp.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/EncoderLib/AQp.h b/source/Lib/EncoderLib/AQp.h
index 4e62ed1eee3034cd005c2e036041e12e59cebfdd..b6f7f60a623d5223bf35246f2208b52e062da987 100644
--- a/source/Lib/EncoderLib/AQp.h
+++ b/source/Lib/EncoderLib/AQp.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/EncoderLib/Analyze.h b/source/Lib/EncoderLib/Analyze.h
index d17b9fa091f24a6a7f29b8b01bcc59d2d1ac34da..9ef8471aa19ec935f786530ffed79a4c57d9bac0 100644
--- a/source/Lib/EncoderLib/Analyze.h
+++ b/source/Lib/EncoderLib/Analyze.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -99,8 +99,8 @@ public:
 
     m_uiNumPic++;
   }
-#if ENABLE_QPA
-  double  getWPSNR      (const ComponentID compID) const { return m_dPSNRSum[compID] / (double)m_uiNumPic; }
+#if ENABLE_QPA || JVET_W0134_UNIFORM_METRICS_LOG
+  double  getWPSNR(const ComponentID compID) const { return m_dPSNRSum[compID] / (double)m_uiNumPic; }
 #endif
   double  getPsnr(ComponentID compID) const { return  m_dPSNRSum[compID];  }
   double  getMsssim(ComponentID compID) const { return  m_msssim[compID];  }
@@ -189,8 +189,156 @@ public:
     PSNRyuv = (MSEyuv == 0) ? 999.99 : 10.0 * log10((maxval * maxval) / MSEyuv);
   }
 
-#if ENABLE_QPA || WCG_WPSNR
-  void    printOut( char cDelim, const ChromaFormat chFmt, const bool printMSEBasedSNR, const bool printSequenceMSE, const bool printMSSSIM, 
+#if JVET_W0134_UNIFORM_METRICS_LOG
+  void printOut(std::string &header, std::string &metrics, const std::string &delim, ChromaFormat chFmt,
+                bool printMSEBasedSNR, bool printSequenceMSE, bool printMSSSIM,
+                bool printHexPsnr, bool printRprPSNR, const BitDepths &bitDepths,
+                bool useWPSNR = false, bool printHdrMetrics = false)
+  {
+
+    std::ostringstream headeross,metricoss;
+    // no generic lambda in C++11...
+    auto addFieldD=[&](const std::string &header,const char *fmt,double x, bool withchroma=true) {
+      if (!withchroma) return;
+      char buffer[512];
+      headeross<<header;
+      snprintf(buffer,512,fmt,x);
+      metricoss<<buffer;
+    };
+    auto addFieldL=[&](const std::string &header,const char *fmt,uint64_t x, bool withchroma=true) {
+      if (!withchroma) return;
+      char buffer[512];
+      headeross<<header;
+      snprintf(buffer,512,fmt,x);
+      metricoss<<buffer;
+    };
+    auto addFieldS=[&](const std::string &header,const char *fmt,const char *s) {
+      char buffer[512];
+      headeross<<header;
+      snprintf(buffer,512,fmt,s);
+      metricoss<<buffer;
+    };
+
+    auto hexValue=[](double x) {
+      uint64_t ui;
+      copy(reinterpret_cast<uint8_t *>(&x),
+           reinterpret_cast<uint8_t *>(&x) + sizeof(x),
+           reinterpret_cast<uint8_t *>(&ui));
+      return ui;
+    };
+    //
+    double fps     =   m_dFrmRate; //--CFG_KDY
+    double scale   = fps / 1000 / (double)m_uiNumPic;
+
+    double mseBasedSNR[MAX_NUM_COMPONENT];
+    if (printMSEBasedSNR||printRprPSNR)
+    {
+      for (uint32_t componentIndex = 0; componentIndex < MAX_NUM_COMPONENT; componentIndex++)
+      {
+        const ComponentID compID = ComponentID(componentIndex);
+        if (getNumPic() == 0) mseBasedSNR[compID] = 0 * scale; // this is the same calculation that will be evaluated for any other statistic when there are no frames (it should result in NaN). We use it here so all the output is consistent.
+        else
+        {
+          const uint32_t maxval = /*useWPSNR ? (1 << bitDepths.recon[toChannelType(compID)]) - 1 :*/ 255 << (bitDepths.recon[toChannelType(compID)] - 8);
+          const double MSE  = m_MSEyuvframe[compID];
+          mseBasedSNR[compID] = (MSE == 0) ? 999.99 : 10.0 * log10((maxval * maxval) / (MSE / (double)getNumPic()));
+        }
+      }
+    }
+
+
+
+    addFieldL("\tTotal Frames","\t%-8d    ",getNumPic());
+    addFieldS(" |  ",                  " %s ",delim.c_str());
+    addFieldD("Bitrate      ", "%-12.4lf ",getBits() * scale);
+
+    const bool withchroma=(chFmt != CHROMA_400);
+    double psnrYUV = MAX_DOUBLE;
+    double mseYUV  = MAX_DOUBLE;
+    if (withchroma)  calculateCombinedValues(chFmt, psnrYUV, mseYUV, bitDepths);
+
+    if (useWPSNR)
+    {
+      addFieldD("Y-WPSNR   ", "%-8.4lf  ", getWPSNR(COMPONENT_Y));
+      addFieldD("U-WPSNR   ", "%-8.4lf  ", getWPSNR(COMPONENT_Cb), withchroma);
+      addFieldD("V-WPSNR   ", "%-8.4lf  ", getWPSNR(COMPONENT_Cr), withchroma);
+      addFieldD("YUV-WPSNR ", "%-8.4lf  ", psnrYUV, withchroma);
+    }
+    else
+    {
+      addFieldD("Y-PSNR   ", "%-8.4lf ", getPsnr(COMPONENT_Y) / (double) getNumPic());
+      addFieldD("U-PSNR   ", "%-8.4lf ", getPsnr(COMPONENT_Cb) / (double) getNumPic(), withchroma);
+      addFieldD("V-PSNR   ", "%-8.4lf ", getPsnr(COMPONENT_Cr) / (double) getNumPic(), withchroma);
+      addFieldD("YUV-PSNR ", "%-8.4lf ", psnrYUV, withchroma);
+    }
+#if JVET_O0756_CALCULATE_HDRMETRICS
+    if (printHdrMetrics && withchroma)
+    {
+      addFieldD("DeltaE   ", "%-8.4lf ", getDeltaE() / (double) getNumPic());
+      addFieldD("PSNRL    ", "%-8.4lf ", getPsnrL() / (double) getNumPic());
+    }
+#endif
+#if EXTENSION_360_VIDEO
+    m_ext360.printInfos(headeross,metricoss,getNumPic());
+#endif
+    if (printHexPsnr)
+    {
+      if (useWPSNR) {
+        addFieldL("xY-WPSNR         ", "%-16" PRIx64 " ", hexValue(getWPSNR(COMPONENT_Y) ));
+        addFieldL("xU-WPSNR         ", "%-16" PRIx64 " ", hexValue(getWPSNR(COMPONENT_Cb)), withchroma);
+        addFieldL("xV-WPSNR         ", "%-16" PRIx64 " ", hexValue(getWPSNR(COMPONENT_Cr)), withchroma);
+
+      } else {
+      addFieldL("xY-PSNR          ", "%-16" PRIx64 " ", hexValue(getPsnr(COMPONENT_Y) / (double) getNumPic()));
+      addFieldL("xU-PSNR          ", "%-16" PRIx64 " ", hexValue(getPsnr(COMPONENT_Cb) / (double) getNumPic()), withchroma);
+      addFieldL("xV-PSNR          ", "%-16" PRIx64 " ", hexValue(getPsnr(COMPONENT_Cr) / (double) getNumPic()), withchroma);
+      }
+    }
+#if JVET_O0756_CALCULATE_HDRMETRICS
+    if (printHdrMetrics && printHexPsnr && withchroma)
+    {
+      addFieldL("xDeltaE          ", "%-16" PRIx64 " ", hexValue(getDeltaE() / (double) getNumPic()));
+      addFieldL("xPSNRL           ", "%-16" PRIx64 " " , hexValue(getPsnrL() / (double) getNumPic()));
+    }
+#endif
+    if (printMSSSIM)
+    {
+      addFieldD("Y-MS-SSIM  ", "%-9.7lf  ", getMsssim(COMPONENT_Y) / (double) getNumPic());
+      addFieldD("U-MS-SSIM  ", "%-9.7lf  ", getMsssim(COMPONENT_Cb) / (double) getNumPic(), withchroma);
+      addFieldD("V-MS-SSIM  ", "%-9.7lf  ", getMsssim(COMPONENT_Cr) / (double) getNumPic(), withchroma);
+    }
+    if (printSequenceMSE)
+    {
+      addFieldD("Y-MSE      ", "%-10.4lf ", m_MSEyuvframe[COMPONENT_Y] / (double) getNumPic());
+      addFieldD("U-MSE      ", "%-10.4lf ", m_MSEyuvframe[COMPONENT_Cb] / (double) getNumPic(), withchroma);
+      addFieldD("V-MSE      ", "%-10.4lf ", m_MSEyuvframe[COMPONENT_Cr] / (double) getNumPic(), withchroma);
+      addFieldD("YUV-MSE    ", "%-10.4lf ",mseYUV, withchroma);
+    }
+
+    if (printMSEBasedSNR&&!printRprPSNR) {
+      addFieldD("MSE-Y-PSNR   ", "%-8.4lf     ", mseBasedSNR[COMPONENT_Y]);
+      addFieldD("MSE-U-PSNR   ", "%-8.4lf     ", mseBasedSNR[COMPONENT_Cb], withchroma);
+      addFieldD("MSE-V-PSNR   ", "%-8.4lf     ", mseBasedSNR[COMPONENT_Cr], withchroma);
+      addFieldD("MSE-YUV-PSNR ", "%-8.4lf     ", psnrYUV, withchroma);
+    }
+    if (printRprPSNR) {
+      addFieldD("Y-PSNR1  ", "%-8.4lf ", mseBasedSNR[COMPONENT_Y]);
+      addFieldD("U-PSNR1  ", "%-8.4lf ", mseBasedSNR[COMPONENT_Cb], withchroma);
+      addFieldD("V-PSNR1  ", "%-8.4lf ", mseBasedSNR[COMPONENT_Cr], withchroma);
+      addFieldD("Y-PSNR2  ", "%-8.4lf ", m_upscaledPSNR[COMPONENT_Y]/ (double)getNumPic());
+      addFieldD("U-PSNR2  ", "%-8.4lf ", m_upscaledPSNR[COMPONENT_Cb]/ (double)getNumPic(), withchroma);
+      addFieldD("V-PSNR2  ", "%-8.4lf ", m_upscaledPSNR[COMPONENT_Cr]/ (double)getNumPic(), withchroma);
+    }
+    header=headeross.str();
+    metrics=metricoss.str();
+  }
+
+#endif
+
+#if !JVET_W0134_UNIFORM_METRICS_LOG
+#if (ENABLE_QPA || WCG_WPSNR)
+  void    printOut(
+      char cDelim, const ChromaFormat chFmt, const bool printMSEBasedSNR, const bool printSequenceMSE, const bool printMSSSIM,
     const bool printHexPsnr, const bool printRprPSNR, const BitDepths &bitDepths, const bool useWPSNR = false
 #if JVET_O0756_CALCULATE_HDRMETRICS
       , const bool printHdrMetrics = false
@@ -635,7 +783,7 @@ public:
         break;
     }
   }
-
+#endif
 
   void    printSummary(const ChromaFormat chFmt, const bool printSequenceMSE, const bool printHexPsnr, const BitDepths &bitDepths, const std::string &sFilename)
   {
diff --git a/source/Lib/EncoderLib/AnnexBwrite.h b/source/Lib/EncoderLib/AnnexBwrite.h
index d9007085fbe75a4cc0b1bbaf37f9c8a4e88f5975..0fb5115ccd96a0d96a9cf44b7577bea9a16f4a55 100644
--- a/source/Lib/EncoderLib/AnnexBwrite.h
+++ b/source/Lib/EncoderLib/AnnexBwrite.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -78,13 +78,8 @@ std::vector<uint32_t> writeAnnexBAccessUnit(std::ostream& out, const AccessUnit&
   for (AccessUnit::const_iterator it = au.begin(); it != au.end(); it++)
   {
     const NALUnitEBSP& nalu = **it;
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
     const bool useLongStartCode = (it == au.begin() || nalu.m_nalUnitType == NAL_UNIT_OPI || nalu.m_nalUnitType == NAL_UNIT_DCI || nalu.m_nalUnitType == NAL_UNIT_VPS || nalu.m_nalUnitType == NAL_UNIT_SPS
                                    || nalu.m_nalUnitType == NAL_UNIT_PPS || nalu.m_nalUnitType == NAL_UNIT_PREFIX_APS || nalu.m_nalUnitType == NAL_UNIT_SUFFIX_APS);
-#else
-    const bool useLongStartCode = (it == au.begin() || nalu.m_nalUnitType == NAL_UNIT_DCI || nalu.m_nalUnitType == NAL_UNIT_VPS || nalu.m_nalUnitType == NAL_UNIT_SPS
-                                   || nalu.m_nalUnitType == NAL_UNIT_PPS || nalu.m_nalUnitType == NAL_UNIT_PREFIX_APS || nalu.m_nalUnitType == NAL_UNIT_SUFFIX_APS);
-#endif
 
     const uint32_t size = writeAnnexBNalUnit(out, nalu, useLongStartCode);
 
diff --git a/source/Lib/EncoderLib/BinEncoder.cpp b/source/Lib/EncoderLib/BinEncoder.cpp
index 9636d2f78cafefaf4a87a9963b1faf838bf7d3b1..9f4f7e91aecf0ec1fa9e6d5621f24503b50361cd 100644
--- a/source/Lib/EncoderLib/BinEncoder.cpp
+++ b/source/Lib/EncoderLib/BinEncoder.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/EncoderLib/BinEncoder.h b/source/Lib/EncoderLib/BinEncoder.h
index 67500723eaac4809945a80156075f2846b8ffbb1..ddd5d878fcd46545f96e47e8d7ff457fd60844e8 100644
--- a/source/Lib/EncoderLib/BinEncoder.h
+++ b/source/Lib/EncoderLib/BinEncoder.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -172,6 +172,11 @@ public:
   void      finish  ();
   void      restart ();
   void      reset   ( int qp, int initId );
+#if JVET_W0178_CONSTRAINTS_ON_REXT_TOOLS
+  void      riceStatReset(int bitDepth, bool persistentRiceAdaptationEnabledFlag);
+#else
+  void      riceStatReset(int bitDepth);
+#endif
 public:
   void      resetBits           ();
   uint64_t  getEstFracBits      ()                    const { THROW( "not supported" ); return 0; }
diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp
index 087cb0c179cb7c632cbebc16f4f94fac166108d1..40a1dd2634ebabed818de3500f9b12bf65c253e2 100644
--- a/source/Lib/EncoderLib/CABACWriter.cpp
+++ b/source/Lib/EncoderLib/CABACWriter.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -61,6 +61,12 @@ void CABACWriter::initCtxModels( const Slice& slice )
     sliceType = encCABACTableIdx;
   }
   m_BinEncoder.reset( qp, (int)sliceType );
+  m_BinEncoder.setBaseLevel(slice.getRiceBaseLevel());
+#if JVET_W0178_CONSTRAINTS_ON_REXT_TOOLS
+  m_BinEncoder.riceStatReset(slice.getSPS()->getBitDepth(CHANNEL_TYPE_LUMA), slice.getSPS()->getSpsRangeExtension().getPersistentRiceAdaptationEnabledFlag()); // provide bit depth for derivation (CE14_C method)
+#else
+  m_BinEncoder.riceStatReset(slice.getSPS()->getBitDepth(CHANNEL_TYPE_LUMA)); // provide bit depth for derivation (CE14_C method)
+#endif
 }
 
 
@@ -174,11 +180,11 @@ void CABACWriter::coding_tree_unit( CodingStructure& cs, const UnitArea& area, i
       codeAlfCtuEnableFlag(cs, ctuRsAddr, compIdx, NULL);
       if (isLuma(ComponentID(compIdx)))
       {
-        codeAlfCtuFilterIndex(cs, ctuRsAddr, cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Y));
+        codeAlfCtuFilterIndex(cs, ctuRsAddr, cs.slice->getAlfEnabledFlag(COMPONENT_Y));
       }
       if (isChroma(ComponentID(compIdx)))
       {
-        uint8_t* ctbAlfFlag = cs.slice->getTileGroupAlfEnabledFlag((ComponentID)compIdx) ? cs.slice->getPic()->getAlfCtuEnableFlag( compIdx ) : nullptr;
+        uint8_t* ctbAlfFlag = cs.slice->getAlfEnabledFlag((ComponentID)compIdx) ? cs.slice->getPic()->getAlfCtuEnableFlag( compIdx ) : nullptr;
         if( ctbAlfFlag && ctbAlfFlag[ctuRsAddr] )
         {
           codeAlfCtuAlternative( cs, ctuRsAddr, compIdx );
@@ -430,61 +436,62 @@ void CABACWriter::coding_tree(const CodingStructure& cs, Partitioner& partitione
 
   if( splitMode != CU_DONT_SPLIT )
   {
-      if (CS::isDualITree(cs) && pPartitionerChroma != nullptr && (partitioner.currArea().lwidth() >= 64 || partitioner.currArea().lheight() >= 64))
-      {
-        partitioner.splitCurrArea(CU_QUAD_SPLIT, cs);
-        pPartitionerChroma->splitCurrArea(CU_QUAD_SPLIT, cs);
-        bool beContinue = true;
-        bool lumaContinue = true;
-        bool chromaContinue = true;
+    if (CS::isDualITree(cs) && pPartitionerChroma != nullptr
+        && (partitioner.currArea().lwidth() >= 64 || partitioner.currArea().lheight() >= 64))
+    {
+      partitioner.splitCurrArea(CU_QUAD_SPLIT, cs);
+      pPartitionerChroma->splitCurrArea(CU_QUAD_SPLIT, cs);
+      bool beContinue     = true;
+      bool lumaContinue   = true;
+      bool chromaContinue = true;
 
-        while (beContinue)
+      while (beContinue)
+      {
+        if (partitioner.currArea().lwidth() > 64 || partitioner.currArea().lheight() > 64)
         {
-          if (partitioner.currArea().lwidth() > 64 || partitioner.currArea().lheight() > 64)
+          if (cs.picture->blocks[partitioner.chType].contains(partitioner.currArea().blocks[partitioner.chType].pos()))
           {
-            if (cs.picture->blocks[partitioner.chType].contains(partitioner.currArea().blocks[partitioner.chType].pos()))
-            {
-              coding_tree(cs, partitioner, cuCtx, pPartitionerChroma, pCuCtxChroma);
-            }
-            lumaContinue = partitioner.nextPart(cs);
-            chromaContinue = pPartitionerChroma->nextPart(cs);
-            CHECK(lumaContinue != chromaContinue, "luma chroma partition should be matched");
-            beContinue = lumaContinue;
+            coding_tree(cs, partitioner, cuCtx, pPartitionerChroma, pCuCtxChroma);
           }
-          else
+          lumaContinue   = partitioner.nextPart(cs);
+          chromaContinue = pPartitionerChroma->nextPart(cs);
+          CHECK(lumaContinue != chromaContinue, "luma chroma partition should be matched");
+          beContinue = lumaContinue;
+        }
+        else
+        {
+          // dual tree coding under 64x64 block
+          if (cs.picture->blocks[partitioner.chType].contains(partitioner.currArea().blocks[partitioner.chType].pos()))
           {
-            //dual tree coding under 64x64 block
-            if (cs.picture->blocks[partitioner.chType].contains(partitioner.currArea().blocks[partitioner.chType].pos()))
-            {
-              coding_tree(cs, partitioner, cuCtx);
-            }
-            lumaContinue = partitioner.nextPart(cs);
-            if (cs.picture->blocks[pPartitionerChroma->chType].contains(pPartitionerChroma->currArea().blocks[pPartitionerChroma->chType].pos()))
-            {
-              coding_tree(cs, *pPartitionerChroma, *pCuCtxChroma);
-            }
-            chromaContinue = pPartitionerChroma->nextPart(cs);
-            CHECK(lumaContinue != chromaContinue, "luma chroma partition should be matched");
-            beContinue = lumaContinue;
+            coding_tree(cs, partitioner, cuCtx);
           }
+          lumaContinue = partitioner.nextPart(cs);
+          if (cs.picture->blocks[pPartitionerChroma->chType].contains(
+                pPartitionerChroma->currArea().blocks[pPartitionerChroma->chType].pos()))
+          {
+            coding_tree(cs, *pPartitionerChroma, *pCuCtxChroma);
+          }
+          chromaContinue = pPartitionerChroma->nextPart(cs);
+          CHECK(lumaContinue != chromaContinue, "luma chroma partition should be matched");
+          beContinue = lumaContinue;
         }
-        partitioner.exitCurrSplit();
-        pPartitionerChroma->exitCurrSplit();
-
       }
-      else
-      {
-        const ModeType modeTypeParent = partitioner.modeType;
-        const ModeType modeTypeChild = CU::getModeTypeAtDepth( cu, partitioner.currDepth );
-        mode_constraint( splitMode, cs, partitioner, modeTypeChild );
-        partitioner.modeType = modeTypeChild;
+      partitioner.exitCurrSplit();
+      pPartitionerChroma->exitCurrSplit();
+    }
+    else
+    {
+      const ModeType modeTypeParent = partitioner.modeType;
+      const ModeType modeTypeChild  = CU::getModeTypeAtDepth(cu, partitioner.currDepth);
+      mode_constraint(splitMode, cs, partitioner, modeTypeChild);
+      partitioner.modeType = modeTypeChild;
 
-        bool chromaNotSplit = modeTypeParent == MODE_TYPE_ALL && modeTypeChild == MODE_TYPE_INTRA ? true : false;
-        CHECK( chromaNotSplit && partitioner.chType != CHANNEL_TYPE_LUMA, "chType must be luma" );
-        if( partitioner.treeType == TREE_D )
-        {
-          partitioner.treeType = chromaNotSplit ? TREE_L : TREE_D;
-        }
+      bool chromaNotSplit = modeTypeParent == MODE_TYPE_ALL && modeTypeChild == MODE_TYPE_INTRA ? true : false;
+      CHECK(chromaNotSplit && partitioner.chType != CHANNEL_TYPE_LUMA, "chType must be luma");
+      if (partitioner.treeType == TREE_D)
+      {
+        partitioner.treeType = chromaNotSplit ? TREE_L : TREE_D;
+      }
       partitioner.splitCurrArea( splitMode, cs );
 
       do
@@ -500,14 +507,14 @@ void CABACWriter::coding_tree(const CodingStructure& cs, Partitioner& partitione
       {
         if (isChromaEnabled(cs.pcv->chrFormat))
         {
-        CHECK( partitioner.chType != CHANNEL_TYPE_LUMA, "must be luma status" );
-        partitioner.chType = CHANNEL_TYPE_CHROMA;
-        partitioner.treeType = TREE_C;
+          CHECK(partitioner.chType != CHANNEL_TYPE_LUMA, "must be luma status");
+          partitioner.chType   = CHANNEL_TYPE_CHROMA;
+          partitioner.treeType = TREE_C;
 
-        if( cs.picture->blocks[partitioner.chType].contains( partitioner.currArea().blocks[partitioner.chType].pos() ) )
-        {
-          coding_tree( cs, partitioner, cuCtx );
-        }
+          if (cs.picture->blocks[partitioner.chType].contains(partitioner.currArea().blocks[partitioner.chType].pos()))
+          {
+            coding_tree(cs, partitioner, cuCtx);
+          }
         }
 
         //recover
@@ -515,8 +522,8 @@ void CABACWriter::coding_tree(const CodingStructure& cs, Partitioner& partitione
         partitioner.treeType = TREE_D;
       }
       partitioner.modeType = modeTypeParent;
-      }
-      return;
+    }
+    return;
   }
 
   // Predict QP on start of quantization group
@@ -743,9 +750,9 @@ void CABACWriter::cu_skip_flag( const CodingUnit& cu )
       {
         return;
       }
-    unsigned ctxidx = DeriveCtx::CtxIBCFlag(cu);
-    m_BinEncoder.encodeBin(CU::isIBC(cu) ? 1 : 0, Ctx::IBCFlag(ctxidx));
-    DTRACE(g_trace_ctx, D_SYNTAX, "ibc() ctx=%d cu.predMode=%d\n", ctxidx, cu.predMode);
+      unsigned ctxidx = DeriveCtx::CtxIBCFlag(cu);
+      m_BinEncoder.encodeBin(CU::isIBC(cu) ? 1 : 0, Ctx::IBCFlag(ctxidx));
+      DTRACE(g_trace_ctx, D_SYNTAX, "ibc() ctx=%d cu.predMode=%d\n", ctxidx, cu.predMode);
     }
   }
 }
@@ -765,8 +772,8 @@ void CABACWriter::pred_mode( const CodingUnit& cu )
     {
       if (cu.lwidth() < 128 && cu.lheight() < 128) // disable IBC mode larger than 64x64
       {
-      unsigned ctxidx = DeriveCtx::CtxIBCFlag(cu);
-      m_BinEncoder.encodeBin(CU::isIBC(cu), Ctx::IBCFlag(ctxidx));
+        unsigned ctxidx = DeriveCtx::CtxIBCFlag(cu);
+        m_BinEncoder.encodeBin(CU::isIBC(cu), Ctx::IBCFlag(ctxidx));
       }
       if (!CU::isIBC(cu) && cu.cs->slice->getSPS()->getPLTMode() && cu.lwidth() <= 64 && cu.lheight() <= 64 && (cu.lumaSize().width * cu.lumaSize().height > 16) )
       {
@@ -783,14 +790,16 @@ void CABACWriter::pred_mode( const CodingUnit& cu )
       if (CU::isIntra(cu) || CU::isPLT(cu))
       {
         if (cu.cs->slice->getSPS()->getPLTMode() && cu.lwidth() <= 64 && cu.lheight() <= 64 && (cu.lumaSize().width * cu.lumaSize().height > 16) )
+        {
           m_BinEncoder.encodeBin(CU::isPLT(cu), Ctx::PLTFlag(0));
+        }
       }
       else
       {
         if (cu.lwidth() < 128 && cu.lheight() < 128) // disable IBC mode larger than 64x64
         {
-        unsigned ctxidx = DeriveCtx::CtxIBCFlag(cu);
-        m_BinEncoder.encodeBin(CU::isIBC(cu), Ctx::IBCFlag(ctxidx));
+          unsigned ctxidx = DeriveCtx::CtxIBCFlag(cu);
+          m_BinEncoder.encodeBin(CU::isIBC(cu), Ctx::IBCFlag(ctxidx));
         }
       }
     }
@@ -806,7 +815,9 @@ void CABACWriter::pred_mode( const CodingUnit& cu )
     if ( cu.cs->slice->isIntra() || ( cu.lwidth() == 4 && cu.lheight() == 4 ) || cu.isConsIntra() )
     {
       if (cu.cs->slice->getSPS()->getPLTMode() && cu.lwidth() <= 64 && cu.lheight() <= 64 && ( ( (!isLuma(cu.chType)) && (cu.chromaSize().width * cu.chromaSize().height > 16) ) || ((isLuma(cu.chType)) && ((cu.lumaSize().width * cu.lumaSize().height) > 16 ) )  ) && (!cu.isLocalSepTree() || isLuma(cu.chType)  ) )
+      {
         m_BinEncoder.encodeBin((CU::isPLT(cu)), Ctx::PLTFlag(0));
+      }
       return;
     }
     m_BinEncoder.encodeBin((CU::isIntra(cu) || CU::isPLT(cu)), Ctx::PredMode(DeriveCtx::CtxPredModeFlag(cu)));
@@ -818,8 +829,14 @@ void CABACWriter::pred_mode( const CodingUnit& cu )
 }
 void CABACWriter::bdpcm_mode( const CodingUnit& cu, const ComponentID compID )
 {
-  if( !cu.cs->sps->getBDPCMEnabledFlag() ) return;
-  if( !CU::bdpcmAllowed( cu, compID ) ) return;
+  if (!cu.cs->sps->getBDPCMEnabledFlag())
+  {
+    return;
+  }
+  if (!CU::bdpcmAllowed(cu, compID))
+  {
+    return;
+  }
 
   int bdpcmMode = isLuma(compID) ? cu.bdpcmMode : cu.bdpcmModeChroma;
 
@@ -871,7 +888,6 @@ void CABACWriter::cu_pred_data( const CodingUnit& cu )
   affine_amvr_mode( cu );
 
   cu_bcw_flag( cu );
-
 }
 
 void CABACWriter::cu_bcw_flag(const CodingUnit& cu)
@@ -978,7 +994,6 @@ void CABACWriter::extend_ref_line(const PredictionUnit& pu)
 
 void CABACWriter::extend_ref_line(const CodingUnit& cu)
 {
-
   if ( !cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma(cu.chType) || cu.bdpcmMode )
   {
     return;
@@ -1006,7 +1021,6 @@ void CABACWriter::extend_ref_line(const CodingUnit& cu)
       {
         m_BinEncoder.encodeBin(multiRefIdx != MULTI_REF_LINE_IDX[1], Ctx::MultiRefLineIdx(1));
       }
-
     }
     pu = pu->next;
   }
@@ -1082,26 +1096,26 @@ void CABACWriter::intra_luma_pred_modes( const CodingUnit& cu )
     const unsigned& mpm_idx = mpm_idxs[k];
     if( mpm_idx < numMPMs )
     {
+      unsigned ctx = (pu->cu->ispMode == NOT_INTRA_SUBPARTITIONS ? 1 : 0);
+      if (pu->multiRefIdx == 0)
       {
-        unsigned ctx = (pu->cu->ispMode == NOT_INTRA_SUBPARTITIONS ? 1 : 0);
-        if (pu->multiRefIdx == 0)
-          m_BinEncoder.encodeBin(mpm_idx > 0, Ctx::IntraLumaPlanarFlag(ctx));
-        if( mpm_idx )
-        {
-          m_BinEncoder.encodeBinEP( mpm_idx > 1 );
-        }
-        if (mpm_idx > 1)
-        {
-          m_BinEncoder.encodeBinEP(mpm_idx > 2);
-        }
-        if (mpm_idx > 2)
-        {
-          m_BinEncoder.encodeBinEP(mpm_idx > 3);
-        }
-        if (mpm_idx > 3)
-        {
-          m_BinEncoder.encodeBinEP(mpm_idx > 4);
-        }
+        m_BinEncoder.encodeBin(mpm_idx > 0, Ctx::IntraLumaPlanarFlag(ctx));
+      }
+      if (mpm_idx)
+      {
+        m_BinEncoder.encodeBinEP(mpm_idx > 1);
+      }
+      if (mpm_idx > 1)
+      {
+        m_BinEncoder.encodeBinEP(mpm_idx > 2);
+      }
+      if (mpm_idx > 2)
+      {
+        m_BinEncoder.encodeBinEP(mpm_idx > 3);
+      }
+      if (mpm_idx > 3)
+      {
+        m_BinEncoder.encodeBinEP(mpm_idx > 4);
       }
     }
     else
@@ -1112,17 +1126,16 @@ void CABACWriter::intra_luma_pred_modes( const CodingUnit& cu )
       // sorting of MPMs
       std::sort( mpm_pred, mpm_pred + numMPMs );
 
+      for (int idx = numMPMs - 1; idx >= 0; idx--)
       {
-        for (int idx = numMPMs - 1; idx >= 0; idx--)
+        if (ipred_mode > mpm_pred[idx])
         {
-          if (ipred_mode > mpm_pred[idx])
-          {
-            ipred_mode--;
-          }
+          ipred_mode--;
         }
-        CHECK(ipred_mode >= 64, "Incorrect mode");
-        xWriteTruncBinCode(ipred_mode, NUM_LUMA_MODE - NUM_MOST_PROBABLE_MODES);  // Remaining mode is truncated binary coded
       }
+      CHECK(ipred_mode >= 64, "Incorrect mode");
+      xWriteTruncBinCode(ipred_mode,
+                         NUM_LUMA_MODE - NUM_MOST_PROBABLE_MODES);   // Remaining mode is truncated binary coded
     }
 
     DTRACE( g_trace_ctx, D_SYNTAX, "intra_luma_pred_modes() idx=%d pos=(%d,%d) mode=%d\n", k, pu->lumaPos().x, pu->lumaPos().y, pu->intraDir[0] );
@@ -1133,8 +1146,10 @@ void CABACWriter::intra_luma_pred_modes( const CodingUnit& cu )
 
 void CABACWriter::intra_luma_pred_mode( const PredictionUnit& pu )
 {
-
-  if( pu.cu->bdpcmMode ) return;
+  if (pu.cu->bdpcmMode)
+  {
+    return;
+  }
   mip_flag(*pu.cu);
   if (pu.cu->mipFlag)
   {
@@ -1173,41 +1188,40 @@ void CABACWriter::intra_luma_pred_mode( const PredictionUnit& pu )
   // mpm_idx / rem_intra_luma_pred_mode
   if( mpm_idx < numMPMs )
   {
+    unsigned ctx = (pu.cu->ispMode == NOT_INTRA_SUBPARTITIONS ? 1 : 0);
+    if (pu.multiRefIdx == 0)
     {
-      unsigned ctx = (pu.cu->ispMode == NOT_INTRA_SUBPARTITIONS ? 1 : 0);
-      if (pu.multiRefIdx == 0)
-        m_BinEncoder.encodeBin( mpm_idx > 0, Ctx::IntraLumaPlanarFlag(ctx) );
-      if( mpm_idx )
-      {
-        m_BinEncoder.encodeBinEP( mpm_idx > 1 );
-      }
-      if (mpm_idx > 1)
-      {
-        m_BinEncoder.encodeBinEP(mpm_idx > 2);
-      }
-      if (mpm_idx > 2)
-      {
-        m_BinEncoder.encodeBinEP(mpm_idx > 3);
-      }
-      if (mpm_idx > 3)
-      {
-        m_BinEncoder.encodeBinEP(mpm_idx > 4);
-      }
+      m_BinEncoder.encodeBin(mpm_idx > 0, Ctx::IntraLumaPlanarFlag(ctx));
+    }
+    if (mpm_idx)
+    {
+      m_BinEncoder.encodeBinEP(mpm_idx > 1);
+    }
+    if (mpm_idx > 1)
+    {
+      m_BinEncoder.encodeBinEP(mpm_idx > 2);
+    }
+    if (mpm_idx > 2)
+    {
+      m_BinEncoder.encodeBinEP(mpm_idx > 3);
+    }
+    if (mpm_idx > 3)
+    {
+      m_BinEncoder.encodeBinEP(mpm_idx > 4);
     }
   }
   else
   {
     std::sort( mpm_pred, mpm_pred + numMPMs );
+    for (int idx = numMPMs - 1; idx >= 0; idx--)
     {
-      for (int idx = numMPMs - 1; idx >= 0; idx--)
+      if (ipred_mode > mpm_pred[idx])
       {
-        if (ipred_mode > mpm_pred[idx])
-        {
-          ipred_mode--;
-        }
+        ipred_mode--;
       }
-      xWriteTruncBinCode(ipred_mode, NUM_LUMA_MODE - NUM_MOST_PROBABLE_MODES);  // Remaining mode is truncated binary coded
     }
+    xWriteTruncBinCode(ipred_mode,
+                       NUM_LUMA_MODE - NUM_MOST_PROBABLE_MODES);   // Remaining mode is truncated binary coded
   }
 }
 
@@ -1439,7 +1453,6 @@ void CABACWriter::end_of_ctu( const CodingUnit& cu, CUCtx& cuCtx )
       )
   {
     cuCtx.isDQPCoded = ( cu.cs->pps->getUseDQP() && !cuCtx.isDQPCoded );
-
   }
 }
 
@@ -1453,7 +1466,6 @@ void CABACWriter::cu_palette_info(const CodingUnit& cu, ComponentID compBegin, u
 
   if (cu.lastPLTSize[compBegin])
   {
-
     xEncodePLTPredIndicator(cu, maxPltSize, compBegin);
   }
 
@@ -1489,9 +1501,13 @@ void CABACWriter::cu_palette_info(const CodingUnit& cu, ComponentID compBegin, u
   m_scanOrder = g_scanOrder[SCAN_UNGROUPED][(cu.useRotation[compBegin]) ? SCAN_TRAV_VER : SCAN_TRAV_HOR][gp_sizeIdxInfo->idxFrom(width)][gp_sizeIdxInfo->idxFrom(height)];
   uint32_t total = height * width;
   if (indexMaxSize > 1)
+  {
     codeScanRotationModeFlag(cu, compBegin);
+  }
   else
+  {
     assert(!cu.useRotation[compBegin]);
+  }
 
   if (cu.useEscape[compBegin] && cu.cs->pps->getUseDQP() && !cuCtx.isDQPCoded)
   {
@@ -1519,6 +1535,7 @@ void CABACWriter::cu_palette_info(const CodingUnit& cu, ComponentID compBegin, u
   }
   CHECK(cu.curPLTSize[compBegin] > maxPltSize, " Current palette size is larger than maximum palette size");
 }
+
 void CABACWriter::cuPaletteSubblockInfo(const CodingUnit& cu, ComponentID compBegin, uint32_t numComp, int subSetId, uint32_t& prevRunPos, unsigned& prevRunType)
 {
   const SPS&      sps = *(cu.cs->sps);
@@ -1534,10 +1551,14 @@ void CABACWriter::cuPaletteSubblockInfo(const CodingUnit& cu, ComponentID compBe
 
   unsigned runCopyFlag[(1 << LOG2_PALETTE_CG_SIZE)];
   for (int i = 0; i < (1 << LOG2_PALETTE_CG_SIZE); i++)
+  {
     runCopyFlag[i] = MAX_INT;
+  }
 
   if (minSubPos == 0)
+  {
     runCopyFlag[0] = 0;
+  }
 
 // PLT runCopy flag and runType - context coded
   int curPos = minSubPos;
@@ -1609,19 +1630,21 @@ void CABACWriter::cuPaletteSubblockInfo(const CodingUnit& cu, ComponentID compBe
       uint32_t posx = m_scanOrder[curPos].x;
       if (curPLTIdx.at(posx, posy) == cu.curPLTSize[compBegin])
       {
-          PLTescapeBuf    escapeValue = tu.getescapeValue((ComponentID)comp);
-          if (compID == COMPONENT_Y || compBegin != COMPONENT_Y)
-          {
-            exp_golomb_eqprob((unsigned)escapeValue.at(posx, posy), 5);
-            DTRACE(g_trace_ctx, D_SYNTAX, "plt_escape_val() value=%d etype=%d sp=%d\n", escapeValue.at(posx, posy), comp, curPos);
-          }
-          if (compBegin == COMPONENT_Y && compID != COMPONENT_Y && posy % (1 << scaleY) == 0 && posx % (1 << scaleX) == 0)
-          {
-            uint32_t posxC = posx >> scaleX;
-            uint32_t posyC = posy >> scaleY;
-            exp_golomb_eqprob((unsigned)escapeValue.at(posxC, posyC), 5);
-            DTRACE(g_trace_ctx, D_SYNTAX, "plt_escape_val() value=%d etype=%d sp=%d\n", escapeValue.at(posx, posy), comp, curPos);
-          }
+        PLTescapeBuf escapeValue = tu.getescapeValue((ComponentID) comp);
+        if (compID == COMPONENT_Y || compBegin != COMPONENT_Y)
+        {
+          exp_golomb_eqprob((unsigned) escapeValue.at(posx, posy), 5);
+          DTRACE(g_trace_ctx, D_SYNTAX, "plt_escape_val() value=%d etype=%d sp=%d\n", escapeValue.at(posx, posy), comp,
+                 curPos);
+        }
+        if (compBegin == COMPONENT_Y && compID != COMPONENT_Y && posy % (1 << scaleY) == 0 && posx % (1 << scaleX) == 0)
+        {
+          uint32_t posxC = posx >> scaleX;
+          uint32_t posyC = posy >> scaleY;
+          exp_golomb_eqprob((unsigned) escapeValue.at(posxC, posyC), 5);
+          DTRACE(g_trace_ctx, D_SYNTAX, "plt_escape_val() value=%d etype=%d sp=%d\n", escapeValue.at(posx, posy), comp,
+                 curPos);
+        }
       }
     }
   }
@@ -1663,6 +1686,7 @@ void CABACWriter::xEncodePLTPredIndicator(const CodingUnit& cu, uint32_t maxPLTS
     exp_golomb_eqprob(1, 0);
   }
 }
+
 Pel CABACWriter::writePLTIndex(const CodingUnit& cu, uint32_t idx, PelBuf& paletteIdx, PLTtypeBuf& paletteRunType, int maxSymbol, ComponentID compBegin)
 {
   uint32_t posy = m_scanOrder[idx].y;
@@ -1739,11 +1763,7 @@ Pel CABACWriter::writePLTIndex(const CodingUnit& cu, uint32_t idx, PelBuf& palet
 void CABACWriter::prediction_unit( const PredictionUnit& pu )
 {
   CHECK( pu.cu->treeType == TREE_C, "cannot be chroma CU" );
-#if ENABLE_SPLIT_PARALLELISM
-  CHECK( pu.cacheUsed, "Processing a PU that should be in cache!" );
-  CHECK( pu.cu->cacheUsed, "Processing a CU that should be in cache!" );
 
-#endif
   if( pu.cu->skip )
   {
     CHECK( !pu.mergeFlag, "merge_flag must be true for skipped CUs" );
@@ -1767,7 +1787,9 @@ void CABACWriter::prediction_unit( const PredictionUnit& pu )
       CHECK( pu.mvpIdx[REF_PIC_LIST_0], "mvpIdx for IBC mode should be 0" );
     }
     else
-    mvp_flag(pu, REF_PIC_LIST_0);
+    {
+      mvp_flag(pu, REF_PIC_LIST_0);
+    }
   }
   else
   {
@@ -1804,31 +1826,31 @@ void CABACWriter::prediction_unit( const PredictionUnit& pu )
     {
       if ( pu.cu->smvdMode != 1 )
       {
-      ref_idx     ( pu, REF_PIC_LIST_1 );
-      if( !pu.cs->picHeader->getMvdL1ZeroFlag() || pu.interDir != 3 /* PRED_BI */ )
-      {
-        if ( pu.cu->affine )
+        ref_idx(pu, REF_PIC_LIST_1);
+        if (!pu.cs->picHeader->getMvdL1ZeroFlag() || pu.interDir != 3 /* PRED_BI */)
         {
-          Mv mvd = pu.mvdAffi[REF_PIC_LIST_1][0];
-          mvd.changeAffinePrecInternal2Amvr(pu.cu->imv);
-          mvd_coding(mvd, 0); // already changed to signaling precision
-          mvd = pu.mvdAffi[REF_PIC_LIST_1][1];
-          mvd.changeAffinePrecInternal2Amvr(pu.cu->imv);
-          mvd_coding(mvd, 0); // already changed to signaling precision
-          if ( pu.cu->affineType == AFFINEMODEL_6PARAM )
+          if (pu.cu->affine)
           {
-            mvd = pu.mvdAffi[REF_PIC_LIST_1][2];
+            Mv mvd = pu.mvdAffi[REF_PIC_LIST_1][0];
             mvd.changeAffinePrecInternal2Amvr(pu.cu->imv);
             mvd_coding(mvd, 0); // already changed to signaling precision
+            mvd = pu.mvdAffi[REF_PIC_LIST_1][1];
+            mvd.changeAffinePrecInternal2Amvr(pu.cu->imv);
+            mvd_coding(mvd, 0);   // already changed to signaling precision
+            if (pu.cu->affineType == AFFINEMODEL_6PARAM)
+            {
+              mvd = pu.mvdAffi[REF_PIC_LIST_1][2];
+              mvd.changeAffinePrecInternal2Amvr(pu.cu->imv);
+              mvd_coding(mvd, 0);   // already changed to signaling precision
+            }
+          }
+          else
+          {
+            Mv mvd = pu.mvd[REF_PIC_LIST_1];
+            mvd.changeTransPrecInternal2Amvr(pu.cu->imv);
+            mvd_coding(mvd, 0);   // already changed to signaling precision
           }
         }
-        else
-        {
-          Mv mvd = pu.mvd[REF_PIC_LIST_1];
-          mvd.changeTransPrecInternal2Amvr(pu.cu->imv);
-          mvd_coding(mvd, 0); // already changed to signaling precision
-        }
-      }
       }
       mvp_flag    ( pu, REF_PIC_LIST_1 );
     }
@@ -1969,8 +1991,8 @@ void CABACWriter::imv_mode( const CodingUnit& cu )
     }
     if (cu.imv < IMV_HPEL)
     {
-    m_BinEncoder.encodeBin( (cu.imv > 1), Ctx::ImvFlag( 1 ) );
-    DTRACE( g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", (cu.imv > 1), 1 );
+      m_BinEncoder.encodeBin((cu.imv > 1), Ctx::ImvFlag(1));
+      DTRACE(g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", (cu.imv > 1), 1);
     }
   }
 
@@ -2021,7 +2043,7 @@ void CABACWriter::merge_idx( const PredictionUnit& pu )
         m_BinEncoder.encodeBin( 1, Ctx::AffMergeIdx() );
         for ( unsigned idx = 1; idx < numCandminus1; idx++ )
         {
-            m_BinEncoder.encodeBinEP( pu.mergeIdx == idx ? 0 : 1 );
+          m_BinEncoder.encodeBinEP(pu.mergeIdx == idx ? 0 : 1);
           if ( pu.mergeIdx == idx )
           {
             break;
@@ -2065,31 +2087,35 @@ void CABACWriter::merge_idx( const PredictionUnit& pu )
     }
     int numCandminus1;
     if (pu.cu->predMode == MODE_IBC)
+    {
       numCandminus1 = int(pu.cs->sps->getMaxNumIBCMergeCand()) - 1;
+    }
     else
-      numCandminus1 = int(pu.cs->sps->getMaxNumMergeCand()) - 1;
-  if( numCandminus1 > 0 )
-  {
-    if( pu.mergeIdx == 0 )
     {
-      m_BinEncoder.encodeBin( 0, Ctx::MergeIdx() );
-      DTRACE( g_trace_ctx, D_SYNTAX, "merge_idx() merge_idx=%d\n", pu.mergeIdx );
-      return;
+      numCandminus1 = int(pu.cs->sps->getMaxNumMergeCand()) - 1;
     }
-    else
+    if (numCandminus1 > 0)
     {
-      m_BinEncoder.encodeBin( 1, Ctx::MergeIdx() );
-      for( unsigned idx = 1; idx < numCandminus1; idx++ )
+      if (pu.mergeIdx == 0)
+      {
+        m_BinEncoder.encodeBin(0, Ctx::MergeIdx());
+        DTRACE(g_trace_ctx, D_SYNTAX, "merge_idx() merge_idx=%d\n", pu.mergeIdx);
+        return;
+      }
+      else
       {
-          m_BinEncoder.encodeBinEP( pu.mergeIdx == idx ? 0 : 1 );
-        if( pu.mergeIdx == idx )
+        m_BinEncoder.encodeBin(1, Ctx::MergeIdx());
+        for (unsigned idx = 1; idx < numCandminus1; idx++)
         {
-          break;
+          m_BinEncoder.encodeBinEP(pu.mergeIdx == idx ? 0 : 1);
+          if (pu.mergeIdx == idx)
+          {
+            break;
+          }
         }
       }
     }
-  }
-  DTRACE( g_trace_ctx, D_SYNTAX, "merge_idx() merge_idx=%d\n", pu.mergeIdx );
+    DTRACE(g_trace_ctx, D_SYNTAX, "merge_idx() merge_idx=%d\n", pu.mergeIdx);
   }
 }
 void CABACWriter::mmvd_merge_idx(const PredictionUnit& pu)
@@ -2134,6 +2160,7 @@ void CABACWriter::mmvd_merge_idx(const PredictionUnit& pu)
   DTRACE(g_trace_ctx, D_SYNTAX, "pos() pos=%d\n", var2);
   DTRACE(g_trace_ctx, D_SYNTAX, "mmvd_merge_idx() mmvd_merge_idx=%d\n", pu.mmvdMergeIdx);
 }
+
 void CABACWriter::inter_pred_idc( const PredictionUnit& pu )
 {
   if( !pu.cs->slice->isInterB() )
@@ -2172,7 +2199,9 @@ void CABACWriter::ref_idx( const PredictionUnit& pu, RefPicList eRefList )
   if (eRefList == REF_PIC_LIST_0 && pu.cs->sps->getIBCFlag())
   {
     if (CU::isIBC(*pu.cu))
+    {
       return;
+    }
   }
 
   if( numRef <= 1 )
@@ -2260,8 +2289,9 @@ void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partit
     CHECK( !split, "transform split implied - sbt" );
   }
   else
-  CHECK( split && !cu.ispMode, "transform split not allowed with QTBT" );
-
+  {
+    CHECK(split && !cu.ispMode, "transform split not allowed with QTBT");
+  }
 
   if( split )
   {
@@ -2284,7 +2314,9 @@ void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partit
       partitioner.splitCurrArea( PartSplit( cu.getSbtTuSplit() ), cs );
     }
     else
+    {
       THROW( "Implicit TU split not available" );
+    }
 
     do
     {
@@ -2311,24 +2343,26 @@ void CABACWriter::cbf_comp( const CodingStructure& cs, bool cbf, const CompArea&
    || (area.compID != COMPONENT_Y && cs.getCU(area.pos(), toChannelType(area.compID)) != NULL && cs.getCU(area.pos(), toChannelType(area.compID))->bdpcmModeChroma))
   {
     if (area.compID == COMPONENT_Y)
+    {
       ctxId = 1;
+    }
     else if (area.compID == COMPONENT_Cb)
+    {
       ctxId = 1;
+    }
     else
+    {
       ctxId = 2;
+    }
     m_BinEncoder.encodeBin(cbf, ctxSet(ctxId));
   }
   else
   {
-  m_BinEncoder.encodeBin( cbf, ctxSet( ctxId ) );
+    m_BinEncoder.encodeBin(cbf, ctxSet(ctxId));
   }
   DTRACE( g_trace_ctx, D_SYNTAX, "cbf_comp() etype=%d pos=(%d,%d) ctx=%d cbf=%d\n", area.compID, area.x, area.y, ctxId, cbf );
 }
 
-
-
-
-
 //================================================================================
 //  clause 7.3.8.9
 //--------------------------------------------------------------------------------
@@ -2393,9 +2427,6 @@ void CABACWriter::mvd_coding( const Mv &rMvd, int8_t imv )
   }
 }
 
-
-
-
 //================================================================================
 //  clause 7.3.8.10
 //--------------------------------------------------------------------------------
@@ -2417,28 +2448,24 @@ void CABACWriter::transform_unit( const TransformUnit& tu, CUCtx& cuCtx, Partiti
   {
     const bool              chromaCbfISP = area.blocks[COMPONENT_Cb].valid() && cu.ispMode;
     if (area.blocks[COMPONENT_Cb].valid() && (!cu.isSepTree() || partitioner.chType == CHANNEL_TYPE_CHROMA) && (!cu.ispMode || chromaCbfISP))
-  {
     {
       unsigned cbfDepth = chromaCbfISP ? trDepth - 1 : trDepth;
+      chromaCbfs.Cb     = TU::getCbfAtDepth(tu, COMPONENT_Cb, trDepth);
+      if (!(cu.sbtInfo && tu.noResidual))
       {
-        chromaCbfs.Cb = TU::getCbfAtDepth(tu, COMPONENT_Cb, trDepth);
-        //if (!(cu.sbtInfo && trDepth == 1))
-        if (!(cu.sbtInfo && tu.noResidual))
-          cbf_comp(cs, chromaCbfs.Cb, area.blocks[COMPONENT_Cb], cbfDepth);
+        cbf_comp(cs, chromaCbfs.Cb, area.blocks[COMPONENT_Cb], cbfDepth);
       }
 
+      chromaCbfs.Cr = TU::getCbfAtDepth(tu, COMPONENT_Cr, trDepth);
+      if (!(cu.sbtInfo && tu.noResidual))
       {
-        chromaCbfs.Cr = TU::getCbfAtDepth(tu, COMPONENT_Cr, trDepth);
-        //if (!(cu.sbtInfo && trDepth == 1))
-        if (!(cu.sbtInfo && tu.noResidual))
-          cbf_comp(cs, chromaCbfs.Cr, area.blocks[COMPONENT_Cr], cbfDepth, chromaCbfs.Cb);
+        cbf_comp(cs, chromaCbfs.Cr, area.blocks[COMPONENT_Cr], cbfDepth, chromaCbfs.Cb);
       }
     }
-  }
-  else if (cu.isSepTree())
-  {
-    chromaCbfs = ChromaCbfs(false);
-  }
+    else if (cu.isSepTree())
+    {
+      chromaCbfs = ChromaCbfs(false);
+    }
   }
   else if (cu.isSepTree())
   {
@@ -2536,20 +2563,22 @@ void CABACWriter::transform_unit( const TransformUnit& tu, CUCtx& cuCtx, Partiti
     joint_cb_cr( tu, ( cbf[COMPONENT_Cb] ? 2 : 0 ) + ( cbf[COMPONENT_Cr] ? 1 : 0 ) );
   }
 
-    if( cbfLuma )
-    {
-      residual_coding( tu, COMPONENT_Y, &cuCtx );
-    }
-    if( !lumaOnly )
+  if (cbfLuma)
+  {
+    residual_coding(tu, COMPONENT_Y, &cuCtx);
+  }
+  if (!lumaOnly)
+  {
+    for (ComponentID compID = COMPONENT_Cb; compID <= COMPONENT_Cr; compID = ComponentID(compID + 1))
     {
-      for( ComponentID compID = COMPONENT_Cb; compID <= COMPONENT_Cr; compID = ComponentID( compID + 1 ) )
+      if (cbf[compID])
       {
-        if( cbf[ compID ] )
-        {
-          residual_coding( tu, compID, &cuCtx );
+        residual_coding(tu, compID, &cuCtx);
       }
     }
   }
+
+  DTRACE_COND( ( isEncoding() ), g_trace_ctx, D_DQP, "x=%d, y=%d, d=%d, qpAdj=%d\n", cu.blocks[cu.chType].lumaPos().x, cu.blocks[cu.chType].lumaPos().y, cu.qtDepth, cu.chromaQpAdj );
 }
 
 void CABACWriter::cu_qp_delta( const CodingUnit& cu, int predQP, const int8_t qp )
@@ -2594,10 +2623,6 @@ void CABACWriter::cu_chroma_qp_offset( const CodingUnit& cu )
   }
 }
 
-
-
-
-
 //================================================================================
 //  clause 7.3.8.11
 //--------------------------------------------------------------------------------
@@ -2627,7 +2652,9 @@ void CABACWriter::residual_coding( const TransformUnit& tu, ComponentID compID,
   DTRACE( g_trace_ctx, D_SYNTAX, "residual_coding() etype=%d pos=(%d,%d) size=%dx%d predMode=%d\n", tu.blocks[compID].compID, tu.blocks[compID].x, tu.blocks[compID].y, tu.blocks[compID].width, tu.blocks[compID].height, cu.predMode );
 
   if( compID == COMPONENT_Cr && tu.jointCbCr == 3 )
+  {
     return;
+  }
 
   ts_flag            ( tu, compID );
 
@@ -2685,6 +2712,15 @@ void CABACWriter::residual_coding( const TransformUnit& tu, ComponentID compID,
   int ctxBinSampleRatio = (compID == COMPONENT_Y) ? MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_LUMA : MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT_CHROMA;
   cctx.regBinLimit = (tu.getTbAreaAfterCoefZeroOut(compID) * ctxBinSampleRatio) >> 4;
 
+  int baseLevel = m_BinEncoder.getCtx().getBaseLevel();
+  cctx.setBaseLevel(baseLevel);
+  if (tu.cs->slice->getSPS()->getSpsRangeExtension().getPersistentRiceAdaptationEnabledFlag())
+  {
+    cctx.setUpdateHist(1);
+    unsigned riceStats = m_BinEncoder.getCtx().getGRAdaptStats((unsigned)compID);
+    TCoeff historyValue = (TCoeff)1 << riceStats;
+    cctx.setHistValue(historyValue);
+  }
   for( int subSetId = ( cctx.scanPosLast() >> cctx.log2CGSize() ); subSetId >= 0; subSetId--)
   {
     cctx.initSubblock       ( subSetId, sigGroupFlags[subSetId] );
@@ -2834,17 +2870,47 @@ void CABACWriter::last_sig_coeff( CoeffCodingContext& cctx, const TransformUnit&
   }
 
   unsigned CtxLast;
-  unsigned GroupIdxX = g_uiGroupIdx[ posX ];
-  unsigned GroupIdxY = g_uiGroupIdx[ posY ];
+  unsigned GroupIdxX = g_groupIdx[posX];
+  unsigned GroupIdxY = g_groupIdx[posY];
 
   unsigned maxLastPosX = cctx.maxLastPosX();
   unsigned maxLastPosY = cctx.maxLastPosY();
 
+#if JVET_W0046_RLSCP
+  unsigned zoTbWdith  = std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, cctx.width());
+  unsigned zoTbHeight = std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, cctx.height());
+#endif
+
   if( tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[ compID ].width <= 32 && tu.blocks[ compID ].height <= 32 && compID == COMPONENT_Y )
   {
-    maxLastPosX = ( tu.blocks[compID].width  == 32 ) ? g_uiGroupIdx[ 15 ] : maxLastPosX;
-    maxLastPosY = ( tu.blocks[compID].height == 32 ) ? g_uiGroupIdx[ 15 ] : maxLastPosY;
+    maxLastPosX = (tu.blocks[compID].width == 32) ? g_groupIdx[15] : maxLastPosX;
+    maxLastPosY = (tu.blocks[compID].height == 32) ? g_groupIdx[15] : maxLastPosY;
+#if JVET_W0046_RLSCP
+    zoTbWdith = (tu.blocks[compID].width == 32) ? 16 : zoTbWdith;
+    zoTbHeight = (tu.blocks[compID].height == 32) ? 16 : zoTbHeight;
+#endif
+  }
+#if JVET_W0046_RLSCP
+  if (isEncoding())
+  {
+    if ((posX + posY) > ((zoTbWdith + zoTbHeight + 2) / 2))
+    {
+      tu.cu->slice->updateCntRightBottom(1);
+    }
+    else
+    {
+      tu.cu->slice->updateCntRightBottom(-1);
+    }
+  }
+  if (tu.cu->slice->getReverseLastSigCoeffFlag())
+  {
+    posX = zoTbWdith - 1 - posX;
+    posY = zoTbHeight - 1 - posY;
+
+    GroupIdxX = g_groupIdx[posX];
+    GroupIdxY = g_groupIdx[posY];
   }
+#endif
 
   for( CtxLast = 0; CtxLast < GroupIdxX; CtxLast++ )
   {
@@ -2864,7 +2930,7 @@ void CABACWriter::last_sig_coeff( CoeffCodingContext& cctx, const TransformUnit&
   }
   if( GroupIdxX > 3 )
   {
-    posX -= g_uiMinInGroup[ GroupIdxX ];
+    posX -= g_minInGroup[GroupIdxX];
     for (int i = ( ( GroupIdxX - 2 ) >> 1 ) - 1 ; i >= 0; i-- )
     {
       m_BinEncoder.encodeBinEP( ( posX >> i ) & 1 );
@@ -2872,7 +2938,7 @@ void CABACWriter::last_sig_coeff( CoeffCodingContext& cctx, const TransformUnit&
   }
   if( GroupIdxY > 3 )
   {
-    posY -= g_uiMinInGroup[ GroupIdxY ];
+    posY -= g_minInGroup[GroupIdxY];
     for ( int i = ( ( GroupIdxY - 2 ) >> 1 ) - 1 ; i >= 0; i-- )
     {
       m_BinEncoder.encodeBinEP( ( posY >> i ) & 1 );
@@ -2880,8 +2946,6 @@ void CABACWriter::last_sig_coeff( CoeffCodingContext& cctx, const TransformUnit&
   }
 }
 
-
-
 void CABACWriter::residual_coding_subblock( CoeffCodingContext& cctx, const TCoeff* coeff, const int stateTransTable, int& state )
 {
   //===== init =====
@@ -2889,6 +2953,8 @@ void CABACWriter::residual_coding_subblock( CoeffCodingContext& cctx, const TCoe
   const bool  isLast      = cctx.isLast();
   int         firstSigPos = ( isLast ? cctx.scanPosLast() : cctx.maxSubPos() );
   int         nextSigPos  = firstSigPos;
+  int baseLevel = cctx.getBaseLevel();
+  bool updateHistory = cctx.getUpdateHist();
 
   //===== encode significant_coeffgroup_flag =====
   if( !isLast && cctx.isNotFirst() )
@@ -2974,14 +3040,21 @@ void CABACWriter::residual_coding_subblock( CoeffCodingContext& cctx, const TCoe
   unsigned ricePar = 0;
   for( int scanPos = firstSigPos; scanPos > firstPosMode2; scanPos-- )
   {
-    int       sumAll = cctx.templateAbsSum(scanPos, coeff, 4);
-    ricePar = g_auiGoRiceParsCoeff[sumAll];
+     ricePar = (cctx.*(cctx.deriveRiceRRC))(scanPos, coeff, baseLevel);
+
     unsigned absLevel = (unsigned) abs( coeff[ cctx.blockPos( scanPos ) ] );
     if( absLevel >= 4 )
     {
       unsigned rem      = ( absLevel - 4 ) >> 1;
       m_BinEncoder.encodeRemAbsEP( rem, ricePar, COEF_REMAIN_BIN_REDUCTION, cctx.maxLog2TrDRange() );
       DTRACE( g_trace_ctx, D_SYNTAX_RESI, "rem_val() bin=%d ctx=%d\n", rem, ricePar );
+      if ((updateHistory) && (rem > 0))
+      {
+        unsigned &riceStats = m_BinEncoder.getCtx().getGRAdaptStats((unsigned)(cctx.compID()));
+        cctx.updateRiceStat(riceStats, rem, 1);
+        cctx.setUpdateHist(0);
+        updateHistory = 0;
+      }
     }
   }
 
@@ -2990,13 +3063,19 @@ void CABACWriter::residual_coding_subblock( CoeffCodingContext& cctx, const TCoe
   {
     TCoeff    Coeff     = coeff[ cctx.blockPos( scanPos ) ];
     unsigned    absLevel  = (unsigned) abs( Coeff );
-    int       sumAll = cctx.templateAbsSum(scanPos, coeff, 0);
-    int       rice      = g_auiGoRiceParsCoeff                        [sumAll];
-    int       pos0      = g_auiGoRicePosCoeff0(state, rice);
+    int rice = (cctx.*(cctx.deriveRiceRRC))(scanPos, coeff, 0);
+    int         pos0      = g_goRicePosCoeff0(state, rice);
     unsigned  rem       = ( absLevel == 0 ? pos0 : absLevel <= pos0 ? absLevel-1 : absLevel );
     m_BinEncoder.encodeRemAbsEP( rem, rice, COEF_REMAIN_BIN_REDUCTION, cctx.maxLog2TrDRange() );
     DTRACE( g_trace_ctx, D_SYNTAX_RESI, "rem_val() bin=%d ctx=%d\n", rem, rice );
     state = ( stateTransTable >> ((state<<2)+((absLevel&1)<<1)) ) & 3;
+    if ((updateHistory) && (rem > 0))
+    {
+      unsigned &riceStats = m_BinEncoder.getCtx().getGRAdaptStats((unsigned)cctx.compID());
+      cctx.updateRiceStat(riceStats, rem, 0);
+      cctx.setUpdateHist(0);
+      updateHistory = 0;
+    }
     if( absLevel )
     {
       numNonZero++;
@@ -3042,11 +3121,33 @@ void CABACWriter::residual_codingTS( const TransformUnit& tu, ComponentID compID
   for( int subSetId = 0; subSetId <= ( cctx.maxNumCoeff() - 1 ) >> cctx.log2CGSize(); subSetId++ )
   {
     cctx.initSubblock         ( subSetId, sigGroupFlags[subSetId] );
-    residual_coding_subblockTS( cctx, coeff );
+    int goRiceParam = 1;
+    bool ricePresentFlag = false;
+    unsigned RiceBit[8]   = { 0, 0, 0, 0, 0, 0, 0, 0 };
+    if (tu.cu->slice->getSPS()->getSpsRangeExtension().getTSRCRicePresentFlag() && tu.mtsIdx[compID] == MTS_SKIP)
+    {
+      goRiceParam = goRiceParam + tu.cu->slice->get_tsrc_index();
+      if (isEncoding())
+      {
+        ricePresentFlag = true;
+        for (int i = 0; i < MAX_TSRC_RICE; i++)
+        {
+          RiceBit[i] = tu.cu->slice->getRiceBit(i);
+        }
+      }
+    }
+    residual_coding_subblockTS( cctx, coeff, RiceBit, goRiceParam, ricePresentFlag);
+    if (tu.cu->slice->getSPS()->getSpsRangeExtension().getTSRCRicePresentFlag() && tu.mtsIdx[compID] == MTS_SKIP && isEncoding())
+    {
+      for (int i = 0; i < MAX_TSRC_RICE; i++)
+      {
+        tu.cu->slice->setRiceBit(i, RiceBit[i]);
+      }
+    }
   }
 }
 
-void CABACWriter::residual_coding_subblockTS( CoeffCodingContext& cctx, const TCoeff* coeff )
+void CABACWriter::residual_coding_subblockTS( CoeffCodingContext& cctx, const TCoeff* coeff, unsigned (&RiceBit)[8], int riceParam, bool ricePresentFlag)
 {
   //===== init =====
   const int   minSubPos   = cctx.maxSubPos();
@@ -3058,13 +3159,13 @@ void CABACWriter::residual_coding_subblockTS( CoeffCodingContext& cctx, const TC
   {
     if( cctx.isSigGroup() )
     {
-        m_BinEncoder.encodeBin( 1, cctx.sigGroupCtxId( true ) );
-        DTRACE( g_trace_ctx, D_SYNTAX_RESI, "ts_sigGroup() bin=%d ctx=%d\n", 1, cctx.sigGroupCtxId() );
+      m_BinEncoder.encodeBin(1, cctx.sigGroupCtxId(true));
+      DTRACE(g_trace_ctx, D_SYNTAX_RESI, "ts_sigGroup() bin=%d ctx=%d\n", 1, cctx.sigGroupCtxId());
     }
     else
     {
-        m_BinEncoder.encodeBin( 0, cctx.sigGroupCtxId( true ) );
-        DTRACE( g_trace_ctx, D_SYNTAX_RESI, "ts_sigGroup() bin=%d ctx=%d\n", 0, cctx.sigGroupCtxId() );
+      m_BinEncoder.encodeBin(0, cctx.sigGroupCtxId(true));
+      DTRACE(g_trace_ctx, D_SYNTAX_RESI, "ts_sigGroup() bin=%d ctx=%d\n", 0, cctx.sigGroupCtxId());
       return;
     }
   }
@@ -3084,19 +3185,19 @@ void CABACWriter::residual_coding_subblockTS( CoeffCodingContext& cctx, const TC
     unsigned  sigFlag    = ( Coeff != 0 );
     if( numNonZero || nextSigPos != inferSigPos )
     {
-        const unsigned sigCtxId = cctx.sigCtxIdAbsTS( nextSigPos, coeff );
-        m_BinEncoder.encodeBin( sigFlag, sigCtxId );
-        DTRACE( g_trace_ctx, D_SYNTAX_RESI, "ts_sig_bin() bin=%d ctx=%d\n", sigFlag, sigCtxId );
-        cctx.decimateNumCtxBins(1);
+      const unsigned sigCtxId = cctx.sigCtxIdAbsTS(nextSigPos, coeff);
+      m_BinEncoder.encodeBin(sigFlag, sigCtxId);
+      DTRACE(g_trace_ctx, D_SYNTAX_RESI, "ts_sig_bin() bin=%d ctx=%d\n", sigFlag, sigCtxId);
+      cctx.decimateNumCtxBins(1);
     }
 
     if( sigFlag )
     {
       //===== encode sign's =====
       int sign = Coeff < 0;
-        const unsigned signCtxId = cctx.signCtxIdAbsTS(nextSigPos, coeff, cctx.bdpcm());
-        m_BinEncoder.encodeBin(sign, signCtxId);
-        cctx.decimateNumCtxBins(1);
+      const unsigned signCtxId = cctx.signCtxIdAbsTS(nextSigPos, coeff, cctx.bdpcm());
+      m_BinEncoder.encodeBin(sign, signCtxId);
+      cctx.decimateNumCtxBins(1);
       numNonZero++;
       cctx.neighTS(rightPixel, belowPixel, nextSigPos, coeff);
       modAbsCoeff = cctx.deriveModCoeff(rightPixel, belowPixel, abs(Coeff), cctx.bdpcm());
@@ -3104,9 +3205,9 @@ void CABACWriter::residual_coding_subblockTS( CoeffCodingContext& cctx, const TC
 
       unsigned gt1 = !!remAbsLevel;
       const unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(nextSigPos, coeff, cctx.bdpcm());
-        m_BinEncoder.encodeBin(gt1, gt1CtxId);
-        DTRACE(g_trace_ctx, D_SYNTAX_RESI, "ts_gt1_flag() bin=%d ctx=%d\n", gt1, gt1CtxId);
-        cctx.decimateNumCtxBins(1);
+      m_BinEncoder.encodeBin(gt1, gt1CtxId);
+      DTRACE(g_trace_ctx, D_SYNTAX_RESI, "ts_gt1_flag() bin=%d ctx=%d\n", gt1, gt1CtxId);
+      cctx.decimateNumCtxBins(1);
 
       if( gt1 )
       {
@@ -3151,10 +3252,33 @@ void CABACWriter::residual_coding_subblockTS( CoeffCodingContext& cctx, const TC
 
     if( absLevel >= cutoffVal )
     {
-      int       rice = cctx.templateAbsSumTS( scanPos, coeff );
+      int       rice = riceParam;
       unsigned  rem = scanPos <= lastScanPosPass1 ? (absLevel - cutoffVal) >> 1 : absLevel;
       m_BinEncoder.encodeRemAbsEP( rem, rice, COEF_REMAIN_BIN_REDUCTION, cctx.maxLog2TrDRange() );
       DTRACE( g_trace_ctx, D_SYNTAX_RESI, "ts_rem_val() bin=%d ctx=%d sp=%d\n", rem, rice, scanPos );
+      if ( ricePresentFlag && (isEncoding()) && (cctx.compID() == COMPONENT_Y))
+      {
+        for (int idx = 1; idx < 9; idx++)
+        {
+          uint32_t length;
+          uint32_t symbol = rem;
+          if (rem < (5 << idx))
+          {
+            length = rem >> idx;
+            RiceBit[idx - 1] += (length + 1 + idx);
+          }
+          else
+          {
+            length = idx;
+            symbol = symbol - (5 << idx);
+            while (symbol >= (1 << length))
+            {
+              symbol -= (1 << (length++));
+            }
+            RiceBit[idx - 1] += (5 + length + 1 - idx + length);
+          }
+        }
+      }
 
       if (absLevel && scanPos > lastScanPosPass1)
       {
@@ -3165,12 +3289,6 @@ void CABACWriter::residual_coding_subblockTS( CoeffCodingContext& cctx, const TC
   }
 }
 
-
-
-
-
-
-
 //================================================================================
 //  helper functions
 //--------------------------------------------------------------------------------
@@ -3240,14 +3358,20 @@ void CABACWriter::codeAlfCtuEnableFlags( CodingStructure& cs, ChannelType channe
   if( isLuma( channel ) )
   {
     if (alfParam->enabledFlag[COMPONENT_Y])
+    {
       codeAlfCtuEnableFlags( cs, COMPONENT_Y, alfParam );
+    }
   }
   else
   {
     if (alfParam->enabledFlag[COMPONENT_Cb])
+    {
       codeAlfCtuEnableFlags( cs, COMPONENT_Cb, alfParam );
+    }
     if (alfParam->enabledFlag[COMPONENT_Cr])
+    {
       codeAlfCtuEnableFlags( cs, COMPONENT_Cr, alfParam );
+    }
   }
 }
 void CABACWriter::codeAlfCtuEnableFlags( CodingStructure& cs, ComponentID compID, AlfParam* alfParam)
@@ -3262,7 +3386,7 @@ void CABACWriter::codeAlfCtuEnableFlags( CodingStructure& cs, ComponentID compID
 
 void CABACWriter::codeAlfCtuEnableFlag( CodingStructure& cs, uint32_t ctuRsAddr, const int compIdx, AlfParam* alfParam)
 {
-  const bool alfComponentEnabled = (alfParam != NULL) ? alfParam->enabledFlag[compIdx] : cs.slice->getTileGroupAlfEnabledFlag((ComponentID)compIdx);
+  const bool alfComponentEnabled = (alfParam != NULL) ? alfParam->enabledFlag[compIdx] : cs.slice->getAlfEnabledFlag((ComponentID)compIdx);
 
   if( cs.sps->getALFEnabledFlag() && alfComponentEnabled )
   {
@@ -3396,7 +3520,7 @@ void CABACWriter::codeAlfCtuFilterIndex(CodingStructure& cs, uint32_t ctuRsAddr,
 
   short* alfCtbFilterIndex = cs.slice->getPic()->getAlfCtbFilterIndex();
   const unsigned filterSetIdx = alfCtbFilterIndex[ctuRsAddr];
-  unsigned numAps = cs.slice->getTileGroupNumAps();
+  unsigned numAps = cs.slice->getNumAlfApsIdsLuma();
   unsigned numAvailableFiltSets = numAps + NUM_FIXED_FILTER_SETS;
   if (numAvailableFiltSets > NUM_FIXED_FILTER_SETS)
   {
@@ -3422,20 +3546,28 @@ void CABACWriter::codeAlfCtuFilterIndex(CodingStructure& cs, uint32_t ctuRsAddr,
     xWriteTruncBinCode(filterSetIdx, NUM_FIXED_FILTER_SETS);
   }
 }
+
 void CABACWriter::codeAlfCtuAlternatives( CodingStructure& cs, ChannelType channel, AlfParam* alfParam)
 {
   if( isChroma( channel ) )
   {
     if (alfParam->enabledFlag[COMPONENT_Cb])
+    {
       codeAlfCtuAlternatives( cs, COMPONENT_Cb, alfParam );
+    }
     if (alfParam->enabledFlag[COMPONENT_Cr])
+    {
       codeAlfCtuAlternatives( cs, COMPONENT_Cr, alfParam );
+    }
   }
 }
+
 void CABACWriter::codeAlfCtuAlternatives( CodingStructure& cs, ComponentID compID, AlfParam* alfParam)
 {
   if( compID == COMPONENT_Y )
+  {
     return;
+  }
   uint32_t numCTUs = cs.pcv->sizeInCtus;
   uint8_t* ctbAlfFlag = cs.slice->getPic()->getAlfCtuEnableFlag( compID );
 
@@ -3451,11 +3583,13 @@ void CABACWriter::codeAlfCtuAlternatives( CodingStructure& cs, ComponentID compI
 void CABACWriter::codeAlfCtuAlternative( CodingStructure& cs, uint32_t ctuRsAddr, const int compIdx, const AlfParam* alfParam)
 {
   if( compIdx == COMPONENT_Y )
+  {
     return;
-  int apsIdx = alfParam ? 0 : cs.slice->getTileGroupApsIdChroma();
+  }
+  int apsIdx = alfParam ? 0 : cs.slice->getAlfApsIdChroma();
   const AlfParam& alfParamRef = alfParam ? (*alfParam) : cs.slice->getAlfAPSs()[apsIdx]->getAlfAPSParam();
 
-  if( alfParam || (cs.sps->getALFEnabledFlag() && cs.slice->getTileGroupAlfEnabledFlag( (ComponentID)compIdx )) )
+  if( alfParam || (cs.sps->getALFEnabledFlag() && cs.slice->getAlfEnabledFlag( (ComponentID)compIdx )) )
   {
     uint8_t* ctbAlfFlag = cs.slice->getPic()->getAlfCtuEnableFlag( compIdx );
 
@@ -3466,9 +3600,13 @@ void CABACWriter::codeAlfCtuAlternative( CodingStructure& cs, uint32_t ctuRsAddr
       unsigned numOnes = ctbAlfAlternative[ctuRsAddr];
       assert( ctbAlfAlternative[ctuRsAddr] < numAlts );
       for( int i = 0; i < numOnes; ++i )
+      {
         m_BinEncoder.encodeBin( 1, Ctx::ctbAlfAlternative( compIdx-1 ) );
+      }
       if( numOnes < numAlts-1 )
+      {
         m_BinEncoder.encodeBin( 0, Ctx::ctbAlfAlternative( compIdx-1 ) );
+      }
     }
   }
 }
diff --git a/source/Lib/EncoderLib/CABACWriter.h b/source/Lib/EncoderLib/CABACWriter.h
index 62a39d7a6d29c9fbc2b9755412fae7abfbc2d7ad..a230571fd3930860f3b4ded5ec2d6c447d230d62 100644
--- a/source/Lib/EncoderLib/CABACWriter.h
+++ b/source/Lib/EncoderLib/CABACWriter.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -149,7 +149,7 @@ public:
   void        last_sig_coeff            ( CoeffCodingContext&           cctx,     const TransformUnit& tu, ComponentID       compID );
   void        residual_coding_subblock  ( CoeffCodingContext&           cctx,     const TCoeff*     coeff, const int stateTransTable, int& state );
   void        residual_codingTS         ( const TransformUnit&          tu,       ComponentID       compID );
-  void        residual_coding_subblockTS( CoeffCodingContext&           cctx,     const TCoeff*     coeff  );
+  void        residual_coding_subblockTS( CoeffCodingContext&           cctx,     const TCoeff*     coeff, unsigned (&RiceBit)[8], int riceParam, bool ricePresentFlag);
   void        joint_cb_cr               ( const TransformUnit&          tu,       const int cbfMask );
 
 
diff --git a/source/Lib/EncoderLib/CMakeLists.txt b/source/Lib/EncoderLib/CMakeLists.txt
index 2a50346f2c45c4a57d7c73788df086a19645d7f0..9585d923b317c94d1a7aa24d346db8adf568fd91 100644
--- a/source/Lib/EncoderLib/CMakeLists.txt
+++ b/source/Lib/EncoderLib/CMakeLists.txt
@@ -32,28 +32,8 @@ if( SET_ENABLE_TRACING )
   endif()
 endif()
 
-if( OpenMP_FOUND )
-  if( SET_ENABLE_SPLIT_PARALLELISM )
-    if( ENABLE_SPLIT_PARALLELISM )
-      target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 )
-    else()
-      target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
-    endif()
-  endif()
-  if( SET_ENABLE_WPP_PARALLELISM )
-    if( ENABLE_WPP_PARALLELISM )
-      target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 )
-    else()
-      target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
-    endif()
-  endif()
-else()
-  target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
-  target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
-endif()
-
 target_include_directories( ${LIB_NAME} PUBLIC . )
-target_link_libraries( ${LIB_NAME} CommonLib Threads::Threads )
+target_link_libraries( ${LIB_NAME} CommonLib )
 
 if( CMAKE_COMPILER_IS_GNUCC )
   # this is quite certainly a compiler problem
diff --git a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp
index c4f260b2ee8a45a4b4e1dfaf18bc60347a256ebd..60cfb552ce1f65b52f618086c3acbf7fe539f836 100644
--- a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp
+++ b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -807,8 +807,8 @@ void EncAdaptiveLoopFilter::xSetupCcAlfAPS( CodingStructure &cs )
 {
   if (m_ccAlfFilterParam.ccAlfFilterEnabled[COMPONENT_Cb - 1])
   {
-    int  ccAlfCbApsId = cs.slice->getTileGroupCcAlfCbApsId();
-    APS* aps = m_apsMap->getPS((cs.slice->getTileGroupCcAlfCbApsId() << NUM_APS_TYPE_LEN) + ALF_APS);
+    int  ccAlfCbApsId = cs.slice->getCcAlfCbApsId();
+    APS* aps = m_apsMap->getPS((cs.slice->getCcAlfCbApsId() << NUM_APS_TYPE_LEN) + ALF_APS);
     if (aps == NULL)
     {
       aps = m_apsMap->allocatePS((ccAlfCbApsId << NUM_APS_TYPE_LEN) + ALF_APS);
@@ -831,16 +831,16 @@ void EncAdaptiveLoopFilter::xSetupCcAlfAPS( CodingStructure &cs )
       m_apsMap->setChangedFlag((ccAlfCbApsId << NUM_APS_TYPE_LEN) + ALF_APS, true);
       aps->setTemporalId(cs.slice->getTLayer());
     }
-    cs.slice->setTileGroupCcAlfCbEnabledFlag(true);
+    cs.slice->setCcAlfCbEnabledFlag(true);
   }
   else
   {
-    cs.slice->setTileGroupCcAlfCbEnabledFlag(false);
+    cs.slice->setCcAlfCbEnabledFlag(false);
   }
   if (m_ccAlfFilterParam.ccAlfFilterEnabled[COMPONENT_Cr - 1])
   {
-    int  ccAlfCrApsId = cs.slice->getTileGroupCcAlfCrApsId();
-    APS* aps = m_apsMap->getPS((cs.slice->getTileGroupCcAlfCrApsId() << NUM_APS_TYPE_LEN) + ALF_APS);
+    int  ccAlfCrApsId = cs.slice->getCcAlfCrApsId();
+    APS* aps = m_apsMap->getPS((cs.slice->getCcAlfCrApsId() << NUM_APS_TYPE_LEN) + ALF_APS);
     if (aps == NULL)
     {
       aps = m_apsMap->allocatePS((ccAlfCrApsId << NUM_APS_TYPE_LEN) + ALF_APS);
@@ -863,11 +863,11 @@ void EncAdaptiveLoopFilter::xSetupCcAlfAPS( CodingStructure &cs )
       aps->setTemporalId(cs.slice->getTLayer());
     }
     aps->setAPSType(ALF_APS);
-    cs.slice->setTileGroupCcAlfCrEnabledFlag(true);
+    cs.slice->setCcAlfCrEnabledFlag(true);
   }
   else
   {
-    cs.slice->setTileGroupCcAlfCrEnabledFlag(false);
+    cs.slice->setCcAlfCrEnabledFlag(false);
   }
 }
 
@@ -881,7 +881,11 @@ void EncAdaptiveLoopFilter::ALFProcess(CodingStructure& cs, const double *lambda
   int layerIdx = cs.vps == nullptr ? 0 : cs.vps->getGeneralLayerIdx( cs.slice->getPic()->layerId );
 
    // IRAP AU is assumed
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+  if( !layerIdx && ( cs.slice->getPendingRasInit() || cs.slice->isIDRorBLA() || ( cs.slice->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA && m_encCfg->getCraAPSreset() ) ) )
+#else
   if( !layerIdx && ( cs.slice->getPendingRasInit() || cs.slice->isIDRorBLA() ) )
+#endif
   {
     memset(cs.slice->getAlfAPSs(), 0, sizeof(*cs.slice->getAlfAPSs())*ALF_CTB_MAX_NUM_APS);
     m_apsIdStart = ALF_CTB_MAX_NUM_APS;
@@ -925,8 +929,11 @@ void EncAdaptiveLoopFilter::ALFProcess(CodingStructure& cs, const double *lambda
   m_lambda[COMPONENT_Y] = lambdas[COMPONENT_Y] * double(1 << shiftLuma);
   m_lambda[COMPONENT_Cb] = lambdas[COMPONENT_Cb] * double(1 << shiftChroma);
   m_lambda[COMPONENT_Cr] = lambdas[COMPONENT_Cr] * double(1 << shiftChroma);
-
-  PelUnitBuf orgYuv = cs.getOrgBuf();
+#if JVET_W0129_ENABLE_ALF_TRUEORG
+  PelUnitBuf orgYuv = m_encCfg->getAlfTrueOrg() ? cs.getTrueOrgBuf() : cs.getOrgBuf();
+#else
+  PelUnitBuf orgYuv = m_encCfg->getAlfSaoTrueOrg() ? cs.getTrueOrgBuf() : cs.getOrgBuf();
+#endif
 
   m_tempBuf.copyFrom( cs.getRecoBuf() );
   PelUnitBuf recYuv = m_tempBuf.getBuf( cs.area );
@@ -1009,7 +1016,7 @@ void EncAdaptiveLoopFilter::ALFProcess(CodingStructure& cs, const double *lambda
   // consider using new filter (only)
   alfParam.newFilterFlag[CHANNEL_TYPE_LUMA] = true;
   alfParam.newFilterFlag[CHANNEL_TYPE_CHROMA] = true;
-  cs.slice->setTileGroupNumAps(1); // Only new filter for RD cost optimization
+  cs.slice->setNumAlfApsIdsLuma(1); // Only new filter for RD cost optimization
   // derive filter (luma)
   alfEncoder( cs, alfParam, orgYuv, recYuv, cs.getRecoBuf(), CHANNEL_TYPE_LUMA
 #if ENABLE_QPA
@@ -1030,7 +1037,7 @@ void EncAdaptiveLoopFilter::ALFProcess(CodingStructure& cs, const double *lambda
   // let alfEncoderCtb decide now
   alfParam.newFilterFlag[CHANNEL_TYPE_LUMA] = false;
   alfParam.newFilterFlag[CHANNEL_TYPE_CHROMA] = false;
-  cs.slice->setTileGroupNumAps(0);
+  cs.slice->setNumAlfApsIdsLuma(0);
   m_CABACEstimator->getCtx() = AlfCtx(ctxStart);
   alfEncoderCtb(cs, alfParam
 #if ENABLE_QPA
@@ -1055,9 +1062,9 @@ void EncAdaptiveLoopFilter::ALFProcess(CodingStructure& cs, const double *lambda
   alfReconstructor(cs, recYuv);
 
   // Do not transmit CC ALF if it is unchanged
-  if (cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Y))
+  if (cs.slice->getAlfEnabledFlag(COMPONENT_Y))
   {
-    for (int32_t lumaAlfApsId : cs.slice->getTileGroupApsIdLuma())
+    for (int32_t lumaAlfApsId : cs.slice->getAlfApsIdsLuma())
     {
       APS* aps = (lumaAlfApsId >= 0) ? m_apsMap->getPS((lumaAlfApsId << NUM_APS_TYPE_LEN) + ALF_APS) : nullptr;
       if (aps && m_apsMap->getChangedFlag((lumaAlfApsId << NUM_APS_TYPE_LEN) + ALF_APS))
@@ -1067,7 +1074,7 @@ void EncAdaptiveLoopFilter::ALFProcess(CodingStructure& cs, const double *lambda
       }
     }
   }
-  int chromaAlfApsId = ( cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Cb) || cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Cr) ) ? cs.slice->getTileGroupApsIdChroma() : -1;
+  int chromaAlfApsId = ( cs.slice->getAlfEnabledFlag(COMPONENT_Cb) || cs.slice->getAlfEnabledFlag(COMPONENT_Cr) ) ? cs.slice->getAlfApsIdChroma() : -1;
   APS* aps = (chromaAlfApsId >= 0) ? m_apsMap->getPS((chromaAlfApsId << NUM_APS_TYPE_LEN) + ALF_APS) : nullptr;
   if (aps && m_apsMap->getChangedFlag((chromaAlfApsId << NUM_APS_TYPE_LEN) + ALF_APS))
   {
@@ -1084,7 +1091,7 @@ void EncAdaptiveLoopFilter::ALFProcess(CodingStructure& cs, const double *lambda
   m_tempBuf.get(COMPONENT_Cr).copyFrom(cs.getRecoBuf().get(COMPONENT_Cr));
   recYuv = m_tempBuf.getBuf(cs.area);
   recYuv.extendBorderPel(MAX_ALF_FILTER_LENGTH >> 1);
-  
+
   deriveStatsForCcAlfFiltering(orgYuv, recYuv, COMPONENT_Cb, m_numCTUsInWidth, (0 + 1), cs);
   deriveStatsForCcAlfFiltering(orgYuv, recYuv, COMPONENT_Cr, m_numCTUsInWidth, (0 + 1), cs);
   initDistortionCcalf();
@@ -1162,7 +1169,7 @@ double EncAdaptiveLoopFilter::deriveCtbAlfEnableFlags( CodingStructure& cs, cons
       {
         // Evaluate cost of signaling filter set index for convergence of filters enabled flag / filter derivation
         assert( cs.slice->getPic()->getAlfCtbFilterIndex()[ctuIdx] == NUM_FIXED_FILTER_SETS );
-        assert( cs.slice->getTileGroupNumAps() == 1 );
+        assert( cs.slice->getNumAlfApsIdsLuma() == 1 );
         m_CABACEstimator->codeAlfCtuFilterIndex(cs, ctuIdx, &m_alfParamTemp.enabledFlag[COMPONENT_Y]);
       }
       double costOn = distUnfilterCtu + ctuLambda * FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits();
@@ -1459,7 +1466,7 @@ double EncAdaptiveLoopFilter::getFilterCoeffAndCost( CodingStructure& cs, double
     {
       // Evaluate cost of signaling filter set index for convergence of filters enabled flag / filter derivation
       assert( cs.slice->getPic()->getAlfCtbFilterIndex()[ctuIdx] == NUM_FIXED_FILTER_SETS );
-      assert( cs.slice->getTileGroupNumAps() == 1 );
+      assert( cs.slice->getNumAlfApsIdsLuma() == 1 );
       m_CABACEstimator->codeAlfCtuFilterIndex(cs, ctuIdx, &m_alfParamTemp.enabledFlag[COMPONENT_Y]);
     }
   }
@@ -1707,7 +1714,9 @@ int EncAdaptiveLoopFilter::lengthFilterCoeffs( AlfFilterShape& alfShape, const i
     {
       bitCnt += lengthUvlc( abs( FilterCoeff[ ind ][ i ] ) );
       if( abs( FilterCoeff[ ind ][ i ] ) != 0 )
+      {
         bitCnt += 1;
+      }
     }
   }
   return bitCnt;
@@ -1767,23 +1776,23 @@ double EncAdaptiveLoopFilter::getDistCoeffForce0( bool* codedVarBins, double err
   return distForce0;
 }
 
-int EncAdaptiveLoopFilter::lengthUvlc( int uiCode )
+int EncAdaptiveLoopFilter::lengthUvlc(int code)
 {
-  int uiLength = 1;
-  int uiTemp = ++uiCode;
+  CHECK(code < 0,        "Unsigned VLC cannot be negative");
+  CHECK(code == MAX_INT, "Maximum supported UVLC code is MAX_INT-1");
 
-  CHECK( !uiTemp, "Integer overflow" );
+  int length = 1;
+  int temp = ++code;
 
-  while( 1 != uiTemp )
+  while (1 != temp)
   {
-    uiTemp >>= 1;
-    uiLength += 2;
+    temp >>= 1;
+    length += 2;
   }
-  // Take care of cases where uiLength > 32
-  return ( uiLength >> 1 ) + ( ( uiLength + 1 ) >> 1 );
+  // Take care of cases where length > 32
+  return (length >> 1) + ((length + 1) >> 1);
 }
 
-
 double EncAdaptiveLoopFilter::deriveFilterCoeffs( AlfCovariance* cov, AlfCovariance* covMerged, int clipMerged[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF], AlfFilterShape& alfShape, short* filterIndices, int numFilters, double errorTabForce0Coeff[MAX_NUM_ALF_CLASSES][2], AlfParam& alfParam )
 {
   double error = 0.0;
@@ -1834,12 +1843,11 @@ double EncAdaptiveLoopFilter::deriveCoeffQuant( int *filterClipp, int *filterCoe
   filterCoeffQuant[numCoeff - 1] = 0;
 
   int modified=1;
-#if JVET_T0064
-  if( m_encCfg->getALFStrength() != 1.0 )
+  bool isLumaFilter = numCoeff > 7 ? 1 : 0;
+  if ((isLumaFilter && m_encCfg->getALFStrengthLuma() != 1.0) || (!isLumaFilter && m_encCfg->getALFStrengthChroma() != 1.0))
   {
     modified = 0;
   }
-#endif
   double errRef=cov.calcErrorForCoeffs( filterClipp, filterCoeffQuant, numCoeff, bitDepth );
   while( modified )
   {
@@ -1880,14 +1888,12 @@ double EncAdaptiveLoopFilter::deriveCoeffQuant( int *filterClipp, int *filterCoe
 
 void EncAdaptiveLoopFilter::roundFiltCoeff( int *filterCoeffQuant, double *filterCoeff, const int numCoeff, const int factor )
 {
+  bool isLumaFilter = numCoeff > 7 ? 1 : 0;
+  double alfStrength = isLumaFilter ? m_encCfg->getALFStrengthLuma() : m_encCfg->getALFStrengthChroma();
   for( int i = 0; i < numCoeff; i++ )
   {
     int sign = filterCoeff[i] > 0 ? 1 : -1;
-#if JVET_T0064
-    filterCoeffQuant[i] = int((filterCoeff[i] * m_encCfg->getALFStrength()) * sign * factor + 0.5) * sign;
-#else
-    filterCoeffQuant[i] = int( filterCoeff[i] * sign * factor + 0.5 ) * sign;
-#endif
+    filterCoeffQuant[i] = int((filterCoeff[i] * alfStrength) * sign * factor + 0.5) * sign;
   }
 }
 
@@ -1901,11 +1907,7 @@ void EncAdaptiveLoopFilter::roundFiltCoeffCCALF(int16_t *filterCoeffQuant, doubl
     int best_index = 0;
     for(int k = 0; k < CCALF_CANDS_COEFF_NR; k++)
     {
-#if JVET_T0064
       double err = ((filterCoeff[i] * m_encCfg->getCCALFStrength()) * sign * factor - CCALF_SMALL_TAB[k]);
-#else
-      double err = (filterCoeff[i] * sign * factor - CCALF_SMALL_TAB[k]);
-#endif
       err = err*err;
       if(err < best_err)
       {
@@ -2273,7 +2275,19 @@ void EncAdaptiveLoopFilter::getBlkStats(AlfCovariance* alfCovariance, const AlfF
   const int numBins = AlfNumClippingValues[channel];
   int transposeIdx = 0;
   int classIdx = 0;
-
+  bool isLumaFilter = shape.numCoeff > 7 ? 1 : 0;
+  double filterStrengthTarget = isLumaFilter ? m_encCfg->getALFStrengthTargetLuma() : m_encCfg->getALFStrengthTargetChroma();
+  double filterStrengthTargetE = 1.0;
+  double filterStrengthTargetY = 1.0;
+  if (filterStrengthTarget != 0.0)
+  {
+    filterStrengthTargetY = 1 / filterStrengthTarget;
+    filterStrengthTargetE = filterStrengthTargetY * filterStrengthTargetY;
+  }
+  else
+  {
+    filterStrengthTargetY = 0.0;
+  }
   for( int i = 0; i < area.height; i++ )
   {
     int vbDistance = ((areaDst.y + i) % vbCTUHeight) - vbPos;
@@ -2308,11 +2322,11 @@ void EncAdaptiveLoopFilter::getBlkStats(AlfCovariance* alfCovariance, const AlfF
             {
               if (m_alfWSSD)
               {
-                alfCovariance[classIdx].E[b0][b1][k][l] += weight * (ELocal[k][b0] * (double)ELocal[l][b1]);
+                alfCovariance[classIdx].E[b0][b1][k][l] += filterStrengthTargetE * weight * (ELocal[k][b0] * (double)ELocal[l][b1]);
               }
               else
               {
-                alfCovariance[classIdx].E[b0][b1][k][l] += ELocal[k][b0] * (double)ELocal[l][b1];
+                alfCovariance[classIdx].E[b0][b1][k][l] += filterStrengthTargetE * ELocal[k][b0] * (double)ELocal[l][b1];
               }
             }
           }
@@ -2321,11 +2335,11 @@ void EncAdaptiveLoopFilter::getBlkStats(AlfCovariance* alfCovariance, const AlfF
         {
           if (m_alfWSSD)
           {
-            alfCovariance[classIdx].y[b][k] += weight * (ELocal[k][b] * (double)yLocal);
+            alfCovariance[classIdx].y[b][k] += filterStrengthTargetY * weight * (ELocal[k][b] * (double)yLocal);
           }
           else
           {
-            alfCovariance[classIdx].y[b][k] += ELocal[k][b] * (double)yLocal;
+            alfCovariance[classIdx].y[b][k] += filterStrengthTargetY * ELocal[k][b] * (double)yLocal;
           }
         }
       }
@@ -2563,8 +2577,8 @@ std::vector<int> EncAdaptiveLoopFilter::getAvaiApsIdsLuma(CodingStructure& cs, i
       curApsId = (curApsId + 1) % ALF_CTB_MAX_NUM_APS;
     }
   }
-  cs.slice->setTileGroupNumAps((int)result.size());
-  cs.slice->setAlfAPSs(result);
+  cs.slice->setNumAlfApsIdsLuma((int)result.size());
+  cs.slice->setAlfApsIdsLuma(result);
   newApsId = m_apsIdStart - 1;
   if (newApsId < 0)
   {
@@ -2649,7 +2663,7 @@ void  EncAdaptiveLoopFilter::alfEncoderCtb(CodingStructure& cs, AlfParam& alfPar
       {
         continue;
       }
-      cs.slice->setTileGroupNumAps(numTemporalAps + useNewFilter);
+      cs.slice->setNumAlfApsIdsLuma(numTemporalAps + useNewFilter);
       int numFilterSet = NUM_FIXED_FILTER_SETS + numTemporalAps + useNewFilter;
       if (numTemporalAps == apsIds.size() && numTemporalAps > 0 && useNewFilter && newApsId == apsIds.back()) //last temporalAPS is occupied by new filter set and this temporal APS becomes unavailable
       {
@@ -2734,16 +2748,12 @@ void  EncAdaptiveLoopFilter::alfEncoderCtb(CodingStructure& cs, AlfParam& alfPar
           double         costOn = MAX_DOUBLE;
           ctxTempStart = AlfCtx(m_CABACEstimator->getCtx());
           int iBestFilterSetIdx = 0;
-#if JVET_T0064
           int firstFilterSetIdx = 0;
           if (!m_encCfg->getALFAllowPredefinedFilters())
           {
             firstFilterSetIdx = NUM_FIXED_FILTER_SETS;
           }
           for (int filterSetIdx = firstFilterSetIdx; filterSetIdx < numFilterSet; filterSetIdx++)
-#else
-          for (int filterSetIdx = 0; filterSetIdx < numFilterSet; filterSetIdx++)
-#endif
           {
             //rate
             m_CABACEstimator->getCtx() = AlfCtx(ctxTempStart);
@@ -2849,22 +2859,22 @@ void  EncAdaptiveLoopFilter::alfEncoderCtb(CodingStructure& cs, AlfParam& alfPar
     }// for (int numTemporalAps = 0; numTemporalAps < apsIds.size(); numTemporalAps++)
   }//for (int useNewFilter = 0; useNewFilter <= 1; useNewFilter++)
 
-  cs.slice->setTileGroupCcAlfCbApsId(newApsId);
-  cs.slice->setTileGroupCcAlfCrApsId(newApsId);
+  cs.slice->setCcAlfCbApsId(newApsId);
+  cs.slice->setCcAlfCrApsId(newApsId);
 
   if (costOff <= costMin)
   {
-    cs.slice->resetTileGroupAlfEnabledFlag();
-    cs.slice->setTileGroupNumAps(0);
+    cs.slice->resetAlfEnabledFlag();
+    cs.slice->setNumAlfApsIdsLuma(0);
     setCtuEnableFlag(m_ctuEnableFlag, CHANNEL_TYPE_LUMA, 0);
     setCtuEnableFlag(m_ctuEnableFlag, CHANNEL_TYPE_CHROMA, 0);
     return;
   }
   else
   {
-    cs.slice->setTileGroupAlfEnabledFlag(COMPONENT_Y, true);
-    cs.slice->setTileGroupNumAps((int)bestApsIds.size());
-    cs.slice->setAlfAPSs(bestApsIds);
+    cs.slice->setAlfEnabledFlag(COMPONENT_Y, true);
+    cs.slice->setNumAlfApsIdsLuma((int)bestApsIds.size());
+    cs.slice->setAlfApsIdsLuma(bestApsIds);
     copyCtuEnableFlag(m_ctuEnableFlag, m_ctuEnableFlagTmp, CHANNEL_TYPE_LUMA);
     for (int ctuIdx = 0; ctuIdx < m_numCTUsInPic; ctuIdx++)
     {
@@ -2886,8 +2896,8 @@ void  EncAdaptiveLoopFilter::alfEncoderCtb(CodingStructure& cs, AlfParam& alfPar
       m_apsIdStart = newApsId;
     }
 
-    std::vector<int> apsIds = cs.slice->getTileGroupApsIdLuma();
-    for (int i = 0; i < (int)cs.slice->getTileGroupNumAps(); i++)
+    std::vector<int> apsIds = cs.slice->getAlfApsIdsLuma();
+    for (int i = 0; i < (int)cs.slice->getNumAlfApsIdsLuma(); i++)
     {
       apss[apsIds[i]] = m_apsMap->getPS((apsIds[i] << NUM_APS_TYPE_LEN) + ALF_APS);
     }
@@ -3048,9 +3058,9 @@ void  EncAdaptiveLoopFilter::alfEncoderCtb(CodingStructure& cs, AlfParam& alfPar
       if (curCost < costMin)
       {
         costMin = curCost;
-        cs.slice->setTileGroupApsIdChroma(curApsId);
-        cs.slice->setTileGroupAlfEnabledFlag(COMPONENT_Cb, m_alfParamTemp.enabledFlag[COMPONENT_Cb]);
-        cs.slice->setTileGroupAlfEnabledFlag(COMPONENT_Cr, m_alfParamTemp.enabledFlag[COMPONENT_Cr]);
+        cs.slice->setAlfApsIdChroma(curApsId);
+        cs.slice->setAlfEnabledFlag(COMPONENT_Cb, m_alfParamTemp.enabledFlag[COMPONENT_Cb]);
+        cs.slice->setAlfEnabledFlag(COMPONENT_Cr, m_alfParamTemp.enabledFlag[COMPONENT_Cr]);
         copyCtuEnableFlag(m_ctuEnableFlagTmp, m_ctuEnableFlag, CHANNEL_TYPE_CHROMA);
         copyCtuAlternativeChroma(m_ctuAlternativeTmp, m_ctuAlternative);
       }
@@ -3058,20 +3068,20 @@ void  EncAdaptiveLoopFilter::alfEncoderCtb(CodingStructure& cs, AlfParam& alfPar
 
     if (newApsIdChroma >= 0)
     {
-      cs.slice->setTileGroupCcAlfCbApsId(newApsIdChroma);
-      cs.slice->setTileGroupCcAlfCrApsId(newApsIdChroma);
+      cs.slice->setCcAlfCbApsId(newApsIdChroma);
+      cs.slice->setCcAlfCrApsId(newApsIdChroma);
     }
     if (costOff < costMin)
     {
-      cs.slice->setTileGroupAlfEnabledFlag(COMPONENT_Cb, false);
-      cs.slice->setTileGroupAlfEnabledFlag(COMPONENT_Cr, false);
+      cs.slice->setAlfEnabledFlag(COMPONENT_Cb, false);
+      cs.slice->setAlfEnabledFlag(COMPONENT_Cr, false);
       setCtuEnableFlag(m_ctuEnableFlag, CHANNEL_TYPE_CHROMA, 0);
     }
     else
     {
       copyCtuEnableFlag(m_ctuEnableFlag, m_ctuEnableFlagTmp, CHANNEL_TYPE_CHROMA);
       copyCtuAlternativeChroma(m_ctuAlternative, m_ctuAlternativeTmp);
-      if (cs.slice->getTileGroupApsIdChroma() == newApsIdChroma)   // new filter
+      if (cs.slice->getAlfApsIdChroma() == newApsIdChroma)   // new filter
       {
         APS *newAPS = m_apsMap->getPS((newApsIdChroma << NUM_APS_TYPE_LEN) + ALF_APS);
         if (newAPS == NULL)
@@ -3101,19 +3111,19 @@ void  EncAdaptiveLoopFilter::alfEncoderCtb(CodingStructure& cs, AlfParam& alfPar
         m_apsMap->setChangedFlag((newApsIdChroma << NUM_APS_TYPE_LEN) + ALF_APS);
         m_apsIdStart = newApsIdChroma;
       }
-      apss[cs.slice->getTileGroupApsIdChroma()] =
-        m_apsMap->getPS((cs.slice->getTileGroupApsIdChroma() << NUM_APS_TYPE_LEN) + ALF_APS);
+      apss[cs.slice->getAlfApsIdChroma()] =
+        m_apsMap->getPS((cs.slice->getAlfApsIdChroma() << NUM_APS_TYPE_LEN) + ALF_APS);
     }
   }
 }
 
 void EncAdaptiveLoopFilter::alfReconstructor(CodingStructure& cs, const PelUnitBuf& recExtBuf)
 {
-  if (!cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Y))
+  if (!cs.slice->getAlfEnabledFlag(COMPONENT_Y))
   {
     return;
   }
-  reconstructCoeffAPSs(cs, true, cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Cb) || cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Cr), false);
+  reconstructCoeffAPSs(cs, true, cs.slice->getAlfEnabledFlag(COMPONENT_Cb) || cs.slice->getAlfEnabledFlag(COMPONENT_Cr), false);
   short* alfCtuFilterIndex = cs.slice->getPic()->getAlfCtbFilterIndex();
   PelUnitBuf& recBuf = cs.getRecoBufRef();
   const PreCalcValues& pcv = *cs.pcv;
@@ -3359,12 +3369,10 @@ void EncAdaptiveLoopFilter::deriveCcAlfFilterCoeff( ComponentID compID, const Pe
 
   // Refine quanitzation
   int modified       = 1;
-#if JVET_T0064
   if (m_encCfg->getCCALFStrength() != 1.0)
   {
     modified = 0;
   }
-#endif
   double errRef      = m_alfCovarianceFrameCcAlf[compID - 1][0][filterIdx].calcErrorForCcAlfCoeffs(filterCoeffInt, size, (m_scaleBits+1));
   while (modified)
   {
@@ -3535,7 +3543,7 @@ void EncAdaptiveLoopFilter::determineControlIdcValues(CodingStructure &cs, const
   {
     std::copy_n(curFilterEnabled, MAX_NUM_CC_ALF_FILTERS, filterEnabled);
 
-    std::sort(filterIdxCount, filterIdxCount + MAX_NUM_CC_ALF_FILTERS, compareCounts);
+    std::stable_sort(filterIdxCount, filterIdxCount + MAX_NUM_CC_ALF_FILTERS, compareCounts);
 
     int filterIdc = 1;
     ccAlfFilterCount = 0;
@@ -3636,7 +3644,7 @@ void EncAdaptiveLoopFilter::getFrameStatsCcalf(ComponentID compIdx, int filterId
 
 void EncAdaptiveLoopFilter::deriveCcAlfFilter( CodingStructure& cs, ComponentID compID, const PelUnitBuf& orgYuv, const PelUnitBuf& tempDecYuvBuf, const PelUnitBuf& dstYuv )
 {
-  if (!cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Y))
+  if (!cs.slice->getAlfEnabledFlag(COMPONENT_Y))
   {
     m_ccAlfFilterParam.ccAlfFilterEnabled[compID - 1] = false;
     return;
@@ -3927,11 +3935,11 @@ void EncAdaptiveLoopFilter::deriveCcAlfFilter( CodingStructure& cs, ComponentID
       m_reuseApsId[compID - 1] = ccalfReuseApsId;
       if (compID == COMPONENT_Cb)
       {
-        cs.slice->setTileGroupCcAlfCbApsId(ccalfReuseApsId);
+        cs.slice->setCcAlfCbApsId(ccalfReuseApsId);
       }
       else
       {
-        cs.slice->setTileGroupCcAlfCrApsId(ccalfReuseApsId);
+        cs.slice->setCcAlfCrApsId(ccalfReuseApsId);
       }
     }
   }
@@ -4076,7 +4084,18 @@ void EncAdaptiveLoopFilter::getBlkStatsCcAlf(AlfCovariance &alfCovariance, const
   }
 
   Pel ELocal[MAX_NUM_CC_ALF_CHROMA_COEFF][1];
-
+  double filterStrengthTarget = m_encCfg->getCCALFStrengthTarget();
+  double filterStrengthTargetE = 1.0;
+  double filterStrengthTargetY = 1.0;
+  if (filterStrengthTarget != 0.0)
+  {
+    filterStrengthTargetY = 1 / filterStrengthTarget;
+    filterStrengthTargetE = filterStrengthTargetY * filterStrengthTargetY;
+  }
+  else
+  {
+    filterStrengthTargetY = 0.0;
+  }
   for (int i = 0; i < compArea.height; i++)
   {
     int vbDistance = ((i << getComponentScaleY(compID, m_chromaFormat)) % vbCTUHeight) - vbPos;
@@ -4105,11 +4124,11 @@ void EncAdaptiveLoopFilter::getBlkStatsCcAlf(AlfCovariance &alfCovariance, const
             {
               if (m_alfWSSD)
               {
-                alfCovariance.E[b0][b1][k][l] += weight * (ELocal[k][b0] * (double)ELocal[l][b1]);
+                alfCovariance.E[b0][b1][k][l] += filterStrengthTargetE * weight * (ELocal[k][b0] * (double)ELocal[l][b1]);
               }
               else
               {
-                alfCovariance.E[b0][b1][k][l] += ELocal[k][b0] * (double)ELocal[l][b1];
+                alfCovariance.E[b0][b1][k][l] += filterStrengthTargetE * ELocal[k][b0] * (double)ELocal[l][b1];
               }
             }
           }
@@ -4118,11 +4137,11 @@ void EncAdaptiveLoopFilter::getBlkStatsCcAlf(AlfCovariance &alfCovariance, const
         {
           if (m_alfWSSD)
           {
-            alfCovariance.y[b][k] += weight * (ELocal[k][b] * (double)yLocal);
+            alfCovariance.y[b][k] += filterStrengthTargetY * weight * (ELocal[k][b] * (double)yLocal);
           }
           else
           {
-            alfCovariance.y[b][k] += ELocal[k][b] * (double)yLocal;
+            alfCovariance.y[b][k] += filterStrengthTargetY * ELocal[k][b] * (double)yLocal;
           }
         }
       }
diff --git a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.h b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.h
index 12dbc00c29c0f888a6504878a22145deb8fbeea2..6e3f25d9ff0c3df1a41089320d96af757c9e297c 100644
--- a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.h
+++ b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h
index fa16f2895268bf28700edff711e19d0ead12c15d..2fdec7f1d61ff425c92fcf3d56c210c943775a12 100644
--- a/source/Lib/EncoderLib/EncCfg.h
+++ b/source/Lib/EncoderLib/EncCfg.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -161,9 +161,10 @@ protected:
   int       m_iFrameRate;
   int       m_FrameSkip;
   uint32_t      m_temporalSubsampleRatio;
-  int       m_iSourceWidth;
-  int       m_iSourceHeight;
+  int       m_sourceWidth;
+  int       m_sourceHeight;
   Window    m_conformanceWindow;
+  int       m_sourcePadding[2];
   int       m_framesToBeEncoded;
   double    m_adLambdaModifier[ MAX_TLAYER ];
   std::vector<double> m_adIntraLambdaModifier;
@@ -174,6 +175,7 @@ protected:
   bool      m_printFrameMSE;
   bool      m_printSequenceMSE;
   bool      m_printMSSSIM;
+  bool      m_printWPSNR;
   bool      m_cabacZeroWordPaddingEnabled;
 
   bool      m_gciPresentFlag;
@@ -233,6 +235,9 @@ protected:
   bool      m_noCraConstraintFlag;
   bool      m_noGdrConstraintFlag;
   bool      m_noApsConstraintFlag;
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+  bool      m_generalLowerBitRateConstraintFlag;
+#endif
 
   /* profile & level */
   Profile::Name m_profile;
@@ -269,6 +274,7 @@ protected:
   int       m_maxDecPicBuffering[MAX_TLAYER];
   int       m_maxNumReorderPics[MAX_TLAYER];
   int       m_drapPeriod;
+  int       m_edrapPeriod;
 
   int       m_iQP;                              //  if (AdaptiveQP == OFF)
   ChromaQpMappingTableParams m_chromaQpMappingTableParams;
@@ -276,7 +282,6 @@ protected:
   int       m_intraQPOffset;                    ///< QP offset for intra slice (integer)
   int       m_lambdaFromQPEnable;               ///< enable lambda derivation from QP
 #endif
-  int       m_aiPad[2];
 
   bool      m_AccessUnitDelimiter;               ///< add Access Unit Delimiter NAL units
   bool      m_enablePictureHeaderInSliceHeader;  ///< Enable Picture Header in Slice Header
@@ -299,6 +304,13 @@ protected:
   bool                  m_subPicIdMappingInSpsFlag;
   unsigned              m_subPicIdLen;
   std::vector<uint16_t> m_subPicId;
+#if GDR_ENABLED
+  bool      m_gdrEnabled;  
+  unsigned  m_gdrPocStart;
+  unsigned  m_gdrPeriod;
+  int       m_gdrInterval;  
+  bool      m_gdrNoHash;  
+#endif
   bool      m_useSplitConsOverride;
   unsigned  m_uiMinQT[3]; //0: I slice; 1: P/B slice, 2: I slice chroma
   unsigned  m_uiMaxBT[3]; //0: I slice; 1: P/B slice, 2: I slice chroma
@@ -394,20 +406,23 @@ protected:
   uint32_t  m_log2MaxTbSize;
 
   //====== Loop/Deblock Filter ========
-  bool      m_bLoopFilterDisable;
-  bool      m_loopFilterOffsetInPPS;
-  int       m_loopFilterBetaOffsetDiv2;
-  int       m_loopFilterTcOffsetDiv2;
-  int       m_loopFilterCbBetaOffsetDiv2;
-  int       m_loopFilterCbTcOffsetDiv2;
-  int       m_loopFilterCrBetaOffsetDiv2;
-  int       m_loopFilterCrTcOffsetDiv2;
+  bool      m_deblockingFilterDisable;
+  bool      m_deblockingFilterOffsetInPPS;
+  int       m_deblockingFilterBetaOffsetDiv2;
+  int       m_deblockingFilterTcOffsetDiv2;
+  int       m_deblockingFilterCbBetaOffsetDiv2;
+  int       m_deblockingFilterCbTcOffsetDiv2;
+  int       m_deblockingFilterCrBetaOffsetDiv2;
+  int       m_deblockingFilterCrTcOffsetDiv2;
 #if W0038_DB_OPT
   int       m_deblockingFilterMetric;
 #else
   bool      m_DeblockingFilterMetric;
 #endif
   bool      m_bUseSAO;
+#if JVET_W0129_ENABLE_ALF_TRUEORG
+  bool      m_saoTrueOrg;
+#endif
   bool      m_bTestSAODisableAtPictureLevel;
   double    m_saoEncodingRate;       // When non-0 SAO early picture termination is enabled for luma and chroma
   double    m_saoEncodingRateChroma; // The SAO early picture termination rate to use for chroma (when m_SaoEncodingRate is >0). If <=0, use results for luma.
@@ -428,7 +443,9 @@ protected:
   //====== Quality control ========
   int       m_iMaxDeltaQP;                      //  Max. absolute delta QP (1:default)
   int       m_cuQpDeltaSubdiv;                  //  Max. subdivision level for a CuDQP (0:default)
-  int       m_cuChromaQpOffsetSubdiv;           ///< If negative, then do not apply chroma qp offsets.
+  unsigned  m_cuChromaQpOffsetSubdiv;           ///< Max. subdivision level for a chroma QP adjustment (0:default)
+  bool      m_cuChromaQpOffsetEnabled;          ///< Local chroma QP offset enable flag
+  std::vector<ChromaQpAdj> m_cuChromaQpOffsetList; ///< Local chroma QP offsets list (to be signalled in PPS)
 
   int       m_chromaCbQpOffset;                 //  Chroma Cb QP Offset (0:default)
   int       m_chromaCrQpOffset;                 //  Chroma Cr Qp Offset (0:default)
@@ -447,6 +464,10 @@ protected:
   ChromaFormat m_chromaFormatIDC;
 
   bool      m_extendedPrecisionProcessingFlag;
+  bool      m_tsrcRicePresentFlag;
+#if JVET_W0046_RLSCP
+  bool      m_reverseLastSigCoeffEnabledFlag;
+#endif
   bool      m_highPrecisionOffsetsEnabledFlag;
   bool      m_bUseAdaptiveQP;
   int       m_iQPAdaptationRange;
@@ -478,10 +499,29 @@ protected:
   uint32_t      m_log2MaxTransformSkipBlockSize;
   bool      m_transformSkipRotationEnabledFlag;
   bool      m_transformSkipContextEnabledFlag;
+  bool      m_rrcRiceExtensionEnableFlag;
   bool      m_persistentRiceAdaptationEnabledFlag;
   bool      m_cabacBypassAlignmentEnabledFlag;
 #if SHARP_LUMA_DELTA_QP
   LumaLevelToDeltaQPMapping m_lumaLevelToDeltaQPMapping; ///< mapping from luma level to delta QP.
+#endif
+  bool      m_smoothQPReductionEnable;
+#if JVET_W0043
+  int       m_smoothQPReductionPeriodicity;
+  double    m_smoothQPReductionThresholdIntra;
+  double    m_smoothQPReductionModelScaleIntra;
+  double    m_smoothQPReductionModelOffsetIntra;
+  int       m_smoothQPReductionLimitIntra;
+  double    m_smoothQPReductionThresholdInter;
+  double    m_smoothQPReductionModelScaleInter;
+  double    m_smoothQPReductionModelOffsetInter;
+  int       m_smoothQPReductionLimitInter;
+#else
+  double    m_smoothQPReductionThreshold;
+  double    m_smoothQPReductionModelScale;
+  double    m_smoothQPReductionModelOffset;
+  int       m_smoothQPReductionPeriodicity;
+  int       m_smoothQPReductionLimit;
 #endif
   int*      m_aidQP;
   uint32_t      m_uiDeltaQpRD;
@@ -518,11 +558,16 @@ protected:
   bool      m_pictureTimingSEIEnabled;
   bool      m_frameFieldInfoSEIEnabled;
   bool      m_dependentRAPIndicationSEIEnabled;
+  bool      m_edrapIndicationSEIEnabled;
   bool      m_framePackingSEIEnabled;
   int       m_framePackingSEIType;
   int       m_framePackingSEIId;
   int       m_framePackingSEIQuincunx;
   int       m_framePackingSEIInterpretation;
+  bool      m_doSEIEnabled;
+  bool      m_doSEICancelFlag;
+  bool      m_doSEIPersistenceFlag;
+  int       m_doSEITransformType;
   bool      m_parameterSetsInclusionIndicationSEIEnabled;
   bool      m_selfContainedClvsFlag;
   bool      m_bpDeltasGOPStructure;
@@ -624,6 +669,19 @@ protected:
   uint32_t  m_aveSEIAmbientIlluminance;
   uint16_t  m_aveSEIAmbientLightX;
   uint16_t  m_aveSEIAmbientLightY;
+  // colour tranform information sei
+  bool      m_ctiSEIEnabled;
+  uint32_t  m_ctiSEIId;
+  bool      m_ctiSEISignalInfoFlag;
+  bool      m_ctiSEIFullRangeFlag;
+  uint32_t  m_ctiSEIPrimaries;
+  uint32_t  m_ctiSEITransferFunction;
+  uint32_t  m_ctiSEIMatrixCoefs;
+  bool      m_ctiSEICrossComponentFlag;
+  bool      m_ctiSEICrossComponentInferred;
+  uint32_t  m_ctiSEINumberChromaLut;
+  int       m_ctiSEIChromaOffset;
+  LutModel  m_ctiSEILut[MAX_NUM_COMPONENT];
 // ccv sei
   bool      m_ccvSEIEnabled;
   bool      m_ccvSEICancelFlag;
@@ -637,6 +695,77 @@ protected:
   double    m_ccvSEIMinLuminanceValue;
   double    m_ccvSEIMaxLuminanceValue;
   double    m_ccvSEIAvgLuminanceValue;
+  // sdi sei
+  bool              m_sdiSEIEnabled;
+  int               m_sdiSEIMaxLayersMinus1;
+  bool              m_sdiSEIMultiviewInfoFlag;
+  bool              m_sdiSEIAuxiliaryInfoFlag;
+  int               m_sdiSEIViewIdLenMinus1;
+  std::vector<uint32_t>  m_sdiSEILayerId;
+  std::vector<uint32_t>  m_sdiSEIViewIdVal;
+  std::vector<uint32_t>  m_sdiSEIAuxId;
+  std::vector<uint32_t>  m_sdiSEINumAssociatedPrimaryLayersMinus1;
+  // mai sei
+  bool              m_maiSEIEnabled;
+  bool              m_maiSEIIntrinsicParamFlag;
+  bool              m_maiSEIExtrinsicParamFlag;
+  int               m_maiSEINumViewsMinus1;
+  bool              m_maiSEIIntrinsicParamsEqualFlag;
+  int               m_maiSEIPrecFocalLength;
+  int               m_maiSEIPrecPrincipalPoint;
+  int               m_maiSEIPrecSkewFactor;
+  std::vector<bool> m_maiSEISignFocalLengthX;
+  std::vector<uint32_t>  m_maiSEIExponentFocalLengthX;
+  std::vector<uint32_t>  m_maiSEIMantissaFocalLengthX;
+  std::vector<bool> m_maiSEISignFocalLengthY;
+  std::vector<uint32_t>  m_maiSEIExponentFocalLengthY;
+  std::vector<uint32_t>  m_maiSEIMantissaFocalLengthY;
+  std::vector<bool> m_maiSEISignPrincipalPointX;
+  std::vector<uint32_t>  m_maiSEIExponentPrincipalPointX;
+  std::vector<uint32_t>  m_maiSEIMantissaPrincipalPointX;
+  std::vector<bool> m_maiSEISignPrincipalPointY;
+  std::vector<uint32_t>  m_maiSEIExponentPrincipalPointY;
+  std::vector<uint32_t>  m_maiSEIMantissaPrincipalPointY;
+  std::vector<bool> m_maiSEISignSkewFactor;
+  std::vector<uint32_t>  m_maiSEIExponentSkewFactor;
+  std::vector<uint32_t>  m_maiSEIMantissaSkewFactor;
+  int               m_maiSEIPrecRotationParam;
+  int               m_maiSEIPrecTranslationParam;
+#if JVET_W0078_MVP_SEI 
+  // mvp sei
+  bool              m_mvpSEIEnabled;
+  int               m_mvpSEINumViewsMinus1;
+  std::vector<uint32_t>  m_mvpSEIViewPosition;
+#endif
+  // aci sei
+  bool      m_aciSEIEnabled;
+  bool      m_aciSEICancelFlag;
+  int       m_aciSEIUseIdc;
+  int       m_aciSEIBitDepthMinus8;
+  int       m_aciSEITransparentValue;
+  int       m_aciSEIOpaqueValue;
+  bool      m_aciSEIIncrFlag;
+  bool      m_aciSEIClipFlag;
+  bool      m_aciSEIClipTypeFlag;
+  // dri sei
+  bool      m_driSEIEnabled;
+  bool      m_driSEIZNearFlag;
+  bool      m_driSEIZFarFlag;
+  bool      m_driSEIDMinFlag;
+  bool      m_driSEIDMaxFlag;
+  double    m_driSEIZNear;
+  double    m_driSEIZFar;
+  double    m_driSEIDMin;
+  double    m_driSEIDMax;
+  int       m_driSEIDepthRepresentationType;
+  int       m_driSEIDisparityRefViewId;
+  int       m_driSEINonlinearNumMinus1;
+  std::vector<uint32_t> m_driSEINonlinearModel;
+  std::string           m_arSEIFileRoot;  // Annotated region SEI - initialized from external file
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+  bool      m_constrainedRaslEncoding;
+#endif
+
   //====== Weighted Prediction ========
   bool      m_useWeightedPred;       //< Use of Weighting Prediction (P_SLICE)
   bool      m_useWeightedBiPred;    //< Use of Bi-directional Weighting Prediction (B_SLICE)
@@ -683,10 +812,8 @@ protected:
   CostMode  m_costMode;                                       ///< The cost function to use, primarily when considering lossless coding.
   bool      m_TSRCdisableLL;                                  ///< Disable TSRC for lossless
 
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
   OPI       m_opi;
   bool      m_OPIEnabled;                                     ///< enable Operating Point Information (OPI)
-#endif
 
   DCI       m_dci;
   bool      m_DCIEnabled;                                     ///< enable Decoding Capability Information (DCI)
@@ -713,8 +840,9 @@ protected:
   bool      m_overscanAppropriateFlag;                        ///< Indicates whether conformant decoded pictures are suitable for display using overscan
   bool      m_videoFullRangeFlag;                             ///< Indicates the black level and range of luma and chroma signals
 
-  bool      m_bEfficientFieldIRAPEnabled;                     ///< enable to code fields in a specific, potentially more efficient, order.
-  bool      m_bHarmonizeGopFirstFieldCoupleEnabled;
+  bool m_fieldSeqFlag;
+  bool m_efficientFieldIRAPEnabled;   /// enable to code fields in a specific, potentially more efficient, order.
+  bool m_harmonizeGopFirstFieldCoupleEnabled;
 
   std::string m_summaryOutFilename;                           ///< filename to use for producing summary output file.
   std::string m_summaryPicFilenameBase;                       ///< Base filename to use for producing summary picture output files. The actual filenames used will have I.txt, P.txt and B.txt appended.
@@ -732,17 +860,19 @@ protected:
 
   CfgVPSParameters m_cfgVPSParameters;
 
-#if ENABLE_SPLIT_PARALLELISM
-  int         m_numSplitThreads;
-  bool        m_forceSingleSplitThread;
-#endif
-
   bool        m_alf;                                          ///< Adaptive Loop Filter
-#if JVET_T0064
-  double      m_alfStrength;
+#if JVET_W0129_ENABLE_ALF_TRUEORG
+  bool        m_alfTrueOrg;
+#else
+  bool        m_alfSaoTrueOrg;
+#endif
+  double      m_alfStrengthLuma;
   bool        m_alfAllowPredefinedFilters;
   double      m_ccalfStrength;
-#endif
+  double      m_alfStrengthChroma;
+  double      m_alfStrengthTargetLuma;
+  double      m_alfStrengthTargetChroma;
+  double      m_ccalfStrengthTarget;
   bool        m_ccalf;
   int         m_ccalfQpThreshold;
 #if JVET_O0756_CALCULATE_HDRMETRICS
@@ -767,6 +897,10 @@ protected:
   int         m_upscaledOutput;
   int         m_numRefLayers[MAX_VPS_LAYERS];
   bool        m_avoidIntraInDepLayer;
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+  bool        m_craAPSreset;
+  bool        m_rprRASLtoolSwitch;
+#endif
 
 public:
   EncCfg()
@@ -775,7 +909,7 @@ public:
 
   virtual ~EncCfg()
   {}
-
+  std::map<uint32_t, SEIAnnotatedRegions::AnnotatedRegionObject> m_arObjects;
   void setProfile(Profile::Name profile) { m_profile = profile; }
   void setLevel(Level::Tier tier, Level::Name level) { m_levelTier = tier; m_level = level; }
   bool      getFrameOnlyConstraintFlag() const { return m_frameOnlyConstraintFlag; }
@@ -900,13 +1034,16 @@ public:
   void      setNoGdrConstraintFlag(bool val) { m_noGdrConstraintFlag = val; }
   bool      getNoApsConstraintFlag() const { return m_noApsConstraintFlag; }
   void      setNoApsConstraintFlag(bool val) { m_noApsConstraintFlag = val; }
-
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+  bool      getGeneralLowerBitRateConstraintFlag() const { return m_generalLowerBitRateConstraintFlag; }
+  void      setGeneralLowerBitRateConstraintFlag(bool val) { m_generalLowerBitRateConstraintFlag = val; }
+#endif
 
   void      setFrameRate                    ( int   i )      { m_iFrameRate = i; }
   void      setFrameSkip                    ( uint32_t  i )      { m_FrameSkip = i; }
   void      setTemporalSubsampleRatio       ( uint32_t  i )      { m_temporalSubsampleRatio = i; }
-  void      setSourceWidth                  ( int   i )      { m_iSourceWidth = i; }
-  void      setSourceHeight                 ( int   i )      { m_iSourceHeight = i; }
+  void      setSourceWidth                  ( int   i )      { m_sourceWidth = i; }
+  void      setSourceHeight                 ( int   i )      { m_sourceHeight = i; }
 
   Window   &getConformanceWindow()                           { return m_conformanceWindow; }
   void      setConformanceWindow (int confLeft, int confRight, int confTop, int confBottom ) { m_conformanceWindow.setWindow (confLeft, confRight, confTop, confBottom); }
@@ -928,6 +1065,9 @@ public:
   bool      getPrintMSSSIM                  ()         const { return m_printMSSSIM;               }
   void      setPrintMSSSIM                  (bool value)     { m_printMSSSIM = value;              }
 
+  bool      getPrintWPSNR                   ()         const { return m_printWPSNR;               }
+  void      setPrintWPSNR                   (bool value)     { m_printWPSNR = value;              }
+
   bool      getCabacZeroWordPaddingEnabled()           const { return m_cabacZeroWordPaddingEnabled;  }
   void      setCabacZeroWordPaddingEnabled(bool value)       { m_cabacZeroWordPaddingEnabled = value; }
 
@@ -964,6 +1104,7 @@ public:
   void      setMaxDecPicBuffering           ( uint32_t u, uint32_t tlayer ) { m_maxDecPicBuffering[tlayer] = u;    }
   void      setMaxNumReorderPics            ( int  i, uint32_t tlayer ) { m_maxNumReorderPics[tlayer] = i;    }
   void      setDrapPeriod                   (int drapPeriod) { m_drapPeriod = drapPeriod; }
+  void      setEdrapPeriod                  (int edrapPeriod) { m_edrapPeriod = edrapPeriod; }
 
   void      setBaseQP                       ( int   i )      { m_iQP = i; }
 #if X0038_LAMBDA_FROM_QP_CAPABILITY
@@ -972,7 +1113,7 @@ public:
 #endif
   void      setChromaQpMappingTableParams   (const ChromaQpMappingTableParams &params) { m_chromaQpMappingTableParams = params; }
 
-  void      setPad                          ( int*  iPad                   )      { for ( int i = 0; i < 2; i++ ) m_aiPad[i] = iPad[i]; }
+  void      setSourcePadding                ( int*  padding)                { for ( int i = 0; i < 2; i++ ) m_sourcePadding[i] = padding[i]; }
 
   int       getMaxRefPicNum                 ()                              { return m_iMaxRefPicNum;           }
   void      setMaxRefPicNum                 ( int iMaxRefPicNum )           { m_iMaxRefPicNum = iMaxRefPicNum;  }
@@ -984,6 +1125,19 @@ public:
   void      setMinQTSizes                   ( unsigned* minQT)   { m_uiMinQT[0] = minQT[0]; m_uiMinQT[1] = minQT[1]; m_uiMinQT[2] = minQT[2]; }
   void      setMaxBTSizes                   ( unsigned* maxBT)   { m_uiMaxBT[0] = maxBT[0]; m_uiMaxBT[1] = maxBT[1]; m_uiMaxBT[2] = maxBT[2]; }
   void      setMaxTTSizes                   ( unsigned* maxTT)   { m_uiMaxTT[0] = maxTT[0]; m_uiMaxTT[1] = maxTT[1]; m_uiMaxTT[2] = maxTT[2]; }
+#if GDR_ENABLED
+  void      setGdrEnabled(bool b)       { m_gdrEnabled  = b; }
+  void      setGdrPeriod(unsigned u)    { m_gdrPeriod   = u; }
+  void      setGdrPocStart(unsigned u)  { m_gdrPocStart = u; }
+  void      setGdrInterval(int i)       { m_gdrInterval = i; }  
+  void      setGdrNoHash(bool b)        { m_gdrNoHash   = b; }    
+
+  bool      getGdrEnabled()             { return m_gdrEnabled;  }
+  unsigned  getGdrPeriod()              { return m_gdrPeriod;   }
+  unsigned  getGdrPocStart()            { return m_gdrPocStart; }
+  int       getGdrInterval()            { return m_gdrInterval; }  
+  bool      getGdrNoHash()              { return m_gdrNoHash;   }  
+#endif
   void      setMaxMTTHierarchyDepth         ( unsigned uiMaxMTTHierarchyDepth, unsigned uiMaxMTTHierarchyDepthI, unsigned uiMaxMTTHierarchyDepthIChroma )
                                                              { m_uiMaxMTTHierarchyDepth = uiMaxMTTHierarchyDepth; m_uiMaxMTTHierarchyDepthI = uiMaxMTTHierarchyDepthI; m_uiMaxMTTHierarchyDepthIChroma = uiMaxMTTHierarchyDepthIChroma; }
   unsigned  getMaxMTTHierarchyDepth         ()         const { return m_uiMaxMTTHierarchyDepth; }
@@ -1209,14 +1363,14 @@ public:
   void      setLog2MaxTbSize                ( uint32_t  u )   { m_log2MaxTbSize = u; }
 
   //====== Loop/Deblock Filter ========
-  void      setLoopFilterDisable            ( bool  b )      { m_bLoopFilterDisable       = b; }
-  void      setLoopFilterOffsetInPPS        ( bool  b )      { m_loopFilterOffsetInPPS      = b; }
-  void      setLoopFilterBetaOffset         ( int   i )      { m_loopFilterBetaOffsetDiv2  = i; }
-  void      setLoopFilterTcOffset           ( int   i )      { m_loopFilterTcOffsetDiv2    = i; }
-  void      setLoopFilterCbBetaOffset       ( int   i )      { m_loopFilterCbBetaOffsetDiv2  = i; }
-  void      setLoopFilterCbTcOffset         ( int   i )      { m_loopFilterCbTcOffsetDiv2    = i; }
-  void      setLoopFilterCrBetaOffset       ( int   i )      { m_loopFilterCrBetaOffsetDiv2  = i; }
-  void      setLoopFilterCrTcOffset         ( int   i )      { m_loopFilterCrTcOffsetDiv2    = i; }
+  void      setDeblockingFilterDisable      ( bool  b )      { m_deblockingFilterDisable           = b; }
+  void      setDeblockingFilterOffsetInPPS  ( bool  b )      { m_deblockingFilterOffsetInPPS       = b; }
+  void      setDeblockingFilterBetaOffset   ( int   i )      { m_deblockingFilterBetaOffsetDiv2    = i; }
+  void      setDeblockingFilterTcOffset     ( int   i )      { m_deblockingFilterTcOffsetDiv2      = i; }
+  void      setDeblockingFilterCbBetaOffset ( int   i )      { m_deblockingFilterCbBetaOffsetDiv2  = i; }
+  void      setDeblockingFilterCbTcOffset   ( int   i )      { m_deblockingFilterCbTcOffsetDiv2    = i; }
+  void      setDeblockingFilterCrBetaOffset ( int   i )      { m_deblockingFilterCrBetaOffsetDiv2  = i; }
+  void      setDeblockingFilterCrTcOffset   ( int   i )      { m_deblockingFilterCrTcOffsetDiv2    = i; }
 #if W0038_DB_OPT
   void      setDeblockingFilterMetric       ( int   i )      { m_deblockingFilterMetric = i; }
 #else
@@ -1235,8 +1389,11 @@ public:
   //====== Quality control ========
   void      setMaxDeltaQP                   ( int   i )      { m_iMaxDeltaQP = i; }
   void      setCuQpDeltaSubdiv              ( int   i )      { m_cuQpDeltaSubdiv = i; }
-  int       getCuChromaQpOffsetSubdiv       ()         const { return m_cuChromaQpOffsetSubdiv;  }
-  void      setCuChromaQpOffsetSubdiv       (int value)      { m_cuChromaQpOffsetSubdiv = value; }
+  unsigned  getCuChromaQpOffsetSubdiv       ()         const { return m_cuChromaQpOffsetSubdiv;  }
+  void      setCuChromaQpOffsetSubdiv       ( unsigned value ) { m_cuChromaQpOffsetSubdiv = value; }
+  bool      getCuChromaQpOffsetEnabled      ()         const { return m_cuChromaQpOffsetEnabled;  }
+  void      setCuChromaQpOffsetEnabled      ( bool value )   { m_cuChromaQpOffsetEnabled = value; }
+  void      setCuChromaQpOffsetList         (const std::vector<ChromaQpAdj> &list) { m_cuChromaQpOffsetList = list; }
 
   void      setChromaCbQpOffset             ( int   i )      { m_chromaCbQpOffset = i; }
   void      setChromaCrQpOffset             ( int   i )      { m_chromaCrQpOffset = i; }
@@ -1268,10 +1425,47 @@ public:
   void      setLumaLevelToDeltaQPControls( const LumaLevelToDeltaQPMapping &lumaLevelToDeltaQPMapping ) { m_lumaLevelToDeltaQPMapping=lumaLevelToDeltaQPMapping; }
   const LumaLevelToDeltaQPMapping& getLumaLevelToDeltaQPMapping() const { return m_lumaLevelToDeltaQPMapping; }
 #endif
-
+  bool      getSmoothQPReductionEnable()                  const { return m_smoothQPReductionEnable; }
+  void      setSmoothQPReductionEnable(bool value)        { m_smoothQPReductionEnable = value; }
+#if JVET_W0043
+  int       getSmoothQPReductionPeriodicity()                 const { return m_smoothQPReductionPeriodicity; }
+  void      setSmoothQPReductionPeriodicity(int value)        { m_smoothQPReductionPeriodicity = value; }
+  double    getSmoothQPReductionThresholdIntra()              const { return m_smoothQPReductionThresholdIntra; }
+  void      setSmoothQPReductionThresholdIntra(double value)  { m_smoothQPReductionThresholdIntra = value; }
+  double    getSmoothQPReductionModelScaleIntra()              const { return m_smoothQPReductionModelScaleIntra; }
+  void      setSmoothQPReductionModelScaleIntra(double value) { m_smoothQPReductionModelScaleIntra = value; }
+  double    getSmoothQPReductionModelOffsetIntra()             const { return m_smoothQPReductionModelOffsetIntra; }
+  void      setSmoothQPReductionModelOffsetIntra(double value) { m_smoothQPReductionModelOffsetIntra = value; }
+  int       getSmoothQPReductionLimitIntra()                   const { return m_smoothQPReductionLimitIntra; }
+  void      setSmoothQPReductionLimitIntra(int value)          { m_smoothQPReductionLimitIntra = value; }
+  double    getSmoothQPReductionThresholdInter()               const { return m_smoothQPReductionThresholdInter; }
+  void      setSmoothQPReductionThresholdInter(double value)   { m_smoothQPReductionThresholdInter = value; }
+  double    getSmoothQPReductionModelScaleInter()              const { return m_smoothQPReductionModelScaleInter; }
+  void      setSmoothQPReductionModelScaleInter(double value) { m_smoothQPReductionModelScaleInter = value; }
+  double    getSmoothQPReductionModelOffsetInter()             const { return m_smoothQPReductionModelOffsetInter; }
+  void      setSmoothQPReductionModelOffsetInter(double value) { m_smoothQPReductionModelOffsetInter = value; }
+  int       getSmoothQPReductionLimitInter()                   const { return m_smoothQPReductionLimitInter; }
+  void      setSmoothQPReductionLimitInter(int value)          { m_smoothQPReductionLimitInter = value; }
+#else
+  double    getSmoothQPReductionThreshold()               const { return m_smoothQPReductionThreshold; }
+  void      setSmoothQPReductionThreshold(double value)   { m_smoothQPReductionThreshold = value; }
+  double    getSmoothQPReductionModelScale()              const { return m_smoothQPReductionModelScale; }
+  void      setSmoothQPReductionModelScale(double value)  { m_smoothQPReductionModelScale = value; }
+  double    getSmoothQPReductionModelOffset()             const { return m_smoothQPReductionModelOffset; }
+  void      setSmoothQPReductionModelOffset(double value) { m_smoothQPReductionModelOffset = value; }
+  int       getSmoothQPReductionPeriodicity()             const { return m_smoothQPReductionPeriodicity; }
+  void      setSmoothQPReductionPeriodicity(int value)    { m_smoothQPReductionPeriodicity = value; }
+  int       getSmoothQPReductionLimit()                   const { return m_smoothQPReductionLimit; }
+  void      setSmoothQPReductionLimit(int value)          { m_smoothQPReductionLimit = value; }
+#endif
   bool      getExtendedPrecisionProcessingFlag         ()         const { return m_extendedPrecisionProcessingFlag;  }
   void      setExtendedPrecisionProcessingFlag         (bool value)     { m_extendedPrecisionProcessingFlag = value; }
-
+  bool      getTSRCRicePresentFlag         ()         const { return m_tsrcRicePresentFlag;  }
+  void      setTSRCRicePresentFlag         (bool value)     { m_tsrcRicePresentFlag = value; }
+#if JVET_W0046_RLSCP
+  bool      getReverseLastSigCoeffEnabledFlag         ()         const { return m_reverseLastSigCoeffEnabledFlag;  }
+  void      setReverseLastSigCoeffEnabledFlag         (bool value)     { m_reverseLastSigCoeffEnabledFlag = value; }
+#endif
   bool      getHighPrecisionOffsetsEnabledFlag() const { return m_highPrecisionOffsetsEnabledFlag; }
   void      setHighPrecisionOffsetsEnabledFlag(bool value) { m_highPrecisionOffsetsEnabledFlag = value; }
 
@@ -1286,8 +1480,8 @@ public:
   int       getFrameRate                    () const     { return  m_iFrameRate; }
   uint32_t      getFrameSkip                    () const     { return  m_FrameSkip; }
   uint32_t      getTemporalSubsampleRatio       () const     { return  m_temporalSubsampleRatio; }
-  int       getSourceWidth                  () const     { return  m_iSourceWidth; }
-  int       getSourceHeight                 () const     { return  m_iSourceHeight; }
+  int       getSourceWidth                  () const     { return  m_sourceWidth; }
+  int       getSourceHeight                 () const     { return  m_sourceHeight; }
   int       getFramesToBeEncoded            () const     { return  m_framesToBeEncoded; }
 
   //====== Lambda Modifiers ========
@@ -1306,6 +1500,7 @@ public:
   int       getMaxDecPicBuffering           (uint32_t tlayer) { return m_maxDecPicBuffering[tlayer]; }
   int       getMaxNumReorderPics            (uint32_t tlayer) { return m_maxNumReorderPics[tlayer]; }
   int       getDrapPeriod                   ()     { return m_drapPeriod; }
+  int       getEdrapPeriod                  ()     { return m_edrapPeriod; }
 #if X0038_LAMBDA_FROM_QP_CAPABILITY
   int       getIntraQPOffset                () const    { return  m_intraQPOffset; }
   int       getLambdaFromQPEnable           () const    { return  m_lambdaFromQPEnable; }
@@ -1315,7 +1510,7 @@ public:
 #else
   int       getBaseQP                       ()       { return  m_iQP; }
 #endif
-  int       getPad                          ( int i )      { CHECK(i >= 2, "Invalid index");                      return  m_aiPad[i]; }
+  int       getSourcePadding                ( int i ) { CHECK(i >= 2, "Invalid index"); return  m_sourcePadding[i]; }
 
   bool      getAccessUnitDelimiter() const  { return m_AccessUnitDelimiter; }
   void      setAccessUnitDelimiter(bool val){ m_AccessUnitDelimiter = val; }
@@ -1323,14 +1518,14 @@ public:
   void      setEnablePictureHeaderInSliceHeader(bool val) { m_enablePictureHeaderInSliceHeader = val; }
 
   //==== Loop/Deblock Filter ========
-  bool      getLoopFilterDisable            ()      { return  m_bLoopFilterDisable;       }
-  bool      getLoopFilterOffsetInPPS        ()      { return m_loopFilterOffsetInPPS; }
-  int       getLoopFilterBetaOffset         ()      { return m_loopFilterBetaOffsetDiv2; }
-  int       getLoopFilterTcOffset           ()      { return m_loopFilterTcOffsetDiv2; }
-  int       getLoopFilterCbBetaOffset       ()      { return m_loopFilterCbBetaOffsetDiv2; }
-  int       getLoopFilterCbTcOffset         ()      { return m_loopFilterCbTcOffsetDiv2;   }
-  int       getLoopFilterCrBetaOffset       ()      { return m_loopFilterCrBetaOffsetDiv2; }
-  int       getLoopFilterCrTcOffset         ()      { return m_loopFilterCrTcOffsetDiv2;   }
+  bool      getDeblockingFilterDisable            ()      { return m_deblockingFilterDisable;          }
+  bool      getDeblockingFilterOffsetInPPS        ()      { return m_deblockingFilterOffsetInPPS;      }
+  int       getDeblockingFilterBetaOffset         ()      { return m_deblockingFilterBetaOffsetDiv2;   }
+  int       getDeblockingFilterTcOffset           ()      { return m_deblockingFilterTcOffsetDiv2;     }
+  int       getDeblockingFilterCbBetaOffset       ()      { return m_deblockingFilterCbBetaOffsetDiv2; }
+  int       getDeblockingFilterCbTcOffset         ()      { return m_deblockingFilterCbTcOffsetDiv2;   }
+  int       getDeblockingFilterCrBetaOffset       ()      { return m_deblockingFilterCrBetaOffsetDiv2; }
+  int       getDeblockingFilterCrTcOffset         ()      { return m_deblockingFilterCrTcOffsetDiv2;   }
 #if W0038_DB_OPT
   int       getDeblockingFilterMetric       ()      { return m_deblockingFilterMetric; }
 #else
@@ -1414,6 +1609,8 @@ public:
   void setUseBDPCM                                     ( bool b ) { m_useBDPCM  = b;   }
   bool getUseJointCbCr                                 ()         { return m_JointCbCrMode; }
   void setUseJointCbCr                                 (bool b)   { m_JointCbCrMode = b; }
+  bool getRrcRiceExtensionEnableFlag()                 const { return m_rrcRiceExtensionEnableFlag; }
+  void setRrcRiceExtensionEnableFlag(const bool value) { m_rrcRiceExtensionEnableFlag = value; }
   bool getPersistentRiceAdaptationEnabledFlag          ()                 const { return m_persistentRiceAdaptationEnabledFlag;  }
   void setPersistentRiceAdaptationEnabledFlag          (const bool value)       { m_persistentRiceAdaptationEnabledFlag = value; }
   bool getCabacBypassAlignmentEnabledFlag              ()       const      { return m_cabacBypassAlignmentEnabledFlag;  }
@@ -1460,6 +1657,10 @@ public:
   bool      getSingleSlicePerSubPicFlagFlag( )                       { return m_singleSlicePerSubPicFlag;    }
   void      setUseSAO                  (bool bVal)                   { m_bUseSAO = bVal; }
   bool      getUseSAO                  ()                            { return m_bUseSAO; }
+#if JVET_W0129_ENABLE_ALF_TRUEORG
+  void      setSaoTrueOrg              (bool b)                      { m_saoTrueOrg = b; }
+  bool      getSaoTrueOrg              () const                      { return m_saoTrueOrg; }
+#endif
   void  setTestSAODisableAtPictureLevel (bool bVal)                  { m_bTestSAODisableAtPictureLevel = bVal; }
   bool  getTestSAODisableAtPictureLevel ( ) const                    { return m_bTestSAODisableAtPictureLevel; }
 
@@ -1490,6 +1691,8 @@ public:
   bool  getFrameFieldInfoSEIEnabled() const                           { return m_frameFieldInfoSEIEnabled; }
   void  setDependentRAPIndicationSEIEnabled(bool b)                  { m_dependentRAPIndicationSEIEnabled = b; }
   int   getDependentRAPIndicationSEIEnabled() const                  { return m_dependentRAPIndicationSEIEnabled; }
+  void  setEdrapIndicationSEIEnabled(bool b)                         { m_edrapIndicationSEIEnabled = b; }
+  int   getEdrapIndicationSEIEnabled() const                         { return m_edrapIndicationSEIEnabled; }
   void  setFramePackingArrangementSEIEnabled(bool b)                 { m_framePackingSEIEnabled = b; }
   bool  getFramePackingArrangementSEIEnabled() const                 { return m_framePackingSEIEnabled; }
   void  setFramePackingArrangementSEIType(int b)                     { m_framePackingSEIType = b; }
@@ -1500,6 +1703,14 @@ public:
   int   getFramePackingArrangementSEIQuincunx()                      { return m_framePackingSEIQuincunx; }
   void  setFramePackingArrangementSEIInterpretation(int b)           { m_framePackingSEIInterpretation = b; }
   int   getFramePackingArrangementSEIInterpretation()                { return m_framePackingSEIInterpretation; }
+  void  setDoSEIEnabled(bool b)                                      { m_doSEIEnabled = b; }
+  bool  getDoSEIEnabled() const                                      { return m_doSEIEnabled; }
+  void  setDoSEICancelFlag(bool b)                                   { m_doSEICancelFlag = b; }
+  bool  getDoSEICancelFlag()                                         { return m_doSEICancelFlag; }
+  void  setDoSEIPersistenceFlag(bool b)                              { m_doSEIPersistenceFlag = b; }
+  bool  getDoSEIPersistenceFlag()                                    { return m_doSEIPersistenceFlag; }
+  void  setDoSEITransformType(const int type)                        { m_doSEITransformType = type; }
+  int   getDOSEITransformType() const                                { return m_doSEITransformType; }
   void  setParameterSetsInclusionIndicationSEIEnabled(bool b)        { m_parameterSetsInclusionIndicationSEIEnabled = b; }
   bool  getParameterSetsInclusionIndicationSEIEnabled() const        { return m_parameterSetsInclusionIndicationSEIEnabled; }
   void  setSelfContainedClvsFlag(bool b)                             { m_selfContainedClvsFlag = b; }
@@ -1557,6 +1768,8 @@ public:
   uint32_t  getOmniViewportSEIHorRange(int idx)                      { return m_omniViewportSEIHorRange[idx]; }
   void  setOmniViewportSEIVerRange(const std::vector<uint32_t>& vi)  { m_omniViewportSEIVerRange = vi; }
   uint32_t  getOmniViewportSEIVerRange(int idx)                      { return m_omniViewportSEIVerRange[idx]; }
+  void  setAnnotatedRegionSEIFileRoot(const std::string &s)          { m_arSEIFileRoot = s; m_arObjects.clear();}
+  const std::string &getAnnotatedRegionSEIFileRoot() const           { return m_arSEIFileRoot; }
   void     setRwpSEIEnabled(bool b)                                                                     { m_rwpSEIEnabled = b; }
   bool     getRwpSEIEnabled()                                                                           { return m_rwpSEIEnabled; }
   void     setRwpSEIRwpCancelFlag(bool b)                                                               { m_rwpSEIRwpCancelFlag = b; }
@@ -1694,6 +1907,31 @@ public:
   uint16_t getAmbientViewingEnvironmentSEIAmbientLightX()            { return m_aveSEIAmbientLightX; }
   void  setAmbientViewingEnvironmentSEIAmbientLightY( uint16_t v )   { m_aveSEIAmbientLightY = v; }
   uint16_t getAmbientViewingEnvironmentSEIAmbientLightY()            { return m_aveSEIAmbientLightY; }
+  // colour tranform information sei
+  void      setCtiSEIEnabled(bool b) { m_ctiSEIEnabled = b; }
+  bool      getCtiSEIEnabled() { return m_ctiSEIEnabled; }
+  void      setCtiSEIId(uint32_t b) { m_ctiSEIId = b; }
+  uint32_t  getCtiSEIId() { return m_ctiSEIId; }
+  void      setCtiSEISignalInfoFlag(bool b) { m_ctiSEISignalInfoFlag = b; }
+  bool      getCtiSEISignalInfoFlag() { return m_ctiSEISignalInfoFlag; }
+  void      setCtiSEIFullRangeFlag(bool b) { m_ctiSEIFullRangeFlag = b; }
+  bool      getCtiSEIFullRangeFlag() { return m_ctiSEIFullRangeFlag; }
+  uint32_t  getCtiSEIPrimaries() { return m_ctiSEIPrimaries; }
+  void      setCtiSEIPrimaries(uint32_t v) { m_ctiSEIPrimaries = v; }
+  uint32_t  getCtiSEITransferFunction() { return m_ctiSEITransferFunction; }
+  void      setCtiSEITransferFunction(uint32_t v) { m_ctiSEITransferFunction = v; }
+  uint32_t  getCtiSEIMatrixCoefs() { return m_ctiSEIMatrixCoefs; }
+  void      setCtiSEIMatrixCoefs(uint32_t v) { m_ctiSEIMatrixCoefs = v; }
+  void      setCtiSEICrossComponentFlag(bool b) { m_ctiSEICrossComponentFlag = b; }
+  bool      getCtiSEICrossComponentFlag() { return m_ctiSEICrossComponentFlag; }
+  void      setCtiSEICrossComponentInferred(bool b) { m_ctiSEICrossComponentInferred = b; }
+  bool      getCtiSEICrossComponentInferred() { return m_ctiSEICrossComponentInferred; }
+  uint32_t  getCtiSEINbChromaLut() { return m_ctiSEINumberChromaLut; }
+  void      setCtiSEINbChromaLut(uint32_t v) { m_ctiSEINumberChromaLut = v; }
+  int       getCtiSEIChromaOffset() { return m_ctiSEIChromaOffset; }
+  void      setCtiSEIChromaOffset(int v) { m_ctiSEIChromaOffset = v; }
+  const LutModel&  getCtiSEILut(int idx) const { return m_ctiSEILut[idx]; }
+  void      setCtiSEILut(LutModel& cmp, int idx) { m_ctiSEILut[idx] = cmp; }
   // ccv SEI
   void     setCcvSEIEnabled(bool b)                                  { m_ccvSEIEnabled = b; }
   bool     getCcvSEIEnabled()                                        { return m_ccvSEIEnabled; }
@@ -1719,6 +1957,140 @@ public:
   double   getCcvSEIMaxLuminanceValue  ()                            { return m_ccvSEIMaxLuminanceValue;  }
   void     setCcvSEIAvgLuminanceValue  (double dValue)               { m_ccvSEIAvgLuminanceValue = dValue; }
   double   getCcvSEIAvgLuminanceValue  ()                            { return m_ccvSEIAvgLuminanceValue;  }
+  // scalability dimension information SEI
+  void     setSdiSEIEnabled(bool b)                                  { m_sdiSEIEnabled = b; }
+  bool     getSdiSEIEnabled() const                                  { return m_sdiSEIEnabled; }
+  void     setSdiSEIMaxLayersMinus1(int i)                           { m_sdiSEIMaxLayersMinus1 = i; }
+  int      getSdiSEIMaxLayersMinus1() const                          { return m_sdiSEIMaxLayersMinus1; }
+  void     setSdiSEIMultiviewInfoFlag(bool b)                        { m_sdiSEIMultiviewInfoFlag = b; }
+  bool     getSdiSEIMultiviewInfoFlag() const                        { return m_sdiSEIMultiviewInfoFlag; }
+  void     setSdiSEIAuxiliaryInfoFlag(bool b)                        { m_sdiSEIAuxiliaryInfoFlag = b; }
+  bool     getSdiSEIAuxiliaryInfoFlag() const                        { return m_sdiSEIAuxiliaryInfoFlag; }
+  void     setSdiSEIViewIdLenMinus1(int i)                           { m_sdiSEIViewIdLenMinus1 = i; }
+  int      getSdiSEIViewIdLenMinus1() const                          { return m_sdiSEIViewIdLenMinus1; }
+  void     setSdiSEILayerId(const std::vector<uint32_t>& sdiSEILayerId)   { m_sdiSEILayerId = sdiSEILayerId; }
+  uint32_t getSdiSEILayerId(int idx) const                           { return m_sdiSEILayerId[idx]; }
+  void     setSdiSEIViewIdVal(const std::vector<uint32_t>& sdiSEIViewIdVal)   { m_sdiSEIViewIdVal = sdiSEIViewIdVal; }
+  uint32_t getSdiSEIViewIdVal(int idx) const                         { return m_sdiSEIViewIdVal[idx]; }
+  void     setSdiSEIAuxId(const std::vector<uint32_t>& sdiSEIAuxId)       { m_sdiSEIAuxId = sdiSEIAuxId; }
+  uint32_t getSdiSEIAuxId(int idx) const                             { return m_sdiSEIAuxId[idx]; }
+  void     setSdiSEINumAssociatedPrimaryLayersMinus1(const std::vector<uint32_t>& sdiSEINumAssociatedPrimaryLayersMinus1)   { m_sdiSEINumAssociatedPrimaryLayersMinus1 = sdiSEINumAssociatedPrimaryLayersMinus1; }
+  uint32_t getSdiSEINumAssociatedPrimaryLayersMinus1(int idx) const  { return m_sdiSEINumAssociatedPrimaryLayersMinus1[idx]; }
+  // multiview acquisition information SEI
+  void     setMaiSEIEnabled(bool b)                                  { m_maiSEIEnabled = b; }
+  bool     getMaiSEIEnabled() const                                  { return m_maiSEIEnabled; }
+  void     setMaiSEIIntrinsicParamFlag(bool b)                       { m_maiSEIIntrinsicParamFlag = b; }
+  bool     getMaiSEIIntrinsicParamFlag() const                       { return m_maiSEIIntrinsicParamFlag; }
+  void     setMaiSEIExtrinsicParamFlag(bool b)                       { m_maiSEIExtrinsicParamFlag = b; }
+  bool     getMaiSEIExtrinsicParamFlag() const                       { return m_maiSEIExtrinsicParamFlag; }
+  void     setMaiSEINumViewsMinus1(int i)                            { m_maiSEINumViewsMinus1 = i; }
+  int      getMaiSEINumViewsMinus1() const                           { return m_maiSEINumViewsMinus1; }
+  void     setMaiSEIIntrinsicParamsEqualFlag(bool b)                 { m_maiSEIIntrinsicParamsEqualFlag = b; }
+  bool     getMaiSEIIntrinsicParamsEqualFlag() const                 { return m_maiSEIIntrinsicParamsEqualFlag; }
+  void     setMaiSEIPrecFocalLength(int i)                           { m_maiSEIPrecFocalLength= i; }
+  int      getMaiSEIPrecFocalLength() const                          { return m_maiSEIPrecFocalLength; }
+  void     setMaiSEIPrecPrincipalPoint(int i)                        { m_maiSEIPrecPrincipalPoint = i; }
+  int      getMaiSEIPrecPrincipalPoint() const                       { return m_maiSEIPrecPrincipalPoint; }
+  void     setMaiSEIPrecSkewFactor(int i)                            { m_maiSEIPrecSkewFactor = i; }
+  int      getMaiSEIPrecSkewFactor() const                           { return m_maiSEIPrecSkewFactor; }
+  void     setMaiSEISignFocalLengthX(const std::vector<bool>& maiSEISignFocalLengthX) { m_maiSEISignFocalLengthX = maiSEISignFocalLengthX; }
+  bool     getMaiSEISignFocalLengthX(int idx) const                  { return m_maiSEISignFocalLengthX[idx]; }
+  void     setMaiSEIExponentFocalLengthX(const std::vector<uint32_t>& maiSEIExponentFocalLengthX) { m_maiSEIExponentFocalLengthX = maiSEIExponentFocalLengthX; }
+  uint32_t      getMaiSEIExponentFocalLengthX(int idx) const              { return m_maiSEIExponentFocalLengthX[idx]; }
+  void     setMaiSEIMantissaFocalLengthX(const std::vector<uint32_t>& maiSEIMantissaFocalLengthX) { m_maiSEIMantissaFocalLengthX = maiSEIMantissaFocalLengthX; }
+  uint32_t      getMaiSEIMantissaFocalLengthX(int idx) const              { return m_maiSEIMantissaFocalLengthX[idx]; }
+  void     setMaiSEISignFocalLengthY(const std::vector<bool>& maiSEISignFocalLengthY) { m_maiSEISignFocalLengthY = maiSEISignFocalLengthY; }
+  bool     getMaiSEISignFocalLengthY(int idx) const                  { return m_maiSEISignFocalLengthY[idx]; }
+  void     setMaiSEIExponentFocalLengthY(const std::vector<uint32_t>& maiSEIExponentFocalLengthY) { m_maiSEIExponentFocalLengthY = maiSEIExponentFocalLengthY; }
+  uint32_t      getMaiSEIExponentFocalLengthY(int idx) const              { return m_maiSEIExponentFocalLengthY[idx]; }
+  void     setMaiSEIMantissaFocalLengthY(const std::vector<uint32_t>& maiSEIMantissaFocalLengthY) { m_maiSEIMantissaFocalLengthY = maiSEIMantissaFocalLengthY; }
+  uint32_t      getMaiSEIMantissaFocalLengthY(int idx) const              { return m_maiSEIMantissaFocalLengthY[idx]; }
+  void     setMaiSEISignPrincipalPointX(const std::vector<bool>& maiSEISignPrincipalPointX) { m_maiSEISignPrincipalPointX = maiSEISignPrincipalPointX; }
+  bool     getMaiSEISignPrincipalPointX(int idx) const               { return m_maiSEISignPrincipalPointX[idx]; }
+  void     setMaiSEIExponentPrincipalPointX(const std::vector<uint32_t>& maiSEIExponentPrincipalPointX) { m_maiSEIExponentPrincipalPointX = maiSEIExponentPrincipalPointX; }
+  uint32_t      getMaiSEIExponentPrincipalPointX(int idx) const           { return m_maiSEIExponentPrincipalPointX[idx]; }
+  void     setMaiSEIMantissaPrincipalPointX(const std::vector<uint32_t>& maiSEIMantissaPrincipalPointX) { m_maiSEIMantissaPrincipalPointX = maiSEIMantissaPrincipalPointX; }
+  uint32_t      getMaiSEIMantissaPrincipalPointX(int idx) const           { return m_maiSEIMantissaPrincipalPointX[idx]; }
+  void     setMaiSEISignPrincipalPointY(const std::vector<bool>& maiSEISignPrincipalPointY) { m_maiSEISignPrincipalPointY = maiSEISignPrincipalPointY; }
+  bool     getMaiSEISignPrincipalPointY(int idx) const               { return m_maiSEISignPrincipalPointY[idx]; }
+  void     setMaiSEIExponentPrincipalPointY(const std::vector<uint32_t>& maiSEIExponentPrincipalPointY) { m_maiSEIExponentPrincipalPointY = maiSEIExponentPrincipalPointY; }
+  uint32_t      getMaiSEIExponentPrincipalPointY(int idx) const           { return m_maiSEIExponentPrincipalPointY[idx]; }
+  void     setMaiSEIMantissaPrincipalPointY(const std::vector<uint32_t>& maiSEIMantissaPrincipalPointY) { m_maiSEIMantissaPrincipalPointY = maiSEIMantissaPrincipalPointY; }
+  uint32_t      getMaiSEIMantissaPrincipalPointY(int idx) const           { return m_maiSEIMantissaPrincipalPointY[idx]; }
+  void     setMaiSEISignSkewFactor(const std::vector<bool>& maiSEISignSkewFactor) { m_maiSEISignSkewFactor = maiSEISignSkewFactor; }
+  bool     getMaiSEISignSkewFactor(int idx) const                    { return m_maiSEISignSkewFactor[idx]; }
+  void     setMaiSEIExponentSkewFactor(const std::vector<uint32_t>& maiSEIExponentSkewFactor) { m_maiSEIExponentSkewFactor = maiSEIExponentSkewFactor; }
+  uint32_t      getMaiSEIExponentSkewFactor(int idx) const                { return m_maiSEIExponentSkewFactor[idx]; }
+  void     setMaiSEIMantissaSkewFactor(const std::vector<uint32_t>& maiSEIMantissaSkewFactor) { m_maiSEIMantissaSkewFactor = maiSEIMantissaSkewFactor; }
+  uint32_t      getMaiSEIMantissaSkewFactor(int idx) const                { return m_maiSEIMantissaSkewFactor[idx]; }
+  void     setMaiSEIPrecRotationParam(int i)                         { m_maiSEIPrecRotationParam = i; }
+  int      getMaiSEIPrecRotationParam() const                        { return m_maiSEIPrecRotationParam; }
+  void     setMaiSEIPrecTranslationParam(int i)                      { m_maiSEIPrecTranslationParam = i; }
+  int      getMaiSEIPrecTranslationParam() const                     { return m_maiSEIPrecTranslationParam; }
+#if JVET_W0078_MVP_SEI 
+  // multiview view position SEI
+  void     setMvpSEIEnabled(bool b) { m_mvpSEIEnabled = b; }
+  bool     getMvpSEIEnabled() const { return m_mvpSEIEnabled; }
+  void     setMvpSEINumViewsMinus1(int i) { m_mvpSEINumViewsMinus1 = i; }
+  int      getMvpSEINumViewsMinus1() const { return m_mvpSEINumViewsMinus1; }
+  void     setMvpSEIViewPosition(const std::vector<uint32_t>& mvpSEIViewPosition) { m_mvpSEIViewPosition = mvpSEIViewPosition; }
+  uint32_t      getMvpSEIViewPosition(int idx) const { return m_mvpSEIViewPosition[idx]; }
+#endif
+  // alpha channel information SEI
+  void     setAciSEIEnabled(bool b)                                  { m_aciSEIEnabled = b; }
+  bool     getAciSEIEnabled() const                                  { return m_aciSEIEnabled; }
+  void     setAciSEICancelFlag(bool b)                               { m_aciSEICancelFlag = b; }
+  bool     getAciSEICancelFlag() const                               { return m_aciSEICancelFlag; }
+  void     setAciSEIUseIdc(int value)                                { m_aciSEIUseIdc = value; }
+  int      getAciSEIUseIdc() const                                   { return m_aciSEIUseIdc; }
+  void     setAciSEIBitDepthMinus8(int value)                        { m_aciSEIBitDepthMinus8 = value; }
+  int      getAciSEIBitDepthMinus8() const                           { return m_aciSEIBitDepthMinus8; }
+  void     setAciSEITransparentValue(int value)                      { m_aciSEITransparentValue = value; }
+  int      getAciSEITransparentValue() const                         { return m_aciSEITransparentValue; }
+  void     setAciSEIOpaqueValue(int value)                           { m_aciSEIOpaqueValue = value; }
+  int      getAciSEIOpaqueValue() const                              { return m_aciSEIOpaqueValue; }
+  void     setAciSEIIncrFlag(bool b)                                 { m_aciSEIIncrFlag = b; }
+  bool     getAciSEIIncrFlag() const                                 { return m_aciSEIIncrFlag; }
+  void     setAciSEIClipFlag(bool b)                                 { m_aciSEIClipFlag = b; }
+  bool     getAciSEIClipFlag() const                                 { return m_aciSEIClipFlag; }
+  void     setAciSEIClipTypeFlag(bool b)                             { m_aciSEIClipTypeFlag = b; }
+  bool     getAciSEIClipTypeFlag() const                             { return m_aciSEIClipTypeFlag; }
+  // depth representation information SEI
+  void     setDriSEIEnabled(bool b)                                  { m_driSEIEnabled = b; }
+  bool     getDriSEIEnabled() const                                  { return m_driSEIEnabled; }
+  void     setDriSEIZNearFlag(bool b)                                { m_driSEIZNearFlag = b; }
+  bool     getDriSEIZNearFlag() const                                { return m_driSEIZNearFlag; }
+  void     setDriSEIZFarFlag(bool b)                                 { m_driSEIZFarFlag = b; }
+  bool     getDriSEIZFarFlag() const                                 { return m_driSEIZFarFlag; }
+  void     setDriSEIDMinFlag(bool b)                                 { m_driSEIDMinFlag = b; }
+  bool     getDriSEIDMinFlag() const                                 { return m_driSEIDMinFlag; }
+  void     setDriSEIDMaxFlag(bool b)                                 { m_driSEIDMaxFlag = b; }
+  bool     getDriSEIDMaxFlag() const                                 { return m_driSEIDMaxFlag; }
+  void     setDriSEIZNear(double d)                                  { m_driSEIZNear = d; }
+  double   getDriSEIZNear() const                                    { return m_driSEIZNear; }
+  void     setDriSEIZFar(double d)                                   { m_driSEIZFar = d; }
+  double   getDriSEIZFar() const                                     { return m_driSEIZFar; }
+  void     setDriSEIDMin(double d)                                   { m_driSEIDMin = d; }
+  double   getDriSEIDMin() const                                     { return m_driSEIDMin; }
+  void     setDriSEIDMax(double d)                                   { m_driSEIDMax = d; }
+  double   getDriSEIDMax() const                                     { return m_driSEIDMax; }
+  void     setDriSEIDepthRepresentationType(int i)                   { m_driSEIDepthRepresentationType = i; }
+  int      getDriSEIDepthRepresentationType() const                  { return m_driSEIDepthRepresentationType; }
+  void     setDriSEIDisparityRefViewId(int i)                        { m_driSEIDisparityRefViewId = i; }
+  int      getDriSEIDisparityRefViewId() const                       { return m_driSEIDisparityRefViewId; }
+  void     setDriSEINonlinearNumMinus1(int i)                        { m_driSEINonlinearNumMinus1 = i; }
+  int      getDriSEINonlinearNumMinus1() const                       { return m_driSEINonlinearNumMinus1; }
+  void     setDriSEINonlinearModel(const std::vector<uint32_t>& driSEINonLinearModel) { m_driSEINonlinearModel = driSEINonLinearModel; }
+  uint32_t getDriSEINonlinearModel(int idx) const                                                    { return m_driSEINonlinearModel[idx]; }
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+  void     setConstrainedRaslencoding(bool b)                        { m_constrainedRaslEncoding = b; }
+  bool     getConstrainedRaslencoding()                              { return m_constrainedRaslEncoding; }
+  void     setCraAPSreset(bool b)                                    { m_craAPSreset = b; }
+  bool     getCraAPSreset()                                    const { return m_craAPSreset; }
+  void     setRprRASLtoolSwitch(bool b)                              { m_rprRASLtoolSwitch = b; }
+  bool     getRprRASLtoolSwitch()                                    { return m_rprRASLtoolSwitch; }
+#endif
+
   void         setUseWP               ( bool b )                     { m_useWeightedPred   = b;    }
   void         setWPBiPred            ( bool b )                     { m_useWeightedBiPred = b;    }
   bool         getUseWP               ()                             { return m_useWeightedPred;   }
@@ -1790,28 +2162,27 @@ public:
   bool         getTSRCdisableLL       ()                             { return m_TSRCdisableLL;         }
   void         setTSRCdisableLL       ( bool b )                     { m_TSRCdisableLL = b;            }
 
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
   void         setOPI(OPI *p)                                        { m_opi = *p; }
   OPI*         getOPI()                                              { return &m_opi; }
-#endif
 
   void         setDCI(DCI *p)                                        { m_dci = *p; }
   DCI*         getDCI()                                              { return &m_dci; }
   void         setUseRecalculateQPAccordingToLambda (bool b)         { m_recalculateQPAccordingToLambda = b;    }
   bool         getUseRecalculateQPAccordingToLambda ()               { return m_recalculateQPAccordingToLambda; }
 
-  void         setEfficientFieldIRAPEnabled( bool b )                { m_bEfficientFieldIRAPEnabled = b; }
-  bool         getEfficientFieldIRAPEnabled( ) const                 { return m_bEfficientFieldIRAPEnabled; }
+  void setFieldSeqFlag(const bool b) { m_fieldSeqFlag = b; }
+  bool getFieldSeqFlag() const { return m_fieldSeqFlag; }
 
-  void         setHarmonizeGopFirstFieldCoupleEnabled( bool b )      { m_bHarmonizeGopFirstFieldCoupleEnabled = b; }
-  bool         getHarmonizeGopFirstFieldCoupleEnabled( ) const       { return m_bHarmonizeGopFirstFieldCoupleEnabled; }
+  void setEfficientFieldIRAPEnabled(const bool b) { m_efficientFieldIRAPEnabled = b; }
+  bool getEfficientFieldIRAPEnabled() const { return m_efficientFieldIRAPEnabled; }
+
+  void setHarmonizeGopFirstFieldCoupleEnabled(const bool b) { m_harmonizeGopFirstFieldCoupleEnabled = b; }
+  bool getHarmonizeGopFirstFieldCoupleEnabled() const { return m_harmonizeGopFirstFieldCoupleEnabled; }
 
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
   bool         getOPIEnabled()                      { return m_OPIEnabled; }
   void         setOPIEnabled(bool i)                { m_OPIEnabled = i; }
   void         setHtidPlus1(int HTid)               { m_opi.setHtidInfoPresentFlag(true); m_opi.setOpiHtidPlus1(HTid); }
   void         setTargetOlsIdx(int TOlsIdx)         { m_opi.setOlsInfoPresentFlag(true); m_opi.setOpiOlsIdx(TOlsIdx); }
-#endif
 
   bool         getDCIEnabled()                      { return m_DCIEnabled; }
   void         setDCIEnabled(bool i)                { m_DCIEnabled = i; }
@@ -1920,22 +2291,29 @@ public:
   void         setDebugCTU( int i )                                  { m_debugCTU = i; }
   int          getDebugCTU()                                   const { return m_debugCTU; }
 
-#if ENABLE_SPLIT_PARALLELISM
-  void         setNumSplitThreads( int n )                           { m_numSplitThreads = n; }
-  int          getNumSplitThreads()                            const { return m_numSplitThreads; }
-  void         setForceSingleSplitThread( bool b )                   { m_forceSingleSplitThread = b; }
-  int          getForceSingleSplitThread()                     const { return m_forceSingleSplitThread; }
-#endif
   void         setUseALF( bool b ) { m_alf = b; }
   bool         getUseALF()                                      const { return m_alf; }
-#if JVET_T0064
-  void         setALFStrength( double s)                        { m_alfStrength = s; }
-  double       getALFStrength()                                 const { return m_alfStrength; }
+#if JVET_W0129_ENABLE_ALF_TRUEORG
+  void         setAlfTrueOrg( bool b )                                { m_alfTrueOrg = b; }
+  bool         getAlfTrueOrg()                                  const { return m_alfTrueOrg; }
+#else
+  void         setAlfSaoTrueOrg( bool b )                             { m_alfSaoTrueOrg = b; }
+  bool         getAlfSaoTrueOrg()                               const { return m_alfSaoTrueOrg; }
+#endif
+  void         setALFStrengthLuma(double s)                     { m_alfStrengthLuma = s; }
+  double       getALFStrengthLuma()                             const { return m_alfStrengthLuma; }
   void         setALFAllowPredefinedFilters(bool b)             { m_alfAllowPredefinedFilters = b; }
   bool         getALFAllowPredefinedFilters()                   const { return m_alfAllowPredefinedFilters; }
   void         setCCALFStrength(double s)                       { m_ccalfStrength = s; }
   double       getCCALFStrength()                               const { return m_ccalfStrength; }
-#endif
+  void         setALFStrengthChroma(double s)                  { m_alfStrengthChroma = s; }
+  double       getALFStrengthChroma()                          const { return m_alfStrengthChroma; }
+  void         setALFStrengthTargetLuma(double s)              { m_alfStrengthTargetLuma = s; }
+  double       getALFStrengthTargetLuma()                      const { return m_alfStrengthTargetLuma; }
+  void         setALFStrengthTargetChroma(double s)            { m_alfStrengthTargetChroma = s; }
+  double       getALFStrengthTargetChroma()                    const { return m_alfStrengthTargetChroma; }
+  void         setCCALFStrengthTarget(double s)                { m_ccalfStrengthTarget = s; }
+  double       getCCALFStrengthTarget()                        const { return m_ccalfStrengthTarget; }
   void         setUseCCALF( bool b )                                  { m_ccalf = b; }
   bool         getUseCCALF()                                    const { return m_ccalf; }
   void         setCCALFQpThreshold( int b )                           { m_ccalfQpThreshold = b; }
diff --git a/source/Lib/EncoderLib/EncCfgParam.h b/source/Lib/EncoderLib/EncCfgParam.h
index f15644b04519af6de5eb5701c9c00927c04c10fd..f83a651f2fac471fee0212fba05289acb2884eea 100644
--- a/source/Lib/EncoderLib/EncCfgParam.h
+++ b/source/Lib/EncoderLib/EncCfgParam.h
@@ -4,7 +4,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -48,18 +48,11 @@ class CfgVPSParameters
 {
 public:
   CfgVPSParameters()
-#if !JVET_R0193
-  : m_maxTidILRefPicsPlus1(-1)
-#endif
   {}
 
   virtual ~CfgVPSParameters(){}
 
-#if JVET_R0193
   std::vector<std::vector<uint32_t>> m_maxTidILRefPicsPlus1;
-#else
-  int m_maxTidILRefPicsPlus1;
-#endif
 };
 
 class CfgSEISubpictureLevel
diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp
index b875909a93575deba535718a6210149211092cc9..43a65f84b56fe6f36654552aadfe1e34a35ba5cd 100644
--- a/source/Lib/EncoderLib/EncCu.cpp
+++ b/source/Lib/EncoderLib/EncCu.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -155,14 +155,26 @@ void EncCu::destroy()
   {
     for( unsigned h = 0; h < numHeights; h++ )
     {
-      if( m_pBestCS[w][h] ) m_pBestCS[w][h]->destroy();
-      if( m_pTempCS[w][h] ) m_pTempCS[w][h]->destroy();
+      if (m_pBestCS[w][h])
+      {
+        m_pBestCS[w][h]->destroy();
+      }
+      if (m_pTempCS[w][h])
+      {
+        m_pTempCS[w][h]->destroy();
+      }
 
       delete m_pBestCS[w][h];
       delete m_pTempCS[w][h];
 
-      if( m_pBestCS2[w][h] ) m_pBestCS2[w][h]->destroy();
-      if( m_pTempCS2[w][h] ) m_pTempCS2[w][h]->destroy();
+      if (m_pBestCS2[w][h])
+      {
+        m_pBestCS2[w][h]->destroy();
+      }
+      if (m_pTempCS2[w][h])
+      {
+        m_pTempCS2[w][h]->destroy();
+      }
 
       delete m_pBestCS2[w][h];
       delete m_pTempCS2[w][h];
@@ -209,33 +221,25 @@ void EncCu::destroy()
   }
 }
 
-
-
 EncCu::~EncCu()
 {
 }
 
-
-
 /** \param    pcEncLib      pointer of encoder class
  */
-void EncCu::init( EncLib* pcEncLib, const SPS& sps PARL_PARAM( const int tId ) )
+void EncCu::init( EncLib* pcEncLib, const SPS& sps )
 {
   m_pcEncCfg           = pcEncLib;
-  m_pcIntraSearch      = pcEncLib->getIntraSearch( PARL_PARAM0( tId ) );
-  m_pcInterSearch      = pcEncLib->getInterSearch( PARL_PARAM0( tId ) );
-  m_pcTrQuant          = pcEncLib->getTrQuant( PARL_PARAM0( tId ) );
-  m_pcRdCost           = pcEncLib->getRdCost ( PARL_PARAM0( tId ) );
-  m_CABACEstimator     = pcEncLib->getCABACEncoder( PARL_PARAM0( tId ) )->getCABACEstimator( &sps );
+  m_pcIntraSearch      = pcEncLib->getIntraSearch();
+  m_pcInterSearch      = pcEncLib->getInterSearch();
+  m_pcTrQuant          = pcEncLib->getTrQuant();
+  m_pcRdCost           = pcEncLib->getRdCost ();
+  m_CABACEstimator     = pcEncLib->getCABACEncoder()->getCABACEstimator( &sps );
   m_CABACEstimator->setEncCu(this);
-  m_CtxCache           = pcEncLib->getCtxCache( PARL_PARAM0( tId ) );
+  m_CtxCache           = pcEncLib->getCtxCache();
   m_pcRateCtrl         = pcEncLib->getRateCtrl();
   m_pcSliceEncoder     = pcEncLib->getSliceEncoder();
-#if ENABLE_SPLIT_PARALLELISM
-  m_pcEncLib           = pcEncLib;
-  m_dataId             = tId;
-#endif
-  m_pcLoopFilter       = pcEncLib->getLoopFilter();
+  m_deblockingFilter   = pcEncLib->getDeblockingFilter();
   m_GeoCostList.init(GEO_NUM_PARTITION_MODE, m_pcEncCfg->getMaxNumGeoCand());
   m_AFFBestSATDCost = MAX_DOUBLE;
 
@@ -246,7 +250,6 @@ void EncCu::init( EncLib* pcEncLib, const SPS& sps PARL_PARAM( const int tId ) )
   m_pcInterSearch->setModeCtrl( m_modeCtrl );
   m_modeCtrl->setInterSearch(m_pcInterSearch);
   m_pcIntraSearch->setModeCtrl( m_modeCtrl );
-
 }
 
 // ====================================================================================================================
@@ -260,39 +263,6 @@ void EncCu::compressCtu( CodingStructure& cs, const UnitArea& area, const unsign
 
   cs.slice->m_mapPltCost[0].clear();
   cs.slice->m_mapPltCost[1].clear();
-#if ENABLE_SPLIT_PARALLELISM
-  if( m_pcEncCfg->getNumSplitThreads() > 1 )
-  {
-    for( int jId = 1; jId < NUM_RESERVERD_SPLIT_JOBS; jId++ )
-    {
-      EncCu*            jobEncCu  = m_pcEncLib->getCuEncoder( cs.picture->scheduler.getSplitDataId( jId ) );
-      CacheBlkInfoCtrl* cacheCtrl = dynamic_cast< CacheBlkInfoCtrl* >( jobEncCu->m_modeCtrl );
-#if REUSE_CU_RESULTS
-      BestEncInfoCache* bestCache = dynamic_cast< BestEncInfoCache* >( jobEncCu->m_modeCtrl );
-#endif
-      SaveLoadEncInfoSbt *sbtCache = dynamic_cast< SaveLoadEncInfoSbt* >( jobEncCu->m_modeCtrl );
-      if( cacheCtrl )
-      {
-        cacheCtrl->init( *cs.slice );
-      }
-#if REUSE_CU_RESULTS
-      if (bestCache)
-      {
-        bestCache->init(*cs.slice);
-      }
-#endif
-      if (sbtCache)
-      {
-        sbtCache->init(*cs.slice);
-      }
-    }
-  }
-
-#if REUSE_CU_RESULTS
-  if( auto* cacheCtrl = dynamic_cast<BestEncInfoCache*>( m_modeCtrl ) ) { cacheCtrl->tick(); }
-#endif
-  if( auto* cacheCtrl = dynamic_cast<CacheBlkInfoCtrl*>( m_modeCtrl ) ) { cacheCtrl->tick(); }
-#endif
   // init the partitioning manager
   QTBTPartitioner partitioner;
   partitioner.initCtu(area, CH_L, *cs.slice);
@@ -512,7 +482,6 @@ bool EncCu::xCheckBestMode( CodingStructure *&tempCS, CodingStructure *&bestCS,
 
     if( m_modeCtrl->useModeResult( encTestMode, tempCS, partitioner ) )
     {
-
       std::swap( tempCS, bestCS );
       // store temp best CI for next CU coding
       m_CurrCtx->best = m_CABACEstimator->getCtx();
@@ -524,26 +493,12 @@ bool EncCu::xCheckBestMode( CodingStructure *&tempCS, CodingStructure *&bestCS,
   // reset context states
   m_CABACEstimator->getCtx() = m_CurrCtx->start;
   return bestCSUpdated;
-
 }
 
 void EncCu::xCompressCU( CodingStructure*& tempCS, CodingStructure*& bestCS, Partitioner& partitioner, double maxCostAllowed )
 {
   CHECK(maxCostAllowed < 0, "Wrong value of maxCostAllowed!");
-#if ENABLE_SPLIT_PARALLELISM
-  CHECK( m_dataId != tempCS->picture->scheduler.getDataId(), "Working in the wrong dataId!" );
-
-  if( m_pcEncCfg->getNumSplitThreads() != 1 && tempCS->picture->scheduler.getSplitJobId() == 0 )
-  {
-    if( m_modeCtrl->isParallelSplit( *tempCS, partitioner ) )
-    {
-      m_modeCtrl->setParallelSplit( true );
-      xCompressCUParallel( tempCS, bestCS, partitioner );
-      return;
-    }
-  }
 
-#endif
   uint32_t compBegin;
   uint32_t numComp;
   bool jointPLT = false;
@@ -557,16 +512,16 @@ void EncCu::xCompressCU( CodingStructure*& tempCS, CodingStructure*& bestCS, Par
     }
     else
     {
-    if (isLuma(partitioner.chType))
-    {
-      compBegin = COMPONENT_Y;
-      numComp = 1;
-    }
-    else
-    {
-      compBegin = COMPONENT_Cb;
-      numComp = 2;
-    }
+      if (isLuma(partitioner.chType))
+      {
+        compBegin = COMPONENT_Y;
+        numComp   = 1;
+      }
+      else
+      {
+        compBegin = COMPONENT_Cb;
+        numComp   = 2;
+      }
     }
   }
   else
@@ -600,39 +555,115 @@ void EncCu::xCompressCU( CodingStructure*& tempCS, CodingStructure*& bestCS, Par
   const UnitArea currCsArea = clipArea( CS::getArea( *bestCS, bestCS->area, partitioner.chType ), *tempCS->picture );
 
   m_modeCtrl->initCULevel( partitioner, *tempCS );
-  if( partitioner.currQtDepth == 0 && partitioner.currMtDepth == 0 && !tempCS->slice->isIntra() && ( sps.getUseSBT() || sps.getUseInterMTS() ) )
+#if GDR_ENABLED
+  if (m_pcEncCfg->getGdrEnabled())
   {
-    auto slsSbt = dynamic_cast<SaveLoadEncInfoSbt*>( m_modeCtrl );
-    int maxSLSize = sps.getUseSBT() ? tempCS->slice->getSPS()->getMaxTbSize() : MTS_INTER_MAX_CU_SIZE;
-    slsSbt->resetSaveloadSbt( maxSLSize );
-#if ENABLE_SPLIT_PARALLELISM
-    CHECK( tempCS->picture->scheduler.getSplitJobId() != 0, "The SBT search reset need to happen in sequential region." );
-    if (m_pcEncCfg->getNumSplitThreads() > 1)
+    bool isInGdrInterval = slice.getPicHeader()->getInGdrInterval();
+
+    // 1.0 applicable to inter picture only
+    if (isInGdrInterval)
     {
-      for (int jId = 1; jId < NUM_RESERVERD_SPLIT_JOBS; jId++)
+      int gdrPocStart = m_pcEncCfg->getGdrPocStart();
+      int gdrInterval = m_pcEncCfg->getGdrInterval();
+
+      int picWidth = slice.getPPS()->getPicWidthInLumaSamples();
+      int m1, m2, n1;
+
+      int curPoc = slice.getPOC();
+      int gdrPoc = (curPoc - gdrPocStart) % gdrInterval;
+
+      int begGdrX = 0;
+      int endGdrX = 0;
+
+      double dd = (picWidth / (double)gdrInterval);
+      int mm = (int)((picWidth / (double)gdrInterval) + 0.49999);
+      m1 = ((mm + 7) >> 3) << 3;
+      m2 = ((mm + 0) >> 3) << 3;
+
+      if (dd > mm && m1 == m2)
+      {
+        m1 = m1 + 8;
+      }
+
+      n1 = (picWidth - m2 * gdrInterval) / 8;
+
+      if (gdrPoc < n1)
+      {
+        begGdrX = m1 * gdrPoc;
+        endGdrX = begGdrX + m1;
+      }
+      else
+      {
+        begGdrX = m1 * n1 + m2 * (gdrPoc - n1);
+        endGdrX = begGdrX + m2;
+        if (picWidth <= endGdrX)
+        {
+          begGdrX = picWidth;
+          endGdrX = picWidth;
+        }
+      }
+
+      bool isInRefreshArea = tempCS->withinRefresh(begGdrX, endGdrX);
+
+      if (isInRefreshArea)
+      {
+        m_modeCtrl->forceIntraMode();
+      }
+      else if (tempCS->containRefresh(begGdrX, endGdrX) || tempCS->overlapRefresh(begGdrX, endGdrX))
+      {
+        // 1.3.1 enable only vertical splits (QT, BT_V, TT_V)
+        m_modeCtrl->forceVerSplitOnly();
+
+        // 1.3.2 remove TT_V if it does not satisfy the condition
+        if (tempCS->refreshCrossTTV(begGdrX, endGdrX))
+        {
+          m_modeCtrl->forceRemoveTTV();
+        }
+      }
+
+      if (tempCS->area.lwidth() != tempCS->area.lheight())
+      {
+        m_modeCtrl->forceRemoveQT();
+      }
+
+      if (!m_modeCtrl->anyPredModeLeft())
+      {
+        m_modeCtrl->forceRemoveDontSplit();
+      }
+
+      if (isInRefreshArea && !m_modeCtrl->anyIntraIBCMode() && (tempCS->area.lwidth() == 4 || tempCS->area.lheight() == 4))
       {
-        auto slsSbt = dynamic_cast<SaveLoadEncInfoSbt *>(m_pcEncLib->getCuEncoder(jId)->m_modeCtrl);
-        slsSbt->resetSaveloadSbt(maxSLSize);
+        m_modeCtrl->finishCULevel(partitioner);
+        return;
       }
     }
+  }
 #endif
+
+  if( partitioner.currQtDepth == 0 && partitioner.currMtDepth == 0 && !tempCS->slice->isIntra() && ( sps.getUseSBT() || sps.getUseInterMTS() ) )
+  {
+    auto slsSbt = dynamic_cast<SaveLoadEncInfoSbt*>( m_modeCtrl );
+    int maxSLSize = sps.getUseSBT() ? tempCS->slice->getSPS()->getMaxTbSize() : MTS_INTER_MAX_CU_SIZE;
+    slsSbt->resetSaveloadSbt( maxSLSize );
   }
   m_sbtCostSave[0] = m_sbtCostSave[1] = MAX_DOUBLE;
 
   m_CurrCtx->start = m_CABACEstimator->getCtx();
 
-  m_cuChromaQpOffsetIdxPlus1 = 0;
-
   if( slice.getUseChromaQpAdj() )
   {
     // TODO M0133 : double check encoder decisions with respect to chroma QG detection and actual encode
     int lgMinCuSize = sps.getLog2MinCodingBlockSize() +
-      std::max<int>(0, floorLog2(sps.getCTUSize()) - sps.getLog2MinCodingBlockSize() - int(slice.getCuChromaQpOffsetSubdiv() / 2));
+      std::max<int>(0, floorLog2(sps.getCTUSize()) - sps.getLog2MinCodingBlockSize() - int((slice.getCuChromaQpOffsetSubdiv()+1) / 2));
     if( partitioner.currQgChromaEnable() )
     {
       m_cuChromaQpOffsetIdxPlus1 = ( ( uiLPelX >> lgMinCuSize ) + ( uiTPelY >> lgMinCuSize ) ) % ( pps.getChromaQpOffsetListLen() + 1 );
     }
   }
+  else
+  {
+    m_cuChromaQpOffsetIdxPlus1 = 0;
+  }
 
   if( !m_modeCtrl->anyMode() )
   {
@@ -708,6 +739,7 @@ void EncCu::xCompressCU( CodingStructure*& tempCS, CodingStructure*& bestCS, Par
 #if SHARP_LUMA_DELTA_QP
         (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()) ||
 #endif
+        (m_pcEncCfg->getSmoothQPReductionEnable()) ||
 #if ENABLE_QPA_SUB_CTU
         (m_pcEncCfg->getUsePerceptQPA() && !m_pcEncCfg->getUseRateCtrl() && pps.getUseDQP())
 #else
@@ -715,9 +747,6 @@ void EncCu::xCompressCU( CodingStructure*& tempCS, CodingStructure*& bestCS, Par
 #endif
       ))
     {
-#if ENABLE_SPLIT_PARALLELISM
-      CHECK( tempCS->picture->scheduler.getSplitJobId() > 0, "Changing lambda is only allowed in the master thread!" );
-#endif
       if (currTestMode.qp >= 0)
       {
         updateLambda (&slice, currTestMode.qp,
@@ -768,7 +797,9 @@ void EncCu::xCompressCU( CodingStructure*& tempCS, CodingStructure*& bestCS, Par
       xCheckRDCostMerge2Nx2N( tempCS, bestCS, partitioner, currTestMode );
       CodingUnit* cu = bestCS->getCU(partitioner.chType);
       if (cu)
-      cu->mmvdSkip = cu->skip == false ? false : cu->mmvdSkip;
+      {
+        cu->mmvdSkip = cu->skip == false ? false : cu->mmvdSkip;
+      }
     }
     else if( currTestMode.type == ETM_MERGE_GEO )
     {
@@ -884,7 +915,11 @@ void EncCu::xCompressCU( CodingStructure*& tempCS, CodingStructure*& bestCS, Par
           break;
         }
       }
+#if GDR_ENABLED
+      if (bestCS->cus.size() > 0 && splitmode != bestCS->cus[0]->splitSeries)
+#else
       if (splitmode != bestCS->cus[0]->splitSeries)
+#endif
       {
         splitmode = bestCS->cus[0]->splitSeries;
         const CodingUnit&     cu = *bestCS->cus.front();
@@ -906,17 +941,6 @@ void EncCu::xCompressCU( CodingStructure*& tempCS, CodingStructure*& bestCS, Par
 
   //////////////////////////////////////////////////////////////////////////
   // Finishing CU
-#if ENABLE_SPLIT_PARALLELISM
-  if( bestCS->cus.empty() )
-  {
-    CHECK( bestCS->cost != MAX_DOUBLE, "Cost should be maximal if no encoding found" );
-    CHECK( bestCS->picture->scheduler.getSplitJobId() == 0, "Should always get a result in serial case" );
-
-    m_modeCtrl->finishCULevel( partitioner );
-    return;
-  }
-
-#endif
   if( tempCS->cost == MAX_DOUBLE && bestCS->cost == MAX_DOUBLE )
   {
     //although some coding modes were planned to be tried in RDO, no coding mode actually finished encoding due to early termination
@@ -959,13 +983,6 @@ void EncCu::xCompressCU( CodingStructure*& tempCS, CodingStructure*& bestCS, Par
     m_pcIntraSearch->saveCuAreaCostInSCIPU( Area( partitioner.currArea().lumaPos(), partitioner.currArea().lumaSize() ), bestCS->cost );
   }
 
-#if ENABLE_SPLIT_PARALLELISM
-  if( tempCS->picture->scheduler.getSplitJobId() == 0 && m_pcEncCfg->getNumSplitThreads() != 1 )
-  {
-    tempCS->picture->finishParallelPart( currCsArea );
-  }
-
-#endif
   if (bestCS->cus.size() == 1) // no partition
   {
     CHECK(bestCS->cus[0]->tileIdx != bestCS->pps->getTileIdx(bestCS->area.lumaPos()), "Wrong tile index!");
@@ -1045,7 +1062,11 @@ void EncCu::updateLambda (Slice* slice, const int dQP,
   {
     m_pcRdCost->setLambda (newLambda, slice->getSPS()->getBitDepths());
 #if WCG_EXT
+#if !JVET_W0043
+    if (!(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || m_pcEncCfg->getSmoothQPReductionEnable()))
+#else
     if (!m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled())
+#endif
     {
       m_pcRdCost->saveUnadjustedLambda();
     }
@@ -1054,164 +1075,6 @@ void EncCu::updateLambda (Slice* slice, const int dQP,
 }
 #endif // SHARP_LUMA_DELTA_QP || ENABLE_QPA_SUB_CTU
 
-#if ENABLE_SPLIT_PARALLELISM
-//#undef DEBUG_PARALLEL_TIMINGS
-//#define DEBUG_PARALLEL_TIMINGS 1
-void EncCu::xCompressCUParallel( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner )
-{
-  const unsigned wIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lwidth() );
-  const unsigned hIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lheight() );
-
-  Picture* picture = tempCS->picture;
-
-  int numJobs = m_modeCtrl->getNumParallelJobs( *bestCS, partitioner );
-
-  bool    jobUsed                            [NUM_RESERVERD_SPLIT_JOBS];
-  std::fill( jobUsed, jobUsed + NUM_RESERVERD_SPLIT_JOBS, false );
-
-  const UnitArea currArea = CS::getArea( *tempCS, partitioner.currArea(), partitioner.chType );
-  const bool doParallel   = !m_pcEncCfg->getForceSingleSplitThread();
-  omp_set_num_threads( m_pcEncCfg->getNumSplitThreads() );
-
-#pragma omp parallel for schedule(dynamic,1) if(doParallel)
-  for( int jId = 1; jId <= numJobs; jId++ )
-  {
-    // thread start
-    picture->scheduler.setSplitThreadId();
-    picture->scheduler.setSplitJobId( jId );
-
-    QTBTPartitioner jobPartitioner;
-    EncCu*       jobCuEnc       = m_pcEncLib->getCuEncoder( picture->scheduler.getSplitDataId( jId ) );
-    auto*        jobBlkCache    = dynamic_cast<CacheBlkInfoCtrl*>( jobCuEnc->m_modeCtrl );
-#if REUSE_CU_RESULTS
-    auto*        jobBestCache   = dynamic_cast<BestEncInfoCache*>( jobCuEnc->m_modeCtrl );
-#endif
-
-    jobPartitioner.copyState( partitioner );
-    jobCuEnc      ->copyState( this, jobPartitioner, currArea, true );
-
-    if( jobBlkCache  ) { jobBlkCache ->tick(); }
-#if REUSE_CU_RESULTS
-    if( jobBestCache ) { jobBestCache->tick(); }
-
-#endif
-    CodingStructure *&jobBest = jobCuEnc->m_pBestCS[wIdx][hIdx];
-    CodingStructure *&jobTemp = jobCuEnc->m_pTempCS[wIdx][hIdx];
-
-    jobUsed[jId] = true;
-
-    jobCuEnc->xCompressCU( jobTemp, jobBest, jobPartitioner );
-
-    picture->scheduler.setSplitJobId( 0 );
-    // thread stop
-  }
-  picture->scheduler.setSplitThreadId( 0 );
-
-  int    bestJId  = 0;
-  double bestCost = bestCS->cost;
-  for( int jId = 1; jId <= numJobs; jId++ )
-  {
-    EncCu* jobCuEnc = m_pcEncLib->getCuEncoder( picture->scheduler.getSplitDataId( jId ) );
-
-    if( jobUsed[jId] && jobCuEnc->m_pBestCS[wIdx][hIdx]->cost < bestCost )
-    {
-      bestCost = jobCuEnc->m_pBestCS[wIdx][hIdx]->cost;
-      bestJId  = jId;
-    }
-  }
-
-  if( bestJId > 0 )
-  {
-    copyState( m_pcEncLib->getCuEncoder( picture->scheduler.getSplitDataId( bestJId ) ), partitioner, currArea, false );
-    m_CurrCtx->best = m_CABACEstimator->getCtx();
-
-    tempCS = m_pTempCS[wIdx][hIdx];
-    bestCS = m_pBestCS[wIdx][hIdx];
-  }
-
-  const int      bitDepthY = tempCS->sps->getBitDepth( CH_L );
-  const UnitArea clipdArea = clipArea( currArea, *picture );
-
-  CHECK( calcCheckSum( picture->getRecoBuf( clipdArea.Y() ), bitDepthY ) != calcCheckSum( bestCS->getRecoBuf( clipdArea.Y() ), bitDepthY ), "Data copied incorrectly!" );
-
-  picture->finishParallelPart( currArea );
-
-  if( auto *blkCache = dynamic_cast<CacheBlkInfoCtrl*>( m_modeCtrl ) )
-  {
-    for( int jId = 1; jId <= numJobs; jId++ )
-    {
-      if( !jobUsed[jId] || jId == bestJId ) continue;
-
-      auto *jobBlkCache = dynamic_cast<CacheBlkInfoCtrl*>( m_pcEncLib->getCuEncoder( picture->scheduler.getSplitDataId( jId ) )->m_modeCtrl );
-      CHECK( !jobBlkCache, "If own mode controller has blk info cache capability so should all other mode controllers!" );
-      blkCache->CacheBlkInfoCtrl::copyState( *jobBlkCache, partitioner.currArea() );
-    }
-
-    blkCache->tick();
-  }
-#if REUSE_CU_RESULTS
-
-  if( auto *blkCache = dynamic_cast<BestEncInfoCache*>( m_modeCtrl ) )
-  {
-    for( int jId = 1; jId <= numJobs; jId++ )
-    {
-      if( !jobUsed[jId] || jId == bestJId ) continue;
-
-      auto *jobBlkCache = dynamic_cast<BestEncInfoCache*>( m_pcEncLib->getCuEncoder( picture->scheduler.getSplitDataId( jId ) )->m_modeCtrl );
-      CHECK( !jobBlkCache, "If own mode controller has blk info cache capability so should all other mode controllers!" );
-      blkCache->BestEncInfoCache::copyState( *jobBlkCache, partitioner.currArea() );
-    }
-
-    blkCache->tick();
-  }
-#endif
-}
-
-void EncCu::copyState( EncCu* other, Partitioner& partitioner, const UnitArea& currArea, const bool isDist )
-{
-  const unsigned wIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lwidth () );
-  const unsigned hIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lheight() );
-
-  if( isDist )
-  {
-    other->m_pBestCS[wIdx][hIdx]->initSubStructure( *m_pBestCS[wIdx][hIdx], partitioner.chType, partitioner.currArea(), false );
-    other->m_pTempCS[wIdx][hIdx]->initSubStructure( *m_pTempCS[wIdx][hIdx], partitioner.chType, partitioner.currArea(), false );
-  }
-  else
-  {
-          CodingStructure* dst =        m_pBestCS[wIdx][hIdx];
-    const CodingStructure* src = other->m_pBestCS[wIdx][hIdx];
-    bool keepResi = KEEP_PRED_AND_RESI_SIGNALS;
-    bool keepPred = true;
-
-    dst->useSubStructure( *src, partitioner.chType, currArea, keepPred, true, keepResi, keepResi, true );
-
-    dst->cost           =  src->cost;
-    dst->dist           =  src->dist;
-    dst->fracBits       =  src->fracBits;
-    dst->features       =  src->features;
-  }
-
-  if( isDist )
-  {
-    m_CurrCtx = m_CtxBuffer.data();
-  }
-
-  m_pcInterSearch->copyState( *other->m_pcInterSearch );
-  m_modeCtrl     ->copyState( *other->m_modeCtrl, partitioner.currArea() );
-  m_pcRdCost     ->copyState( *other->m_pcRdCost );
-  m_pcTrQuant    ->copyState( *other->m_pcTrQuant );
-  if( m_pcEncCfg->getLmcs() )
-  {
-    EncReshape *encReshapeThis  = dynamic_cast<EncReshape*>(       m_pcReshape);
-    EncReshape *encReshapeOther = dynamic_cast<EncReshape*>(other->m_pcReshape);
-    encReshapeThis->copyState( *encReshapeOther );
-  }
-
-  m_CABACEstimator->getCtx() = other->m_CABACEstimator->getCtx();
-}
-#endif
-
 void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, const ModeType modeTypeParent, bool &skipInterPass )
 {
   const int qp                = encTestMode.qp;
@@ -1286,6 +1149,7 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS,
 
   partitioner.splitCurrArea( split, *tempCS );
   bool qgEnableChildren = partitioner.currQgEnable(); // QG possible at children level
+  bool qgChromaEnableChildren = partitioner.currQgChromaEnable(); // Chroma QG possible at children level
 
   m_CurrCtx++;
 
@@ -1294,7 +1158,12 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS,
   tempCS->getPredBuf().fill(0);
   AffineMVInfo tmpMVInfo;
   bool isAffMVInfoSaved;
+#if GDR_ENABLED
+  AffineMVInfoSolid tmpMVInfoSolid;
+  m_pcInterSearch->savePrevAffMVInfo(0, tmpMVInfo, tmpMVInfoSolid, isAffMVInfoSaved);
+#else
   m_pcInterSearch->savePrevAffMVInfo(0, tmpMVInfo, isAffMVInfoSaved);
+#endif
   BlkUniMvInfo tmpUniMvInfo;
   bool         isUniMvInfoSaved = false;
   if (!tempCS->slice->isIntra())
@@ -1457,30 +1326,32 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS,
 
     if (isChromaEnabled(tempCS->pcv->chrFormat))
     {
-    partitioner.chType = CHANNEL_TYPE_CHROMA;
-    tempCS->treeType = partitioner.treeType = TREE_C;
-
-    m_CurrCtx++;
-
-    const unsigned wIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lwidth() );
-    const unsigned hIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lheight() );
-    CodingStructure *tempCSChroma = m_pTempCS2[wIdx][hIdx];
-    CodingStructure *bestCSChroma = m_pBestCS2[wIdx][hIdx];
-    tempCS->initSubStructure( *tempCSChroma, partitioner.chType, partitioner.currArea(), false );
-    tempCS->initSubStructure( *bestCSChroma, partitioner.chType, partitioner.currArea(), false );
-    tempCS->treeType = TREE_D;
-    xCompressCU( tempCSChroma, bestCSChroma, partitioner );
-
-    //attach chromaCS to luma CS and update cost
-    bool keepResi = KEEP_PRED_AND_RESI_SIGNALS;
-    //bestCSChroma->treeType = tempCSChroma->treeType = TREE_C;
-    CHECK( bestCSChroma->treeType != TREE_C || tempCSChroma->treeType != TREE_C, "wrong treeType for chroma CS" );
-    tempCS->useSubStructure( *bestCSChroma, partitioner.chType, CS::getArea( *bestCSChroma, partitioner.currArea(), partitioner.chType ), KEEP_PRED_AND_RESI_SIGNALS, true, keepResi, true, true );
-
-    //release tmp resource
-    tempCSChroma->releaseIntermediateData();
-    bestCSChroma->releaseIntermediateData();
-    //tempCS->picture->cs->releaseIntermediateData();
+      partitioner.chType = CHANNEL_TYPE_CHROMA;
+      tempCS->treeType = partitioner.treeType = TREE_C;
+
+      m_CurrCtx++;
+
+      const unsigned   wIdx         = gp_sizeIdxInfo->idxFrom(partitioner.currArea().lwidth());
+      const unsigned   hIdx         = gp_sizeIdxInfo->idxFrom(partitioner.currArea().lheight());
+      CodingStructure *tempCSChroma = m_pTempCS2[wIdx][hIdx];
+      CodingStructure *bestCSChroma = m_pBestCS2[wIdx][hIdx];
+      tempCS->initSubStructure(*tempCSChroma, partitioner.chType, partitioner.currArea(), false);
+      tempCS->initSubStructure(*bestCSChroma, partitioner.chType, partitioner.currArea(), false);
+      tempCS->treeType = TREE_D;
+      xCompressCU(tempCSChroma, bestCSChroma, partitioner);
+
+      // attach chromaCS to luma CS and update cost
+      bool keepResi = KEEP_PRED_AND_RESI_SIGNALS;
+      // bestCSChroma->treeType = tempCSChroma->treeType = TREE_C;
+      CHECK(bestCSChroma->treeType != TREE_C || tempCSChroma->treeType != TREE_C, "wrong treeType for chroma CS");
+      tempCS->useSubStructure(*bestCSChroma, partitioner.chType,
+                              CS::getArea(*bestCSChroma, partitioner.currArea(), partitioner.chType),
+                              KEEP_PRED_AND_RESI_SIGNALS, true, keepResi, true, true);
+
+      // release tmp resource
+      tempCSChroma->releaseIntermediateData();
+      bestCSChroma->releaseIntermediateData();
+      // tempCS->picture->cs->releaseIntermediateData();
       m_CurrCtx--;
     }
     tempCS->picture->cs->clearCuPuTuIdxMap( partitioner.currArea(), numCuPuTu[0], numCuPuTu[1], numCuPuTu[2], numCuPuTu + 3 );
@@ -1491,6 +1362,13 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS,
     partitioner.treeType = TREE_D;
     partitioner.modeType = MODE_TYPE_ALL;
   }
+  else
+  {
+    if (!qgChromaEnableChildren) // check at deepest cQG level only
+    {
+      xCheckChromaQPOffset( *tempCS, partitioner );
+    }
+  }
 
   // Finally, generate split-signaling bits for RD-cost check
   const PartSplit implicitSplit = partitioner.getImplicitSplit( *tempCS );
@@ -1531,7 +1409,9 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS,
 
   // Check Delta QP bits for splitted structure
   if( !qgEnableChildren ) // check at deepest QG level only
-  xCheckDQP( *tempCS, partitioner, true );
+  {
+    xCheckDQP(*tempCS, partitioner, true);
+  }
 
   // If the configuration being tested exceeds the maximum number of bytes for a slice / slice-segment, then
   // a proper RD evaluation cannot be performed. Therefore, termination of the
@@ -1562,8 +1442,18 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS,
   // RD check for sub partitioned coding structure.
   xCheckBestMode( tempCS, bestCS, partitioner, encTestMode );
 
+#if GDR_ENABLED
+  if (isAffMVInfoSaved)
+  {
+    m_pcInterSearch->addAffMVInfo(tmpMVInfo, tmpMVInfoSolid);
+  }
+#else
   if (isAffMVInfoSaved)
+  {
     m_pcInterSearch->addAffMVInfo(tmpMVInfo);
+  }
+#endif
+
   if (!tempCS->slice->isIntra() && isUniMvInfoSaved)
   {
     m_pcInterSearch->addUniMvInfo(tmpUniMvInfo);
@@ -1832,7 +1722,10 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS
             m_modeCtrl->setBestCostWithoutSplitFlags( tmpCostWithoutSplitFlags );
           }
 
-          if( !mtsFlag ) static_cast< double& >( costSize2Nx2NmtsFirstPass ) = tempCS->cost;
+          if (!mtsFlag)
+          {
+            static_cast<double &>(costSize2Nx2NmtsFirstPass) = tempCS->cost;
+          }
 
           if( sps.getUseLFNST() && !tempCS->cus.empty() )
           {
@@ -1938,7 +1831,9 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS
     }
   } //trGrpIdx
   if(!adaptiveColorTrans)
-  m_modeCtrl->setBestNonDCT2Cost(bestNonDCT2Cost);
+  {
+    m_modeCtrl->setBestNonDCT2Cost(bestNonDCT2Cost);
+  }
   return foundZeroRootCbf;
 }
 
@@ -1946,7 +1841,7 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS
 void EncCu::xCheckPLT(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode)
 {
   if (((partitioner.currArea().lumaSize().width * partitioner.currArea().lumaSize().height <= 16) && (isLuma(partitioner.chType)) )
-        || ((partitioner.currArea().chromaSize().width * partitioner.currArea().chromaSize().height <= 16) && (!isLuma(partitioner.chType)) && partitioner.isSepTree(*tempCS) ) 
+        || ((partitioner.currArea().chromaSize().width * partitioner.currArea().chromaSize().height <= 16) && (!isLuma(partitioner.chType)) && partitioner.isSepTree(*tempCS) )
       || (partitioner.isLocalSepTree(*tempCS)  && (!isLuma(partitioner.chType))  )  )
   {
     return;
@@ -2087,7 +1982,10 @@ void EncCu::xCheckDQP( CodingStructure& cs, Partitioner& partitioner, bool bKeep
   if( hasResidual )
   {
     TempCtx ctxTemp( m_CtxCache );
-    if( !bKeepCtx ) ctxTemp = SubCtx( Ctx::DeltaQP, m_CABACEstimator->getCtx() );
+    if (!bKeepCtx)
+    {
+      ctxTemp = SubCtx(Ctx::DeltaQP, m_CABACEstimator->getCtx());
+    }
 
     m_CABACEstimator->resetBits();
     m_CABACEstimator->cu_qp_delta( *cuFirst, predQP, cuFirst->qp );
@@ -2095,8 +1993,10 @@ void EncCu::xCheckDQP( CodingStructure& cs, Partitioner& partitioner, bool bKeep
     cs.fracBits += m_CABACEstimator->getEstFracBits(); // dQP bits
     cs.cost      = m_pcRdCost->calcRdCost(cs.fracBits, cs.dist);
 
-
-    if( !bKeepCtx ) m_CABACEstimator->getCtx() = SubCtx( Ctx::DeltaQP, ctxTemp );
+    if (!bKeepCtx)
+    {
+      m_CABACEstimator->getCtx() = SubCtx(Ctx::DeltaQP, ctxTemp);
+    }
 
     // NOTE: reset QPs for CUs without residuals up to first coded CU
     for( const auto &cu : cs.cus )
@@ -2133,74 +2033,50 @@ void EncCu::xCheckChromaQPOffset( CodingStructure& cs, Partitioner& partitioner
     return;
   }
 
-  // not needed after the first coded TU in the chroma QG
+  // check cost only at cQG top-level (everything below shall not be influenced by adj coding: it occurs only once)
   if( !partitioner.currQgChromaEnable() )
   {
     return;
   }
 
-  CodingUnit& cu = *cs.getCU( partitioner.chType );
-
   // check if chroma is coded or not
-  bool hasResidual = false;
-  for( const TransformUnit &tu : CU::traverseTUs(cu) )
-  {
-    if( tu.cbf[COMPONENT_Cb] || tu.cbf[COMPONENT_Cr] )
-    {
-      hasResidual = true;
-      break;
-    }
-  }
-
-  if( hasResidual )
-  {
-    // estimate cost for coding cu_chroma_qp_offset
-    TempCtx ctxTempAdjFlag( m_CtxCache );
-    TempCtx ctxTempAdjIdc( m_CtxCache );
-    ctxTempAdjFlag = SubCtx( Ctx::ChromaQpAdjFlag, m_CABACEstimator->getCtx() );
-    ctxTempAdjIdc = SubCtx( Ctx::ChromaQpAdjIdc,   m_CABACEstimator->getCtx() );
-    m_CABACEstimator->resetBits();
-    m_CABACEstimator->cu_chroma_qp_offset( cu );
-    cs.fracBits += m_CABACEstimator->getEstFracBits();
-    cs.cost      = m_pcRdCost->calcRdCost(cs.fracBits, cs.dist);
-    m_CABACEstimator->getCtx() = SubCtx( Ctx::ChromaQpAdjFlag, ctxTempAdjFlag );
-    m_CABACEstimator->getCtx() = SubCtx( Ctx::ChromaQpAdjIdc,  ctxTempAdjIdc  );
-  }
-  else
+  bool isCoded = false;
+  for( auto &cu : cs.cus )
   {
-    // reset chroma QP offset to 0 if it will not be coded
-    cu.chromaQpAdj = 0;
-  }
-}
-
-void EncCu::xFillPCMBuffer( CodingUnit &cu )
-{
-  const ChromaFormat format        = cu.chromaFormat;
-  const uint32_t numberValidComponents = getNumberValidComponents(format);
+    SizeType channelWidth = !cu->isSepTree() ? cu->lwidth() : cu->chromaSize().width;
+    SizeType channelHeight = !cu->isSepTree() ? cu->lheight() : cu->chromaSize().height;
 
-  for( auto &tu : CU::traverseTUs( cu ) )
-  {
-    for( uint32_t ch = 0; ch < numberValidComponents; ch++ )
+    for( const TransformUnit &tu : CU::traverseTUs(*cu) )
     {
-      const ComponentID compID = ComponentID( ch );
-
-      const CompArea &compArea = tu.blocks[ compID ];
-
-      const CPelBuf source      = tu.cs->getOrgBuf( compArea );
-             PelBuf destination = tu.getPcmbuf( compID );
-      if (tu.cs->slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag() && compID == COMPONENT_Y)
+      if( tu.cbf[COMPONENT_Cb] || tu.cbf[COMPONENT_Cr] || channelWidth > 64 || channelHeight > 64)
       {
-        CompArea    tmpArea(COMPONENT_Y, compArea.chromaFormat, Position(0, 0), compArea.size());
-        PelBuf tempOrgBuf = m_tmpStorageLCU->getBuf(tmpArea);
-        tempOrgBuf.copyFrom(source);
-        tempOrgBuf.rspSignal(m_pcReshape->getFwdLUT());
-        destination.copyFrom(tempOrgBuf);
+        isCoded = true;
+        break;
       }
-      else
-        destination.copyFrom( source );
+    }
+    if (isCoded)
+    {
+      // estimate cost for coding cu_chroma_qp_offset
+      TempCtx ctxTempAdjFlag( m_CtxCache );
+      TempCtx ctxTempAdjIdc( m_CtxCache );
+      ctxTempAdjFlag = SubCtx( Ctx::ChromaQpAdjFlag, m_CABACEstimator->getCtx() );
+      ctxTempAdjIdc = SubCtx( Ctx::ChromaQpAdjIdc,   m_CABACEstimator->getCtx() );
+      m_CABACEstimator->resetBits();
+      m_CABACEstimator->cu_chroma_qp_offset( *cu );
+      cs.fracBits += m_CABACEstimator->getEstFracBits();
+      cs.cost      = m_pcRdCost->calcRdCost(cs.fracBits, cs.dist);
+      m_CABACEstimator->getCtx() = SubCtx( Ctx::ChromaQpAdjFlag, ctxTempAdjFlag );
+      m_CABACEstimator->getCtx() = SubCtx( Ctx::ChromaQpAdjIdc,  ctxTempAdjIdc  );
+      break;
+    }
+    else
+    {
+      // chroma QP adj is forced to 0 for leading uncoded CUs
+      cu->chromaQpAdj = 0;
     }
   }
 }
+
 void EncCu::xCheckRDCostHashInter( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
 {
   bool isPerfectMatch = false;
@@ -2227,10 +2103,7 @@ void EncCu::xCheckRDCostHashInter( CodingStructure *&tempCS, CodingStructure *&b
 
     m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;
 
-    xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0
-      , 0
-      , &equBcwCost
-    );
+    xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0, 0, &equBcwCost);
 
     if ( m_bestModeUpdated && bestCS->cost != MAX_DOUBLE )
     {
@@ -2257,6 +2130,10 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
   MergeCtx mergeCtx;
   const SPS &sps = *tempCS->sps;
 
+#if GDR_ENABLED
+  bool isEncodeGdrClean = false;
+  CodingStructure *cs;
+#endif
   if (sps.getSbTMVPEnabledFlag())
   {
     Size bufSize = g_miScaling.scale( tempCS->area.lumaSize() );
@@ -2277,11 +2154,13 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
     PredictionUnit pu( tempCS->area );
     pu.cu = &cu;
     pu.cs = tempCS;
-    PU::getInterMergeCandidates(pu, mergeCtx
-      , 0
-    );
+    PU::getInterMergeCandidates(pu, mergeCtx, 0);
     PU::getInterMMVDMergeCandidates(pu, mergeCtx);
     pu.regularMergeFlag = true;
+#if GDR_ENABLED
+    cs = pu.cs;
+    isEncodeGdrClean = cs->sps->getGDREnabledFlag() && cs->pcv->isEncoder && ((cs->picHeader->getInGdrInterval() && cs->isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs->picHeader->getNumVerVirtualBoundaries() == 0));
+#endif
   }
   bool candHasNoResidual[MRG_MAX_NUM_CANDS + MMVD_ADD_NUM];
   for (uint32_t ui = 0; ui < MRG_MAX_NUM_CANDS + MMVD_ADD_NUM; ui++)
@@ -2289,15 +2168,15 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
     candHasNoResidual[ui] = false;
   }
 
-  bool                                        bestIsSkip = false;
-  bool                                        bestIsMMVDSkip = true;
-  PelUnitBuf                                  acMergeBuffer[MRG_MAX_NUM_CANDS];
-  PelUnitBuf                                  acMergeTmpBuffer[MRG_MAX_NUM_CANDS];
-  PelUnitBuf                                  acMergeRealBuffer[MMVD_MRG_MAX_RD_BUF_NUM];
-  PelUnitBuf *                                acMergeTempBuffer[MMVD_MRG_MAX_RD_NUM];
-  PelUnitBuf *                                singleMergeTempBuffer;
-  int                                         insertPos;
-  unsigned                                    uiNumMrgSATDCand = mergeCtx.numValidMergeCand + MMVD_ADD_NUM;
+  bool        bestIsSkip     = false;
+  bool        bestIsMMVDSkip = true;
+  PelUnitBuf  acMergeBuffer[MRG_MAX_NUM_CANDS];
+  PelUnitBuf  acMergeTmpBuffer[MRG_MAX_NUM_CANDS];
+  PelUnitBuf  acMergeRealBuffer[MMVD_MRG_MAX_RD_BUF_NUM];
+  PelUnitBuf *acMergeTempBuffer[MMVD_MRG_MAX_RD_NUM];
+  PelUnitBuf *singleMergeTempBuffer;
+  int         insertPos;
+  unsigned    uiNumMrgSATDCand = mergeCtx.numValidMergeCand + MMVD_ADD_NUM;
 
   struct ModeInfo
   {
@@ -2446,6 +2325,42 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
         uint64_t fracBits = m_pcInterSearch->xCalcPuMeBits(pu);
         double cost = (double)uiSad + (double)fracBits * sqrtLambdaForFirstPassIntra;
         insertPos = -1;
+
+#if GDR_ENABLED
+        // Non-RD cost for regular merge
+        if (isEncodeGdrClean)
+        {
+          bool isSolid = true;
+          bool isValid = true;
+
+          if (mergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 0].refIdx >= 0)
+          {
+            Mv mv = mergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 0].mv;
+            int ridx = mergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 0].refIdx;
+
+            mergeCtx.mvValid[(uiMergeCand << 1) + 0] = cs->isClean(pu.Y().bottomRight(), mv, REF_PIC_LIST_0, ridx);
+
+            isSolid = isSolid && mergeCtx.mvSolid[(uiMergeCand << 1) + 0];
+            isValid = isValid && mergeCtx.mvValid[(uiMergeCand << 1) + 0];
+          }
+
+          if (mergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1].refIdx >= 0) \
+          {
+            Mv mv = mergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1].mv;
+            int ridx = mergeCtx.mvFieldNeighbours[(uiMergeCand << 1) + 1].refIdx;
+
+            mergeCtx.mvValid[(uiMergeCand << 1) + 1] = cs->isClean(pu.Y().bottomRight(), mv, REF_PIC_LIST_1, ridx);
+
+            isSolid = isSolid && mergeCtx.mvSolid[(uiMergeCand << 1) + 1];
+            isValid = isValid && mergeCtx.mvValid[(uiMergeCand << 1) + 1];
+          }
+
+          if (!isValid || !isSolid)
+          {
+            cost = MAX_DOUBLE;
+          }
+        }
+#endif
         updateCandList(ModeInfo(uiMergeCand, true, false, false), cost, RdModeList, candCostList, uiNumMrgSATDCand, &insertPos);
         if (insertPos != -1)
         {
@@ -2462,7 +2377,9 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
             swap(singleMergeTempBuffer, acMergeTempBuffer[insertPos]);
           }
         }
+#if !GDR_ENABLED
         CHECK(std::min(uiMergeCand + 1, uiNumMrgSATDCand) != RdModeList.size(), "");
+#endif
       }
 
       if (isIntrainterEnabled)
@@ -2516,6 +2433,42 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
           pu.regularMergeFlag = false;
           uint64_t fracBits = m_pcInterSearch->xCalcPuMeBits(pu);
           double cost = (double)sadValue + (double)fracBits * sqrtLambdaForFirstPassIntra;
+#if GDR_ENABLED
+          // Non-RD cost for CIIP merge
+          if (isEncodeGdrClean)
+          {
+            bool isSolid = true;
+            bool isValid = true;
+
+            if (mergeCtx.mvFieldNeighbours[(mergeCand << 1) + 0].refIdx >= 0)
+            {
+              Mv mv = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + 0].mv;
+              int ridx = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + 0].refIdx;
+
+              mergeCtx.mvValid[(mergeCand << 1) + 0] = cs->isClean(pu.Y().bottomRight(), mv, REF_PIC_LIST_0, ridx);
+
+              isSolid = isSolid && mergeCtx.mvSolid[(mergeCand << 1) + 0];
+              isValid = isValid && mergeCtx.mvValid[(mergeCand << 1) + 0];
+            }
+
+            if (mergeCtx.mvFieldNeighbours[(mergeCand << 1) + 1].refIdx >= 0)
+            {
+              Mv mv = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + 1].mv;
+              int ridx = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + 1].refIdx;
+
+              mergeCtx.mvValid[(mergeCand << 1) + 1] = cs->isClean(pu.Y().bottomRight(), mv, REF_PIC_LIST_1, ridx);
+
+              isSolid = isSolid && mergeCtx.mvSolid[(mergeCand << 1) + 1];
+              isValid = isValid && mergeCtx.mvValid[(mergeCand << 1) + 1];
+            }
+
+            if (!isValid || !isSolid)
+            {
+              cost = MAX_DOUBLE;
+            }
+          }
+#endif
+
           insertPos = -1;
           updateCandList(ModeInfo(mergeCand, false, false, true), cost, RdModeList, candCostList, uiNumMrgSATDCand, &insertPos);
           if (insertPos != -1)
@@ -2539,7 +2492,19 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
           int baseIdx = mmvdMergeCand / MMVD_MAX_REFINE_NUM;
           int refineStep = (mmvdMergeCand - (baseIdx * MMVD_MAX_REFINE_NUM)) / 4;
           if (refineStep >= m_pcEncCfg->getMmvdDisNum())
+          {
             continue;
+          }
+#if GDR_ENABLED
+          if (isEncodeGdrClean)
+          {
+            pu.mvSolid[REF_PIC_LIST_0] = true;
+            pu.mvSolid[REF_PIC_LIST_1] = true;
+
+            pu.mvValid[REF_PIC_LIST_0] = true;
+            pu.mvValid[REF_PIC_LIST_1] = true;
+          }
+#endif
           mergeCtx.setMmvdMergeCandiInfo(pu, mmvdMergeCand);
 
           PU::spanMotionInfo(pu, mergeCtx);
@@ -2557,6 +2522,31 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
           uint64_t fracBits = m_pcInterSearch->xCalcPuMeBits(pu);
           double cost = (double)uiSad + (double)fracBits * sqrtLambdaForFirstPassIntra;
           insertPos = -1;
+
+#if GDR_ENABLED
+          if (isEncodeGdrClean)
+          {
+            bool isSolid = true;
+            bool isValid = true;
+
+            if (pu.refIdx[0] >= 0)
+            {
+              isSolid = isSolid && pu.mvSolid[0];
+              isValid = isValid && pu.mvValid[0];
+            }
+
+            if (pu.refIdx[1] >= 0)
+            {
+              isSolid = isSolid && pu.mvSolid[1];
+              isValid = isValid && pu.mvValid[1];
+            }
+
+            if (!isSolid || !isValid)
+            {
+              cost = MAX_DOUBLE;
+            }
+          }
+#endif
           updateCandList(ModeInfo(mmvdMergeCand, false, true, false), cost, RdModeList, candCostList, uiNumMrgSATDCand, &insertPos);
           if (insertPos != -1)
           {
@@ -2590,7 +2580,9 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
             pu.intraDir[0] = PLANAR_IDX;
             pu.intraDir[1] = DM_CHROMA_IDX;
             if (pu.chromaSize().width == 2)
+            {
               continue;
+            }
             uint32_t bufIdx = 0;
             m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Cb());
             m_pcIntraSearch->predIntraAng(COMPONENT_Cb, pu.cs->getPredBuf(pu).Cb(), pu);
@@ -2723,22 +2715,24 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
           m_pcIntraSearch->geneWeightedPred(COMPONENT_Y, tmpBuf, pu, m_pcIntraSearch->getPredictorPtr2(COMPONENT_Y, bufIdx));
           if (isChromaEnabled(pu.chromaFormat))
           {
-          if (pu.chromaSize().width > 2)
-          {
-          tmpBuf = tempCS->getPredBuf(pu).Cb();
-          tmpBuf.copyFrom(acMergeTmpBuffer[uiMergeCand].Cb());
-          m_pcIntraSearch->geneWeightedPred(COMPONENT_Cb, tmpBuf, pu, m_pcIntraSearch->getPredictorPtr2(COMPONENT_Cb, bufIdx));
-          tmpBuf = tempCS->getPredBuf(pu).Cr();
-          tmpBuf.copyFrom(acMergeTmpBuffer[uiMergeCand].Cr());
-          m_pcIntraSearch->geneWeightedPred(COMPONENT_Cr, tmpBuf, pu, m_pcIntraSearch->getPredictorPtr2(COMPONENT_Cr, bufIdx));
-          }
-          else
-          {
-            tmpBuf = tempCS->getPredBuf(pu).Cb();
-            tmpBuf.copyFrom(acMergeTmpBuffer[uiMergeCand].Cb());
-            tmpBuf = tempCS->getPredBuf(pu).Cr();
-            tmpBuf.copyFrom(acMergeTmpBuffer[uiMergeCand].Cr());
-          }
+            if (pu.chromaSize().width > 2)
+            {
+              tmpBuf = tempCS->getPredBuf(pu).Cb();
+              tmpBuf.copyFrom(acMergeTmpBuffer[uiMergeCand].Cb());
+              m_pcIntraSearch->geneWeightedPred(COMPONENT_Cb, tmpBuf, pu,
+                                                m_pcIntraSearch->getPredictorPtr2(COMPONENT_Cb, bufIdx));
+              tmpBuf = tempCS->getPredBuf(pu).Cr();
+              tmpBuf.copyFrom(acMergeTmpBuffer[uiMergeCand].Cr());
+              m_pcIntraSearch->geneWeightedPred(COMPONENT_Cr, tmpBuf, pu,
+                                                m_pcIntraSearch->getPredictorPtr2(COMPONENT_Cr, bufIdx));
+            }
+            else
+            {
+              tmpBuf = tempCS->getPredBuf(pu).Cb();
+              tmpBuf.copyFrom(acMergeTmpBuffer[uiMergeCand].Cb());
+              tmpBuf = tempCS->getPredBuf(pu).Cr();
+              tmpBuf.copyFrom(acMergeTmpBuffer[uiMergeCand].Cr());
+            }
           }
         }
         else
@@ -2770,7 +2764,36 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
         isTestSkipMerge[uiMergeCand] = true;
       }
 
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        bool isSolid = true;
+        bool isValid = true;
+
+        if (pu.refIdx[0] >= 0)
+        {
+          isSolid = isSolid && pu.mvSolid[0];
+          isValid = isValid && pu.mvValid[0];
+        }
+
+        if (pu.refIdx[1] >= 0)
+        {
+          isSolid = isSolid && pu.mvSolid[1];
+          isValid = isValid && pu.mvValid[1];
+        }
+
+        if (isSolid && isValid)
+        {
+          xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, uiNoResidualPass == 0 ? &candHasNoResidual[uiMrgHADIdx] : NULL);
+        }
+      }
+      else
+      {
+        xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, uiNoResidualPass == 0 ? &candHasNoResidual[uiMrgHADIdx] : NULL);
+      }
+#else
       xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, uiNoResidualPass == 0 ? &candHasNoResidual[uiMrgHADIdx] : NULL );
+#endif
 
       if( m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip && !pu.ciipFlag)
       {
@@ -2837,6 +2860,7 @@ void EncCu::xCheckRDCostMergeGeo2Nx2N(CodingStructure *&tempCS, CodingStructure
   cu.predMode = MODE_INTER;
   cu.slice = tempCS->slice;
   cu.tileIdx = tempCS->pps->getTileIdx(tempCS->area.lumaPos());
+  cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
   cu.qp = encTestMode.qp;
   cu.affine = false;
   cu.mtsFlag = false;
@@ -2849,6 +2873,11 @@ void EncCu::xCheckRDCostMergeGeo2Nx2N(CodingStructure *&tempCS, CodingStructure
   cu.bdpcmMode = 0;
 
   PredictionUnit &pu = tempCS->addPU(cu, pm.chType);
+#if GDR_ENABLED
+  CodingStructure &cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+#endif
+
   pu.mergeFlag = true;
   pu.regularMergeFlag = false;
   PU::getGeoMergeCandidates(pu, mergeCtx);
@@ -2867,16 +2896,27 @@ void EncCu::xCheckRDCostMergeGeo2Nx2N(CodingStructure *&tempCS, CodingStructure
   m_pcRdCost->setDistParam(distParamWholeBlk, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y().buf, m_acMergeBuffer[0].Y().stride, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y);
   Distortion bestWholeBlkSad = MAX_UINT64;
   double bestWholeBlkCost = MAX_DOUBLE;
-  Distortion *sadWholeBlk;
-  sadWholeBlk = new Distortion[maxNumMergeCandidates];
-  int *pocMrg;
-  Mv *MrgMv;
-  bool *isSkipThisCand;
-  pocMrg = new int[maxNumMergeCandidates];
-  MrgMv = new Mv[maxNumMergeCandidates];
-  isSkipThisCand = new bool[maxNumMergeCandidates];
+  Distortion sadWholeBlk[GEO_MAX_NUM_UNI_CANDS];
+  int        pocMrg[GEO_MAX_NUM_UNI_CANDS];
+  Mv         MrgMv[GEO_MAX_NUM_UNI_CANDS];
+  bool       isSkipThisCand[GEO_MAX_NUM_UNI_CANDS];
+#if GDR_ENABLED
+  bool MrgSolid[GEO_MAX_NUM_UNI_CANDS];
+  bool MrgValid[GEO_MAX_NUM_UNI_CANDS];
+
+  if (isEncodeGdrClean)
+  {
+    for (int i = 0; i < maxNumMergeCandidates; i++)
+    {
+      MrgSolid[i] = true;
+      MrgValid[i] = true;
+    }
+  }
+#endif
   for (int i = 0; i < maxNumMergeCandidates; i++)
+  {
     isSkipThisCand[i] = false;
+  }
   for (uint8_t mergeCand = 0; mergeCand < maxNumMergeCandidates; mergeCand++)
   {
     geoBuffer[mergeCand] = m_acMergeBuffer[mergeCand].getBuf(localUnitArea);
@@ -2886,6 +2926,13 @@ void EncCu::xCheckRDCostMergeGeo2Nx2N(CodingStructure *&tempCS, CodingStructure
     int MrgrefIdx = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + MrgList].refIdx;
     pocMrg[mergeCand] = tempCS->slice->getRefPic(MrgeRefPicList, MrgrefIdx)->getPOC();
     MrgMv[mergeCand] = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + MrgList].mv;
+#if GDR_ENABLED
+    if (isEncodeGdrClean)
+    {
+      MrgSolid[mergeCand] = mergeCtx.mvSolid[(mergeCand << 1) + MrgList];
+      MrgValid[mergeCand] = mergeCtx.mvValid[(mergeCand << 1) + MrgList];
+    }
+#endif
     if (mergeCand)
     {
       for (int i = 0; i < mergeCand; i++)
@@ -2910,7 +2957,20 @@ void EncCu::xCheckRDCostMergeGeo2Nx2N(CodingStructure *&tempCS, CodingStructure
     distParamWholeBlk.cur.buf = geoTempBuf[mergeCand].Y().buf;
     distParamWholeBlk.cur.stride = geoTempBuf[mergeCand].Y().stride;
     sadWholeBlk[mergeCand] = distParamWholeBlk.distFunc(distParamWholeBlk);
+#if GDR_ENABLED
+    bool allOk = (sadWholeBlk[mergeCand] < bestWholeBlkSad);
+    if (isEncodeGdrClean)
+    {
+      bool isSolid = mergeCtx.mvSolid[(mergeCand << 1) + MrgList];
+      bool isValid = mergeCtx.mvValid[(mergeCand << 1) + MrgList];
+      allOk = allOk && isSolid && isValid;
+    }
+#endif
+#if GDR_ENABLED
+    if (allOk)
+#else
     if (sadWholeBlk[mergeCand] < bestWholeBlkSad)
+#endif
     {
       bestWholeBlkSad = sadWholeBlk[mergeCand];
       int bitsCand = mergeCand + 1;
@@ -2958,18 +3018,46 @@ void EncCu::xCheckRDCostMergeGeo2Nx2N(CodingStructure *&tempCS, CodingStructure
     for (uint8_t mergeCand = 0; mergeCand < maxNumMergeCandidates; mergeCand++)
     {
       int bitsCand = mergeCand + 1;
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        double cost0, cost1;
+
+        m_pcRdCost->setDistParam(distParam, tempCS->getOrgBuf().Y(), geoTempBuf[mergeCand].Y().buf, geoTempBuf[mergeCand].Y().stride, SADmask, maskStride, stepX, maskStride2, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y);
+        sadLarge = distParam.distFunc(distParam);
+        sadSmall = sadWholeBlk[mergeCand] - sadLarge;
 
+        if (MrgSolid[mergeCand] && MrgValid[mergeCand])
+        {
+          cost0 = (double)sadLarge + (double)bitsCand * sqrtLambdaForFirstPass;
+          cost1 = (double)sadSmall + (double)bitsCand * sqrtLambdaForFirstPass;
+        }
+        else
+        {
+          cost0 = MAX_DOUBLE;
+          cost1 = MAX_DOUBLE;
+        }
+
+        m_GeoCostList.insert(splitDir, 0, mergeCand, cost0);
+        m_GeoCostList.insert(splitDir, 1, mergeCand, cost1);
+      }
+      else
+      {
+        m_pcRdCost->setDistParam(distParam, tempCS->getOrgBuf().Y(), geoTempBuf[mergeCand].Y().buf, geoTempBuf[mergeCand].Y().stride, SADmask, maskStride, stepX, maskStride2, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y);
+        sadLarge = distParam.distFunc(distParam);
+        m_GeoCostList.insert(splitDir, 0, mergeCand, (double)sadLarge + (double)bitsCand * sqrtLambdaForFirstPass);
+        sadSmall = sadWholeBlk[mergeCand] - sadLarge;
+        m_GeoCostList.insert(splitDir, 1, mergeCand, (double)sadSmall + (double)bitsCand * sqrtLambdaForFirstPass);
+      }
+#else
       m_pcRdCost->setDistParam(distParam, tempCS->getOrgBuf().Y(), geoTempBuf[mergeCand].Y().buf, geoTempBuf[mergeCand].Y().stride, SADmask, maskStride, stepX, maskStride2, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y);
       sadLarge = distParam.distFunc(distParam);
       m_GeoCostList.insert(splitDir, 0, mergeCand, (double)sadLarge + (double)bitsCand * sqrtLambdaForFirstPass);
       sadSmall = sadWholeBlk[mergeCand] - sadLarge;
       m_GeoCostList.insert(splitDir, 1, mergeCand, (double)sadSmall + (double)bitsCand * sqrtLambdaForFirstPass);
+#endif
     }
   }
-  delete[] sadWholeBlk;
-  delete[] pocMrg;
-  delete[] MrgMv;
-  delete[] isSkipThisCand;
 
   for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++)
   {
@@ -2979,13 +3067,33 @@ void EncCu::xCheckRDCostMergeGeo2Nx2N(CodingStructure *&tempCS, CodingStructure
       unsigned int mergeCand1 = m_GeoModeTest[GeoMotionIdx].m_candIdx1;
       double tempCost = m_GeoCostList.singleDistList[0][splitDir][mergeCand0].cost + m_GeoCostList.singleDistList[1][splitDir][mergeCand1].cost;
       if (tempCost > bestWholeBlkCost)
+      {
         continue;
+      }
       tempCost = tempCost + (double)bitsCandTB * sqrtLambdaForFirstPass;
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        int idx0 = mergeCand0;
+        int idx1 = mergeCand1;
+        bool isSolid0 = mergeCtx.mvSolid[(idx0 << 1) + 0] && mergeCtx.mvSolid[(idx0 << 1) + 1];
+        bool isSolid1 = mergeCtx.mvSolid[(idx1 << 1) + 0] && mergeCtx.mvSolid[(idx1 << 1) + 1];
+        bool isValid0 = mergeCtx.mvValid[(idx0 << 1) + 0] && mergeCtx.mvValid[(idx0 << 1) + 1];
+        bool isValid1 = mergeCtx.mvValid[(idx1 << 1) + 0] && mergeCtx.mvValid[(idx1 << 1) + 1];
+
+        if (!isSolid0 || !isSolid1 || !isValid0 || !isValid1)
+        {
+          tempCost = MAX_DOUBLE;
+        }
+      }
+#endif
       comboList.list.push_back(GeoMergeCombo(splitDir, mergeCand0, mergeCand1, tempCost));
     }
   }
   if (comboList.list.empty())
+  {
     return;
+  }
   comboList.sortByCost();
   bool geocandHasNoResidual[GEO_MAX_TRY_WEIGHTED_SAD];
   for (int mergeCand = 0; mergeCand < GEO_MAX_TRY_WEIGHTED_SAD; mergeCand++)
@@ -3017,6 +3125,22 @@ void EncCu::xCheckRDCostMergeGeo2Nx2N(CodingStructure *&tempCS, CodingStructure
     mvBits += mergeCand0;
     mvBits += mergeCand1;
     double updateCost = (double)sad + (double)(bitsCandTB + mvBits) * sqrtLambdaForFirstPass;
+#if GDR_ENABLED
+    if (isEncodeGdrClean)
+    {
+      int idx0 = mergeCand0;
+      int idx1 = mergeCand1;
+      bool isSolid0 = mergeCtx.mvSolid[(idx0 << 1) + 0] && mergeCtx.mvSolid[(idx0 << 1) + 1];
+      bool isSolid1 = mergeCtx.mvSolid[(idx1 << 1) + 0] && mergeCtx.mvSolid[(idx1 << 1) + 1];
+      bool isValid0 = mergeCtx.mvValid[(idx0 << 1) + 0] && mergeCtx.mvValid[(idx0 << 1) + 1];
+      bool isValid1 = mergeCtx.mvValid[(idx1 << 1) + 0] && mergeCtx.mvValid[(idx1 << 1) + 1];
+
+      if (!isSolid0 || !isSolid1 || !isValid0 || !isValid1)
+      {
+        updateCost = MAX_DOUBLE;
+      }
+    }
+#endif
     comboList.list[candidateIdx].cost = updateCost;
     updateCandList(candidateIdx, updateCost, geoRdModeList, geocandCostList, geoNumMrgSATDCand);
   }
@@ -3058,6 +3182,7 @@ void EncCu::xCheckRDCostMergeGeo2Nx2N(CodingStructure *&tempCS, CodingStructure
       cu.predMode = MODE_INTER;
       cu.slice = tempCS->slice;
       cu.tileIdx = tempCS->pps->getTileIdx(tempCS->area.lumaPos());
+      cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
       cu.qp = encTestMode.qp;
       cu.affine = false;
       cu.mtsFlag = false;
@@ -3080,7 +3205,28 @@ void EncCu::xCheckRDCostMergeGeo2Nx2N(CodingStructure *&tempCS, CodingStructure
       PU::spanGeoMotionInfo(pu, mergeCtx, pu.geoSplitDir, pu.geoMergeIdx0, pu.geoMergeIdx1);
       tempCS->getPredBuf().copyFrom(geoCombinations[candidateIdx]);
 
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        int idx0 = pu.geoMergeIdx0;
+        int idx1 = pu.geoMergeIdx1;
+        bool isSolid0 = mergeCtx.mvSolid[(idx0 << 1) + 0] && mergeCtx.mvSolid[(idx0 << 1) + 1];
+        bool isSolid1 = mergeCtx.mvSolid[(idx1 << 1) + 0] && mergeCtx.mvSolid[(idx1 << 1) + 1];
+        bool isValid0 = mergeCtx.mvValid[(idx0 << 1) + 0] && mergeCtx.mvValid[(idx0 << 1) + 1];
+        bool isValid1 = mergeCtx.mvValid[(idx1 << 1) + 0] && mergeCtx.mvValid[(idx1 << 1) + 1];
+
+        if (isSolid0 && isSolid1 && isValid0 && isValid1)
+        {
+          xEncodeInterResidual(tempCS, bestCS, pm, encTestMode, noResidualPass, (noResidualPass == 0 ? &geocandHasNoResidual[candidateIdx] : NULL));
+        }
+      }
+      else
+      {
+        xEncodeInterResidual(tempCS, bestCS, pm, encTestMode, noResidualPass, (noResidualPass == 0 ? &geocandHasNoResidual[candidateIdx] : NULL));
+      }
+#else
       xEncodeInterResidual(tempCS, bestCS, pm, encTestMode, noResidualPass, (noResidualPass == 0 ? &geocandHasNoResidual[candidateIdx] : NULL));
+#endif
 
       if (m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip)
       {
@@ -3106,6 +3252,10 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct
   {
     return;
   }
+#if GDR_ENABLED
+  CodingStructure *cs;
+  bool isEncodeGdrClean = false;
+#endif
   m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;
   const Slice &slice = *tempCS->slice;
 
@@ -3143,6 +3293,10 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct
     pu.cu = &cu;
     pu.cs = tempCS;
     pu.regularMergeFlag = false;
+#if GDR_ENABLED
+    cs = pu.cs;
+    isEncodeGdrClean = cs->sps->getGDREnabledFlag() && cs->pcv->isEncoder && ((cs->picHeader->getInGdrInterval() && cs->isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs->picHeader->getNumVerVirtualBoundaries() == 0));
+#endif
     PU::getAffineMergeCand( pu, affineMergeCtx );
 
     if ( affineMergeCtx.numValidMergeCand <= 0 )
@@ -3233,6 +3387,19 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct
           PU::spanMotionInfo( pu );
         }
 
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          Mv zero = Mv(0, 0);
+          bool isValid = cs->isSubPuClean(pu, &zero);
+          affineMergeCtx.mvValid[(uiMergeCand << 1) + 0][0] = isValid;
+          affineMergeCtx.mvValid[(uiMergeCand << 1) + 0][1] = isValid;
+          affineMergeCtx.mvValid[(uiMergeCand << 1) + 0][2] = isValid;
+          affineMergeCtx.mvValid[(uiMergeCand << 1) + 1][0] = isValid;
+          affineMergeCtx.mvValid[(uiMergeCand << 1) + 1][1] = isValid;
+          affineMergeCtx.mvValid[(uiMergeCand << 1) + 1][2] = isValid;
+        }
+#endif
         distParam.cur = acMergeBuffer[uiMergeCand].Y();
 
         m_pcInterSearch->motionCompensation( pu, acMergeBuffer[uiMergeCand], REF_PIC_LIST_X, true, false );
@@ -3244,6 +3411,23 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct
           uiBitsCand--;
         }
         double cost = (double)uiSad + (double)uiBitsCand * sqrtLambdaForFirstPass;
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          bool isSolid0 = affineMergeCtx.mvSolid[(uiMergeCand << 1) + 0][0] && affineMergeCtx.mvSolid[(uiMergeCand << 1) + 0][1] && affineMergeCtx.mvSolid[(uiMergeCand << 1) + 0][2];
+          bool isSolid1 = affineMergeCtx.mvSolid[(uiMergeCand << 1) + 1][0] && affineMergeCtx.mvSolid[(uiMergeCand << 1) + 1][1] && affineMergeCtx.mvSolid[(uiMergeCand << 1) + 1][2];
+          bool isValid0 = affineMergeCtx.mvValid[(uiMergeCand << 1) + 0][0] && affineMergeCtx.mvValid[(uiMergeCand << 1) + 0][1] && affineMergeCtx.mvValid[(uiMergeCand << 1) + 0][2];
+          bool isValid1 = affineMergeCtx.mvValid[(uiMergeCand << 1) + 1][0] && affineMergeCtx.mvValid[(uiMergeCand << 1) + 1][1] && affineMergeCtx.mvValid[(uiMergeCand << 1) + 1][2];
+
+          bool isSolid = isSolid0 && isSolid1;
+          bool isValid = isValid0 && isValid1;
+
+          if (!isSolid || !isValid)
+          {
+            cost = MAX_DOUBLE;
+          }
+        }
+#endif
         updateCandList( uiMergeCand, cost, RdModeList, candCostList
           , uiNumMrgSATDCand );
 
@@ -3336,11 +3520,59 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct
         m_pcInterSearch->motionCompensation( pu );
       }
 
+#if GDR_ENABLED
+      bool isSolid = true;
+      bool isValid = true;
+
+      if (isEncodeGdrClean)
+      {
+        if (bestIsSkip)
+        {
+          Mv zero = Mv(0, 0);
+          bool isValid = cs->isSubPuClean(pu, &zero);
+          affineMergeCtx.mvValid[(uiMergeCand << 1) + 0][0] = isValid;
+          affineMergeCtx.mvValid[(uiMergeCand << 1) + 0][1] = isValid;
+          affineMergeCtx.mvValid[(uiMergeCand << 1) + 0][2] = isValid;
+          affineMergeCtx.mvValid[(uiMergeCand << 1) + 1][0] = isValid;
+          affineMergeCtx.mvValid[(uiMergeCand << 1) + 1][1] = isValid;
+          affineMergeCtx.mvValid[(uiMergeCand << 1) + 1][2] = isValid;
+        }
+
+        bool isSolid0 = affineMergeCtx.mvSolid[(uiMergeCand << 1) + 0][0] && affineMergeCtx.mvSolid[(uiMergeCand << 1) + 0][1] && affineMergeCtx.mvSolid[(uiMergeCand << 1) + 0][2];
+        bool isSolid1 = affineMergeCtx.mvSolid[(uiMergeCand << 1) + 1][0] && affineMergeCtx.mvSolid[(uiMergeCand << 1) + 1][1] && affineMergeCtx.mvSolid[(uiMergeCand << 1) + 1][2];
+        bool isValid0 = affineMergeCtx.mvValid[(uiMergeCand << 1) + 0][0] && affineMergeCtx.mvValid[(uiMergeCand << 1) + 0][1] && affineMergeCtx.mvValid[(uiMergeCand << 1) + 0][2];
+        bool isValid1 = affineMergeCtx.mvValid[(uiMergeCand << 1) + 1][0] && affineMergeCtx.mvValid[(uiMergeCand << 1) + 1][1] && affineMergeCtx.mvValid[(uiMergeCand << 1) + 1][2];
+
+        isSolid = isSolid0 && isSolid1;
+        isValid = isValid0 && isValid1;
+
+        if (isSolid && isValid)
+        {
+          xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, (uiNoResidualPass == 0 ? &candHasNoResidual[uiMergeCand] : NULL));
+        }
+      }
+      else
+      {
+        xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, (uiNoResidualPass == 0 ? &candHasNoResidual[uiMergeCand] : NULL));
+      }
+#else
       xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, ( uiNoResidualPass == 0 ? &candHasNoResidual[uiMergeCand] : NULL ) );
+#endif
 
       if ( m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip )
       {
-        bestIsSkip = bestCS->getCU( partitioner.chType )->rootCbf == 0;
+#if GDR_ENABLED
+        if (bestCS->getCU(partitioner.chType))
+        {
+          bestIsSkip = bestCS->getCU(partitioner.chType)->rootCbf == 0;
+        }
+        else
+        {
+          bestIsSkip = false;
+        }
+#else
+        bestIsSkip = bestCS->getCU(partitioner.chType)->rootCbf == 0;
+#endif
       }
       tempCS->initStructData( encTestMode.qp );
     }// end loop uiMrgHADIdx
@@ -3401,6 +3633,9 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct
     mergeCtx.subPuMvpMiBuf = MotionBuf(m_SubPuMiBuf, bufSize);
   }
 
+#if GDR_ENABLED
+  bool gdrClean = true;
+#endif
   {
     // first get merge candidates
     CodingUnit cu(tempCS->area);
@@ -3416,7 +3651,15 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct
     pu.regularMergeFlag = false;
     cu.geoFlag = false;
     PU::getIBCMergeCandidates(pu, mergeCtx);
+#if GDR_ENABLED
+    gdrClean = tempCS->isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA) || tempCS->picHeader->getNumVerVirtualBoundaries() == 0;
+#endif
   }
+#if GDR_ENABLED
+  const bool isEncodeGdrClean = tempCS->sps->getGDREnabledFlag() && tempCS->pcv->isEncoder && tempCS->picHeader->getInGdrInterval() && gdrClean;
+  bool *MrgSolid = nullptr;
+  bool *MrgValid = nullptr;
+#endif
 
   int candHasNoResidual[MRG_MAX_NUM_CANDS];
   for (unsigned int ui = 0; ui < mergeCtx.numValidMergeCand; ui++)
@@ -3424,6 +3667,19 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct
     candHasNoResidual[ui] = 0;
   }
 
+#if GDR_ENABLED
+  if (isEncodeGdrClean)
+  {
+    MrgSolid = new bool[MRG_MAX_NUM_CANDS];
+    MrgValid = new bool[MRG_MAX_NUM_CANDS];
+    for (int i = 0; i < MRG_MAX_NUM_CANDS; i++)
+    {
+      MrgSolid[i] = false;
+      MrgValid[i] = false;
+    }
+  }
+#endif
+
   bool                                        bestIsSkip = false;
   unsigned                                    numMrgSATDCand = mergeCtx.numValidMergeCand;
   static_vector<unsigned, MRG_MAX_NUM_CANDS>  RdModeList(MRG_MAX_NUM_CANDS);
@@ -3432,106 +3688,142 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct
     RdModeList[i] = i;
   }
 
-  //{
-    static_vector<double, MRG_MAX_NUM_CANDS>  candCostList(MRG_MAX_NUM_CANDS, MAX_DOUBLE);
-    // 1. Pass: get SATD-cost for selected candidates and reduce their count
+  static_vector<double, MRG_MAX_NUM_CANDS> candCostList(MRG_MAX_NUM_CANDS, MAX_DOUBLE);
+  // 1. Pass: get SATD-cost for selected candidates and reduce their count
+  {
+    const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda();
+
+    CodingUnit &cu = tempCS->addCU(CS::getArea(*tempCS, tempCS->area, (const ChannelType) partitioner.chType),
+                                   (const ChannelType) partitioner.chType);
+
+    partitioner.setCUData(cu);
+    cu.slice       = tempCS->slice;
+    cu.tileIdx     = tempCS->pps->getTileIdx(tempCS->area.lumaPos());
+    cu.skip        = false;
+    cu.predMode    = MODE_IBC;
+    cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
+    cu.qp          = encTestMode.qp;
+    cu.mmvdSkip    = false;
+    cu.geoFlag     = false;
+    DistParam       distParam;
+    const bool      bUseHadamard = !cu.slice->getDisableSATDForRD();
+    PredictionUnit &pu           = tempCS->addPU(cu, partitioner.chType);   // tempCS->addPU(cu);
+    pu.mmvdMergeFlag             = false;
+    pu.regularMergeFlag          = false;
+    Picture *     refPic         = pu.cu->slice->getPic();
+    const CPelBuf refBuf         = refPic->getRecoBuf(pu.blocks[COMPONENT_Y]);
+    const Pel *   piRefSrch      = refBuf.buf;
+    if (tempCS->slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
+    {
+      const CompArea &area = cu.blocks[COMPONENT_Y];
+      CompArea        tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
+      PelBuf          tmpLuma = m_tmpStorageLCU->getBuf(tmpArea);
+      tmpLuma.copyFrom(tempCS->getOrgBuf().Y());
+      tmpLuma.rspSignal(m_pcReshape->getFwdLUT());
+      m_pcRdCost->setDistParam(distParam, tmpLuma, refBuf, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y,
+                               bUseHadamard);
+    }
+    else
     {
-      const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda( );
+      m_pcRdCost->setDistParam(distParam, tempCS->getOrgBuf().Y(), refBuf, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, bUseHadamard);
+    }
+    int            refStride = refBuf.stride;
+    const UnitArea localUnitArea(tempCS->area.chromaFormat,
+                                 Area(0, 0, tempCS->area.Y().width, tempCS->area.Y().height));
+    int            numValidBv = mergeCtx.numValidMergeCand;
+    for (unsigned int mergeCand = 0; mergeCand < mergeCtx.numValidMergeCand; mergeCand++)
+    {
+      mergeCtx.setMergeInfo(pu, mergeCand);   // set bv info in merge mode
+      const int          cuPelX    = pu.Y().x;
+      const int          cuPelY    = pu.Y().y;
+      int                roiWidth  = pu.lwidth();
+      int                roiHeight = pu.lheight();
+      const int          picWidth  = pu.cs->slice->getPPS()->getPicWidthInLumaSamples();
+      const int          picHeight = pu.cs->slice->getPPS()->getPicHeightInLumaSamples();
+      const unsigned int lcuWidth  = pu.cs->slice->getSPS()->getMaxCUWidth();
+      int                xPred     = pu.bv.getHor();
+      int                yPred     = pu.bv.getVer();
+
+      if (!m_pcInterSearch->searchBv(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, xPred, yPred,
+                                     lcuWidth))   // not valid bv derived
+      {
+        numValidBv--;
+        continue;
+      }
+      PU::spanMotionInfo(pu, mergeCtx);
 
-      CodingUnit &cu = tempCS->addCU(CS::getArea(*tempCS, tempCS->area, (const ChannelType)partitioner.chType), (const ChannelType)partitioner.chType);
+      distParam.cur.buf = piRefSrch + refStride * yPred + xPred;
 
-      partitioner.setCUData(cu);
-      cu.slice = tempCS->slice;
-      cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
-      cu.skip = false;
-      cu.predMode = MODE_IBC;
-      cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
-      cu.qp = encTestMode.qp;
-      cu.mmvdSkip = false;
-      cu.geoFlag = false;
-      DistParam distParam;
-      const bool bUseHadamard = !cu.slice->getDisableSATDForRD();
-      PredictionUnit &pu = tempCS->addPU(cu, partitioner.chType); //tempCS->addPU(cu);
-      pu.mmvdMergeFlag = false;
-      pu.regularMergeFlag = false;
-      Picture* refPic = pu.cu->slice->getPic();
-      const CPelBuf refBuf = refPic->getRecoBuf(pu.blocks[COMPONENT_Y]);
-      const Pel*        piRefSrch = refBuf.buf;
-      if (tempCS->slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
-      {
-        const CompArea &area = cu.blocks[COMPONENT_Y];
-        CompArea    tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
-        PelBuf tmpLuma = m_tmpStorageLCU->getBuf(tmpArea);
-        tmpLuma.copyFrom(tempCS->getOrgBuf().Y());
-        tmpLuma.rspSignal(m_pcReshape->getFwdLUT());
-        m_pcRdCost->setDistParam(distParam, tmpLuma, refBuf, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, bUseHadamard);
+      Distortion   sad      = distParam.distFunc(distParam);
+      unsigned int bitsCand = mergeCand + 1;
+      if (mergeCand == tempCS->sps->getMaxNumMergeCand() - 1)
+      {
+        bitsCand--;
       }
-      else
-      m_pcRdCost->setDistParam(distParam, tempCS->getOrgBuf().Y(), refBuf, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, bUseHadamard);
-      int refStride = refBuf.stride;
-      const UnitArea localUnitArea(tempCS->area.chromaFormat, Area(0, 0, tempCS->area.Y().width, tempCS->area.Y().height));
-      int numValidBv = mergeCtx.numValidMergeCand;
-      for (unsigned int mergeCand = 0; mergeCand < mergeCtx.numValidMergeCand; mergeCand++)
-      {
-        mergeCtx.setMergeInfo(pu, mergeCand); // set bv info in merge mode
-        const int cuPelX = pu.Y().x;
-        const int cuPelY = pu.Y().y;
-        int roiWidth = pu.lwidth();
-        int roiHeight = pu.lheight();
-        const int picWidth = pu.cs->slice->getPPS()->getPicWidthInLumaSamples();
-        const int picHeight = pu.cs->slice->getPPS()->getPicHeightInLumaSamples();
-        const unsigned int  lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth();
-        int xPred = pu.bv.getHor();
-        int yPred = pu.bv.getVer();
-
-        if (!m_pcInterSearch->searchBv(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, xPred, yPred, lcuWidth)) // not valid bv derived
+      double cost = (double) sad + (double) bitsCand * sqrtLambdaForFirstPass;
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        bool isSolid = true;
+        bool isValid = true;
+
+        if (mergeCtx.mvFieldNeighbours[(mergeCand << 1) + 0].refIdx >= 0)
         {
-          numValidBv--;
-          continue;
-        }
-        PU::spanMotionInfo(pu, mergeCtx);
+          Mv mv = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + 0].mv;
+          int ridx = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + 0].refIdx;
 
-        distParam.cur.buf = piRefSrch + refStride * yPred + xPred;
+          mergeCtx.mvValid[(mergeCand << 1) + 0] = tempCS->isClean(pu.Y().bottomRight(), mv, REF_PIC_LIST_0, ridx, true);
 
-        Distortion sad = distParam.distFunc(distParam);
-        unsigned int bitsCand = mergeCand + 1;
-        if (mergeCand == tempCS->sps->getMaxNumMergeCand() - 1)
+          isSolid = isSolid && mergeCtx.mvSolid[(mergeCand << 1) + 0];
+          isValid = isValid && mergeCtx.mvValid[(mergeCand << 1) + 0];
+        }
+
+        if (mergeCtx.mvFieldNeighbours[(mergeCand << 1) + 1].refIdx >= 0)
         {
-          bitsCand--;
+          Mv mv = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + 1].mv;
+          int ridx = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + 1].refIdx;
+
+          mergeCtx.mvValid[(mergeCand << 1) + 1] = tempCS->isClean(pu.Y().bottomRight(), mv, REF_PIC_LIST_1, ridx, true);
+
+          isSolid = isSolid && mergeCtx.mvSolid[(mergeCand << 1) + 1];
+          isValid = isValid && mergeCtx.mvValid[(mergeCand << 1) + 1];
         }
-        double cost = (double)sad + (double)bitsCand * sqrtLambdaForFirstPass;
 
-        updateCandList(mergeCand, cost, RdModeList, candCostList
-         , numMrgSATDCand);
+        if (!isValid || !isSolid)
+        {
+          cost = MAX_DOUBLE;
+          numValidBv--;
+        }
       }
+#endif
+      updateCandList(mergeCand, cost, RdModeList, candCostList, numMrgSATDCand);
+    }
 
-      // Try to limit number of candidates using SATD-costs
-      if (numValidBv)
+    // Try to limit number of candidates using SATD-costs
+    if (numValidBv)
+    {
+      numMrgSATDCand = numValidBv;
+      for (unsigned int i = 1; i < numValidBv; i++)
       {
-        numMrgSATDCand = numValidBv;
-        for (unsigned int i = 1; i < numValidBv; i++)
+        if (candCostList[i] > MRG_FAST_RATIO * candCostList[0])
         {
-          if (candCostList[i] > MRG_FAST_RATIO*candCostList[0])
-          {
-            numMrgSATDCand = i;
-            break;
-          }
+          numMrgSATDCand = i;
+          break;
         }
       }
-      else
-      {
-        tempCS->dist = 0;
-        tempCS->fracBits = 0;
-        tempCS->cost = MAX_DOUBLE;
-        tempCS->costDbOffset = 0;
-        tempCS->initStructData(encTestMode.qp);
-        return;
-      }
-
+    }
+    else
+    {
+      tempCS->dist         = 0;
+      tempCS->fracBits     = 0;
+      tempCS->cost         = MAX_DOUBLE;
+      tempCS->costDbOffset = 0;
       tempCS->initStructData(encTestMode.qp);
+      return;
     }
-  //}
 
+    tempCS->initStructData(encTestMode.qp);
+  }
 
   const unsigned int iteration = 2;
   m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;
@@ -3546,7 +3838,6 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct
         if (!(bestIsSkip && (numResidualPass == 0)))
         {
           {
-
             // first get merge candidates
             CodingUnit &cu = tempCS->addCU(CS::getArea(*tempCS, tempCS->area, (const ChannelType)partitioner.chType), (const ChannelType)partitioner.chType);
 
@@ -3571,12 +3862,62 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct
 
             assert(mergeCtx.mrgTypeNeighbours[mergeCand] == MRG_TYPE_IBC); //  should be IBC candidate at this round
             const bool chroma = !pu.cu->isSepTree();
+#if GDR_ENABLED
+            // redo validation again for Skip
+            {
+              CodingStructure &cs = *pu.cs;
+
+              if (isEncodeGdrClean)
+              {
+                if (mergeCtx.mvFieldNeighbours[(mergeCand << 1) + 0].refIdx >= 0)
+                {
+                  Mv mv = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + 0].mv;
+                  int ridx = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + 0].refIdx;
+
+                  mergeCtx.mvValid[(mergeCand << 1) + 0] = cs.isClean(pu.Y().bottomRight(), mv, REF_PIC_LIST_0, ridx, true);
+                }
 
+                if (mergeCtx.mvFieldNeighbours[(mergeCand << 1) + 1].refIdx >= 0)
+                {
+                  Mv mv = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + 1].mv;
+                  int ridx = mergeCtx.mvFieldNeighbours[(mergeCand << 1) + 1].refIdx;
+                  mergeCtx.mvValid[(mergeCand << 1) + 1] = cs.isClean(pu.Y().bottomRight(), mv, REF_PIC_LIST_1, ridx, true);
+                }
+              }
+            }
+#endif
             //  MC
             m_pcInterSearch->motionCompensation(pu,REF_PIC_LIST_0, true, chroma);
             m_CABACEstimator->getCtx() = m_CurrCtx->start;
 
+#if GDR_ENABLED
+            if (isEncodeGdrClean)
+            {
+              bool mvSolid = true;
+              bool mvValid = true;
+              if (mergeCtx.mvFieldNeighbours[(mergeCand << 1) + 0].refIdx >= 0)
+              {
+                mvSolid = mvSolid && mergeCtx.mvSolid[(mergeCand << 1) + 0];
+                mvValid = mvValid && mergeCtx.mvValid[(mergeCand << 1) + 0];
+              }
+              if (mergeCtx.mvFieldNeighbours[(mergeCand << 1) + 1].refIdx >= 0)
+              {
+                mvSolid = mvSolid && mergeCtx.mvSolid[(mergeCand << 1) + 1];
+                mvValid = mvValid && mergeCtx.mvValid[(mergeCand << 1) + 1];
+              }
+
+              if (mvSolid && mvValid)
+              {
+                m_pcInterSearch->encodeResAndCalcRdInterCU(*tempCS, partitioner, (numResidualPass != 0), true, chroma);
+              }
+            }
+            else
+            {
+              m_pcInterSearch->encodeResAndCalcRdInterCU(*tempCS, partitioner, (numResidualPass != 0), true, chroma);
+            }
+#else
             m_pcInterSearch->encodeResAndCalcRdInterCU(*tempCS, partitioner, (numResidualPass != 0), true, chroma);
+#endif
             if (tempCS->slice->getSPS()->getUseColorTrans())
             {
               bestCS->tmpColorSpaceCost = tempCS->tmpColorSpaceCost;
@@ -3602,13 +3943,17 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct
             tempCS->initStructData(encTestMode.qp);
           }
 
-            if (m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip)
+          if (m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip)
+          {
+            if (bestCS->getCU(partitioner.chType) == NULL)
+            {
+              bestIsSkip = 0;
+            }
+            else
             {
-              if (bestCS->getCU(partitioner.chType) == NULL)
-                bestIsSkip = 0;
-              else
               bestIsSkip = bestCS->getCU(partitioner.chType)->rootCbf == 0;
             }
+          }
         }
       }
     }
@@ -3617,6 +3962,13 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct
   {
     xCalDebCost( *bestCS, partitioner );
   }
+#if GDR_ENABLED
+  if (isEncodeGdrClean)
+  {
+    delete[] MrgSolid;
+    delete[] MrgValid;
+  }
+#endif
 }
 
 void EncCu::xCheckRDCostIBCMode(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode)
@@ -3626,86 +3978,82 @@ void EncCu::xCheckRDCostIBCMode(CodingStructure *&tempCS, CodingStructure *&best
     return;
   }
 
-    tempCS->initStructData(encTestMode.qp);
-
-    m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;
+  tempCS->initStructData(encTestMode.qp);
 
-    CodingUnit &cu = tempCS->addCU(CS::getArea(*tempCS, tempCS->area, partitioner.chType), partitioner.chType);
+  m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;
 
-    partitioner.setCUData(cu);
-    cu.slice = tempCS->slice;
-    cu.tileIdx = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
-    cu.skip = false;
-    cu.predMode = MODE_IBC;
-    cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
-    cu.qp = encTestMode.qp;
-    cu.imv = 0;
-    cu.sbtInfo = 0;
+  CodingUnit &cu = tempCS->addCU(CS::getArea(*tempCS, tempCS->area, partitioner.chType), partitioner.chType);
 
-    CU::addPUs(cu);
+  partitioner.setCUData(cu);
+  cu.slice       = tempCS->slice;
+  cu.tileIdx     = tempCS->pps->getTileIdx(tempCS->area.lumaPos());
+  cu.skip        = false;
+  cu.predMode    = MODE_IBC;
+  cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
+  cu.qp          = encTestMode.qp;
+  cu.imv         = 0;
+  cu.sbtInfo     = 0;
 
-    m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;
+  CU::addPUs(cu);
 
-    PredictionUnit& pu = *cu.firstPU;
-    cu.mmvdSkip = false;
-    pu.mmvdMergeFlag = false;
-    pu.regularMergeFlag = false;
+  m_bestModeUpdated = tempCS->useDbCost = bestCS->useDbCost = false;
 
-    pu.intraDir[0] = DC_IDX; // set intra pred for ibc block
-    pu.intraDir[1] = PLANAR_IDX; // set intra pred for ibc block
+  PredictionUnit &pu  = *cu.firstPU;
+  cu.mmvdSkip         = false;
+  pu.mmvdMergeFlag    = false;
+  pu.regularMergeFlag = false;
 
-    pu.interDir = 1; // use list 0 for IBC mode
-    pu.refIdx[REF_PIC_LIST_0] = MAX_NUM_REF; // last idx in the list
-      bool bValid = m_pcInterSearch->predIBCSearch(cu, partitioner, m_ctuIbcSearchRangeX, m_ctuIbcSearchRangeY, m_ibcHashMap);
+  pu.intraDir[0] = DC_IDX;       // set intra pred for ibc block
+  pu.intraDir[1] = PLANAR_IDX;   // set intra pred for ibc block
 
-      if (bValid)
-      {
-        PU::spanMotionInfo(pu);
-        const bool chroma = !pu.cu->isSepTree();
-        //  MC
-        m_pcInterSearch->motionCompensation(pu, REF_PIC_LIST_0, true, chroma);
+  pu.interDir               = 1;             // use list 0 for IBC mode
+  pu.refIdx[REF_PIC_LIST_0] = MAX_NUM_REF;   // last idx in the list
+  bool bValid =
+    m_pcInterSearch->predIBCSearch(cu, partitioner, m_ctuIbcSearchRangeX, m_ctuIbcSearchRangeY, m_ibcHashMap);
 
-        {
+  if (bValid)
+  {
+    PU::spanMotionInfo(pu);
+    const bool chroma = !pu.cu->isSepTree();
+    //  MC
+    m_pcInterSearch->motionCompensation(pu, REF_PIC_LIST_0, true, chroma);
 
-          m_pcInterSearch->encodeResAndCalcRdInterCU(*tempCS, partitioner, false, true, chroma);
-          if (tempCS->slice->getSPS()->getUseColorTrans())
-          {
-            bestCS->tmpColorSpaceCost = tempCS->tmpColorSpaceCost;
-            bestCS->firstColorSpaceSelected = tempCS->firstColorSpaceSelected;
-          }
+    m_pcInterSearch->encodeResAndCalcRdInterCU(*tempCS, partitioner, false, true, chroma);
+    if (tempCS->slice->getSPS()->getUseColorTrans())
+    {
+      bestCS->tmpColorSpaceCost       = tempCS->tmpColorSpaceCost;
+      bestCS->firstColorSpaceSelected = tempCS->firstColorSpaceSelected;
+    }
 
-          xEncodeDontSplit(*tempCS, partitioner);
+    xEncodeDontSplit(*tempCS, partitioner);
 
 #if ENABLE_QPA_SUB_CTU
-          xCheckDQP (*tempCS, partitioner);
+    xCheckDQP(*tempCS, partitioner);
 #else
-          // this if-check is redundant
-          if (tempCS->pps->getUseDQP() && partitioner.currQgEnable())
-          {
-            xCheckDQP(*tempCS, partitioner);
-          }
+    // this if-check is redundant
+    if (tempCS->pps->getUseDQP() && partitioner.currQgEnable())
+    {
+      xCheckDQP(*tempCS, partitioner);
+    }
 #endif
-          xCheckChromaQPOffset( *tempCS, partitioner );
+    xCheckChromaQPOffset(*tempCS, partitioner);
 
-          tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt();
-          if ( m_bestModeUpdated )
-          {
-            xCalDebCost( *tempCS, partitioner );
-          }
-
-          DTRACE_MODE_COST(*tempCS, m_pcRdCost->getLambda());
-          xCheckBestMode(tempCS, bestCS, partitioner, encTestMode);
-
-        }
+    tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt();
+    if (m_bestModeUpdated)
+    {
+      xCalDebCost(*tempCS, partitioner);
+    }
 
-      } // bValid
-      else
-      {
-        tempCS->dist = 0;
-        tempCS->fracBits = 0;
-        tempCS->cost = MAX_DOUBLE;
-        tempCS->costDbOffset = 0;
-      }
+    DTRACE_MODE_COST(*tempCS, m_pcRdCost->getLambda());
+    xCheckBestMode(tempCS, bestCS, partitioner, encTestMode);
+  }   // bValid
+  else
+  {
+    tempCS->dist         = 0;
+    tempCS->fracBits     = 0;
+    tempCS->cost         = MAX_DOUBLE;
+    tempCS->costDbOffset = 0;
+  }
 }
   // check ibc mode in encoder RD
   //////////////////////////////////////////////////////////////////////////////////////////////
@@ -3756,80 +4104,165 @@ void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestC
       }
     }
 
-  CodingUnit &cu      = tempCS->addCU( tempCS->area, partitioner.chType );
+    CodingUnit &cu = tempCS->addCU(tempCS->area, partitioner.chType);
 
-  partitioner.setCUData( cu );
-  cu.slice            = tempCS->slice;
-  cu.tileIdx          = tempCS->pps->getTileIdx( tempCS->area.lumaPos() );
-  cu.skip             = false;
-  cu.mmvdSkip = false;
-//cu.affine
-  cu.predMode         = MODE_INTER;
-  cu.chromaQpAdj      = m_cuChromaQpOffsetIdxPlus1;
-  cu.qp               = encTestMode.qp;
-  CU::addPUs( cu );
+    partitioner.setCUData(cu);
+    cu.slice    = tempCS->slice;
+    cu.tileIdx  = tempCS->pps->getTileIdx(tempCS->area.lumaPos());
+    cu.skip     = false;
+    cu.mmvdSkip = false;
+    // cu.affine
+    cu.predMode    = MODE_INTER;
+    cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
+    cu.qp          = encTestMode.qp;
+    CU::addPUs(cu);
 
-  cu.BcwIdx = g_BcwSearchOrder[bcwLoopIdx];
-  uint8_t bcwIdx = cu.BcwIdx;
-  bool  testBcw = (bcwIdx != BCW_DEFAULT);
+    cu.BcwIdx       = g_BcwSearchOrder[bcwLoopIdx];
+    uint8_t bcwIdx  = cu.BcwIdx;
+    bool    testBcw = (bcwIdx != BCW_DEFAULT);
 
-  m_pcInterSearch->predInterSearch( cu, partitioner );
+#if GDR_ENABLED
+  const bool isEncodeGdrClean = tempCS->sps->getGDREnabledFlag() && tempCS->pcv->isEncoder && ((tempCS->picHeader->getInGdrInterval() && tempCS->isClean(cu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (tempCS->picHeader->getNumVerVirtualBoundaries() == 0));
+#endif
+    m_pcInterSearch->predInterSearch(cu, partitioner);
 
-  bcwIdx = CU::getValidBcwIdx(cu);
-  if( testBcw && bcwIdx == BCW_DEFAULT ) // Enabled Bcw but the search results is uni.
-  {
-    tempCS->initStructData(encTestMode.qp);
-    continue;
-  }
-  CHECK(!(testBcw || (!testBcw && bcwIdx == BCW_DEFAULT)), " !( bTestBcw || (!bTestBcw && bcwIdx == BCW_DEFAULT ) )");
+    bcwIdx = CU::getValidBcwIdx(cu);
+    if (testBcw && bcwIdx == BCW_DEFAULT)   // Enabled Bcw but the search results is uni.
+    {
+      tempCS->initStructData(encTestMode.qp);
+      continue;
+    }
+    CHECK(!(testBcw || (!testBcw && bcwIdx == BCW_DEFAULT)), " !( bTestBcw || (!bTestBcw && bcwIdx == BCW_DEFAULT ) )");
 
-  bool isEqualUni = false;
-  if( m_pcEncCfg->getUseBcwFast() )
-  {
-    if( cu.firstPU->interDir != 3 && testBcw == 0 )
+    bool isEqualUni = false;
+    if (m_pcEncCfg->getUseBcwFast())
     {
-      isEqualUni = true;
+      if (cu.firstPU->interDir != 3 && testBcw == 0)
+      {
+        isEqualUni = true;
+      }
     }
-  }
 
-  xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, 0
-                        , 0
-                        , &equBcwCost
-  );
+#if GDR_ENABLED
+    // 2.0 xCheckRDCostInter: check residual (compare with bestCS)
+    if (isEncodeGdrClean)
+    {
+      bool isClean = true;
+
+      if (cu.affine && cu.firstPU)
+      {
+        bool L0ok = true, L1ok = true, L3ok = true;
+
+        L0ok = L0ok && cu.firstPU->mvAffiSolid[0][0] && cu.firstPU->mvAffiSolid[0][1] && cu.firstPU->mvAffiSolid[0][2];
+        L0ok = L0ok && cu.firstPU->mvAffiValid[0][0] && cu.firstPU->mvAffiValid[0][1] && cu.firstPU->mvAffiValid[0][2];
+
+        L1ok = L1ok && cu.firstPU->mvAffiSolid[1][0] && cu.firstPU->mvAffiSolid[1][1] && cu.firstPU->mvAffiSolid[1][2];
+        L1ok = L1ok && cu.firstPU->mvAffiValid[1][0] && cu.firstPU->mvAffiValid[1][1] && cu.firstPU->mvAffiValid[1][2];
 
-  if( g_BcwSearchOrder[bcwLoopIdx] == BCW_DEFAULT )
+        L3ok = L0ok && L1ok;
+
+        if (cu.firstPU->interDir == 1 && !L0ok)
+        {
+          isClean = false;
+        }
+        if (cu.firstPU->interDir == 2 && !L1ok)
+        {
+          isClean = false;
+        }
+        if (cu.firstPU->interDir == 3 && !L3ok)
+        {
+          isClean = false;
+        }
+      }
+      else if (cu.firstPU)
+      {
+        bool L0ok = true;
+        bool L1ok = true;
+        bool L3ok = true;
+
+        L0ok = L0ok && cu.firstPU->mvSolid[0];
+        L0ok = L0ok && cu.firstPU->mvValid[0];
+
+        L1ok = L1ok && cu.firstPU->mvSolid[1];
+        L1ok = L1ok && cu.firstPU->mvValid[1];
+
+        L3ok = L0ok && L1ok;
+
+        if (cu.firstPU->interDir == 1 && !L0ok)
+        {
+          isClean = false;
+        }
+        if (cu.firstPU->interDir == 2 && !L1ok)
+        {
+          isClean = false;
+        }
+        if (cu.firstPU->interDir == 3 && !L3ok)
+        {
+          isClean = false;
+        }
+      }
+      else
+      {
+        isClean = false;
+      }
+
+      if (isClean)
+      {
+        xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0
+          , 0
+          , &equBcwCost
+        );
+      }
+    }
+    else
+    {
+      xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0
+        , 0
+        , &equBcwCost
+      );
+    }
+#else
+    xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0, 0, &equBcwCost);
+#endif
+
+#if GDR_ENABLED
+  if (g_BcwSearchOrder[bcwLoopIdx] == BCW_DEFAULT && bestCS->cus.size() > 0)
     m_pcInterSearch->setAffineModeSelected((bestCS->cus.front()->affine && !(bestCS->cus.front()->firstPU->mergeFlag)));
+#else
+    if (g_BcwSearchOrder[bcwLoopIdx] == BCW_DEFAULT)
+    {
+      m_pcInterSearch->setAffineModeSelected(
+        (bestCS->cus.front()->affine && !(bestCS->cus.front()->firstPU->mergeFlag)));
+    }
+#endif
 
-  tempCS->initStructData(encTestMode.qp);
+    tempCS->initStructData(encTestMode.qp);
 
-  double skipTH = MAX_DOUBLE;
-  skipTH = (m_pcEncCfg->getUseBcwFast() ? 1.05 : MAX_DOUBLE);
-  if( equBcwCost > curBestCost * skipTH )
-  {
-    break;
-  }
+    double skipTH = MAX_DOUBLE;
+    skipTH        = (m_pcEncCfg->getUseBcwFast() ? 1.05 : MAX_DOUBLE);
+    if (equBcwCost > curBestCost * skipTH)
+    {
+      break;
+    }
 
-  if( m_pcEncCfg->getUseBcwFast() )
-  {
-    if( isEqualUni == true && m_pcEncCfg->getIntraPeriod() == -1 )
+    if (m_pcEncCfg->getUseBcwFast())
+    {
+      if (isEqualUni == true && m_pcEncCfg->getIntraPeriod() == -1)
+      {
+        break;
+      }
+    }
+    if (g_BcwSearchOrder[bcwLoopIdx] == BCW_DEFAULT && xIsBcwSkip(cu) && m_pcEncCfg->getUseBcwFast())
     {
       break;
     }
-  }
-  if( g_BcwSearchOrder[bcwLoopIdx] == BCW_DEFAULT && xIsBcwSkip(cu) && m_pcEncCfg->getUseBcwFast() )
-  {
-    break;
-  }
- }  // for( UChar bcwLoopIdx = 0; bcwLoopIdx < bcwLoopNum; bcwLoopIdx++ )
+  }   // for( UChar bcwLoopIdx = 0; bcwLoopIdx < bcwLoopNum; bcwLoopIdx++ )
   if ( m_bestModeUpdated && bestCS->cost != MAX_DOUBLE )
   {
     xCalDebCost( *bestCS, partitioner );
   }
 }
 
-
-
-
 bool EncCu::xCheckRDCostInterIMV(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, double &bestIntPelCost)
 {
   int iIMV = int( ( encTestMode.opts & ETO_IMV ) >> ETO_IMV_SHIFT );
@@ -3906,6 +4339,9 @@ bool EncCu::xCheckRDCostInterIMV(CodingStructure *&tempCS, CodingStructure *&bes
 
   CU::addPUs( cu );
 
+#if GDR_ENABLED
+    const bool isEncodeGdrClean = tempCS->sps->getGDREnabledFlag() && tempCS->pcv->isEncoder && ((tempCS->picHeader->getInGdrInterval() && tempCS->isClean(cu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (tempCS->picHeader->getNumVerVirtualBoundaries() == 0));
+#endif
   if (testAltHpelFilter)
   {
     cu.imv = IMV_HPEL;
@@ -3977,10 +4413,79 @@ bool EncCu::xCheckRDCostInterIMV(CodingStructure *&tempCS, CodingStructure *&bes
     }
   }
 
-  xEncodeInterResidual( tempCS, bestCS, partitioner, encTestModeBase, 0
-                        , 0
-                        , &equBcwCost
-  );
+#if GDR_ENABLED
+  // 2.0 xCheckRDCostInter: check residual (compare with bestCS)
+  if (isEncodeGdrClean)
+  {
+    bool isClean = true;
+
+    if (cu.affine && cu.firstPU)
+    {
+      bool L0ok = true, L1ok = true, L3ok = true;
+
+      L0ok = L0ok && cu.firstPU->mvAffiSolid[0][0] && cu.firstPU->mvAffiSolid[0][1] && cu.firstPU->mvAffiSolid[0][2];
+      L0ok = L0ok && cu.firstPU->mvAffiValid[0][0] && cu.firstPU->mvAffiValid[0][1] && cu.firstPU->mvAffiValid[0][2];
+
+      L1ok = L1ok && cu.firstPU->mvAffiSolid[1][0] && cu.firstPU->mvAffiSolid[1][1] && cu.firstPU->mvAffiSolid[1][2];
+      L1ok = L1ok && cu.firstPU->mvAffiValid[1][0] && cu.firstPU->mvAffiValid[1][1] && cu.firstPU->mvAffiValid[1][2];
+
+      L3ok = L0ok && L1ok;
+
+      if (cu.firstPU->interDir == 1 && !L0ok)
+      {
+        isClean = false;
+      }
+      if (cu.firstPU->interDir == 2 && !L1ok)
+      {
+        isClean = false;
+      }
+      if (cu.firstPU->interDir == 3 && !L3ok)
+      {
+        isClean = false;
+      }
+    }
+    else if (cu.firstPU)
+    {
+      bool L0ok = cu.firstPU->mvSolid[0] && cu.firstPU->mvValid[0];
+      bool L1ok = cu.firstPU->mvSolid[1] && cu.firstPU->mvValid[1];
+      bool L3ok = L0ok && L1ok;
+
+      if (cu.firstPU->interDir == 1 && !L0ok)
+      {
+        isClean = false;
+      }
+      if (cu.firstPU->interDir == 2 && !L1ok)
+      {
+        isClean = false;
+      }
+      if (cu.firstPU->interDir == 3 && !L3ok)
+      {
+        isClean = false;
+      }
+    }
+    else
+    {
+      isClean = false;
+    }
+
+    if (isClean)
+    {
+      xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0
+        , 0
+        , &equBcwCost
+      );
+    }
+  }
+  else
+  {
+    xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0
+      , 0
+      , &equBcwCost
+    );
+  }
+#else
+  xEncodeInterResidual(tempCS, bestCS, partitioner, encTestModeBase, 0, 0, &equBcwCost);
+#endif
 
   if( cu.imv == IMV_FPEL && tempCS->cost < bestIntPelCost )
   {
@@ -4029,7 +4534,7 @@ void EncCu::xCalDebCost( CodingStructure &cs, Partitioner &partitioner, bool cal
     return;
   }
 
-  m_pcLoopFilter->setEnc(true);
+  m_deblockingFilter->setEnc(true);
   const ChromaFormat format = cs.area.chromaFormat;
   CodingUnit*                cu = cs.getCU(partitioner.chType);
   const Position lumaPos = cu->Y().valid() ? cu->Y().pos() : recalcPosition( format, cu->chType, CHANNEL_TYPE_LUMA, cu->blocks[cu->chType].pos() );
@@ -4040,7 +4545,6 @@ void EncCu::xCalDebCost( CodingStructure &cs, Partitioner &partitioner, bool cal
 
   if ( calDist )
   {
-    const UnitArea currCsArea = clipArea( CS::getArea( cs, cs.area, partitioner.chType ), *cs.picture );
     ComponentID compStr = ( cu->isSepTree() && !isLuma( partitioner.chType ) ) ? COMPONENT_Cb : COMPONENT_Y;
     ComponentID compEnd = ( cu->isSepTree() && isLuma( partitioner.chType ) ) ? COMPONENT_Y : COMPONENT_Cr;
     Distortion finalDistortion = 0;
@@ -4049,7 +4553,7 @@ void EncCu::xCalDebCost( CodingStructure &cs, Partitioner &partitioner, bool cal
       const ComponentID compID = ComponentID( comp );
       CPelBuf org = cs.getOrgBuf( compID );
       CPelBuf reco = cs.getRecoBuf( compID );
-      finalDistortion += getDistortionDb( cs, org, reco, compID, currCsArea.block( compID ), false );
+      finalDistortion += getDistortionDb(cs, org, reco, compID, cs.area.block(COMPONENT_Y), false);
     }
     //updated distortion
     cs.dist = finalDistortion;
@@ -4060,9 +4564,9 @@ void EncCu::xCalDebCost( CodingStructure &cs, Partitioner &partitioner, bool cal
     ComponentID compStr = ( cu->isSepTree() && !isLuma( partitioner.chType ) ) ? COMPONENT_Cb : COMPONENT_Y;
     ComponentID compEnd = ( cu->isSepTree() &&  isLuma( partitioner.chType ) ) ? COMPONENT_Y : COMPONENT_Cr;
 
-    const UnitArea currCsArea = clipArea( CS::getArea( cs, cs.area, partitioner.chType ), *cs.picture );
+    const UnitArea currCsArea = clipArea(cs.area, *cs.picture);
 
-    PelStorage&          picDbBuf = m_pcLoopFilter->getDbEncPicYuvBuffer();
+    PelStorage&          picDbBuf = m_deblockingFilter->getDbEncPicYuvBuffer();
 
     //deblock neighbour pixels
     const Size     lumaSize = cu->Y().valid() ? cu->Y().size() : recalcSize( format, cu->chType, CHANNEL_TYPE_LUMA, cu->blocks[cu->chType].size() );
@@ -4108,14 +4612,14 @@ void EncCu::xCalDebCost( CodingStructure &cs, Partitioner &partitioner, bool cal
     //deblock
     if ( leftEdgeAvai )
     {
-      m_pcLoopFilter->resetFilterLengths();
-      m_pcLoopFilter->xDeblockCU( *cu, EDGE_VER );
+      m_deblockingFilter->resetFilterLengths();
+      m_deblockingFilter->xDeblockCU( *cu, EDGE_VER );
     }
 
     if (topEdgeAvai)
     {
-      m_pcLoopFilter->resetFilterLengths();
-      m_pcLoopFilter->xDeblockCU( *cu, EDGE_HOR );
+      m_deblockingFilter->resetFilterLengths();
+      m_deblockingFilter->xDeblockCU( *cu, EDGE_HOR );
     }
 
     //update current CU SSE
@@ -4125,7 +4629,7 @@ void EncCu::xCalDebCost( CodingStructure &cs, Partitioner &partitioner, bool cal
       ComponentID compId = (ComponentID)compIdx;
       CPelBuf reco = picDbBuf.getBuf( currCsArea.block( compId ) );
       CPelBuf org = cs.getOrgBuf( compId );
-      distCur += getDistortionDb( cs, org, reco, compId, currCsArea.block( compId ), true );
+      distCur += getDistortionDb(cs, org, reco, compId, currCsArea.block(COMPONENT_Y), true);
     }
 
     //calculate difference between DB_before_SSE and DB_after_SSE for neighbouring CUs
@@ -4139,8 +4643,8 @@ void EncCu::xCalDebCost( CodingStructure &cs, Partitioner &partitioner, bool cal
         CPelBuf org = cs.picture->getOrigBuf( compArea );
         CPelBuf reco = cs.picture->getRecoBuf( compArea );
         CPelBuf recoDb = picDbBuf.getBuf( compArea );
-        distBeforeDb += getDistortionDb( cs, org, reco,   compId, compArea, false );
-        distAfterDb  += getDistortionDb( cs, org, recoDb, compId, compArea, true  );
+        distBeforeDb += getDistortionDb(cs, org, reco, compId, areaLeft.block(COMPONENT_Y), false);
+        distAfterDb += getDistortionDb(cs, org, recoDb, compId, areaLeft.block(COMPONENT_Y), true);
       }
       if ( topEdgeAvai )
       {
@@ -4148,8 +4652,8 @@ void EncCu::xCalDebCost( CodingStructure &cs, Partitioner &partitioner, bool cal
         CPelBuf org = cs.picture->getOrigBuf( compArea );
         CPelBuf reco = cs.picture->getRecoBuf( compArea );
         CPelBuf recoDb = picDbBuf.getBuf( compArea );
-        distBeforeDb += getDistortionDb( cs, org, reco,   compId, compArea, false );
-        distAfterDb  += getDistortionDb( cs, org, recoDb, compId, compArea, true  );
+        distBeforeDb += getDistortionDb(cs, org, reco, compId, areaTop.block(COMPONENT_Y), false);
+        distAfterDb += getDistortionDb(cs, org, recoDb, compId, areaTop.block(COMPONENT_Y), true);
       }
     }
 
@@ -4160,7 +4664,7 @@ void EncCu::xCalDebCost( CodingStructure &cs, Partitioner &partitioner, bool cal
     cs.costDbOffset = sign * m_pcRdCost->calcRdCost( 0, distTmp );
   }
 
-  m_pcLoopFilter->setEnc( false );
+  m_deblockingFilter->setEnc( false );
 }
 
 Distortion EncCu::getDistortionDb( CodingStructure &cs, CPelBuf org, CPelBuf reco, ComponentID compID, const CompArea& compArea, bool afterDb )
@@ -4168,7 +4672,7 @@ Distortion EncCu::getDistortionDb( CodingStructure &cs, CPelBuf org, CPelBuf rec
   Distortion dist = 0;
 #if WCG_EXT
   m_pcRdCost->setChromaFormat(cs.sps->getChromaFormatIdc());
-  CPelBuf orgLuma = cs.picture->getOrigBuf( cs.area.blocks[COMPONENT_Y] );
+  CPelBuf orgLuma = cs.picture->getOrigBuf(compArea);
   if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (
     m_pcEncCfg->getLmcs() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())))
   {
@@ -4257,7 +4761,9 @@ void EncCu::xEncodeInterResidual(   CodingStructure *&tempCS
         mvShift = normalShiftTab[cu->imv];
         Mv signaledmvd(pu.mvd[refList].getHor() >> mvShift, pu.mvd[refList].getVer() >> mvShift);
         if (!((signaledmvd.getHor() >= MVD_MIN) && (signaledmvd.getHor() <= MVD_MAX)) || !((signaledmvd.getVer() >= MVD_MIN) && (signaledmvd.getVer() <= MVD_MAX)))
+        {
           return;
+        }
       }
       else
       {
@@ -4266,7 +4772,9 @@ void EncCu::xEncodeInterResidual(   CodingStructure *&tempCS
           mvShift = affineShiftTab[cu->imv];
           Mv signaledmvd(pu.mvdAffi[refList][ctrlP].getHor() >> mvShift, pu.mvdAffi[refList][ctrlP].getVer() >> mvShift);
           if (!((signaledmvd.getHor() >= MVD_MIN) && (signaledmvd.getHor() <= MVD_MAX)) || !((signaledmvd.getVer() >= MVD_MIN) && (signaledmvd.getVer() <= MVD_MAX)))
+          {
             return;
+          }
         }
       }
     }
@@ -4617,11 +5125,12 @@ void EncCu::xEncodeDontSplit( CodingStructure &cs, Partitioner &partitioner )
 
   m_CABACEstimator->split_cu_mode( CU_DONT_SPLIT, cs, partitioner );
   if( partitioner.treeType == TREE_C )
+  {
     CHECK( m_CABACEstimator->getEstFracBits() != 0, "must be 0 bit" );
+  }
 
   cs.fracBits += m_CABACEstimator->getEstFracBits(); // split bits
   cs.cost      = m_pcRdCost->calcRdCost( cs.fracBits, cs.dist );
-
 }
 
 #if REUSE_CU_RESULTS
@@ -4637,9 +5146,7 @@ void EncCu::xReuseCachedResult( CodingStructure *&tempCS, CodingStructure *&best
     CodingUnit& cu = *tempCS->cus.front();
     partitioner.setCUData( cu );
 
-    if( CU::isIntra( cu )
-    || CU::isPLT(cu)
-    )
+    if (CU::isIntra(cu) || CU::isPLT(cu))
     {
       xReconIntraQT( cu );
     }
@@ -4658,42 +5165,48 @@ void EncCu::xReuseCachedResult( CodingStructure *&tempCS, CodingStructure *&best
     }
     else
     {
-    const SPS &sps = *tempCS->sps;
-    const int  numValidComponents = getNumberValidComponents( tempCS->area.chromaFormat );
-
-    for( int comp = 0; comp < numValidComponents; comp++ )
-    {
-      const ComponentID compID = ComponentID( comp );
+      const SPS &sps                = *tempCS->sps;
+      const int  numValidComponents = getNumberValidComponents(tempCS->area.chromaFormat);
 
-      if( partitioner.isSepTree( *tempCS ) && toChannelType( compID ) != partitioner.chType )
+      for (int comp = 0; comp < numValidComponents; comp++)
       {
-        continue;
-      }
+        const ComponentID compID = ComponentID(comp);
 
-      CPelBuf reco = tempCS->getRecoBuf( compID );
-      CPelBuf org  = tempCS->getOrgBuf ( compID );
+        if (partitioner.isSepTree(*tempCS) && toChannelType(compID) != partitioner.chType)
+        {
+          continue;
+        }
+
+        CPelBuf reco = tempCS->getRecoBuf(compID);
+        CPelBuf org  = tempCS->getOrgBuf(compID);
 
 #if WCG_EXT
-      if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (
-        m_pcEncCfg->getLmcs() && (tempCS->slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())))
-      {
-        const CPelBuf orgLuma = tempCS->getOrgBuf(tempCS->area.blocks[COMPONENT_Y]);
-        if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()))
+        if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()
+            || (m_pcEncCfg->getLmcs() && (tempCS->slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())))
         {
-          const CompArea &area = cu.blocks[COMPONENT_Y];
-          CompArea    tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
-          PelBuf tmpRecLuma = m_tmpStorageLCU->getBuf(tmpArea);
-          tmpRecLuma.copyFrom(reco);
-          tmpRecLuma.rspSignal(m_pcReshape->getInvLUT());
-          finalDistortion += m_pcRdCost->getDistPart(org, tmpRecLuma, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
+          const CPelBuf orgLuma = tempCS->getOrgBuf(tempCS->area.blocks[COMPONENT_Y]);
+          if (compID == COMPONENT_Y && !(m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled()))
+          {
+            const CompArea &area = cu.blocks[COMPONENT_Y];
+            CompArea        tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
+            PelBuf          tmpRecLuma = m_tmpStorageLCU->getBuf(tmpArea);
+            tmpRecLuma.copyFrom(reco);
+            tmpRecLuma.rspSignal(m_pcReshape->getInvLUT());
+            finalDistortion += m_pcRdCost->getDistPart(org, tmpRecLuma, sps.getBitDepth(toChannelType(compID)), compID,
+                                                       DF_SSE_WTD, &orgLuma);
+          }
+          else
+          {
+            finalDistortion +=
+              m_pcRdCost->getDistPart(org, reco, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
+          }
         }
         else
-        finalDistortion += m_pcRdCost->getDistPart( org, reco, sps.getBitDepth( toChannelType( compID ) ), compID, DF_SSE_WTD, &orgLuma );
-      }
-      else
 #endif
-      finalDistortion += m_pcRdCost->getDistPart( org, reco, sps.getBitDepth( toChannelType( compID ) ), compID, DF_SSE );
-    }
+        {
+          finalDistortion += m_pcRdCost->getDistPart(org, reco, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE);
+        }
+      }
     }
 
     m_CABACEstimator->getCtx() = m_CurrCtx->start;
diff --git a/source/Lib/EncoderLib/EncCu.h b/source/Lib/EncoderLib/EncCu.h
index 07d4848fa5423df234e300b69f652635cbd80326..612e212ce28ec0f20624dee6c1db8fb82727eb59 100644
--- a/source/Lib/EncoderLib/EncCu.h
+++ b/source/Lib/EncoderLib/EncCu.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -46,7 +46,7 @@
 #include "CommonLib/Unit.h"
 #include "CommonLib/UnitPartitioner.h"
 #include "CommonLib/IbcHashMap.h"
-#include "CommonLib/LoopFilter.h"
+#include "CommonLib/DeblockingFilter.h"
 
 #include "DecoderLib/DecCu.h"
 
@@ -88,16 +88,18 @@ struct SmallerThanComboCost
 {
   inline bool operator() (const GeoMergeCombo& first, const GeoMergeCombo& second)
   {
-      return (first.cost < second.cost);
+    return (first.cost < second.cost);
   }
 };
+
 class GeoComboCostList
 {
 public:
   GeoComboCostList() {};
   ~GeoComboCostList() {};
-  std::vector<GeoMergeCombo> list;
-  void sortByCost() { std::sort(list.begin(), list.end(), SmallerThanComboCost()); };
+  std::vector<GeoMergeCombo> list;  
+  
+  void sortByCost() { std::stable_sort(list.begin(), list.end(), SmallerThanComboCost()); };
 };
 struct SingleGeoMergeEntry
 {
@@ -161,10 +163,6 @@ private:
   CtxPair*              m_CurrCtx;
   CtxCache*             m_CtxCache;
 
-#if ENABLE_SPLIT_PARALLELISM
-  int                   m_dataId;
-#endif
-
   //  Data : encoder control
   int                   m_cuChromaQpOffsetIdxPlus1; // if 0, then cu_chroma_qp_offset_flag will be 0, otherwise cu_chroma_qp_offset_flag will be 1.
 
@@ -181,7 +179,7 @@ private:
   TrQuant*              m_pcTrQuant;
   RdCost*               m_pcRdCost;
   EncSlice*             m_pcSliceEncoder;
-  LoopFilter*           m_pcLoopFilter;
+  DeblockingFilter*     m_deblockingFilter;
 
   CABACWriter*          m_CABACEstimator;
   RateCtrl*             m_pcRateCtrl;
@@ -199,9 +197,6 @@ private:
 
   int                   m_ctuIbcSearchRangeX;
   int                   m_ctuIbcSearchRangeY;
-#if ENABLE_SPLIT_PARALLELISM
-  EncLib*               m_pcEncLib;
-#endif
   int                   m_bestBcwIdx[2];
   double                m_bestBcwCost[2];
   GeoMotionInfo         m_GeoModeTest[GEO_MAX_NUM_CANDS];
@@ -215,7 +210,7 @@ private:
   double                m_sbtCostSave[2];
 public:
   /// copy parameters from encoder class
-  void  init                ( EncLib* pcEncLib, const SPS& sps PARL_PARAM( const int jId = 0 ) );
+  void  init                ( EncLib* pcEncLib, const SPS& sps );
 
   void setDecCuReshaperInEncCU(EncReshape* pcReshape, ChromaFormat chromaFormatIDC) { initDecCuReshaper((Reshape*) pcReshape, chromaFormatIDC); }
   /// create internal buffers
@@ -248,10 +243,6 @@ protected:
   Distortion getDistortionDb  ( CodingStructure &cs, CPelBuf org, CPelBuf reco, ComponentID compID, const CompArea& compArea, bool afterDb );
 
   void xCompressCU            ( CodingStructure*& tempCS, CodingStructure*& bestCS, Partitioner& pm, double maxCostAllowed = MAX_DOUBLE );
-#if ENABLE_SPLIT_PARALLELISM
-  void xCompressCUParallel    ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm );
-  void copyState              ( EncCu* other, Partitioner& pm, const UnitArea& currArea, const bool isDist );
-#endif
 
   bool
     xCheckBestMode         ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestmode );
@@ -262,7 +253,6 @@ protected:
 
   void xCheckDQP              ( CodingStructure& cs, Partitioner& partitioner, bool bKeepCtx = false);
   void xCheckChromaQPOffset   ( CodingStructure& cs, Partitioner& partitioner);
-  void xFillPCMBuffer         ( CodingUnit &cu);
 
   void xCheckRDCostHashInter  ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode );
   void xCheckRDCostAffineMerge2Nx2N
diff --git a/source/Lib/EncoderLib/EncGOP.cpp b/source/Lib/EncoderLib/EncGOP.cpp
index 092c2195b40c23c677ebddd5a8dade3406a2d5ec..dc9b083f8dfbe94098cc7fde1f575df7d8f2b7fd 100644
--- a/source/Lib/EncoderLib/EncGOP.cpp
+++ b/source/Lib/EncoderLib/EncGOP.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -59,8 +59,6 @@
 
 #include "DecoderLib/DecLib.h"
 
-#define ENCODE_SUB_SET 0
-
 using namespace std;
 
 //! \ingroup EncoderLib
@@ -89,7 +87,12 @@ EncGOP::EncGOP()
   m_bFirst              = true;
   m_iLastRecoveryPicPOC = 0;
   m_latestDRAPPOC       = MAX_INT;
+  m_latestEDRAPPOC      = MAX_INT;
+  m_latestEdrapLeadingPicDecodableFlag = false;
   m_lastRasPoc          = MAX_INT;
+  ::memset(m_riceBit, 0, 8 * 2 * sizeof(unsigned));
+  ::memset(m_preQP, MAX_INT, 2 * sizeof(int));
+  m_preIPOC             = 0;
 
   m_pcCfg               = NULL;
   m_pcSliceEncoder      = NULL;
@@ -139,6 +142,10 @@ EncGOP::EncGOP()
   m_isUseLTRef = false;
   m_isPrepareLTRef = true;
   m_lastLTRefPoc = 0;
+#if JVET_W0046_RLSCP
+  m_cnt_right_bottom = 0;
+  m_cnt_right_bottom_i = 0;
+#endif
 }
 
 EncGOP::~EncGOP()
@@ -212,7 +219,7 @@ void EncGOP::init ( EncLib* pcEncLib )
   m_pcSliceEncoder       = pcEncLib->getSliceEncoder();
   m_pcListPic            = pcEncLib->getListPic();
   m_HLSWriter            = pcEncLib->getHLSWriter();
-  m_pcLoopFilter         = pcEncLib->getLoopFilter();
+  m_pcLoopFilter         = pcEncLib->getDeblockingFilter();
   m_pcSAO                = pcEncLib->getSAO();
   m_pcALF                = pcEncLib->getALF();
   m_pcRateCtrl           = pcEncLib->getRateCtrl();
@@ -233,6 +240,10 @@ void EncGOP::init ( EncLib* pcEncLib )
     pcEncLib->getRdCost()->setReshapeInfo(RESHAPE_SIGNAL_PQ, m_pcCfg->getBitDepth(CHANNEL_TYPE_LUMA));
     pcEncLib->getRdCost()->initLumaLevelToWeightTableReshape();
   }
+  else if (m_pcCfg->getPrintWPSNR())
+  {
+    pcEncLib->getRdCost()->initLumaLevelToWeightTable(m_pcCfg->getBitDepth(CHANNEL_TYPE_LUMA));
+  }
   pcEncLib->getALF()->getLumaLevelWeightTable() = pcEncLib->getRdCost()->getLumaLevelWeightTable();
   int alfWSSD = 0;
   if (m_pcCfg->getLmcs() && m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ )
@@ -308,9 +319,11 @@ void EncGOP::init ( EncLib* pcEncLib )
     m_pcTransferFct       = hdrtoolslib::TransferFunction::create(hdrtoolslib::TF_PQ, true, (float) maxSampleValue, 0, 0.0, 1.0, enableTFunctionLUT);
   }
 #endif
+#if GDR_ENABLED
+  m_lastGdrIntervalPoc = -1;
+#endif
 }
 
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
 int EncGOP::xWriteOPI (AccessUnit &accessUnit, const OPI *opi)
 {
   OutputNALUnit nalu(NAL_UNIT_OPI);
@@ -320,7 +333,6 @@ int EncGOP::xWriteOPI (AccessUnit &accessUnit, const OPI *opi)
   accessUnit.push_back(new NALUnitEBSP(nalu));
   return (int)(accessUnit.back()->m_nalUnitData.str().size()) * 8;
 }
-#endif
 
 int EncGOP::xWriteVPS (AccessUnit &accessUnit, const VPS *vps)
 {
@@ -372,6 +384,12 @@ int EncGOP::xWriteAPS( AccessUnit &accessUnit, APS *aps, const int layerId, cons
   nalu.m_temporalId = aps->getTemporalId();
   aps->setLayerId( layerId );
   CHECK( nalu.m_temporalId < accessUnit.temporalId, "TemporalId shall be greater than or equal to the TemporalId of the layer access unit containing the NAL unit" );
+
+#if GDR_ENC_TRACE
+  if (aps)
+    printf("-aps ty:%d id:%d\n", aps->getAPSType(), aps->getAPSId());
+#endif
+
   m_HLSWriter->codeAPS(aps);
   accessUnit.push_back(new NALUnitEBSP(nalu));
   return (int)(accessUnit.back()->m_nalUnitData.str().size()) * 8;
@@ -385,12 +403,10 @@ int EncGOP::xWriteParameterSets(AccessUnit &accessUnit, Slice *slice, const bool
   {
     if (layerIdx == 0)
     {
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
       if (m_pcCfg->getOPIEnabled())
       {
         actualTotalBits += xWriteOPI(accessUnit, m_pcEncLib->getOPI());
       }
-#endif
       if (m_pcCfg->getDCIEnabled())
       {
         actualTotalBits += xWriteDCI(accessUnit, m_pcEncLib->getDCI());
@@ -514,7 +530,6 @@ void EncGOP::xWriteLeadingSEIOrdered (SEIMessages& seiMessages, SEIMessages& duI
 {
   AccessUnit::iterator itNalu = accessUnit.begin();
 
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
   while ((itNalu != accessUnit.end()) &&
     ((*itNalu)->m_nalUnitType == NAL_UNIT_ACCESS_UNIT_DELIMITER
       || (*itNalu)->m_nalUnitType == NAL_UNIT_OPI
@@ -523,15 +538,6 @@ void EncGOP::xWriteLeadingSEIOrdered (SEIMessages& seiMessages, SEIMessages& duI
       || (*itNalu)->m_nalUnitType == NAL_UNIT_SPS
       || (*itNalu)->m_nalUnitType == NAL_UNIT_PPS
       ))
-#else
-  while ((itNalu != accessUnit.end()) &&
-    ((*itNalu)->m_nalUnitType == NAL_UNIT_ACCESS_UNIT_DELIMITER
-      || (*itNalu)->m_nalUnitType == NAL_UNIT_VPS
-      || (*itNalu)->m_nalUnitType == NAL_UNIT_DCI
-      || (*itNalu)->m_nalUnitType == NAL_UNIT_SPS
-      || (*itNalu)->m_nalUnitType == NAL_UNIT_PPS
-      ))
-#endif
   {
     itNalu++;
   }
@@ -770,6 +776,57 @@ void EncGOP::xCreateIRAPLeadingSEIMessages (SEIMessages& seiMessages, const SPS
     m_seiEncoder.initSEIContentColourVolume(seiContentColourVolume);
     seiMessages.push_back(seiContentColourVolume);
   }
+  
+  if (m_pcCfg->getSdiSEIEnabled())
+  {
+    SEIScalabilityDimensionInfo *seiScalabilityDimensionInfo = new SEIScalabilityDimensionInfo;
+    m_seiEncoder.initSEIScalabilityDimensionInfo(seiScalabilityDimensionInfo);
+    seiMessages.push_back(seiScalabilityDimensionInfo);
+  }
+  // multiview acquisition information
+  if (m_pcCfg->getMaiSEIEnabled())
+  {
+    SEIMultiviewAcquisitionInfo *seiMultiviewAcquisitionInfo = new SEIMultiviewAcquisitionInfo;
+    m_seiEncoder.initSEIMultiviewAcquisitionInfo(seiMultiviewAcquisitionInfo);
+    seiMessages.push_back(seiMultiviewAcquisitionInfo);
+  }
+#if JVET_W0078_MVP_SEI 
+  // multiview view position
+  if (m_pcCfg->getMvpSEIEnabled())
+  {
+    SEIMultiviewViewPosition *seiMultiviewViewPosition = new SEIMultiviewViewPosition;
+    m_seiEncoder.initSEIMultiviewViewPosition(seiMultiviewViewPosition);
+    seiMessages.push_back(seiMultiviewViewPosition);
+  }
+#endif
+  // alpha channel information
+  if (m_pcCfg->getAciSEIEnabled())
+  {
+    SEIAlphaChannelInfo *seiAlphaChannelInfo = new SEIAlphaChannelInfo;
+    m_seiEncoder.initSEIAlphaChannelInfo(seiAlphaChannelInfo);
+    seiMessages.push_back(seiAlphaChannelInfo);
+  }
+  // depth representation information
+  if (m_pcCfg->getDriSEIEnabled())
+  {
+    SEIDepthRepresentationInfo *seiDepthRepresentationInfo = new SEIDepthRepresentationInfo;
+    m_seiEncoder.initSEIDepthRepresentationInfo(seiDepthRepresentationInfo);
+    seiMessages.push_back(seiDepthRepresentationInfo);
+  }
+  // colour transform information
+  if (m_pcCfg->getCtiSEIEnabled())
+  {
+    SEIColourTransformInfo* seiCTI = new SEIColourTransformInfo;
+    m_seiEncoder.initSEIColourTransformInfo(seiCTI);
+    seiMessages.push_back(seiCTI);
+  }
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+  if (m_pcCfg->getConstrainedRaslencoding())
+  {
+    SEIConstrainedRaslIndication* seiConstrainedRasl = new SEIConstrainedRaslIndication;
+    seiMessages.push_back(seiConstrainedRasl);
+  }
+#endif
 }
 
 void EncGOP::xCreatePerPictureSEIMessages (int picInGOP, SEIMessages& seiMessages, SEIMessages& nestedSeiMessages, Slice *slice)
@@ -791,7 +848,6 @@ void EncGOP::xCreatePerPictureSEIMessages (int picInGOP, SEIMessages& seiMessage
       bufferingPeriodSEI->copyTo(*bufferingPeriodSEIcopy);
       nestedSeiMessages.push_back(bufferingPeriodSEIcopy);
     }
-
   }
 
   if (m_pcEncLib->getDependentRAPIndicationSEIEnabled() && slice->isDRAP())
@@ -801,9 +857,40 @@ void EncGOP::xCreatePerPictureSEIMessages (int picInGOP, SEIMessages& seiMessage
     seiMessages.push_back(dependentRAPIndicationSEI);
   }
 
+  if (m_pcEncLib->getEdrapIndicationSEIEnabled() && slice->getEdrapRapId() > 0)
+  {
+    SEIExtendedDrapIndication *seiExtendedDrapIndication = new SEIExtendedDrapIndication();
+    m_seiEncoder.initSEIExtendedDrapIndication(seiExtendedDrapIndication);
+    // update EDRAP SEI message according to the reference lists of the slice
+    seiExtendedDrapIndication->m_edrapIndicationRapIdMinus1 = slice->getEdrapRapId() - 1;
+    seiExtendedDrapIndication->m_edrapIndicationLeadingPicturesDecodableFlag = slice->getLatestEdrapLeadingPicDecodableFlag();
+    seiExtendedDrapIndication->m_edrapIndicationNumRefRapPicsMinus1 = slice->getEdrapNumRefRapPics() - 1;
+    seiExtendedDrapIndication->m_edrapIndicationRefRapId.resize(seiExtendedDrapIndication->m_edrapIndicationNumRefRapPicsMinus1 + 1);
+    for (int i = 0; i <= seiExtendedDrapIndication->m_edrapIndicationNumRefRapPicsMinus1; i++)
+    {
+      seiExtendedDrapIndication->m_edrapIndicationRefRapId[i] = slice->getEdrapRefRapId(i);
+    }
+    seiMessages.push_back(seiExtendedDrapIndication);
+  }
+
+  // insert one Annotated Region SEI for the picture (if the file exists)
+  if (!m_pcCfg->getAnnotatedRegionSEIFileRoot().empty())
+  {
+    SEIAnnotatedRegions *seiAnnotatedRegions = new SEIAnnotatedRegions();
+    const bool success = m_seiEncoder.initSEIAnnotatedRegions(seiAnnotatedRegions, slice->getPOC());
+
+    if (success)
+    {
+      seiMessages.push_back(seiAnnotatedRegions);
+    }
+    else
+    {
+      delete seiAnnotatedRegions;
+    }
+  }
 }
 
-void EncGOP::xCreateScalableNestingSEI(SEIMessages& seiMessages, SEIMessages& nestedSeiMessages, const std::vector<int> &targetOLSs, const std::vector<int> &targetLayers, const std::vector<uint16_t>& subpicIDs)
+void EncGOP::xCreateScalableNestingSEI(SEIMessages& seiMessages, SEIMessages& nestedSeiMessages, const std::vector<int> &targetOLSs, const std::vector<int> &targetLayers, const std::vector<uint16_t>& subpicIDs, uint16_t maxSubpicIdInPic)
 {
   SEIMessages tmpMessages;
   while (!nestedSeiMessages.empty())
@@ -812,7 +899,7 @@ void EncGOP::xCreateScalableNestingSEI(SEIMessages& seiMessages, SEIMessages& ne
     nestedSeiMessages.pop_front();
     tmpMessages.push_back(sei);
     SEIScalableNesting *nestingSEI = new SEIScalableNesting();
-    m_seiEncoder.initSEIScalableNesting(nestingSEI, tmpMessages, targetOLSs, targetLayers, subpicIDs);
+    m_seiEncoder.initSEIScalableNesting(nestingSEI, tmpMessages, targetOLSs, targetLayers, subpicIDs, maxSubpicIdInPic);
     seiMessages.push_back(nestingSEI);
     tmpMessages.clear();
   }
@@ -850,7 +937,6 @@ void EncGOP::xCreatePictureTimingSEI  (int IRAPGOPid, SEIMessages& seiMessages,
   // update decoding unit parameters
   if ((m_pcCfg->getPictureTimingSEIEnabled() || m_pcCfg->getDecodingUnitInfoSEIEnabled()) && slice->getNalUnitLayerId() == slice->getVPS()->getLayerId(0))
   {
-    int picSptDpbOutputDuDelay = 0;
     SEIPictureTiming *pictureTimingSEI = new SEIPictureTiming();
 
     // DU parameters
@@ -1044,10 +1130,6 @@ void EncGOP::xCreatePictureTimingSEI  (int IRAPGOPid, SEIMessages& seiMessages,
     }
     int factor = hrd->getTickDivisorMinus2() + 2;
     pictureTimingSEI->m_picDpbOutputDuDelay = factor * pictureTimingSEI->m_picDpbOutputDelay;
-    if( m_pcCfg->getDecodingUnitInfoSEIEnabled() )
-    {
-      picSptDpbOutputDuDelay = factor * pictureTimingSEI->m_picDpbOutputDelay;
-    }
     if (m_bufferingPeriodSEIPresentInAU)
     {
       for( int i = temporalId ; i < maxNumSubLayers ; i ++ )
@@ -1080,9 +1162,10 @@ void EncGOP::xCreatePictureTimingSEI  (int IRAPGOPid, SEIMessages& seiMessages,
         SEIDecodingUnitInfo *duInfoSEI = new SEIDecodingUnitInfo();
         duInfoSEI->m_decodingUnitIdx = i;
         for( int j = temporalId; j <= maxNumSubLayers; j++ )
+        {
           duInfoSEI->m_duSptCpbRemovalDelayIncrement[j] = pictureTimingSEI->m_duCpbRemovalDelayMinus1[i*maxNumSubLayers+j] + 1;
+        }
         duInfoSEI->m_dpbOutputDuDelayPresentFlag = false;
-        duInfoSEI->m_picSptDpbOutputDuDelay = picSptDpbOutputDuDelay;
 
         duInfoSeiMessages.push_back(duInfoSEI);
       }
@@ -1150,7 +1233,9 @@ void EncGOP::xUpdateTimingSEI(SEIPictureTiming *pictureTimingSEI, std::deque<DUD
 
     int maxNumSubLayers = sps->getMaxTLayers();
     for( int j = 0; j < maxNumSubLayers - 1; j++ )
+    {
       pictureTimingSEI->m_ptSubLayerDelaysPresentFlag[j] = false;
+    }
 
     for( i = 0; i < numDU; i ++ )
     {
@@ -1190,7 +1275,10 @@ void EncGOP::xUpdateTimingSEI(SEIPictureTiming *pictureTimingSEI, std::deque<DUD
             ui64Tmp = uiPrev + 1;
             flag = 1;
           }
-          else                            ui64Tmp = maxDiff - tmp + 1;
+          else
+          {
+            ui64Tmp = maxDiff - tmp + 1;
+          }
         }
         rDuCpbRemovalDelayMinus1[ i * maxNumSubLayers + maxNumSubLayers - 1 ] = (uint32_t)ui64Tmp - uiPrev - 1;
         if( (int)rDuCpbRemovalDelayMinus1[ i * maxNumSubLayers + maxNumSubLayers - 1 ] < 0 )
@@ -1255,6 +1343,40 @@ validateMinCrRequirements(const ProfileLevelTierFeatures &plt, std::size_t numBy
     }
   }
 }
+
+static void
+validateMinCrRequirements(const ProfileLevelTierFeatures &plt, std::size_t numBytesInVclNalUnits, const Slice *pSlice, const EncCfg *pCfg, const SEISubpicureLevelInfo &seiSubpic, const int subPicIdx, const int layerId)
+{
+  if (plt.getLevelTierFeatures() && plt.getProfileFeatures())
+  {
+    if (plt.getTier() == Level::Tier::MAIN)
+    {
+      const uint32_t formatCapabilityFactorx1000 = plt.getProfileFeatures()->formatCapabilityFactorx1000;
+      const uint64_t maxLumaSr = plt.getLevelTierFeatures()->maxLumaSr;
+      const double   denomx1000x256 = (256 * plt.getMinCr() * pCfg->getFrameRate() * 1000 * 256);
+
+      for (int i = 0; i < seiSubpic.m_numRefLevels; i++)
+      {
+        Level::Name level = seiSubpic.m_refLevelIdc[i][layerId];
+        if (level != Level::LEVEL15_5)
+        {
+          const int      nonSubpicLayersFraction = seiSubpic.m_nonSubpicLayersFraction[i][layerId];
+          const int      refLevelFraction = seiSubpic.m_refLevelFraction[i][subPicIdx][layerId] + 1; //m_refLevelFraction is actually sli_ref_level_fraction_minus1
+          const uint32_t olsRefLevelFractionx256 = nonSubpicLayersFraction * 256 + (256 - nonSubpicLayersFraction) * refLevelFraction;
+
+          const double   threshold = formatCapabilityFactorx1000 * maxLumaSr * olsRefLevelFractionx256 / denomx1000x256;
+
+          if (numBytesInVclNalUnits > threshold)
+          {
+            msg( WARNING, "WARNING: Encoded stream for sub-picture %d does not meet MinCr requirements numBytesInVclNalUnits (%.0f) must be <= %.0f. Try increasing Qp, tier or level\n",
+                      subPicIdx, (double) numBytesInVclNalUnits, threshold );
+          }
+        }
+      }
+    }
+  }
+}
+
 static std::size_t
 cabac_zero_word_padding(const Slice *const pcSlice,
                         const Picture *const pcPic,
@@ -1306,7 +1428,7 @@ cabac_zero_word_padding(const Slice *const pcSlice,
       return numberOfAdditionalCabacZeroWords;
     }
   }
-      return 0;
+  return 0;
 }
 
 class EfficientFieldIRAPMapping
@@ -1564,7 +1686,10 @@ void trySkipOrDecodePicture( bool& decPic, bool& encPic, const EncCfg& cfg, Pict
     }
 
     encPic |= cfg.getForceDecodeBitstream1() && !decPic;
-    if( cfg.getForceDecodeBitstream1() ) { return; }
+    if (cfg.getForceDecodeBitstream1())
+    {
+      return;
+    }
   }
 
 
@@ -1892,7 +2017,9 @@ void EncGOP::xPicInitLMCS(Picture *pic, PicHeader *picHeader, Slice *slice)
         m_pcReshaper->setCTUFlag(false);
       }
       else
+      {
         m_pcReshaper->setCTUFlag(true);
+      }
 
       m_pcReshaper->getSliceReshaperInfo().setSliceReshapeModelPresentFlag(false);
 
@@ -1903,6 +2030,12 @@ void EncGOP::xPicInitLMCS(Picture *pic, PicHeader *picHeader, Slice *slice)
       else if (m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_SDR || m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_HLG)
       {
         int modIP = pic->getPOC() - pic->getPOC() / m_pcCfg->getReshapeCW().rspFpsToIp * m_pcCfg->getReshapeCW().rspFpsToIp;
+#if GDR_ENABLED
+        if (slice->getSPS()->getGDREnabledFlag() && slice->isInterGDR())
+        {
+          modIP = 0;
+        }
+#endif
         if (m_pcReshaper->getReshapeFlag() && m_pcCfg->getReshapeCW().updateCtrl == 2 && modIP == 0)
         {
           m_pcReshaper->getSliceReshaperInfo().setSliceReshapeModelPresentFlag(true);
@@ -1917,9 +2050,17 @@ void EncGOP::xPicInitLMCS(Picture *pic, PicHeader *picHeader, Slice *slice)
     }
 
     //set all necessary information in LMCS APS and picture header
-    picHeader->setLmcsEnabledFlag(m_pcReshaper->getSliceReshaperInfo().getUseSliceReshaper());
+    picHeader->setLmcsEnabledFlag(m_pcReshaper->getSliceReshaperInfo().getUseSliceReshaper());    
     slice->setLmcsEnabledFlag(m_pcReshaper->getSliceReshaperInfo().getUseSliceReshaper());
-    picHeader->setLmcsChromaResidualScaleFlag(m_pcReshaper->getSliceReshaperInfo().getSliceReshapeChromaAdj() == 1);
+    picHeader->setLmcsChromaResidualScaleFlag(m_pcReshaper->getSliceReshaperInfo().getSliceReshapeChromaAdj() == 1);    
+
+#if GDR_ENABLED
+    if (slice->getSPS()->getGDREnabledFlag() && picHeader->getInGdrInterval())
+    {
+      picHeader->setLmcsChromaResidualScaleFlag(false);
+    }
+#endif
+
     if (m_pcReshaper->getSliceReshaperInfo().getSliceReshapeModelPresentFlag())
     {
       int apsId = std::min<int>( 3, m_pcEncLib->getVPS() == nullptr ? 0 : m_pcEncLib->getVPS()->getGeneralLayerIdx( m_pcEncLib->getLayerId() ) );
@@ -1966,7 +2107,7 @@ void EncGOP::xPicInitLMCS(Picture *pic, PicHeader *picHeader, Slice *slice)
 // ====================================================================================================================
 void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
                           std::list<PelUnitBuf*>& rcListPicYuvRecOut,
-                          bool isField, bool isTff, const InputColourSpaceConversion snr_conversion, 
+                          bool isField, bool isTff, const InputColourSpaceConversion snr_conversion,
                           const bool printFrameMSE, const bool printMSSSIM, bool isEncodeLtRef, const int picIdInGOP)
 {
   // TODO: Split this function up.
@@ -2094,9 +2235,6 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
 
     m_pcSliceEncoder->create( picWidth, picHeight, chromaFormatIDC, maxCUWidth, maxCUHeight, maxTotalCUDepth );
 
-#if ENABLE_SPLIT_PARALLELISM
-    pcPic->scheduler.init( pcPic->cs->pcv->heightInCtus, pcPic->cs->pcv->widthInCtus, 1                          , 0                             , m_pcCfg->getNumSplitThreads() );
-#endif
     pcPic->createTempBuffers( pcPic->cs->pps->pcv->maxCUWidth );
     pcPic->cs->createCoeffs((bool)pcPic->cs->sps->getPLTMode());
 
@@ -2105,7 +2243,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
     pcPic->allocateNewSlice();
     m_pcSliceEncoder->setSliceSegmentIdx(0);
 
-    m_pcSliceEncoder->initEncSlice(pcPic, iPOCLast, pocCurr, iGOPid, pcSlice, isField, isEncodeLtRef, m_pcEncLib->getLayerId() );
+    m_pcSliceEncoder->initEncSlice(pcPic, iPOCLast, pocCurr, iGOPid, pcSlice, isField, isEncodeLtRef, m_pcEncLib->getLayerId(), getNalUnitType(pocCurr, m_iLastIDR, isField) );
 
     DTRACE_UPDATE( g_trace_ctx, ( std::make_pair( "poc", pocCurr ) ) );
     DTRACE_UPDATE( g_trace_ctx, ( std::make_pair( "final", 0 ) ) );
@@ -2127,6 +2265,18 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       pcSlice->setSliceType(I_SLICE);
     }
     pcSlice->setTLayer(m_pcCfg->getGOPEntry(iGOPid).m_temporalId);
+#if GDR_ENABLED
+    if (m_pcCfg->getGdrEnabled() && pocCurr >= m_pcCfg->getGdrPocStart() && ((pocCurr - m_pcCfg->getGdrPocStart()) % m_pcCfg->getGdrPeriod() == 0))
+    {
+      pcSlice->setSliceType(B_SLICE);
+    }
+
+    // note : first picture is GDR(I_SLICE)
+    if (m_pcCfg->getGdrEnabled() && pocCurr == 0)
+    {
+      pcSlice->setSliceType(I_SLICE);
+    }
+#endif
 
     // Set the nal unit type
     pcSlice->setNalUnitType(getNalUnitType(pocCurr, m_iLastIDR, isField));
@@ -2149,6 +2299,11 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       {
         m_associatedIRAPType[pcPic->layerId] = pcSlice->getNalUnitType();
         m_associatedIRAPPOC[pcPic->layerId] = pocCurr;
+        if (m_pcEncLib->getEdrapIndicationSEIEnabled())
+        {
+          m_latestEDRAPPOC = MAX_INT;
+          pcPic->setEdrapRapId(0);
+        }
       }
       pcSlice->setAssociatedIRAPType(m_associatedIRAPType[pcPic->layerId]);
       pcSlice->setAssociatedIRAPPOC(m_associatedIRAPPOC[pcPic->layerId]);
@@ -2187,6 +2342,11 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       {
         m_associatedIRAPType[pcPic->layerId] = pcSlice->getNalUnitType();
         m_associatedIRAPPOC[pcPic->layerId] = pocCurr;
+        if (m_pcEncLib->getEdrapIndicationSEIEnabled())
+        {
+          m_latestEDRAPPOC = MAX_INT;
+          pcPic->setEdrapRapId(0);
+        }
       }
       pcSlice->setAssociatedIRAPType(m_associatedIRAPType[pcPic->layerId]);
       pcSlice->setAssociatedIRAPPOC(m_associatedIRAPPOC[pcPic->layerId]);
@@ -2221,7 +2381,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
         rpcPic = *(iterPic++);
         if ( pcSlice->isDRAP() && rpcPic->getPOC() != pocCurr )
         {
-            rpcPic->precedingDRAP = true;
+          rpcPic->precedingDRAP = true;
         }
         else if ( !pcSlice->isDRAP() && rpcPic->getPOC() == pocCurr )
         {
@@ -2230,8 +2390,60 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       }
     }
 
+    pcSlice->setEnableEdrapSEI(m_pcEncLib->getEdrapIndicationSEIEnabled());
+    if (m_pcEncLib->getEdrapIndicationSEIEnabled())
+    {
+      // Only mark the picture as Extended DRAP if all of the following applies:
+      //  1) Extended DRAP indication SEI messages are enabled
+      //  2) The current picture is not an intra picture
+      //  3) The current picture is in the EDRAP period
+      //  4) The current picture is a trailing picture
+      if (m_pcEncLib->getEdrapIndicationSEIEnabled() && m_pcEncLib->getEdrapPeriod() > 0 && !pcSlice->isIntra() &&
+          pocCurr % m_pcEncLib->getEdrapPeriod() == 0 && pocCurr > pcSlice->getAssociatedIRAPPOC())
+      {
+        pcSlice->setEdrapRapId(pocCurr / m_pcEncLib->getEdrapPeriod());
+        pcSlice->getPic()->setEdrapRapId(pocCurr / m_pcEncLib->getEdrapPeriod());
+      }
+
+      if (pcSlice->getEdrapRapId() > 0)
+      {
+        m_latestEDRAPPOC = pocCurr;
+        m_latestEdrapLeadingPicDecodableFlag = false;
+        pcSlice->setTLayer(0); // Force Extended DRAP picture to have temporal layer 0
+        msg( NOTICE, "Force the temporal sublayer identifier of the EDRAP picture equal to 0.\n");
+      }
+      pcSlice->setLatestEDRAPPOC(m_latestEDRAPPOC);
+      pcSlice->setLatestEdrapLeadingPicDecodableFlag(m_latestEdrapLeadingPicDecodableFlag);
+      pcSlice->setUseLTforEdrap(false); // When set, sets the associated IRAP/EDRAP as long-term in RPL0 at slice level, unless the associated IRAP/EDRAP is already included in RPL0 or RPL1 defined in SPS
+
+      PicList::iterator iterPic = rcListPic.begin();
+      Picture *rpcPic;
+      while (iterPic != rcListPic.end())
+      {
+        rpcPic = *(iterPic++);
+        if ( pcSlice->getEdrapRapId() > 0 && rpcPic->getPOC() != pocCurr && rpcPic->getPOC() >= pcSlice->getAssociatedIRAPPOC() )
+        {
+          if (rpcPic->getEdrapRapId() >= 0 && rpcPic->getPOC() % m_pcEncLib->getEdrapPeriod() == 0)
+          {
+            bool bRefExist = false;
+            for (int i = 0; i < pcSlice->getEdrapNumRefRapPics(); i++)
+            {
+              if (pcSlice->getEdrapRefRapId(i) == rpcPic->getEdrapRapId())
+                bRefExist = true;
+            }
+            if (!bRefExist)
+            {
+              pcSlice->addEdrapRefRapIds(rpcPic->getPOC() / m_pcEncLib->getEdrapPeriod());
+              pcSlice->setEdrapNumRefRapPics(pcSlice->getEdrapNumRefRapPics() + 1);
+            }
+          }
+        }
+      }
+    }
+
     if (pcSlice->checkThatAllRefPicsAreAvailable(rcListPic, pcSlice->getRPL0(), 0, false) != 0 || pcSlice->checkThatAllRefPicsAreAvailable(rcListPic, pcSlice->getRPL1(), 1, false) != 0 ||
-        (m_pcEncLib->getDependentRAPIndicationSEIEnabled() && !pcSlice->isIRAP() && ( pcSlice->isDRAP() || !pcSlice->isPOCInRefPicList(pcSlice->getRPL0(), pcSlice->getAssociatedIRAPPOC())) )
+        (m_pcEncLib->getDependentRAPIndicationSEIEnabled() && !pcSlice->isIRAP() && ( pcSlice->isDRAP() || !pcSlice->isPOCInRefPicList(pcSlice->getRPL0(), pcSlice->getAssociatedIRAPPOC())) ) ||
+        (m_pcEncLib->getEdrapIndicationSEIEnabled() && !pcSlice->isIRAP() && ( pcSlice->getEdrapRapId() > 0 || !pcSlice->isPOCInRefPicList(pcSlice->getRPL0(), pcSlice->getAssociatedIRAPPOC()) ) )
       || ((m_pcEncLib->getAvoidIntraInDepLayer() || !pcSlice->isIRAP()) && pcSlice->getPic()->cs->vps && m_pcEncLib->getNumRefLayers(pcSlice->getPic()->cs->vps->getGeneralLayerIdx(m_pcEncLib->getLayerId())))
       )
     {
@@ -2245,11 +2457,10 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
         || pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL)
         )
     {
-    if (pcSlice->isStepwiseTemporalLayerSwitchingPointCandidate(rcListPic))
+      if (pcSlice->isStepwiseTemporalLayerSwitchingPointCandidate(rcListPic))
       {
         bool isSTSA=true;
 
-
         for(int ii=0;(ii<m_pcCfg->getGOPSize() && isSTSA==true);ii++)
         {
           int lTid = m_pcCfg->getRPLEntry(0, ii).m_temporalId;
@@ -2313,7 +2524,8 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       pcSlice->setNumRefIdx(REF_PIC_LIST_0, (pcSlice->isIntra()) ? 0 : pcSlice->getRPL0()->getNumberOfActivePictures());
       pcSlice->setNumRefIdx(REF_PIC_LIST_1, (!pcSlice->isInterB()) ? 0 : pcSlice->getRPL1()->getNumberOfActivePictures());
     }
-    if (m_pcCfg->getUseCompositeRef() && getPrepareLTRef()) {
+    if (m_pcCfg->getUseCompositeRef() && getPrepareLTRef())
+    {
       arrangeCompositeReference(pcSlice, rcListPic, pocCurr);
     }
     //  Set reference list
@@ -2341,7 +2553,10 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       if (!pcSlice->isIRAP())
       {
         int refLayer = pcSlice->getDepth();
-        if( refLayer > 9 ) refLayer = 9; // Max layer is 10
+        if (refLayer > 9)
+        {
+          refLayer = 9;   // Max layer is 10
+        }
 
         if( m_bInitAMaxBT && pcSlice->getPOC() > m_uiPrevISlicePOC )
         {
@@ -2450,7 +2665,6 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
 
     if (pcSlice->getSliceType() == B_SLICE)
     {
-
       bool bLowDelay = true;
       int  iCurrPOC  = pcSlice->getPOC();
       int iRefIdx = 0;
@@ -2481,7 +2695,6 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
     //-------------------------------------------------------------
     pcSlice->setRefPOCList();
 
-
     pcSlice->setList1IdxToList0Idx();
 
     if (m_pcEncLib->getTMVPModeId() == 2)
@@ -2583,7 +2796,14 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       }
     }
 
+#if GDR_ENABLED
+    PicHeader *picHeader = new PicHeader;
+    *picHeader = *pcPic->cs->picHeader;
+    pcSlice->scaleRefPicList(scaledRefPic, picHeader, m_pcEncLib->getApss(), picHeader->getLmcsAPS(), picHeader->getScalingListAPS(), false);
+    picHeader = pcPic->cs->picHeader;
+#else
     pcSlice->scaleRefPicList( scaledRefPic, pcPic->cs->picHeader, m_pcEncLib->getApss(), picHeader->getLmcsAPS(), picHeader->getScalingListAPS(), false );
+#endif
 
     // set adaptive search range for non-intra-slices
     if (m_pcCfg->getUseASR() && !pcSlice->isIntra())
@@ -2697,6 +2917,15 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       pcSlice->setBiDirPred( false, -1, -1 );
     }
 
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+    if( pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_RASL && m_pcCfg->getRprRASLtoolSwitch() )
+    {
+      pcSlice->setDisableLmChromaCheck( true );
+      picHeader->setDmvrDisabledFlag( true );
+      xUpdateRPRtmvp( picHeader, pcSlice );
+    }
+#endif
+    
     double lambda            = 0.0;
     int actualHeadBits       = 0;
     int actualTotalBits      = 0;
@@ -2707,9 +2936,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
 
     uint32_t uiNumSliceSegments = 1;
 
-    {
-      pcSlice->setDefaultClpRng( *pcSlice->getSPS() );
-    }
+    pcSlice->setDefaultClpRng(*pcSlice->getSPS());
 
     // Allocate some coders, now the number of tiles are known.
     const uint32_t numberOfCtusInFrame = pcPic->cs->pcv->sizeInCtus;
@@ -2795,6 +3022,31 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
     {
       m_pcSliceEncoder->setJointCbCrModes(*pcPic->cs, Position(0, 0), pcPic->cs->area.lumaSize());
     }
+
+#if JVET_W0046_RLSCP
+    if (!pcSlice->getSPS()->getSpsRangeExtension().getReverseLastSigCoeffEnabledFlag() || pcSlice->getSliceQp() > 12)
+    {
+      pcSlice->setReverseLastSigCoeffFlag(false);
+    }
+    else
+    {
+      /*for RA serial and parallel alignment start*/
+      if (m_pcCfg->getIntraPeriod() > 1)
+      {
+        if (pcSlice->isIntra())
+        {
+          m_cnt_right_bottom = 0;
+        }
+        if ((pocCurr % m_pcCfg->getIntraPeriod()) <= m_pcCfg->getGOPSize() && iGOPid == 0 && !pcSlice->isIntra())
+        {
+          m_cnt_right_bottom = m_cnt_right_bottom_i;
+        }
+      }
+      /*for RA serial and parallel alignment end*/
+      pcSlice->setReverseLastSigCoeffFlag(m_cnt_right_bottom >= 0);
+    }
+#endif
+
     if( encPic )
     // now compress (trial encode) the various slice segments (slices, and dependent slices)
     {
@@ -2809,6 +3061,57 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       for(uint32_t sliceIdx = 0; sliceIdx < pcPic->cs->pps->getNumSlicesInPic(); sliceIdx++ )
       {
         pcSlice->setSliceMap( pcPic->cs->pps->getSliceMap( sliceIdx ) );
+        if (pcSlice->getSPS()->getSpsRangeExtension().getTSRCRicePresentFlag() && (pcPic->cs->pps->getNumSlicesInPic() == 1))
+        {
+          if (!pcSlice->isIntra())
+          {
+            int nextRice = 1;
+
+            if (m_preIPOC < pocCurr)
+            {
+              for (int idx = 0; idx < MAX_TSRC_RICE; idx++)
+              {
+                m_riceBit[idx][0] = m_riceBit[idx][1];
+              }
+              m_preQP[0] = m_preQP[1];
+              m_preIPOC = MAX_INT;
+            }
+
+            if (m_preQP[0] != pcSlice->getSliceQp())
+            {
+              m_riceBit[pcSlice->get_tsrc_index()][0] = (int) (m_riceBit[pcSlice->get_tsrc_index()][0] * 9 / 10);
+            }
+
+            for (int idx = 2; idx < 9; idx++)
+            {
+              if (m_riceBit[idx - 2][0] > m_riceBit[idx - 1][0])
+              {
+                nextRice = idx;
+              }
+              else
+              {
+                m_riceBit[idx - 1][0] = m_riceBit[idx - 2][0];
+              }
+              m_riceBit[idx - 2][0] = 0;
+            }
+            m_riceBit[7][0] = 0;
+            pcSlice->set_tsrc_index(nextRice - 1);
+          }
+          else
+          {
+            m_preIPOC = pocCurr;
+            m_preQP[0] = MAX_INT;          
+            m_preQP[1] = pcSlice->getSliceQp();
+            for (int idx = 0; idx < MAX_TSRC_RICE; idx++)
+            {
+              m_riceBit[idx][0] = 0;
+            }
+          }
+          for (int idx = 0; idx < MAX_TSRC_RICE; idx++)
+          {
+             pcSlice->setRiceBit(idx, m_riceBit[idx][0]);
+          }
+        }
         if( pcPic->cs->pps->getRectSliceFlag() )
         {
           Position firstCtu;
@@ -2840,7 +3143,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
             else
             {
               pcPic->getOrigBuf().copyFrom(pcPic->getTrueOrigBuf());
-            } 
+            }
 
             if (pcSlice->getLmcsEnabledFlag())
             {
@@ -2901,6 +3204,12 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       if (cs.sps->getUseLmcs() && m_pcReshaper->getSliceReshaperInfo().getUseSliceReshaper())
       {
         picHeader->setLmcsEnabledFlag(true);
+#if GDR_ENABLED
+        if (cs.sps->getGDREnabledFlag() && picHeader->getInGdrInterval())
+        {
+          picHeader->setLmcsChromaResidualScaleFlag(false);
+        }
+#endif
         int apsId = std::min<int>(3, m_pcEncLib->getVPS() == nullptr ? 0 : m_pcEncLib->getVPS()->getGeneralLayerIdx(m_pcEncLib->getLayerId()));
         picHeader->setLmcsAPSId(apsId);
 
@@ -2928,7 +3237,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
         else
         {
           pcPic->getOrigBuf().copyFrom(pcPic->getTrueOrigBuf());
-        } 
+        }
       }
 
       // create SAO object based on the picture size
@@ -2957,7 +3266,11 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       // SAO parameter estimation using non-deblocked pixels for CTU bottom and right boundary areas
       if( pcSlice->getSPS()->getSAOEnabledFlag() && m_pcCfg->getSaoCtuBoundary() )
       {
-        m_pcSAO->getPreDBFStatistics( cs );
+#if JVET_W0129_ENABLE_ALF_TRUEORG
+        m_pcSAO->getPreDBFStatistics( cs, m_pcCfg->getSaoTrueOrg() );
+#else
+        m_pcSAO->getPreDBFStatistics( cs, m_pcCfg->getAlfSaoTrueOrg() );
+#endif
       }
 
       //-- Loop filter
@@ -2969,14 +3282,12 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
           applyDeblockingFilterParameterSelection(pcPic, uiNumSliceSegments, iGOPid);
         }
         else
-        {
   #endif
+        {
           applyDeblockingFilterMetric(pcPic, uiNumSliceSegments);
-  #if W0038_DB_OPT
         }
-  #endif
       }
-      if (m_pcCfg->getCostMode() == COST_LOSSLESS_CODING) 
+      if (m_pcCfg->getCostMode() == COST_LOSSLESS_CODING)
       {
         for (int s = 0; s < uiNumSliceSegments; s++)
         {
@@ -2986,7 +3297,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
           }
         }
       }
-      m_pcLoopFilter->loopFilterPic( cs );
+      m_pcLoopFilter->deblockingFilterPic( cs );
 
       CS::setRefinedMotionField(cs);
 
@@ -2994,16 +3305,22 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       {
         bool sliceEnabled[MAX_NUM_COMPONENT];
         m_pcSAO->initCABACEstimator( m_pcEncLib->getCABACEncoder(), m_pcEncLib->getCtxCache(), pcSlice );
-
+#if JVET_W0129_ENABLE_ALF_TRUEORG
         m_pcSAO->SAOProcess( cs, sliceEnabled, pcSlice->getLambdas(),
 #if ENABLE_QPA
-                             (m_pcCfg->getUsePerceptQPA() && !m_pcCfg->getUseRateCtrl() && pcSlice->getPPS()->getUseDQP() ? m_pcEncLib->getRdCost (PARL_PARAM0 (0))->getChromaWeight() : 0.0),
+                             (m_pcCfg->getUsePerceptQPA() && !m_pcCfg->getUseRateCtrl() && pcSlice->getPPS()->getUseDQP() ? m_pcEncLib->getRdCost ()->getChromaWeight() : 0.0),
+#endif
+                             m_pcCfg->getTestSAODisableAtPictureLevel(), m_pcCfg->getSaoEncodingRate(), m_pcCfg->getSaoEncodingRateChroma(), m_pcCfg->getSaoCtuBoundary(), m_pcCfg->getSaoGreedyMergeEnc(), m_pcCfg->getSaoTrueOrg() );
+#else
+        m_pcSAO->SAOProcess( cs, sliceEnabled, pcSlice->getLambdas(),
+#if ENABLE_QPA
+                             (m_pcCfg->getUsePerceptQPA() && !m_pcCfg->getUseRateCtrl() && pcSlice->getPPS()->getUseDQP() ? m_pcEncLib->getRdCost ()->getChromaWeight() : 0.0),
+#endif
+                             m_pcCfg->getTestSAODisableAtPictureLevel(), m_pcCfg->getSaoEncodingRate(), m_pcCfg->getSaoEncodingRateChroma(), m_pcCfg->getSaoCtuBoundary(), m_pcCfg->getSaoGreedyMergeEnc(), m_pcCfg->getAlfSaoTrueOrg() );
 #endif
-                             m_pcCfg->getTestSAODisableAtPictureLevel(), m_pcCfg->getSaoEncodingRate(), m_pcCfg->getSaoEncodingRateChroma(), m_pcCfg->getSaoCtuBoundary(), m_pcCfg->getSaoGreedyMergeEnc() );
         //assign SAO slice header
         for (int s = 0; s < uiNumSliceSegments; s++)
         {
-
           if (pcPic->slices[s]->isLossless() && m_pcCfg->getCostMode() == COST_LOSSLESS_CODING)
           {
             pcPic->slices[s]->setSaoEnabledFlag(CHANNEL_TYPE_LUMA, false);
@@ -3014,9 +3331,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
             pcPic->slices[s]->setSaoEnabledFlag(CHANNEL_TYPE_LUMA, sliceEnabled[COMPONENT_Y]);
             CHECK(!(sliceEnabled[COMPONENT_Cb] == sliceEnabled[COMPONENT_Cr]), "Unspecified error");
             pcPic->slices[s]->setSaoEnabledFlag(CHANNEL_TYPE_CHROMA, sliceEnabled[COMPONENT_Cb]);
-
-        }
-
+          }
         }
       }
 
@@ -3027,12 +3342,12 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
 
         for (int s = 0; s < uiNumSliceSegments; s++)
         {
-          pcPic->slices[s]->setTileGroupAlfEnabledFlag(COMPONENT_Y, false);
+          pcPic->slices[s]->setAlfEnabledFlag(COMPONENT_Y, false);
         }
         m_pcALF->initCABACEstimator(m_pcEncLib->getCABACEncoder(), m_pcEncLib->getCtxCache(), pcSlice, m_pcEncLib->getApsMap());
         m_pcALF->ALFProcess(cs, pcSlice->getLambdas()
 #if ENABLE_QPA
-          , (m_pcCfg->getUsePerceptQPA() && !m_pcCfg->getUseRateCtrl() && pcSlice->getPPS()->getUseDQP() ? m_pcEncLib->getRdCost(PARL_PARAM0(0))->getChromaWeight() : 0.0)
+          , (m_pcCfg->getUsePerceptQPA() && !m_pcCfg->getUseRateCtrl() && pcSlice->getPPS()->getUseDQP() ? m_pcEncLib->getRdCost()->getChromaWeight() : 0.0)
 #endif
           , pcPic, uiNumSliceSegments
         );
@@ -3040,40 +3355,44 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
         //assign ALF slice header
         for (int s = 0; s < uiNumSliceSegments; s++)
         {
-           //For the first slice, even if it is lossless, slice level ALF is not disabled and ALF-APS is signaled so that the later lossy slices can use APS of the first slice. 
-           //However, if the first slice is lossless, the ALF process is disabled for all of the CTUs ( m_ctuEnableFlag == 0) of that slice which is implemented in the function void EncAdaptiveLoopFilter::ALFProcess.         
-      
+           //For the first slice, even if it is lossless, slice level ALF is not disabled and ALF-APS is signaled so that the later lossy slices can use APS of the first slice.
+           //However, if the first slice is lossless, the ALF process is disabled for all of the CTUs ( m_ctuEnableFlag == 0) of that slice which is implemented in the function void EncAdaptiveLoopFilter::ALFProcess.
+
           if (pcPic->slices[s]->isLossless() && s && m_pcCfg->getCostMode() == COST_LOSSLESS_CODING)
           {
-            pcPic->slices[s]->setTileGroupAlfEnabledFlag(COMPONENT_Y, false);
-            pcPic->slices[s]->setTileGroupAlfEnabledFlag(COMPONENT_Cb, false);
-            pcPic->slices[s]->setTileGroupAlfEnabledFlag(COMPONENT_Cr, false);
+            pcPic->slices[s]->setAlfEnabledFlag(COMPONENT_Y, false);
+            pcPic->slices[s]->setAlfEnabledFlag(COMPONENT_Cb, false);
+            pcPic->slices[s]->setAlfEnabledFlag(COMPONENT_Cr, false);
           }
           else
           {
-            pcPic->slices[s]->setTileGroupAlfEnabledFlag(COMPONENT_Y, cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Y));
-            pcPic->slices[s]->setTileGroupAlfEnabledFlag(COMPONENT_Cb, cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Cb));
-            pcPic->slices[s]->setTileGroupAlfEnabledFlag(COMPONENT_Cr, cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Cr));
+            pcPic->slices[s]->setAlfEnabledFlag(COMPONENT_Y, cs.slice->getAlfEnabledFlag(COMPONENT_Y));
+            pcPic->slices[s]->setAlfEnabledFlag(COMPONENT_Cb, cs.slice->getAlfEnabledFlag(COMPONENT_Cb));
+            pcPic->slices[s]->setAlfEnabledFlag(COMPONENT_Cr, cs.slice->getAlfEnabledFlag(COMPONENT_Cr));
 
           }
-          if (pcPic->slices[s]->getTileGroupAlfEnabledFlag(COMPONENT_Y))
+          if (pcPic->slices[s]->getAlfEnabledFlag(COMPONENT_Y))
           {
-            pcPic->slices[s]->setTileGroupNumAps(cs.slice->getTileGroupNumAps());
-            pcPic->slices[s]->setAlfAPSs(cs.slice->getTileGroupApsIdLuma());
+            pcPic->slices[s]->setNumAlfApsIdsLuma(cs.slice->getNumAlfApsIdsLuma());
+            pcPic->slices[s]->setAlfApsIdsLuma(cs.slice->getAlfApsIdsLuma());
           }
           else
           {
-            pcPic->slices[s]->setTileGroupNumAps(0);
+            pcPic->slices[s]->setNumAlfApsIdsLuma(0);
           }
           pcPic->slices[s]->setAlfAPSs(cs.slice->getAlfAPSs());
-          pcPic->slices[s]->setTileGroupApsIdChroma(cs.slice->getTileGroupApsIdChroma());
-          pcPic->slices[s]->setTileGroupCcAlfCbApsId(cs.slice->getTileGroupCcAlfCbApsId());
-          pcPic->slices[s]->setTileGroupCcAlfCrApsId(cs.slice->getTileGroupCcAlfCrApsId());
+          pcPic->slices[s]->setAlfApsIdChroma(cs.slice->getAlfApsIdChroma());
+          pcPic->slices[s]->setCcAlfCbApsId(cs.slice->getCcAlfCbApsId());
+          pcPic->slices[s]->setCcAlfCrApsId(cs.slice->getCcAlfCrApsId());
           pcPic->slices[s]->m_ccAlfFilterParam      = m_pcALF->getCcAlfFilterParam();
           pcPic->slices[s]->m_ccAlfFilterControl[0] = m_pcALF->getCcAlfControlIdc(COMPONENT_Cb);
           pcPic->slices[s]->m_ccAlfFilterControl[1] = m_pcALF->getCcAlfControlIdc(COMPONENT_Cr);
         }
       }
+      else if (!layerIdx && (cs.slice->getPendingRasInit() || cs.slice->isIDRorBLA()))
+      {
+        m_pcALF->setApsIdStart(ALF_CTB_MAX_NUM_APS);
+      }
       DTRACE_UPDATE( g_trace_ctx, ( std::make_pair( "final", 1 ) ) );
       if (m_pcCfg->getUseCompositeRef() && getPrepareLTRef())
       {
@@ -3099,7 +3418,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       {
         m_pcSAO->disabledRate( *pcPic->cs, pcPic->getSAO(1), m_pcCfg->getSaoEncodingRate(), m_pcCfg->getSaoEncodingRateChroma());
       }
-      if (pcSlice->getSPS()->getALFEnabledFlag() && (pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Y) || pcSlice->getTileGroupCcAlfCbEnabledFlag() || pcSlice->getTileGroupCcAlfCrEnabledFlag()))
+      if (pcSlice->getSPS()->getALFEnabledFlag() && (pcSlice->getAlfEnabledFlag(COMPONENT_Y) || pcSlice->getCcAlfCbEnabledFlag() || pcSlice->getCcAlfCrEnabledFlag()))
       {
         // IRAP AU: reset APS map
         {
@@ -3108,7 +3427,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
           {
             // We have to reset all APS on IRAP, but in not encoding case we have to keep the parsed APS of current slice
             // Get active ALF APSs from picture/slice header
-            const std::vector<int> sliceApsIdsLuma = pcSlice->getTileGroupApsIdLuma();
+            const std::vector<int> sliceApsIdsLuma = pcSlice->getAlfApsIdsLuma();
 
             m_pcALF->setApsIdStart( ALF_CTB_MAX_NUM_APS );
 
@@ -3134,10 +3453,10 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
                   }
                 }
                 // Chroma
-                activeAps |= aps->getAPSId() == pcSlice->getTileGroupApsIdChroma();
+                activeAps |= aps->getAPSId() == pcSlice->getAlfApsIdChroma();
                 // CC-ALF
-                activeApsCcAlf |= pcSlice->getTileGroupCcAlfCbEnabledFlag() && aps->getAPSId() == pcSlice->getTileGroupCcAlfCbApsId();
-                activeApsCcAlf |= pcSlice->getTileGroupCcAlfCrEnabledFlag() && aps->getAPSId() == pcSlice->getTileGroupCcAlfCrApsId();
+                activeApsCcAlf |= pcSlice->getCcAlfCbEnabledFlag() && aps->getAPSId() == pcSlice->getCcAlfCbApsId();
+                activeApsCcAlf |= pcSlice->getCcAlfCrEnabledFlag() && aps->getAPSId() == pcSlice->getCcAlfCrApsId();
                 if( !activeAps && !activeApsCcAlf )
                 {
                   apsMap->clearChangedFlag( psId );
@@ -3170,11 +3489,15 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
               pcSlice->getAlfAPSs()[apsId] = aps;
             }
             if( apsMap->getChangedFlag( psId ) )
+            {
               changedApsId = apsId;
+            }
           }
         }
         if( changedApsId >= 0 )
+        {
           m_pcALF->setApsIdStart( changedApsId );
+        }
       }
     }
 
@@ -3199,7 +3522,11 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       /////////////////////////////////////////////////////////////////////////////////////////////////// File writing
 
       // write various parameter sets
+#if GDR_ENABLED // Note : insert SPS/PPS at every GDR picture
+      bool writePS = m_bSeqFirst || (m_pcCfg->getReWriteParamSets() && (pcSlice->isIRAP())) || pcSlice->isInterGDR();
+#else
       bool writePS = m_bSeqFirst || (m_pcCfg->getReWriteParamSets() && (pcSlice->isIRAP()));
+#endif
       if (writePS)
       {
         m_pcEncLib->setParamSetChanged(pcSlice->getSPS()->getSPSId(), pcSlice->getPPS()->getPPSId());
@@ -3235,12 +3562,25 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
         ParameterSetMap<APS> *apsMap = m_pcEncLib->getApsMap();
         APS* aps = apsMap->getPS((apsId << NUM_APS_TYPE_LEN) + LMCS_APS);
         bool writeAPS = aps && apsMap->getChangedFlag((apsId << NUM_APS_TYPE_LEN) + LMCS_APS);
+#if GDR_ENABLED // note : insert APS at every GDR picture
+        if (aps && apsId >= 0)
+        {
+          writeAPS |= pcSlice->isInterGDR();
+        }
+#endif
         if (writeAPS)
         {
           aps->chromaPresentFlag = pcSlice->getSPS()->getChromaFormatIdc() != CHROMA_400;
           actualTotalBits += xWriteAPS( accessUnit, aps, m_pcEncLib->getLayerId(), true );
           apsMap->clearChangedFlag((apsId << NUM_APS_TYPE_LEN) + LMCS_APS);
+#if GDR_ENABLED
+          if (!pcSlice->isInterGDR())
+          {
+            CHECK(aps != picHeader->getLmcsAPS(), "Wrong LMCS APS pointer in compressGOP");
+          }
+#else
           CHECK(aps != picHeader->getLmcsAPS(), "Wrong LMCS APS pointer in compressGOP");
+#endif
         }
       }
 
@@ -3251,16 +3591,29 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
         ParameterSetMap<APS> *apsMap = m_pcEncLib->getApsMap();
         APS* aps = apsMap->getPS( ( apsId << NUM_APS_TYPE_LEN ) + SCALING_LIST_APS );
         bool writeAPS = aps && apsMap->getChangedFlag( ( apsId << NUM_APS_TYPE_LEN ) + SCALING_LIST_APS );
+#if GDR_ENABLED // note : insert APS at every GDR picture
+        if (aps && apsId >= 0)
+        {
+          writeAPS |= pcSlice->isInterGDR();
+        }
+#endif
         if( writeAPS )
         {
           aps->chromaPresentFlag = pcSlice->getSPS()->getChromaFormatIdc() != CHROMA_400;
           actualTotalBits += xWriteAPS( accessUnit, aps, m_pcEncLib->getLayerId(), true );
           apsMap->clearChangedFlag( ( apsId << NUM_APS_TYPE_LEN ) + SCALING_LIST_APS );
+#if GDR_ENABLED
+          if (!pcSlice->isInterGDR())
+          {
+            CHECK(aps != picHeader->getScalingListAPS(), "Wrong SCALING LIST APS pointer in compressGOP");
+          }
+#else
           CHECK( aps != picHeader->getScalingListAPS(), "Wrong SCALING LIST APS pointer in compressGOP" );
+#endif
         }
       }
 
-      if (pcSlice->getSPS()->getALFEnabledFlag() && (pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Y) || pcSlice->getTileGroupCcAlfCbEnabledFlag() || pcSlice->getTileGroupCcAlfCrEnabledFlag()))
+      if (pcSlice->getSPS()->getALFEnabledFlag() && (pcSlice->getAlfEnabledFlag(COMPONENT_Y) || pcSlice->getCcAlfCbEnabledFlag() || pcSlice->getCcAlfCrEnabledFlag()))
       {
         for (int apsId = 0; apsId < ALF_CTB_MAX_NUM_APS; apsId++)
         {
@@ -3275,23 +3628,35 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
             *apsMap->allocatePS((apsId << NUM_APS_TYPE_LEN) + ALF_APS) = *aps; //allocate and cpy
             m_pcALF->setApsIdStart( apsId );
           }
-          else if (pcSlice->getTileGroupCcAlfCbEnabledFlag() && !aps && apsId == pcSlice->getTileGroupCcAlfCbApsId())
+          else if (pcSlice->getCcAlfCbEnabledFlag() && !aps && apsId == pcSlice->getCcAlfCbApsId())
           {
             writeAPS = true;
-            aps = apsMap->getPS((pcSlice->getTileGroupCcAlfCbApsId() << NUM_APS_TYPE_LEN) + ALF_APS);
+            aps = apsMap->getPS((pcSlice->getCcAlfCbApsId() << NUM_APS_TYPE_LEN) + ALF_APS);
           }
-          else if (pcSlice->getTileGroupCcAlfCrEnabledFlag() && !aps && apsId == pcSlice->getTileGroupCcAlfCrApsId())
+          else if (pcSlice->getCcAlfCrEnabledFlag() && !aps && apsId == pcSlice->getCcAlfCrApsId())
           {
             writeAPS = true;
-            aps = apsMap->getPS((pcSlice->getTileGroupCcAlfCrApsId() << NUM_APS_TYPE_LEN) + ALF_APS);
+            aps = apsMap->getPS((pcSlice->getCcAlfCrApsId() << NUM_APS_TYPE_LEN) + ALF_APS);
           }
-
+#if GDR_ENABLED // note : insert APS at every GDR picture
+          if (aps && apsId >= 0)
+          {
+            writeAPS |= (pcSlice->isInterGDR());
+          }
+#endif
           if (writeAPS )
           {
             aps->chromaPresentFlag = pcSlice->getSPS()->getChromaFormatIdc() != CHROMA_400;
             actualTotalBits += xWriteAPS( accessUnit, aps, m_pcEncLib->getLayerId(), true );
             apsMap->clearChangedFlag((apsId << NUM_APS_TYPE_LEN) + ALF_APS);
-            CHECK(aps != pcSlice->getAlfAPSs()[apsId] && apsId != pcSlice->getTileGroupCcAlfCbApsId() && apsId != pcSlice->getTileGroupCcAlfCrApsId(), "Wrong APS pointer in compressGOP");
+#if GDR_ENABLED
+            if (!pcSlice->isInterGDR())
+            {
+              CHECK(aps != pcSlice->getAlfAPSs()[apsId] && apsId != pcSlice->getCcAlfCbApsId() && apsId != pcSlice->getCcAlfCrApsId(), "Wrong APS pointer in compressGOP");
+            }
+#else
+            CHECK(aps != pcSlice->getAlfAPSs()[apsId] && apsId != pcSlice->getCcAlfCbApsId() && apsId != pcSlice->getCcAlfCrApsId(), "Wrong APS pointer in compressGOP");
+#endif
           }
         }
       }
@@ -3379,16 +3744,16 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
           // code ALF parameters in picture header or slice headers
           if( !m_pcCfg->getSliceLevelAlf() )
           {
-            picHeader->setAlfEnabledFlag(COMPONENT_Y,  pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Y ) );
-            picHeader->setAlfEnabledFlag(COMPONENT_Cb, pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Cb) );
-            picHeader->setAlfEnabledFlag(COMPONENT_Cr, pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Cr) );
-            picHeader->setNumAlfAps(pcSlice->getTileGroupNumAps());
-            picHeader->setAlfAPSs(pcSlice->getTileGroupApsIdLuma());
-            picHeader->setAlfApsIdChroma(pcSlice->getTileGroupApsIdChroma());
-            picHeader->setCcAlfEnabledFlag(COMPONENT_Cb, pcSlice->getTileGroupCcAlfCbEnabledFlag());
-            picHeader->setCcAlfEnabledFlag(COMPONENT_Cr, pcSlice->getTileGroupCcAlfCrEnabledFlag());
-            picHeader->setCcAlfCbApsId(pcSlice->getTileGroupCcAlfCbApsId());
-            picHeader->setCcAlfCrApsId(pcSlice->getTileGroupCcAlfCrApsId());
+            picHeader->setAlfEnabledFlag(COMPONENT_Y,  pcSlice->getAlfEnabledFlag(COMPONENT_Y ) );
+            picHeader->setAlfEnabledFlag(COMPONENT_Cb, pcSlice->getAlfEnabledFlag(COMPONENT_Cb) );
+            picHeader->setAlfEnabledFlag(COMPONENT_Cr, pcSlice->getAlfEnabledFlag(COMPONENT_Cr) );
+            picHeader->setNumAlfApsIdsLuma(pcSlice->getNumAlfApsIdsLuma());
+            picHeader->setAlfApsIdsLuma(pcSlice->getAlfApsIdsLuma());
+            picHeader->setAlfApsIdChroma(pcSlice->getAlfApsIdChroma());
+            picHeader->setCcAlfEnabledFlag(COMPONENT_Cb, pcSlice->getCcAlfCbEnabledFlag());
+            picHeader->setCcAlfEnabledFlag(COMPONENT_Cr, pcSlice->getCcAlfCrEnabledFlag());
+            picHeader->setCcAlfCbApsId(pcSlice->getCcAlfCbApsId());
+            picHeader->setCcAlfCrApsId(pcSlice->getCcAlfCrApsId());
           }
 
           // code WP parameters in picture header or slice headers
@@ -3427,7 +3792,6 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
         OutputNALUnit nalu( pcSlice->getNalUnitType(), m_pcEncLib->getLayerId(), pcSlice->getTLayer() );
         m_HLSWriter->setBitstream( &nalu.m_Bitstream );
 
-
         tmpBitsBeforeWriting = m_HLSWriter->getNumberOfWrittenBits();
         m_HLSWriter->codeSliceHeader( pcSlice );
         actualHeadBits += ( m_HLSWriter->getNumberOfWrittenBits() - tmpBitsBeforeWriting );
@@ -3444,6 +3808,21 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
           binCountsInNalUnits+=numBinsCoded;
           subPicStats[subpicIdx].numBinsWritten += numBinsCoded;
         }
+        if (pcSlice->getSPS()->getSpsRangeExtension().getTSRCRicePresentFlag() && (pcPic->cs->pps->getNumSlicesInPic() == 1))
+        {
+          if (pcSlice->getSliceType() == I_SLICE)
+          {
+            for (int idx = 0; idx < MAX_TSRC_RICE; idx++)
+            {
+              m_riceBit[idx][1] = pcSlice->getRiceBit(idx);
+            }
+          }
+          for (int idx = 0; idx < MAX_TSRC_RICE; idx++)
+          {
+            m_riceBit[idx][0] = pcSlice->getRiceBit(idx);
+          }
+          m_preQP[0] = pcSlice->getSliceQp();
+        }
         {
           // Construct the final bitstream by concatenating substreams.
           // The final bitstream is either nalu.m_Bitstream or pcBitstreamRedirect;
@@ -3498,6 +3877,12 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
           // Check picture level encoding constraints/requirements
           ProfileLevelTierFeatures profileLevelTierFeatures;
           profileLevelTierFeatures.extractPTLInformation(*(pcSlice->getSPS()));
+          const SEIMessages& subPictureLevelInfoSEIs = getSeisByType(leadingSeiMessages, SEI::SUBPICTURE_LEVEL_INFO);
+          if (!subPictureLevelInfoSEIs.empty())
+          {
+            const SEISubpicureLevelInfo& seiSubpic = static_cast<const SEISubpicureLevelInfo&>(*subPictureLevelInfoSEIs.front());
+            validateMinCrRequirements(profileLevelTierFeatures, subPicStats[subpicIdx].numBytesInVclNalUnits, pcSlice, m_pcCfg, seiSubpic, subpicIdx, m_pcEncLib->getLayerId());
+          }
           sumZeroWords += cabac_zero_word_padding(pcSlice, pcPic, subPicStats[subpicIdx].numBinsWritten, subPicStats[subpicIdx].numBytesInVclNalUnits, 0,
                                                   accessUnit.back()->m_nalUnitData, m_pcCfg->getCabacZeroWordPaddingEnabled(), profileLevelTierFeatures);
         }
@@ -3517,7 +3902,13 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       auto encTime = std::chrono::duration_cast<std::chrono::seconds>( elapsed ).count();
 
       std::string digestStr;
+#if GDR_ENABLED
+      // note : generate hash sei only for non-gdr pictures
+      bool genHash = !(m_pcCfg->getGdrNoHash() && pcSlice->getPicHeader()->getInGdrInterval());
+      if (m_pcCfg->getDecodedPictureHashSEIType() != HASHTYPE_NONE && genHash)
+#else
       if (m_pcCfg->getDecodedPictureHashSEIType()!=HASHTYPE_NONE)
+#endif
       {
         SEIDecodedPictureHash *decodedPictureHashSei = new SEIDecodedPictureHash();
         PelUnitBuf recoBuf = pcPic->cs->getRecoBuf();
@@ -3530,6 +3921,9 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       std::string subPicDigest;
       if (numSubpics > 1 && m_pcCfg->getSubpicDecodedPictureHashType() != HASHTYPE_NONE )
       {
+        std::vector<uint16_t> subPicIdsInPic;
+        xGetSubpicIdsInPic(subPicIdsInPic, pcPic->cs->sps, pps);
+        uint16_t maxSubpicIdInPic = subPicIdsInPic.size() == 0 ? 0 : *std::max_element(subPicIdsInPic.begin(), subPicIdsInPic.end());
         for (int subPicIdx = 0; subPicIdx < numSubpics; subPicIdx++)
         {
           const SubPic& subpic = pps->getSubPic(subPicIdx);
@@ -3542,20 +3936,27 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
           const std::vector<uint16_t> subPicIds = { (uint16_t)subpic.getSubPicID() };
           std::vector<int> targetOLS;
           std::vector<int> targetLayers = {pcPic->layerId};
-          xCreateScalableNestingSEI(trailingSeiMessages, nestedSEI, targetOLS, targetLayers, subPicIds);
+          xCreateScalableNestingSEI(trailingSeiMessages, nestedSEI, targetOLS, targetLayers, subPicIds, maxSubpicIdInPic);
         }
       }
 
       m_pcCfg->setEncodedFlag(iGOPid, true);
 
       double PSNR_Y;
-      xCalculateAddPSNRs(isField, isTff, iGOPid, pcPic, accessUnit, rcListPic, encTime, snr_conversion, 
+      xCalculateAddPSNRs(isField, isTff, iGOPid, pcPic, accessUnit, rcListPic, encTime, snr_conversion,
         printFrameMSE, printMSSSIM, &PSNR_Y, isEncodeLtRef );
 
 
       xWriteTrailingSEIMessages(trailingSeiMessages, accessUnit, pcSlice->getTLayer());
 
+#if GDR_ENABLED
+      if (!(m_pcCfg->getGdrNoHash() && pcSlice->getPicHeader()->getInGdrInterval()))
+      {
+          printHash(m_pcCfg->getDecodedPictureHashSEIType(), digestStr);
+      }
+#else
       printHash(m_pcCfg->getDecodedPictureHashSEIType(), digestStr);
+#endif
 
       if ( m_pcCfg->getUseRateCtrl() )
       {
@@ -3595,34 +3996,13 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
         const PPS* pps = pcSlice->getPPS();
 
         std::vector<uint16_t> subpicIDs;
-        if (sps->getSubPicInfoPresentFlag())
-        {
-          if(sps->getSubPicIdMappingExplicitlySignalledFlag())
-          {
-            if(sps->getSubPicIdMappingPresentFlag())
-            {
-              subpicIDs = sps->getSubPicIds();
-            }
-            else
-            {
-              subpicIDs = pps->getSubPicIds();
-            }
-          }
-          else
-          {
-            const int numSubPics = sps->getNumSubPics();
-            subpicIDs.resize(numSubPics);
-            for (int i = 0 ; i < numSubPics; i++)
-            {
-              subpicIDs[i] = (uint16_t) i;
-            }
-          }
-        }
+        xGetSubpicIdsInPic(subpicIDs, sps, pps);
+        uint16_t maxSubpicIdInPic = subpicIDs.size() == 0 ? 0 : *std::max_element(subpicIDs.begin(), subpicIDs.end());
         // Note (KJS): Using targetOLS = 0, 1 is as random as encapsulating the same SEIs in scalable nesting.
         //             This can just be seen as example regarding how to write scalable nesting, not what to write.
         std::vector<int> targetOLS = {0, 1};
         std::vector<int> targetLayers;
-        xCreateScalableNestingSEI(leadingSeiMessages, nestedSeiMessages, targetOLS, targetLayers, subpicIDs);
+        xCreateScalableNestingSEI(leadingSeiMessages, nestedSeiMessages, targetOLS, targetLayers, subpicIDs, maxSubpicIdInPic);
       }
 
       xWriteLeadingSEIMessages( leadingSeiMessages, duInfoSeiMessages, accessUnit, pcSlice->getTLayer(), pcSlice->getSPS(), duData );
@@ -3634,6 +4014,13 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       fflush( stdout );
     }
 
+#if JVET_W0046_RLSCP
+    m_cnt_right_bottom = pcSlice->getCntRightBottom();
+    if (m_pcCfg->getIntraPeriod() > 1 && pcSlice->isIntra())
+    {
+      m_cnt_right_bottom_i = m_cnt_right_bottom;
+    }
+#endif
 
     DTRACE_UPDATE( g_trace_ctx, ( std::make_pair( "final", 0 ) ) );
 
@@ -3664,15 +4051,19 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
   CHECK( m_iNumPicCoded > 1, "Unspecified error" );
 }
 
-void EncGOP::printOutSummary( uint32_t uiNumAllPicCoded, bool isField, const bool printMSEBasedSNR, 
-  const bool printSequenceMSE, const bool printMSSSIM, const bool printHexPsnr, const bool printRprPSNR, 
-  const BitDepths &bitDepths )
+void EncGOP::printOutSummary( uint32_t uiNumAllPicCoded, bool isField, const bool printMSEBasedSNR,
+  const bool printSequenceMSE, const bool printMSSSIM, const bool printHexPsnr, const bool printRprPSNR,
+  const BitDepths &bitDepths
+#if JVET_W0134_UNIFORM_METRICS_LOG
+                             , int layerId
+#endif
+                             )
 {
 #if ENABLE_QPA
   const bool    useWPSNR = m_pcEncLib->getUseWPSNR();
 #endif
 #if WCG_WPSNR
-  const bool    useLumaWPSNR = m_pcEncLib->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcCfg->getLmcs() && m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ);
+  const bool    useLumaWPSNR = m_pcEncLib->getPrintWPSNR();
 #endif
 
   if( m_pcCfg->getDecodeBitstream(0).empty() && m_pcCfg->getDecodeBitstream(1).empty() && !m_pcCfg->useFastForwardToPOC() )
@@ -3701,8 +4092,22 @@ void EncGOP::printOutSummary( uint32_t uiNumAllPicCoded, bool isField, const boo
 #if JVET_O0756_CALCULATE_HDRMETRICS
   const bool calculateHdrMetrics = m_pcEncLib->getCalcluateHdrMetrics();
 #endif
+
+
+#if JVET_W0134_UNIFORM_METRICS_LOG
+  std::string header,metrics;
+  std::string id="a";
+  if (layerId==0) id+=' ';
+  else            id+=std::to_string(layerId);
+  m_gcAnalyzeAll.printOut(header,metrics, id, chFmt, printMSEBasedSNR, printSequenceMSE, printMSSSIM, printHexPsnr, printRprPSNR, bitDepths, useWPSNR
+#if JVET_O0756_CALCULATE_HDRMETRICS
+                          , calculateHdrMetrics
+#endif
+                          );
+  if( g_verbosity >= INFO ) std::cout<<header<<'\n'<<metrics<<std::endl;
+#else
 #if ENABLE_QPA
-  m_gcAnalyzeAll.printOut( 'a', chFmt, printMSEBasedSNR, printSequenceMSE, printMSSSIM, printHexPsnr, 
+  m_gcAnalyzeAll.printOut( 'a', chFmt, printMSEBasedSNR, printSequenceMSE, printMSSSIM, printHexPsnr,
     printRprPSNR, bitDepths, useWPSNR
 #if JVET_O0756_CALCULATE_HDRMETRICS
                           , calculateHdrMetrics
@@ -3715,16 +4120,48 @@ void EncGOP::printOutSummary( uint32_t uiNumAllPicCoded, bool isField, const boo
 #endif
                           );
 #endif
+#endif
+
+#if JVET_W0134_UNIFORM_METRICS_LOG
+  id="i";
+  if (layerId==0) id+=' ';
+  else            id+=std::to_string(layerId);
+  m_gcAnalyzeI.printOut(header,metrics, id, chFmt, printMSEBasedSNR, printSequenceMSE, printMSSSIM, printHexPsnr, printRprPSNR, bitDepths );
+  if( g_verbosity >= DETAILS ) std::cout<< "\n\nI Slices--------------------------------------------------------\n"<<header<<'\n'<<metrics<<std::endl;
+
+  id="p";
+  if (layerId==0) id+=' ';
+  else            id+=std::to_string(layerId);
+  m_gcAnalyzeP.printOut(header,metrics, id, chFmt, printMSEBasedSNR, printSequenceMSE, printMSSSIM, printHexPsnr, printRprPSNR, bitDepths );
+  if( g_verbosity >= DETAILS ) std::cout<<"\n\nP Slices--------------------------------------------------------\n"<<header<<'\n'<<metrics<<std::endl;
+
+  id="b";
+  if (layerId==0) id+=' ';
+  else            id+=std::to_string(layerId);
+  m_gcAnalyzeB.printOut(header,metrics, id, chFmt, printMSEBasedSNR, printSequenceMSE, printMSSSIM, printHexPsnr, printRprPSNR, bitDepths );
+  if( g_verbosity >= DETAILS ) std::cout<<"\n\nB Slices--------------------------------------------------------\n"<<header<<'\n'<<metrics<<std::endl;
+
+#if WCG_WPSNR
+  if (useLumaWPSNR)
+  {
+    id="w";
+    if (layerId==0) id+=' ';
+    else            id+=std::to_string(layerId);
+    m_gcAnalyzeWPSNR.printOut(header,metrics, id, chFmt, printMSEBasedSNR, printSequenceMSE, printMSSSIM, printHexPsnr, printRprPSNR, bitDepths, useLumaWPSNR );
+    if( g_verbosity >= DETAILS ) std::cout<<"\nWPSNR SUMMARY --------------------------------------------------------\n"<<header<<'\n'<<metrics<<std::endl;
+
+  }
+#endif
+#else
   msg( DETAILS, "\n\nI Slices--------------------------------------------------------\n" );
-  m_gcAnalyzeI.printOut( 'i', chFmt, printMSEBasedSNR, printSequenceMSE, printMSSSIM, 
+  m_gcAnalyzeI.printOut( 'i', chFmt, printMSEBasedSNR, printSequenceMSE, printMSSSIM,
     printHexPsnr, printRprPSNR, bitDepths );
-
   msg( DETAILS, "\n\nP Slices--------------------------------------------------------\n" );
   m_gcAnalyzeP.printOut( 'p', chFmt, printMSEBasedSNR, printSequenceMSE, printMSSSIM,
     printHexPsnr, printRprPSNR, bitDepths );
 
   msg( DETAILS, "\n\nB Slices--------------------------------------------------------\n" );
-  m_gcAnalyzeB.printOut( 'b', chFmt, printMSEBasedSNR, printSequenceMSE, printMSSSIM, 
+  m_gcAnalyzeB.printOut( 'b', chFmt, printMSEBasedSNR, printSequenceMSE, printMSSSIM,
     printHexPsnr, printRprPSNR, bitDepths );
 
 #if WCG_WPSNR
@@ -3735,6 +4172,9 @@ void EncGOP::printOutSummary( uint32_t uiNumAllPicCoded, bool isField, const boo
       printHexPsnr, printRprPSNR, bitDepths, useLumaWPSNR );
   }
 #endif
+#endif
+
+
   if (!m_pcCfg->getSummaryOutFilename().empty())
   {
     m_gcAnalyzeAll.printSummary(chFmt, printSequenceMSE, printHexPsnr, bitDepths, m_pcCfg->getSummaryOutFilename());
@@ -3759,7 +4199,13 @@ void EncGOP::printOutSummary( uint32_t uiNumAllPicCoded, bool isField, const boo
     m_gcAnalyzeAll_in.setFrmRate( m_pcCfg->getFrameRate() / (double)m_pcCfg->getTemporalSubsampleRatio());
     m_gcAnalyzeAll_in.setBits(m_gcAnalyzeAll.getBits());
     // prior to the above statement, the interlace analyser does not contain the correct total number of bits.
-
+#if JVET_W0134_UNIFORM_METRICS_LOG
+    id="a";
+    if (layerId==0) id+=' ';
+    else            id+=std::to_string(layerId);
+    m_gcAnalyzeAll_in.printOut(header,metrics, id, chFmt, printMSEBasedSNR, printSequenceMSE, printMSSSIM, printHexPsnr, printRprPSNR, bitDepths, useWPSNR );
+    if( g_verbosity >= DETAILS ) std::cout<< "\n\nSUMMARY INTERLACED ---------------------------------------------\n"<<header<<'\n'<<metrics<<std::endl;
+#else
     msg( INFO,"\n\nSUMMARY INTERLACED ---------------------------------------------\n" );
 #if ENABLE_QPA
     m_gcAnalyzeAll_in.printOut( 'a', chFmt, printMSEBasedSNR, printSequenceMSE, printMSSSIM,
@@ -3767,6 +4213,7 @@ void EncGOP::printOutSummary( uint32_t uiNumAllPicCoded, bool isField, const boo
 #else
     m_gcAnalyzeAll_in.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, printMSSSIM,
       printHexPsnr, bitDepths);
+#endif
 #endif
     if (!m_pcCfg->getSummaryOutFilename().empty())
     {
@@ -3787,7 +4234,7 @@ void EncGOP::printOutSummary( uint32_t uiNumAllPicCoded, bool isField, const boo
 uint64_t EncGOP::preLoopFilterPicAndCalcDist( Picture* pcPic )
 {
   CodingStructure& cs = *pcPic->cs;
-  m_pcLoopFilter->loopFilterPic( cs );
+  m_pcLoopFilter->deblockingFilterPic( cs );
 
   const CPelUnitBuf picOrg = pcPic->getRecoBuf();
   const CPelUnitBuf picRec = cs.getRecoBuf();
@@ -3809,9 +4256,7 @@ uint64_t EncGOP::preLoopFilterPicAndCalcDist( Picture* pcPic )
 // ====================================================================================================================
 // Protected member functions
 // ====================================================================================================================
-void EncGOP::xInitGOP( int iPOCLast, int iNumPicRcvd, bool isField
-  , bool isEncodeLtRef
-)
+void EncGOP::xInitGOP(int iPOCLast, int iNumPicRcvd, bool isField, bool isEncodeLtRef)
 {
   CHECK(!( iNumPicRcvd > 0 ), "Unspecified error");
   //  Exception for the first frames
@@ -3871,6 +4316,35 @@ void EncGOP::xGetBuffer( PicList&                  rcListPic,
   return;
 }
 
+void EncGOP::xGetSubpicIdsInPic(std::vector<uint16_t>& subpicIDs, const SPS* sps, const PPS* pps)
+{
+  subpicIDs.clear();
+
+  if (sps->getSubPicInfoPresentFlag())
+  {
+    if(sps->getSubPicIdMappingExplicitlySignalledFlag())
+    {
+      if(sps->getSubPicIdMappingPresentFlag())
+      {
+        subpicIDs = sps->getSubPicIds();
+      }
+      else
+      {
+        subpicIDs = pps->getSubPicIds();
+      }
+    }
+    else
+    {
+      const int numSubPics = sps->getNumSubPics();
+      subpicIDs.resize(numSubPics);
+      for (int i = 0 ; i < numSubPics; i++)
+      {
+        subpicIDs[i] = (uint16_t) i;
+      }
+    }
+  }
+}
+
 #if ENABLE_QPA
 
 #ifndef BETA
@@ -3909,7 +4383,10 @@ static inline double calcWeightedSquaredError(const CPelBuf& org,        const C
       ssErr += uint64_t(iDiff * iDiff);
     }
   }
-  if (wAct <= xAct || hAct <= yAct) return (double)ssErr;
+  if (wAct <= xAct || hAct <= yAct)
+  {
+    return (double) ssErr;
+  }
 
   for (y = yAct; y < hAct; y++)   // activity
   {
@@ -4031,7 +4508,7 @@ uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1,
 double EncGOP::xFindDistortionPlaneWPSNR(const CPelBuf& pic0, const CPelBuf& pic1, const uint32_t rshift, const CPelBuf& picLuma0,
   ComponentID compID, const ChromaFormat chfmt    )
 {
-  const bool    useLumaWPSNR = m_pcEncLib->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcCfg->getLmcs() && m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ);
+  const bool    useLumaWPSNR = m_pcEncLib->getPrintWPSNR();
   if (!useLumaWPSNR)
   {
     return 0;
@@ -4081,12 +4558,12 @@ double EncGOP::xFindDistortionPlaneWPSNR(const CPelBuf& pic0, const CPelBuf& pic
 }
 #endif
 
-void EncGOP::xCalculateAddPSNRs( const bool isField, const bool isFieldTopFieldFirst, 
-  const int iGOPid, Picture* pcPic, const AccessUnit&accessUnit, PicList &rcListPic, 
-  const int64_t dEncTime, const InputColourSpaceConversion snr_conversion, 
+void EncGOP::xCalculateAddPSNRs( const bool isField, const bool isFieldTopFieldFirst,
+  const int iGOPid, Picture* pcPic, const AccessUnit&accessUnit, PicList &rcListPic,
+  const int64_t dEncTime, const InputColourSpaceConversion snr_conversion,
   const bool printFrameMSE, const bool printMSSSIM, double* PSNR_Y, bool isEncodeLtRef)
 {
-  xCalculateAddPSNR(pcPic, pcPic->getRecoBuf(), accessUnit, (double)dEncTime, snr_conversion, 
+  xCalculateAddPSNR(pcPic, pcPic->getRecoBuf(), accessUnit, (double)dEncTime, snr_conversion,
     printFrameMSE, printMSSSIM, PSNR_Y, isEncodeLtRef);
 
   //In case of field coding, compute the interlaced PSNR for both fields
@@ -4142,20 +4619,20 @@ void EncGOP::xCalculateAddPSNRs( const bool isField, const bool isFieldTopFieldF
 
       if ((pcPic->topField && isFieldTopFieldFirst) || (!pcPic->topField && !isFieldTopFieldFirst))
       {
-        xCalculateInterlacedAddPSNR(pcPic, correspondingFieldPic, pcPic->getRecoBuf(), 
-          correspondingFieldPic->getRecoBuf(), snr_conversion, printFrameMSE, printMSSSIM, 
+        xCalculateInterlacedAddPSNR(pcPic, correspondingFieldPic, pcPic->getRecoBuf(),
+          correspondingFieldPic->getRecoBuf(), snr_conversion, printFrameMSE, printMSSSIM,
           PSNR_Y, isEncodeLtRef);
       }
       else
       {
-        xCalculateInterlacedAddPSNR(correspondingFieldPic, pcPic, correspondingFieldPic->getRecoBuf(), 
+        xCalculateInterlacedAddPSNR(correspondingFieldPic, pcPic, correspondingFieldPic->getRecoBuf(),
           pcPic->getRecoBuf(), snr_conversion, printFrameMSE, printMSSSIM, PSNR_Y, isEncodeLtRef);
       }
     }
   }
 }
 
-void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUnit& accessUnit, 
+void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUnit& accessUnit,
   double dEncTime, const InputColourSpaceConversion conversion, const bool printFrameMSE, const bool printMSSSIM,
   double* PSNR_Y, bool isEncodeLtRef)
 {
@@ -4170,7 +4647,7 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni
   double  dPSNR[MAX_NUM_COMPONENT];
   double msssim[MAX_NUM_COMPONENT] = {0.0};
 #if WCG_WPSNR
-  const bool    useLumaWPSNR = m_pcEncLib->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcCfg->getLmcs() && m_pcCfg->getReshapeSignalType() == RESHAPE_SIGNAL_PQ);
+  const bool    useLumaWPSNR = m_pcEncLib->getPrintWPSNR();
   double  dPSNRWeighted[MAX_NUM_COMPONENT];
   double  MSEyuvframeWeighted[MAX_NUM_COMPONENT];
 #endif
@@ -4237,8 +4714,8 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni
     CHECK(!( p.width  == o.width), "Unspecified error");
     CHECK(!( p.height == o.height), "Unspecified error");
 
-    int padX = m_pcEncLib->getPad( 0 );
-    int padY = m_pcEncLib->getPad( 1 );
+    int padX = m_pcEncLib->getSourcePadding( 0 );
+    int padY = m_pcEncLib->getSourcePadding( 1 );
 
     // when RPR is enabled, picture padding is picture specific due to possible different picture resoluitons, however only full resolution padding is stored in EncLib
     // get per picture padding from the conformance window, in this case if conformance window is set not equal to the padding then PSNR results may be inaccurate
@@ -4284,8 +4761,8 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni
     {
       const CPelBuf& upscaledOrg = (sps.getUseLmcs() || m_pcCfg->getGopBasedTemporalFilterEnabled()) ? pcPic->M_BUFS( 0, PIC_TRUE_ORIGINAL_INPUT).get( compID ) : pcPic->M_BUFS( 0, PIC_ORIGINAL_INPUT).get( compID );
 
-      const uint32_t upscaledWidth = upscaledOrg.width - ( m_pcEncLib->getPad( 0 ) >> ::getComponentScaleX( compID, format ) );
-      const uint32_t upscaledHeight = upscaledOrg.height - ( m_pcEncLib->getPad( 1 ) >> ( !!bPicIsField + ::getComponentScaleY( compID, format ) ) );
+      const uint32_t upscaledWidth = upscaledOrg.width - ( m_pcEncLib->getSourcePadding( 0 ) >> ::getComponentScaleX( compID, format ) );
+      const uint32_t upscaledHeight = upscaledOrg.height - ( m_pcEncLib->getSourcePadding( 1 ) >> ( !!bPicIsField + ::getComponentScaleY( compID, format ) ) );
 
       // create new buffers with correct dimensions
       const CPelBuf upscaledRecPB( upscaledRec.get( compID ).bufAt( 0, 0 ), upscaledRec.get( compID ).stride, upscaledWidth, upscaledHeight );
@@ -4331,11 +4808,7 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni
     if( ( *it )->m_nalUnitType != NAL_UNIT_PREFIX_SEI && ( *it )->m_nalUnitType != NAL_UNIT_SUFFIX_SEI )
     {
       numRBSPBytes += numRBSPBytes_nal;
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
       if (it == accessUnit.begin() || (*it)->m_nalUnitType == NAL_UNIT_OPI || (*it)->m_nalUnitType == NAL_UNIT_VPS || (*it)->m_nalUnitType == NAL_UNIT_DCI || (*it)->m_nalUnitType == NAL_UNIT_SPS || (*it)->m_nalUnitType == NAL_UNIT_PPS || (*it)->m_nalUnitType == NAL_UNIT_PREFIX_APS || (*it)->m_nalUnitType == NAL_UNIT_SUFFIX_APS)
-#else
-      if (it == accessUnit.begin() || (*it)->m_nalUnitType == NAL_UNIT_VPS || (*it)->m_nalUnitType == NAL_UNIT_DCI || (*it)->m_nalUnitType == NAL_UNIT_SPS || (*it)->m_nalUnitType == NAL_UNIT_PPS || (*it)->m_nalUnitType == NAL_UNIT_PREFIX_APS || (*it)->m_nalUnitType == NAL_UNIT_SUFFIX_APS)
-#endif
       {
         numRBSPBytes += 4;
       }
@@ -4414,7 +4887,14 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni
   {
     c += 32;
   }
-  if (m_pcCfg->getDependentRAPIndicationSEIEnabled() && pcSlice->isDRAP()) c = 'D';
+  if (m_pcCfg->getDependentRAPIndicationSEIEnabled() && pcSlice->isDRAP())
+  {
+    c = 'D';
+  }
+  if (m_pcCfg->getEdrapIndicationSEIEnabled() && pcSlice->getEdrapRapId() > 0)
+  {
+    c = 'E';
+  }
 
   if( g_verbosity >= NOTICE )
   {
@@ -4451,7 +4931,7 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni
     if (printMSSSIM)
     {
       msg( NOTICE, " [MS-SSIM Y %1.6lf    U %1.6lf    V %1.6lf]", msssim[COMPONENT_Y], msssim[COMPONENT_Cb], msssim[COMPONENT_Cr] );
-    }  
+    }
 
     if( printFrameMSE )
     {
@@ -4506,8 +4986,8 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni
                  reinterpret_cast<uint8_t *>(&psnrL[i]) + sizeof(psnrL[i]),
                  reinterpret_cast<uint8_t *>(&xpsnrL[i]));
           }
-          msg(NOTICE, " [xPSNRL%d %16" PRIx64 "]", (int)m_pcCfg->getWhitePointDeltaE(i), xpsnrL[0]);
 
+          msg(NOTICE, " [xPSNRL%d %16" PRIx64 "]", (int) m_pcCfg->getWhitePointDeltaE(i), xpsnrL[0]);
         }
       }
     }
@@ -4555,7 +5035,11 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni
     }
     if (m_pcEncLib->isResChangeInClvsEnabled())
     {
+#if JVET_W0134_UNIFORM_METRICS_LOG
+      msg( NOTICE, " [Y2 %6.4lf dB  U2 %6.4lf dB  V2 %6.4lf dB]", upscaledPSNR[COMPONENT_Y], upscaledPSNR[COMPONENT_Cb], upscaledPSNR[COMPONENT_Cr] );
+#else
       msg( NOTICE, "\nPSNR2: [Y %6.4lf dB    U %6.4lf dB    V %6.4lf dB]", upscaledPSNR[COMPONENT_Y], upscaledPSNR[COMPONENT_Cb], upscaledPSNR[COMPONENT_Cr] );
+#endif
     }
   }
   else if( g_verbosity >= INFO )
@@ -4573,22 +5057,22 @@ double EncGOP::xCalculateMSSSIM (const Pel* org, const int orgStride, const Pel*
 
   uint32_t maxScale;
 
-  // For low resolution videos determine number of scales 
+  // For low resolution videos determine number of scales
   if (width < 22 || height < 22)
   {
-    maxScale = 1; 
+    maxScale = 1;
   }
   else if (width < 44 || height < 44)
   {
-    maxScale = 2; 
+    maxScale = 2;
   }
   else if (width < 88 || height < 88)
   {
-    maxScale = 3; 
+    maxScale = 3;
   }
   else if (width < 176 || height < 176)
   {
-    maxScale = 4; 
+    maxScale = 4;
   }
   else
   {
@@ -4604,8 +5088,10 @@ double EncGOP::xCalculateMSSSIM (const Pel* org, const int orgStride, const Pel*
   {
     for(int x=0; x<WEIGHTING_SIZE; x++)
     {
-      weights[y][x]=exp(-((y-WEIGHTING_MID_TAP)*(y-WEIGHTING_MID_TAP)+(x-WEIGHTING_MID_TAP)*(x-WEIGHTING_MID_TAP))/(WEIGHTING_MID_TAP-0.5));
-      coeffSum +=weights[y][x];
+      weights[y][x] =
+        exp(-((y - WEIGHTING_MID_TAP) * (y - WEIGHTING_MID_TAP) + (x - WEIGHTING_MID_TAP) * (x - WEIGHTING_MID_TAP))
+            / (WEIGHTING_MID_TAP - 0.5));
+      coeffSum += weights[y][x];
     }
   }
 
@@ -4666,12 +5152,12 @@ double EncGOP::xCalculateMSSSIM (const Pel* org, const int orgStride, const Pel*
       }
     }
   }
-  
+
   // Calculate MS-SSIM:
   const uint32_t   maxValue  = (1<<bitDepth)-1;
   const double c1        = (0.01*maxValue)*(0.01*maxValue);
   const double c2        = (0.03*maxValue)*(0.03*maxValue);
-  
+
   double finalMSSSIM = 1.0;
 
   for(uint32_t scale=0; scale<maxScale; scale++)
@@ -4760,7 +5246,6 @@ void EncGOP::xCalculateHDRMetrics( Picture* pcPic, double deltaE[hdrtoolslib::NB
 
   *deltaE = m_pcDistortionDeltaE->getDeltaE();
   *psnrL  = m_pcDistortionDeltaE->getPsnrL();
-
 }
 
 void EncGOP::copyBuftoFrame( Picture* pcPic )
@@ -4770,12 +5255,12 @@ void EncGOP::copyBuftoFrame( Picture* pcPic )
   int cropOffsetRight  = m_pcCfg->getCropOffsetRight();
   int cropOffsetBottom = m_pcCfg->getCropOffsetBottom();
 
-  int height = pcPic->getOrigBuf(COMPONENT_Y).height - cropOffsetLeft + cropOffsetRight;
-  int width = pcPic->getOrigBuf(COMPONENT_Y).width - cropOffsetTop + cropOffsetBottom;
+  int height = pcPic->getTrueOrigBuf(COMPONENT_Y).height - cropOffsetLeft + cropOffsetRight;
+  int width  = pcPic->getTrueOrigBuf(COMPONENT_Y).width - cropOffsetTop + cropOffsetBottom;
 
   ChromaFormat chFmt =  pcPic->chromaFormat;
 
-  Pel* pOrg = pcPic->getOrigBuf(COMPONENT_Y).buf;
+  Pel *pOrg = pcPic->getTrueOrigBuf(COMPONENT_Y).buf;
   Pel* pRec = pcPic->getRecoBuf(COMPONENT_Y).buf;
 
   uint16_t* yOrg = m_ppcFrameOrg[0]->m_ui16Comp[hdrtoolslib::Y_COMP];
@@ -4785,7 +5270,8 @@ void EncGOP::copyBuftoFrame( Picture* pcPic )
   uint16_t* vOrg = m_ppcFrameOrg[0]->m_ui16Comp[hdrtoolslib::Cr_COMP];
   uint16_t* vRec = m_ppcFrameRec[0]->m_ui16Comp[hdrtoolslib::Cr_COMP];
 
-  if(chFmt == CHROMA_444){
+  if (chFmt == CHROMA_444)
+  {
     yOrg = m_ppcFrameOrg[1]->m_ui16Comp[hdrtoolslib::Y_COMP];
     yRec = m_ppcFrameRec[1]->m_ui16Comp[hdrtoolslib::Y_COMP];
     uOrg = m_ppcFrameOrg[1]->m_ui16Comp[hdrtoolslib::Cb_COMP];
@@ -4794,36 +5280,43 @@ void EncGOP::copyBuftoFrame( Picture* pcPic )
     vRec = m_ppcFrameRec[1]->m_ui16Comp[hdrtoolslib::Cr_COMP];
   }
 
-  for (int i = 0; i < height; i++) {
-    for (int j = 0; j < width; j++) {
-      yOrg[i*width + j] = static_cast<uint16_t>(pOrg[(i + cropOffsetTop) * pcPic->getOrigBuf(COMPONENT_Y).stride + j + cropOffsetLeft]);
+  for (int i = 0; i < height; i++)
+  {
+    for (int j = 0; j < width; j++)
+    {
+      yOrg[i * width + j] = static_cast<uint16_t>(pOrg[(i + cropOffsetTop) * pcPic->getTrueOrigBuf(COMPONENT_Y).stride + j + cropOffsetLeft]);
       yRec[i*width + j] = static_cast<uint16_t>(pRec[(i + cropOffsetTop) * pcPic->getRecoBuf(COMPONENT_Y).stride + j + cropOffsetLeft]);
     }
   }
 
-  if (chFmt != CHROMA_444) {
+  if (chFmt != CHROMA_444)
+  {
     height >>= 1;
     width  >>= 1;
     cropOffsetLeft >>= 1;
     cropOffsetTop >>= 1;
   }
 
-  pOrg = pcPic->getOrigBuf(COMPONENT_Cb).buf;
+  pOrg = pcPic->getTrueOrigBuf(COMPONENT_Cb).buf;
   pRec = pcPic->getRecoBuf(COMPONENT_Cb).buf;
 
-  for (int i = 0; i < height; i++) {
-    for (int j = 0; j < width; j++) {
-      uOrg[i*width + j] = static_cast<uint16_t>(pOrg[(i + cropOffsetTop) * pcPic->getOrigBuf(COMPONENT_Cb).stride + j + cropOffsetLeft]);
+  for (int i = 0; i < height; i++)
+  {
+    for (int j = 0; j < width; j++)
+    {
+      uOrg[i * width + j] = static_cast<uint16_t>(pOrg[(i + cropOffsetTop) * pcPic->getTrueOrigBuf(COMPONENT_Cb).stride + j + cropOffsetLeft]);
       uRec[i*width + j] = static_cast<uint16_t>(pRec[(i + cropOffsetTop) * pcPic->getRecoBuf(COMPONENT_Cb).stride + j + cropOffsetLeft]);
     }
   }
 
-  pOrg = pcPic->getOrigBuf(COMPONENT_Cr).buf;
+  pOrg = pcPic->getTrueOrigBuf(COMPONENT_Cr).buf;
   pRec = pcPic->getRecoBuf(COMPONENT_Cr).buf;
 
-  for (int i = 0; i < height; i++) {
-    for (int j = 0; j < width; j++) {
-      vOrg[i*width + j] = static_cast<uint16_t>(pOrg[(i + cropOffsetTop) * pcPic->getOrigBuf(COMPONENT_Cr).stride + j + cropOffsetLeft]);
+  for (int i = 0; i < height; i++)
+  {
+    for (int j = 0; j < width; j++)
+    {
+      vOrg[i * width + j] = static_cast<uint16_t>(pOrg[(i + cropOffsetTop) * pcPic->getTrueOrigBuf(COMPONENT_Cr).stride + j + cropOffsetLeft]);
       vRec[i*width + j] = static_cast<uint16_t>(pRec[(i + cropOffsetTop) * pcPic->getRecoBuf(COMPONENT_Cr).stride + j + cropOffsetLeft]);
     }
   }
@@ -4832,7 +5325,7 @@ void EncGOP::copyBuftoFrame( Picture* pcPic )
 
 void EncGOP::xCalculateInterlacedAddPSNR( Picture* pcPicOrgFirstField, Picture* pcPicOrgSecondField,
                                           PelUnitBuf cPicRecFirstField, PelUnitBuf cPicRecSecondField,
-                                          const InputColourSpaceConversion conversion, const bool printFrameMSE, 
+                                          const InputColourSpaceConversion conversion, const bool printFrameMSE,
                                           const bool printMSSSIM, double* PSNR_Y, bool isEncodeLtRef)
 {
   const SPS &sps = *pcPicOrgFirstField->cs->sps;
@@ -4874,8 +5367,8 @@ void EncGOP::xCalculateInterlacedAddPSNR( Picture* pcPicOrgFirstField, Picture*
     CHECK(!(acPicRecFields[0].get(ch).height==acPicRecFields[0].get(ch).height), "Unspecified error");
 
     uint64_t uiSSDtemp=0;
-    const uint32_t width    = acPicRecFields[0].get(ch).width - (m_pcEncLib->getPad(0) >> ::getComponentScaleX(ch, format));
-    const uint32_t height   = acPicRecFields[0].get(ch).height - ((m_pcEncLib->getPad(1) >> 1) >> ::getComponentScaleY(ch, format));
+    const uint32_t width    = acPicRecFields[0].get(ch).width - (m_pcEncLib->getSourcePadding(0) >> ::getComponentScaleX(ch, format));
+    const uint32_t height   = acPicRecFields[0].get(ch).height - ((m_pcEncLib->getSourcePadding(1) >> 1) >> ::getComponentScaleY(ch, format));
     const uint32_t bitDepth = sps.getBitDepth(toChannelType(ch));
 
     double sumOverFieldsMSSSIM = 0;
@@ -4937,6 +5430,18 @@ void EncGOP::xCalculateInterlacedAddPSNR( Picture* pcPicOrgFirstField, Picture*
  */
 NalUnitType EncGOP::getNalUnitType(int pocCurr, int lastIDR, bool isField)
 {
+#if GDR_ENABLED
+  if (m_pcCfg->getGdrEnabled() && m_pcCfg->getDecodingRefreshType() == 3 && (pocCurr >= m_pcCfg->getGdrPocStart()))
+  {
+    int m = pocCurr - m_pcCfg->getGdrPocStart();
+    int n = m_pcCfg->getGdrPeriod();
+    if (m % n == 0)
+    {
+      return NAL_UNIT_CODED_SLICE_GDR;
+    }
+  }
+#endif
+
   if (pocCurr == 0)
   {
     return NAL_UNIT_CODED_SLICE_IDR_N_LP;
@@ -4987,7 +5492,14 @@ NalUnitType EncGOP::getNalUnitType(int pocCurr, int lastIDR, bool isField)
       return NAL_UNIT_CODED_SLICE_RADL;
     }
   }
+#if GDR_ENABLED
+  if (m_pcCfg->getGdrEnabled() && pocCurr >= m_pcCfg->getGdrPocStart() && ((pocCurr - m_pcCfg->getGdrPocStart()) % m_pcCfg->getGdrPeriod() == 0))
+    return NAL_UNIT_CODED_SLICE_GDR;
+  else
+    return NAL_UNIT_CODED_SLICE_TRAIL;
+#else
   return NAL_UNIT_CODED_SLICE_TRAIL;
+#endif
 }
 
 void EncGOP::xUpdateRasInit(Slice* slice)
@@ -5004,6 +5516,69 @@ void EncGOP::xUpdateRasInit(Slice* slice)
   }
 }
 
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+void EncGOP::xUpdateRPRtmvp( PicHeader* pcPicHeader, Slice* pcSlice )
+{
+  if( pcPicHeader->getEnableTMVPFlag() )
+  {
+    int colRefIdxL0 = -1, colRefIdxL1 = -1;
+
+    for( int refIdx = 0; refIdx < pcSlice->getNumRefIdx( REF_PIC_LIST_0 ); refIdx++ )
+    {
+      if( !( pcSlice->getRefPic( REF_PIC_LIST_0, refIdx )->slices[0]->getNalUnitType() != NAL_UNIT_CODED_SLICE_RASL &&
+            pcSlice->getRefPic( REF_PIC_LIST_0, refIdx )->poc <= m_pocCRA ) )
+      {
+        colRefIdxL0 = refIdx;
+        break;
+      }
+    }
+
+    for( int refIdx = 0; refIdx < pcSlice->getNumRefIdx( REF_PIC_LIST_1 ); refIdx++ )
+    {
+      if( !( pcSlice->getRefPic( REF_PIC_LIST_1, refIdx )->slices[0]->getNalUnitType() != NAL_UNIT_CODED_SLICE_RASL &&
+            pcSlice->getRefPic( REF_PIC_LIST_1, refIdx )->poc <= m_pocCRA ) )
+      {
+        colRefIdxL1 = refIdx;
+        break;
+      }
+    }
+
+    if( colRefIdxL0 >= 0 && colRefIdxL1 >= 0 )
+    {
+      const Picture *refPicL0 = pcSlice->getRefPic( REF_PIC_LIST_0, colRefIdxL0 );
+      const Picture *refPicL1 = pcSlice->getRefPic( REF_PIC_LIST_1, colRefIdxL1 );
+
+      CHECK( !refPicL0->slices.size(), "Wrong L0 reference picture" );
+      CHECK( !refPicL1->slices.size(), "Wrong L1 reference picture" );
+
+      const uint32_t colFromL0 = refPicL0->slices[0]->getSliceQp() > refPicL1->slices[0]->getSliceQp();
+      pcPicHeader->setPicColFromL0Flag( colFromL0 );
+      pcSlice->setColFromL0Flag(colFromL0);
+      pcSlice->setColRefIdx( colFromL0 ? colRefIdxL0 : colRefIdxL1 );
+      pcPicHeader->setColRefIdx( colFromL0 ? colRefIdxL0 : colRefIdxL1 );
+    }
+    else if( colRefIdxL0 < 0 && colRefIdxL1 >= 0 )
+    {
+      pcPicHeader->setPicColFromL0Flag( false );
+      pcSlice->setColFromL0Flag( false );
+      pcSlice->setColRefIdx( colRefIdxL1 );
+      pcPicHeader->setColRefIdx( colRefIdxL1 );
+    }
+    else if( colRefIdxL0 >= 0 && colRefIdxL1 < 0 )
+    {
+      pcPicHeader->setPicColFromL0Flag( true );
+      pcSlice->setColFromL0Flag( true );
+      pcSlice->setColRefIdx( colRefIdxL0 );
+      pcPicHeader->setColRefIdx( colRefIdxL0 );
+    }
+    else
+    {
+      pcPicHeader->setEnableTMVPFlag( false );
+    }
+  }
+}
+#endif
+
 double EncGOP::xCalculateRVM()
 {
   double dRVM = 0;
@@ -5311,7 +5886,7 @@ void EncGOP::applyDeblockingFilterMetric( Picture* pcPic, uint32_t uiNumSlices )
   int qp = pcSlice->getSliceQp();
   const int bitDepthLuma=pcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA);
   int bitdepthScale = 1 << (bitDepthLuma-8);
-  int beta = LoopFilter::getBeta( qp ) * bitdepthScale;
+  int beta = DeblockingFilter::getBeta( qp ) * bitdepthScale;
   const int thr2 = (beta>>2);
   const int thr1 = 2*bitdepthScale;
   uint32_t a = 0;
@@ -5560,19 +6135,11 @@ void EncGOP::applyDeblockingFilterParameterSelection( Picture* pcPic, const uint
 }
 #endif
 
-#if JVET_R0193
 bool EncGOP::xCheckMaxTidILRefPics(int layerIdx, Picture* refPic, bool currentPicIsIRAP)
-#else
-bool EncGOP::xCheckMaxTidILRefPics(Picture* refPic, bool currentPicIsIRAP)
-#endif
 {
-#if JVET_R0193
   const VPS* vps = refPic->cs->vps;
   int refLayerIdx = vps == nullptr ? 0 : vps->getGeneralLayerIdx(refPic->layerId);
   const int maxTidILRefPicsPlus1 = vps->getMaxTidIlRefPicsPlus1(layerIdx, refLayerIdx);
-#else
-  const int maxTidILRefPicsPlus1 = m_pcCfg->getVPSParameters().m_maxTidILRefPicsPlus1;
-#endif
 
   // -1 means not set
   if (maxTidILRefPicsPlus1 < 0)
@@ -5613,6 +6180,8 @@ void EncGOP::xCreateExplicitReferencePictureSetFromReference( Slice* slice, PicL
   static_vector<int, MAX_NUM_REF_PICS> higherTLayerRefs;
 
   higherTLayerRefs.resize(0);
+  static_vector<int, MAX_NUM_REF_PICS> inactiveRefs;
+  inactiveRefs.resize(0);
   if (isIntraLayerPredAllowed)
   {
     for (int ii = 0; ii < numOfRefPic; ii++)
@@ -5633,12 +6202,14 @@ void EncGOP::xCreateExplicitReferencePictureSetFromReference( Slice* slice, PicL
           hasHigherTId = rpcPic->temporalId > pic->temporalId;
           if (!rpl0->isRefPicLongterm(ii) && rpcPic->referenced
               && rpcPic->getPOC() == slice->getPOC() + rpl0->getRefPicIdentifier(ii)
-              && !slice->isPocRestrictedByDRAP(rpcPic->getPOC(), rpcPic->precedingDRAP))
+              && !slice->isPocRestrictedByDRAP(rpcPic->getPOC(), rpcPic->precedingDRAP)
+              && !slice->isPocRestrictedByEdrap(rpcPic->getPOC()))
           {
             isAvailable = true;
             break;
           }
-          else if (rpl0->isRefPicLongterm(ii) && rpcPic->referenced && (rpcPic->getPOC() & (pocCycle - 1)) == rpl0->getRefPicIdentifier(ii) && !slice->isPocRestrictedByDRAP(rpcPic->getPOC(), rpcPic->precedingDRAP))
+          else if (rpl0->isRefPicLongterm(ii) && rpcPic->referenced && (rpcPic->getPOC() & (pocCycle - 1)) == rpl0->getRefPicIdentifier(ii) && !slice->isPocRestrictedByDRAP(rpcPic->getPOC(), rpcPic->precedingDRAP)
+              && !slice->isPocRestrictedByEdrap(rpcPic->getPOC()))
           {
             isAvailable = true;
             break;
@@ -5652,6 +6223,10 @@ void EncGOP::xCreateExplicitReferencePictureSetFromReference( Slice* slice, PicL
         {
           higherTLayerRefs.push_back(ii);
         }
+        else if (refPicIdxL0 >= rpl1->getNumberOfActivePictures() && layerIdx && vps && !vps->getAllIndependentLayersFlag() && isInterLayerPredAllowed)
+        {
+          inactiveRefs.push_back(ii);
+        }
         else
         {
           pLocalRPL0->setRefPicIdentifier(refPicIdxL0, rpl0->getRefPicIdentifier(ii), rpl0->isRefPicLongterm(ii), false,
@@ -5679,11 +6254,7 @@ void EncGOP::xCreateExplicitReferencePictureSetFromReference( Slice* slice, PicL
         rpcPic = *( iterPic++ );
         int refLayerIdx = vps->getGeneralLayerIdx( rpcPic->layerId );
         if (rpcPic->referenced && rpcPic->getPOC() == pic->getPOC() && vps->getDirectRefLayerFlag(layerIdx, refLayerIdx)
-#if JVET_R0193
             && xCheckMaxTidILRefPics(layerIdx, rpcPic, slice->isIRAP()))
-#else
-            && xCheckMaxTidILRefPics(rpcPic, slice->isIRAP()) )
-#endif
         {
           pLocalRPL0->setRefPicIdentifier( refPicIdxL0, 0, true, true, vps->getInterLayerRefIdc( layerIdx, refLayerIdx ) );
           refPicIdxL0++;
@@ -5693,6 +6264,16 @@ void EncGOP::xCreateExplicitReferencePictureSetFromReference( Slice* slice, PicL
       }
     }
   }
+  // now add inactive refs
+  for (int i = 0; i < inactiveRefs.size(); i++)
+  {
+    const int ii = inactiveRefs[i];
+    pLocalRPL0->setRefPicIdentifier(refPicIdxL0, rpl0->getRefPicIdentifier(ii), rpl0->isRefPicLongterm(ii), false,
+      NOT_VALID);
+    refPicIdxL0++;
+    numOfSTRPL0 = numOfSTRPL0 + ((rpl0->isRefPicLongterm(ii)) ? 0 : 1);
+    numOfLTRPL0 += (rpl0->isRefPicLongterm(ii) && !rpl0->isInterLayerRefPic(ii)) ? 1 : 0;
+  }
 
   if( slice->getEnableDRAPSEI() )
   {
@@ -5718,6 +6299,35 @@ void EncGOP::xCreateExplicitReferencePictureSetFromReference( Slice* slice, PicL
       }
     }
   }
+  if( slice->getEnableEdrapSEI() )
+  {
+    pLocalRPL0->setNumberOfShorttermPictures( numOfSTRPL0 );
+    pLocalRPL0->setNumberOfLongtermPictures( numOfLTRPL0 );
+    pLocalRPL0->setNumberOfInterLayerPictures( numOfILRPL0 );
+
+    for (int i = 0; i < slice->getEdrapNumRefRapPics(); i++)
+    {
+      int refPoc = slice->getEdrapRefRapId(i) == 0 ? slice->getAssociatedIRAPPOC() : slice->getEdrapRefRapId(i) * m_pcEncLib->getEdrapPeriod();
+      if( slice->isPOCInRefPicList( pLocalRPL0, refPoc ) )
+      {
+        continue;
+      }
+      if( slice->getUseLTforEdrap() && !slice->isPOCInRefPicList( rpl1, refPoc ) )
+      {
+        // Added as longterm picture
+        pLocalRPL0->setRefPicIdentifier( refPicIdxL0, refPoc, true, false, 0 );
+        refPicIdxL0++;
+        numOfLTRPL0++;
+      }
+      else
+      {
+        // Added as shortterm picture
+        pLocalRPL0->setRefPicIdentifier(refPicIdxL0, refPoc - slice->getPOC(), false, false, 0);
+        refPicIdxL0++;
+        numOfSTRPL0++;
+      }
+    }
+  }
 
   // now add higher TId refs
   for (int i = 0; i < higherTLayerRefs.size(); i++)
@@ -5740,6 +6350,7 @@ void EncGOP::xCreateExplicitReferencePictureSetFromReference( Slice* slice, PicL
   uint32_t refPicIdxL1 = 0;
 
   higherTLayerRefs.resize(0);
+  inactiveRefs.resize(0);
   if (isIntraLayerPredAllowed)
   {
     for (int ii = 0; ii < numOfRefPic; ii++)
@@ -5758,12 +6369,13 @@ void EncGOP::xCreateExplicitReferencePictureSetFromReference( Slice* slice, PicL
           hasHigherTId = rpcPic->temporalId > pic->temporalId;
           if (!rpl1->isRefPicLongterm(ii) && rpcPic->referenced
               && rpcPic->getPOC() == slice->getPOC() + rpl1->getRefPicIdentifier(ii)
-              && !slice->isPocRestrictedByDRAP(rpcPic->getPOC(), rpcPic->precedingDRAP))
+              && !slice->isPocRestrictedByDRAP(rpcPic->getPOC(), rpcPic->precedingDRAP)
+              && !slice->isPocRestrictedByEdrap(rpcPic->getPOC()))
           {
             isAvailable = true;
             break;
           }
-          else if (rpl1->isRefPicLongterm(ii) && rpcPic->referenced && (rpcPic->getPOC() & (pocCycle - 1)) == rpl1->getRefPicIdentifier(ii) && !slice->isPocRestrictedByDRAP(rpcPic->getPOC(), rpcPic->precedingDRAP))
+          else if (rpl1->isRefPicLongterm(ii) && rpcPic->referenced && (rpcPic->getPOC() & (pocCycle - 1)) == rpl1->getRefPicIdentifier(ii) && !slice->isPocRestrictedByDRAP(rpcPic->getPOC(), rpcPic->precedingDRAP) && !slice->isPocRestrictedByEdrap(rpcPic->getPOC()))
           {
             isAvailable = true;
             break;
@@ -5777,6 +6389,10 @@ void EncGOP::xCreateExplicitReferencePictureSetFromReference( Slice* slice, PicL
         {
           higherTLayerRefs.push_back(ii);
         }
+        else if (refPicIdxL1 >= rpl1->getNumberOfActivePictures() && layerIdx && vps && !vps->getAllIndependentLayersFlag() && isInterLayerPredAllowed)
+        {
+          inactiveRefs.push_back(ii);
+        }
         else
         {
           pLocalRPL1->setRefPicIdentifier(refPicIdxL1, rpl1->getRefPicIdentifier(ii), rpl1->isRefPicLongterm(ii), false,
@@ -5805,11 +6421,7 @@ void EncGOP::xCreateExplicitReferencePictureSetFromReference( Slice* slice, PicL
         rpcPic = *( iterPic++ );
         int refLayerIdx = vps->getGeneralLayerIdx( rpcPic->layerId );
         if (rpcPic->referenced && rpcPic->getPOC() == pic->getPOC() && vps->getDirectRefLayerFlag(layerIdx, refLayerIdx)
-#if JVET_R0193
             && xCheckMaxTidILRefPics( layerIdx, rpcPic, slice->isIRAP()))
-#else
-            && xCheckMaxTidILRefPics( rpcPic, slice->isIRAP() ) )
-#endif
         {
           pLocalRPL1->setRefPicIdentifier( refPicIdxL1, 0, true, true, vps->getInterLayerRefIdc( layerIdx, refLayerIdx ) );
           refPicIdxL1++;
@@ -5820,6 +6432,15 @@ void EncGOP::xCreateExplicitReferencePictureSetFromReference( Slice* slice, PicL
     }
   }
 
+  for (int i = 0; i < inactiveRefs.size(); i++)
+  {
+    const int ii = inactiveRefs[i];
+    pLocalRPL1->setRefPicIdentifier(refPicIdxL1, rpl1->getRefPicIdentifier(ii), rpl1->isRefPicLongterm(ii), false,
+                                    NOT_VALID);
+    refPicIdxL1++;
+    numOfSTRPL1 = numOfSTRPL1 + ((rpl1->isRefPicLongterm(ii)) ? 0 : 1);
+    numOfLTRPL1 += (rpl1->isRefPicLongterm(ii) && !rpl1->isInterLayerRefPic(ii)) ? 1 : 0;
+  }
   // now add higher TId refs
   for (int i = 0; i < higherTLayerRefs.size(); i++)
   {
@@ -5880,7 +6501,7 @@ void EncGOP::xCreateExplicitReferencePictureSetFromReference( Slice* slice, PicL
   *slice->getRPL0() = localRPL0;
 
   //Copy from L0 if we have less than active ref pic
-  numOfNeedToFill = pLocalRPL0->getNumberOfActivePictures() - ( numOfLTRPL1 + numOfSTRPL1 );
+  numOfNeedToFill = pLocalRPL0->getNumberOfActivePictures() - ( numOfLTRPL1 + numOfSTRPL1 ) - numOfILRPL0;
 
   for( int ii = 0; numOfNeedToFill > 0 && ii < ( pLocalRPL0->getNumberOfLongtermPictures() + pLocalRPL0->getNumberOfShorttermPictures() + pLocalRPL0->getNumberOfInterLayerPictures() ); ii++ )
   {
@@ -5908,7 +6529,7 @@ void EncGOP::xCreateExplicitReferencePictureSetFromReference( Slice* slice, PicL
         refPicIdxL1++;
         numOfSTRPL1 = numOfSTRPL1 + ( ( pLocalRPL0->isRefPicLongterm( ii ) ) ? 0 : 1 );
         numOfLTRPL1 += ( pLocalRPL0->isRefPicLongterm( ii ) && !pLocalRPL0->isInterLayerRefPic( ii ) ) ? 1 : 0;
-        numOfLTRPL1 += pLocalRPL0->isInterLayerRefPic( ii ) ? 1 : 0;
+		numOfILRPL1 += pLocalRPL0->isInterLayerRefPic( ii ) ? 1 : 0;
         numOfNeedToFill--;
       }
     }
@@ -5922,5 +6543,15 @@ void EncGOP::xCreateExplicitReferencePictureSetFromReference( Slice* slice, PicL
   pLocalRPL1->setLtrpInSliceHeaderFlag( 1 );
   slice->setRPL1idx( -1 );
   *slice->getRPL1() = localRPL1;
+  // To ensure that any picture in the RefRapIds has been included in the refrence list.
+  for (int i = 0; i < slice->getEdrapNumRefRapPics(); i++)
+  {
+    int refPoc = slice->getEdrapRefRapId(i) == 0 ? slice->getAssociatedIRAPPOC() : slice->getEdrapRefRapId(i) * m_pcEncLib->getEdrapPeriod();
+    if (!slice->isPOCInRefPicList( pLocalRPL0, refPoc ) && !slice->isPOCInRefPicList( pLocalRPL1, refPoc ))
+    {
+      slice->deleteEdrapRefRapIds(i);
+    }
+  }
+
 }
 //! \}
diff --git a/source/Lib/EncoderLib/EncGOP.h b/source/Lib/EncoderLib/EncGOP.h
index 5a89d4898a9a3e225e068491317fefa9c12c06de..bc7d2869aff9d9070f5b6bae19c0b106333d47d4 100644
--- a/source/Lib/EncoderLib/EncGOP.h
+++ b/source/Lib/EncoderLib/EncGOP.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -43,7 +43,7 @@
 #include <stdlib.h>
 
 #include "CommonLib/Picture.h"
-#include "CommonLib/LoopFilter.h"
+#include "CommonLib/DeblockingFilter.h"
 #include "CommonLib/NAL.h"
 #include "EncSampleAdaptiveOffset.h"
 #include "EncAdaptiveLoopFilter.h"
@@ -128,7 +128,16 @@ private:
   bool                    m_bFirst;
   int                     m_iLastRecoveryPicPOC;
   int                     m_latestDRAPPOC;
+  int                     m_latestEDRAPPOC;
+  bool                    m_latestEdrapLeadingPicDecodableFlag;
   int                     m_lastRasPoc;
+  unsigned                m_riceBit[8][2];
+  int                     m_preQP[2];
+  int                     m_preIPOC;
+#if JVET_W0046_RLSCP
+  int                     m_cnt_right_bottom;
+  int                     m_cnt_right_bottom_i;
+#endif
 
   //  Access channel
   EncLib*                 m_pcEncLib;
@@ -137,7 +146,7 @@ private:
   PicList*                m_pcListPic;
 
   HLSWriter*              m_HLSWriter;
-  LoopFilter*             m_pcLoopFilter;
+  DeblockingFilter*             m_pcLoopFilter;
 
   SEIWriter               m_seiWriter;
 
@@ -183,6 +192,9 @@ private:
   bool                    m_bInitAMaxBT;
 
   AUWriterIf*             m_AUWriterIf;
+#if GDR_ENABLED
+  int                     m_lastGdrIntervalPoc;  
+#endif
 
 #if JVET_O0756_CALCULATE_HDRMETRICS
 
@@ -234,8 +246,19 @@ public:
   void      setLastLTRefPoc(int iLastLTRefPoc) { m_lastLTRefPoc = iLastLTRefPoc; }
   int       getLastLTRefPoc() const { return m_lastLTRefPoc; }
 
+#if GDR_ENABLED
+  void      setLastGdrIntervalPoc(int p)  { m_lastGdrIntervalPoc = p; }
+  int       getLastGdrIntervalPoc() const { return m_lastGdrIntervalPoc; }
+#endif
+
+  int       getPreQP() const { return m_preQP[0]; }
+
   void  printOutSummary( uint32_t uiNumAllPicCoded, bool isField, const bool printMSEBasedSNR, const bool printSequenceMSE, 
-    const bool printMSSSIM, const bool printHexPsnr, const bool printRprPSNR, const BitDepths &bitDepths );
+    const bool printMSSSIM, const bool printHexPsnr, const bool printRprPSNR, const BitDepths &bitDepths
+#if JVET_W0134_UNIFORM_METRICS_LOG
+                       , int layerId
+#endif
+                       );
 #if W0038_DB_OPT
   uint64_t  preLoopFilterPicAndCalcDist( Picture* pcPic );
 #endif
@@ -267,6 +290,7 @@ protected:
   void  xPicInitLMCS       (Picture *pic, PicHeader *picHeader, Slice *slice);
   void  xGetBuffer        ( PicList& rcListPic, std::list<PelUnitBuf*>& rcListPicYuvRecOut,
                             int iNumPicRcvd, int iTimeOffset, Picture*& rpcPic, int pocCurr, bool isField );
+  void xGetSubpicIdsInPic(std::vector<uint16_t>& subpicIDs, const SPS* sps, const PPS* pps);
 
 #if JVET_O0756_CALCULATE_HDRMETRICS
   void xCalculateHDRMetrics ( Picture* pcPic, double deltaE[hdrtoolslib::NB_REF_WHITE], double psnrL[hdrtoolslib::NB_REF_WHITE]);
@@ -296,6 +320,10 @@ protected:
 
   void xUpdateRasInit(Slice* slice);
 
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+  void xUpdateRPRtmvp    ( PicHeader* pcPicHeader, Slice* pcSlice );
+#endif
+
   void xWriteAccessUnitDelimiter (AccessUnit &accessUnit, Slice *slice);
 
   void xWriteFillerData (AccessUnit &accessUnit, Slice *slice, uint32_t &fdSize);
@@ -307,7 +335,7 @@ protected:
   void xUpdateDuData(AccessUnit &testAU, std::deque<DUData> &duData);
   void xUpdateTimingSEI(SEIPictureTiming *pictureTimingSEI, std::deque<DUData> &duData, const SPS *sps);
   void xUpdateDuInfoSEI(SEIMessages &duInfoSeiMessages, SEIPictureTiming *pictureTimingSEI, int maxSubLayers);
-  void xCreateScalableNestingSEI(SEIMessages& seiMessages, SEIMessages& nestedSeiMessages, const std::vector<int> &targetOLSs, const std::vector<int> &targetLayers, const std::vector<uint16_t>& subpicIDs);
+  void xCreateScalableNestingSEI(SEIMessages& seiMessages, SEIMessages& nestedSeiMessages, const std::vector<int> &targetOLSs, const std::vector<int> &targetLayers, const std::vector<uint16_t>& subpicIDs, uint16_t maxSubpicIdInPic);
   void xWriteSEI (NalUnitType naluType, SEIMessages& seiMessages, AccessUnit &accessUnit, AccessUnit::iterator &auPos, int temporalId);
   void xWriteSEISeparately (NalUnitType naluType, SEIMessages& seiMessages, AccessUnit &accessUnit, AccessUnit::iterator &auPos, int temporalId);
   void xClearSEIs(SEIMessages& seiMessages, bool deleteMessages);
@@ -316,9 +344,7 @@ protected:
   void xWriteTrailingSEIMessages (SEIMessages& seiMessages, AccessUnit &accessUnit, int temporalId);
   void xWriteDuSEIMessages       (SEIMessages& duInfoSeiMessages, AccessUnit &accessUnit, int temporalId, std::deque<DUData> &duData);
 
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
   int xWriteOPI (AccessUnit &accessUnit, const OPI *opi);
-#endif
   int xWriteVPS (AccessUnit &accessUnit, const VPS *vps);
   int xWriteDCI (AccessUnit &accessUnit, const DCI *dci);
   int xWriteSPS( AccessUnit &accessUnit, const SPS *sps, const int layerId = 0 );
@@ -332,11 +358,7 @@ protected:
   void applyDeblockingFilterParameterSelection( Picture* pcPic, const uint32_t numSlices, const int gopID );
 #endif
   void xCreateExplicitReferencePictureSetFromReference( Slice* slice, PicList& rcListPic, const ReferencePictureList *rpl0, const ReferencePictureList *rpl1 );
-#if JVET_R0193
   bool xCheckMaxTidILRefPics(int layerIdx, Picture* refPic, bool currentPicIsIRAP);
-#else
-  bool xCheckMaxTidILRefPics(Picture* refPic, bool currentPicIsIRAP);
-#endif
 };// END CLASS DEFINITION EncGOP
 
 //! \}
diff --git a/source/Lib/EncoderLib/EncHRD.cpp b/source/Lib/EncoderLib/EncHRD.cpp
index 1b4d2ce2694af4d4fd4f7b242096c74d1c88b82f..481cbf06021236c71a6f2513133d7d2048444804 100644
--- a/source/Lib/EncoderLib/EncHRD.cpp
+++ b/source/Lib/EncoderLib/EncHRD.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/EncoderLib/EncHRD.h b/source/Lib/EncoderLib/EncHRD.h
index 1aa7c695f74ec6e08318ca22d23a44d044138fee..c7a0173f7f615abef21e2f29635b1562fb6fa9b1 100644
--- a/source/Lib/EncoderLib/EncHRD.h
+++ b/source/Lib/EncoderLib/EncHRD.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp
index bb5e51f65e59e0bfc00ba9105d6f17209808746f..b3e46ff2977c94071c29d879097668fda2ae8e65 100644
--- a/source/Lib/EncoderLib/EncLib.cpp
+++ b/source/Lib/EncoderLib/EncLib.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -43,9 +43,6 @@
 #include "CommonLib/Picture.h"
 #include "CommonLib/CommonDef.h"
 #include "CommonLib/ChromaFormat.h"
-#if ENABLE_SPLIT_PARALLELISM
-#include <omp.h>
-#endif
 #include "EncLibCommon.h"
 #include "CommonLib/ProfileLevelTier.h"
 
@@ -102,56 +99,25 @@ void EncLib::create( const int layerId )
   m_iPOCLast = m_compositeRefEnabled ? -2 : -1;
   // create processing unit classes
   m_cGOPEncoder.        create( );
-#if ENABLE_SPLIT_PARALLELISM
-#if ENABLE_SPLIT_PARALLELISM
-  m_numCuEncStacks  = m_numSplitThreads == 1 ? 1 : NUM_RESERVERD_SPLIT_JOBS;
-#else
-  m_numCuEncStacks  = 1;
-#endif
-
-  m_cCuEncoder      = new EncCu              [m_numCuEncStacks];
-  m_cInterSearch    = new InterSearch        [m_numCuEncStacks];
-  m_cIntraSearch    = new IntraSearch        [m_numCuEncStacks];
-  m_cTrQuant        = new TrQuant            [m_numCuEncStacks];
-  m_CABACEncoder    = new CABACEncoder       [m_numCuEncStacks];
-  m_cRdCost         = new RdCost             [m_numCuEncStacks];
-  m_CtxCache        = new CtxCache           [m_numCuEncStacks];
-
-  for( int jId = 0; jId < m_numCuEncStacks; jId++ )
-  {
-    m_cCuEncoder[jId].         create( this );
-  }
-#else
   m_cCuEncoder.         create( this );
-#endif
 #if JVET_J0090_MEMORY_BANDWITH_MEASURE
   m_cInterSearch.cacheAssign( &m_cacheModel );
 #endif
 
-  m_cLoopFilter.create(floorLog2(m_maxCUWidth) - MIN_CU_LOG2);
+  m_deblockingFilter.create(floorLog2(m_maxCUWidth) - MIN_CU_LOG2);
 
-  if (!m_bLoopFilterDisable && m_encDbOpt)
+  if (!m_deblockingFilterDisable && m_encDbOpt)
   {
-    m_cLoopFilter.initEncPicYuvBuffer(m_chromaFormatIDC, Size(getSourceWidth(), getSourceHeight()), getMaxCUWidth());
+    m_deblockingFilter.initEncPicYuvBuffer(m_chromaFormatIDC, Size(getSourceWidth(), getSourceHeight()), getMaxCUWidth());
   }
 
-#if ENABLE_SPLIT_PARALLELISM
-  m_cReshaper = new EncReshape[m_numCuEncStacks];
-#endif
   if (m_lmcsEnabled)
   {
-#if ENABLE_SPLIT_PARALLELISM
-    for (int jId = 0; jId < m_numCuEncStacks; jId++)
-    {
-      m_cReshaper[jId].createEnc(getSourceWidth(), getSourceHeight(), m_maxCUWidth, m_maxCUHeight, m_bitDepth[COMPONENT_Y]);
-    }
-#else
     m_cReshaper.createEnc( getSourceWidth(), getSourceHeight(), m_maxCUWidth, m_maxCUHeight, m_bitDepth[COMPONENT_Y]);
-#endif
   }
   if ( m_RCEnableRateControl )
   {
-    m_cRateCtrl.init(m_framesToBeEncoded, m_RCTargetBitrate, (int)((double)m_iFrameRate / m_temporalSubsampleRatio + 0.5), m_iGOPSize, m_iSourceWidth, m_iSourceHeight,
+    m_cRateCtrl.init(m_framesToBeEncoded, m_RCTargetBitrate, (int)((double)m_iFrameRate / m_temporalSubsampleRatio + 0.5), m_iGOPSize, m_sourceWidth, m_sourceHeight,
       m_maxCUWidth, m_maxCUHeight, getBitDepth(CHANNEL_TYPE_LUMA), m_RCKeepHierarchicalBit, m_RCUseLCUSeparateModel, m_GOPList);
   }
 
@@ -162,55 +128,23 @@ void EncLib::destroy ()
   // destroy processing unit classes
   m_cGOPEncoder.        destroy();
   m_cSliceEncoder.      destroy();
-#if ENABLE_SPLIT_PARALLELISM
-  for( int jId = 0; jId < m_numCuEncStacks; jId++ )
-  {
-    m_cCuEncoder[jId].destroy();
-  }
-#else
   m_cCuEncoder.         destroy();
-#endif
   if( m_alf )
   {
     m_cEncALF.destroy();
   }
   m_cEncSAO.            destroyEncData();
   m_cEncSAO.            destroy();
-  m_cLoopFilter.        destroy();
+  m_deblockingFilter.   destroy();
   m_cRateCtrl.          destroy();
-#if ENABLE_SPLIT_PARALLELISM
-  for (int jId = 0; jId < m_numCuEncStacks; jId++)
-  {
-    m_cReshaper[jId].   destroy();
-  }
-#else
   m_cReshaper.          destroy();
-#endif
-#if ENABLE_SPLIT_PARALLELISM
-  for( int jId = 0; jId < m_numCuEncStacks; jId++ )
-  {
-    m_cInterSearch[jId].   destroy();
-    m_cIntraSearch[jId].   destroy();
-  }
-#else
   m_cInterSearch.       destroy();
   m_cIntraSearch.       destroy();
-#endif
-
-#if ENABLE_SPLIT_PARALLELISM
-  delete[] m_cCuEncoder;
-  delete[] m_cInterSearch;
-  delete[] m_cIntraSearch;
-  delete[] m_cTrQuant;
-  delete[] m_CABACEncoder;
-  delete[] m_cRdCost;
-  delete[] m_CtxCache;
-#endif
 
   return;
 }
 
-void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf )
+void EncLib::init(AUWriterIf *auWriterIf)
 {
   m_AUWriterIf = auWriterIf;
 
@@ -233,17 +167,8 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf )
   }
 
   xInitVPS( sps0 );
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
   xInitOPI(m_opi);
-#endif
   xInitDCI(m_dci, sps0);
-#if ENABLE_SPLIT_PARALLELISM
-  if( omp_get_dynamic() )
-  {
-    omp_set_dynamic( false );
-  }
-  omp_set_nested( true );
-#endif
 
   if (getUseCompositeRef() || getDependentRAPIndicationSEIEnabled())
   {
@@ -256,18 +181,11 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf )
     m_cRateCtrl.initHrdParam(sps0.getGeneralHrdParameters(), sps0.getOlsHrdParameters(), m_iFrameRate, m_RCInitialCpbFullness);
   }
 #endif
-#if ENABLE_SPLIT_PARALLELISM
-  for( int jId = 0; jId < m_numCuEncStacks; jId++ )
-  {
-    m_cRdCost[jId].setCostMode ( m_costMode );
-  }
-#else
   m_cRdCost.setCostMode ( m_costMode );
-#endif
 
   // initialize PPS
-  pps0.setPicWidthInLumaSamples( m_iSourceWidth );
-  pps0.setPicHeightInLumaSamples( m_iSourceHeight );
+  pps0.setPicWidthInLumaSamples( m_sourceWidth );
+  pps0.setPicHeightInLumaSamples( m_sourceHeight );
   if (pps0.getPicWidthInLumaSamples() == sps0.getMaxPicWidthInLumaSamples() && pps0.getPicHeightInLumaSamples() == sps0.getMaxPicHeightInLumaSamples())
   {
     pps0.setConformanceWindow( sps0.getConformanceWindow() );
@@ -278,9 +196,13 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf )
     pps0.setConformanceWindow( m_conformanceWindow );
     pps0.setConformanceWindowFlag( m_conformanceWindow.getWindowEnabledFlag() );
   }
+  if (!pps0.getExplicitScalingWindowFlag())
+  {
+    pps0.setScalingWindow(pps0.getConformanceWindow());
+  }
   xInitPPS(pps0, sps0);
   // initialize APS
-  xInitRPL(sps0, isFieldCoding);
+  xInitRPL(sps0);
 
   if (m_resChangeInClvsEnabled)
   {
@@ -314,6 +236,7 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf )
     Window scalingWindow;
     scalingWindow.setWindow( 0, ( width - scaledWidth ) / SPS::getWinUnitX( sps0.getChromaFormatIdc() ), 0, ( height - scaledHeight ) / SPS::getWinUnitY( sps0.getChromaFormatIdc() ) );
     pps.setScalingWindow( scalingWindow );
+    pps.setExplicitScalingWindowFlag(scalingWindow.getWindowEnabledFlag());
 
     //register the width/height of the current pic into reference SPS
     if (!sps0.getPPSValidFlag(pps.getPPSId()))
@@ -342,7 +265,7 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf )
     m_noPicPartitionFlag = true;
 
     xInitPPS( pps, sps0 ); // will allocate memory for and initialize pps.pcv inside
-    
+
     if( pps.getWrapAroundEnabledFlag() )
     {
       int minCbSizeY = (1 << sps0.getLog2MinCodingBlockSize());
@@ -350,10 +273,10 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf )
       pps.setWrapAroundOffset                   (minCbSizeY * (pps.getPicWidthInLumaSamples() / minCbSizeY - pps.getPicWidthMinusWrapAroundOffset()));
 
     }
-    else 
+    else
     {
       pps.setPicWidthMinusWrapAroundOffset      (0);
-      pps.setWrapAroundOffset                   ( 0 );       
+      pps.setWrapAroundOffset                   ( 0 );
     }
   }
 
@@ -375,54 +298,6 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf )
   // initialize processing unit classes
   m_cGOPEncoder.  init( this );
   m_cSliceEncoder.init( this, sps0 );
-#if ENABLE_SPLIT_PARALLELISM
-  for( int jId = 0; jId < m_numCuEncStacks; jId++ )
-  {
-    // precache a few objects
-    for( int i = 0; i < 10; i++ )
-    {
-      auto x = m_CtxCache[jId].get();
-      m_CtxCache[jId].cache( x );
-    }
-
-    m_cCuEncoder[jId].init( this, sps0, jId );
-
-    // initialize transform & quantization class
-    m_cTrQuant[jId].init( jId == 0 ? nullptr : m_cTrQuant[0].getQuant(),
-                          1 << m_log2MaxTbSize,
-
-                          m_useRDOQ,
-                          m_useRDOQTS,
-#if T0196_SELECTIVE_RDOQ
-                          m_useSelectiveRDOQ,
-#endif
-                          true
-    );
-
-    // initialize encoder search class
-    CABACWriter* cabacEstimator = m_CABACEncoder[jId].getCABACEstimator( &sps0 );
-    m_cIntraSearch[jId].init( this,
-                              &m_cTrQuant[jId],
-                              &m_cRdCost[jId],
-                              cabacEstimator,
-                              getCtxCache( jId ), m_maxCUWidth, m_maxCUHeight, floorLog2(m_maxCUWidth) - m_log2MinCUSize
-                            , &m_cReshaper[jId]
-                            , sps0.getBitDepth(CHANNEL_TYPE_LUMA)
-    );
-    m_cInterSearch[jId].init( this,
-                              &m_cTrQuant[jId],
-                              m_iSearchRange,
-                              m_bipredSearchRange,
-                              m_motionEstimationSearchMethod,
-                              getUseCompositeRef(),
-                              m_maxCUWidth, m_maxCUHeight, floorLog2(m_maxCUWidth) - m_log2MinCUSize, &m_cRdCost[jId], cabacEstimator, getCtxCache( jId )
-                           , &m_cReshaper[jId]
-    );
-
-    // link temporary buffets from intra search with inter search to avoid unnecessary memory overhead
-    m_cInterSearch[jId].setTempBuffers( m_cIntraSearch[jId].getSplitCSBuf(), m_cIntraSearch[jId].getFullCSBuf(), m_cIntraSearch[jId].getSaveCSBuf() );
-  }
-#else  // ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
   m_cCuEncoder.   init( this, sps0 );
 
   // initialize transform & quantization class
@@ -458,7 +333,6 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf )
 
   // link temporary buffets from intra search with inter search to avoid unneccessary memory overhead
   m_cInterSearch.setTempBuffers( m_cIntraSearch.getSplitCSBuf(), m_cIntraSearch.getFullCSBuf(), m_cIntraSearch.getSaveCSBuf() );
-#endif // ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
 
   m_iMaxRefPicNum = 0;
 
@@ -482,7 +356,13 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf )
     Picture *picBg = new Picture;
     picBg->create( sps0.getChromaFormatIdc(), Size( pps0.getPicWidthInLumaSamples(), pps0.getPicHeightInLumaSamples() ), sps0.getMaxCUWidth(), sps0.getMaxCUWidth() + 16, false, m_layerId, m_gopBasedTemporalFilterEnabled );
     picBg->getRecoBuf().fill(0);
+#if GDR_ENABLED
+    PicHeader *picHeader = new PicHeader();
+    xInitPicHeader(*picHeader, sps0, pps0);
+    picBg->finalInit( m_vps, sps0, pps0, picHeader, m_apss, m_lmcsAPS, m_scalinglistAPS );
+#else
     picBg->finalInit( m_vps, sps0, pps0, &m_picHeader, m_apss, m_lmcsAPS, m_scalinglistAPS );
+#endif
     picBg->allocateNewSlice();
     picBg->createSpliceIdx(pps0.pcv->sizeInCtus);
     m_cGOPEncoder.setPicBg(picBg);
@@ -509,26 +389,12 @@ void EncLib::xInitScalingLists( SPS &sps, APS &aps )
   {
     quant->setFlatScalingList(maxLog2TrDynamicRange, sps.getBitDepths());
     quant->setUseScalingList(false);
-#if ENABLE_SPLIT_PARALLELISM
-    for( int jId = 1; jId < m_numCuEncStacks; jId++ )
-    {
-      getTrQuant( jId )->getQuant()->setFlatScalingList( maxLog2TrDynamicRange, sps.getBitDepths() );
-      getTrQuant( jId )->getQuant()->setUseScalingList( false );
-    }
-#endif
   }
   else if(getUseScalingListId() == SCALING_LIST_DEFAULT)
   {
     aps.getScalingList().setDefaultScalingList ();
     quant->setScalingList( &( aps.getScalingList() ), maxLog2TrDynamicRange, sps.getBitDepths() );
     quant->setUseScalingList(true);
-#if ENABLE_SPLIT_PARALLELISM
-    for( int jId = 1; jId < m_numCuEncStacks; jId++ )
-    {
-      getTrQuant( jId )->getQuant()->setUseScalingList( true );
-    }
-    sps.setDisableScalingMatrixForLfnstBlks(getDisableScalingMatrixForLfnstBlks());
-#endif
   }
   else if(getUseScalingListId() == SCALING_LIST_FILE_READ)
   {
@@ -542,12 +408,6 @@ void EncLib::xInitScalingLists( SPS &sps, APS &aps )
     aps.getScalingList().setChromaScalingListPresentFlag((sps.getChromaFormatIdc()!=CHROMA_400));
     quant->setScalingList( &( aps.getScalingList() ), maxLog2TrDynamicRange, sps.getBitDepths() );
     quant->setUseScalingList(true);
-#if ENABLE_SPLIT_PARALLELISM
-    for( int jId = 1; jId < m_numCuEncStacks; jId++ )
-    {
-      getTrQuant( jId )->getQuant()->setUseScalingList( true );
-    }
-#endif
 
     sps.setDisableScalingMatrixForLfnstBlks(getDisableScalingMatrixForLfnstBlks());
   }
@@ -614,7 +474,13 @@ bool EncLib::encodePrep( bool flush, PelStorage* pcPicYuvOrg, PelStorage* cPicYu
     const SPS *sps = m_spsMap.getPS( pps->getSPSId() );
 
     picCurr->M_BUFS( 0, PIC_ORIGINAL ).copyFrom( m_cGOPEncoder.getPicBg()->getRecoBuf() );
+#if GDR_ENABLED
+    PicHeader *picHeader = new PicHeader();
+    xInitPicHeader(*picHeader, *sps, *pps);
+    picCurr->finalInit( m_vps, *sps, *pps, picHeader, m_apss, m_lmcsAPS, m_scalinglistAPS );
+#else
     picCurr->finalInit( m_vps, *sps, *pps, &m_picHeader, m_apss, m_lmcsAPS, m_scalinglistAPS );
+#endif
     picCurr->poc = m_iPOCLast - 1;
     m_iPOCLast -= 2;
     if( getUseAdaptiveQP() )
@@ -626,7 +492,7 @@ bool EncLib::encodePrep( bool flush, PelStorage* pcPicYuvOrg, PelStorage* cPicYu
       m_cRateCtrl.initRCGOP( m_iNumPicRcvd );
     }
 
-    m_cGOPEncoder.compressGOP( m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut, false, false, 
+    m_cGOPEncoder.compressGOP( m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut, false, false,
       snrCSC, m_printFrameMSE, m_printMSSSIM, true, 0 );
 
 #if JVET_O0756_CALCULATE_HDRMETRICS
@@ -732,8 +598,13 @@ bool EncLib::encodePrep( bool flush, PelStorage* pcPicYuvOrg, PelStorage* cPicYu
         pcPicCurr->M_BUFS( 0, PIC_FILTERED_ORIGINAL ).swap( *pcPicYuvFilteredOrg );
       }
     }
-
+#if GDR_ENABLED
+    PicHeader *picHeader = new PicHeader();
+    xInitPicHeader(*picHeader, *pSPS, *pPPS);
+    pcPicCurr->finalInit( m_vps, *pSPS, *pPPS, picHeader, m_apss, m_lmcsAPS, m_scalinglistAPS );
+#else
     pcPicCurr->finalInit( m_vps, *pSPS, *pPPS, &m_picHeader, m_apss, m_lmcsAPS, m_scalinglistAPS );
+#endif
 
     pcPicCurr->poc = m_iPOCLast;
 
@@ -878,7 +749,13 @@ bool EncLib::encodePrep( bool flush, PelStorage* pcPicYuvOrg, PelStorage* pcPicY
       const PPS *pPPS = ( ppsID < 0 ) ? m_ppsMap.getFirstPS() : m_ppsMap.getPS( ppsID );
       const SPS *pSPS = m_spsMap.getPS( pPPS->getSPSId() );
 
+#if GDR_ENABLED
+      PicHeader *picHeader = new PicHeader();
+      xInitPicHeader(*picHeader, *pSPS, *pPPS);
+      pcField->finalInit( m_vps, *pSPS, *pPPS, picHeader, m_apss, m_lmcsAPS, m_scalinglistAPS );
+#else
       pcField->finalInit( m_vps, *pSPS, *pPPS, &m_picHeader, m_apss, m_lmcsAPS, m_scalinglistAPS );
+#endif
 
       pcField->poc = m_iPOCLast;
       pcField->reconstructed = false;
@@ -914,7 +791,7 @@ bool EncLib::encode( const InputColourSpaceConversion snrCSC, std::list<PelUnitB
     m_iPOCLast = m_iPOCLast < 2 ? fieldNum : m_iPOCLast;
 
     // compress GOP
-    m_cGOPEncoder.compressGOP( m_iPOCLast, m_iPOCLast < 2 ? m_iPOCLast + 1 : m_iNumPicRcvd, m_cListPic, 
+    m_cGOPEncoder.compressGOP( m_iPOCLast, m_iPOCLast < 2 ? m_iPOCLast + 1 : m_iNumPicRcvd, m_cListPic,
       rcListPicYuvRecOut, true, isTff, snrCSC, m_printFrameMSE, m_printMSSSIM, false, m_picIdInGOP );
 #if JVET_O0756_CALCULATE_HDRMETRICS
     m_metricTime = m_cGOPEncoder.getMetricTime();
@@ -1010,7 +887,7 @@ void EncLib::xGetNewPicBuffer ( std::list<PelUnitBuf*>& rcListPicYuvRecOut, Pict
       if(m_gopBasedTemporalFilterEnabled)
       {
         rpcPic->M_BUFS(0, PIC_FILTERED_ORIGINAL_INPUT).create(sps.getChromaFormatIdc(), Area(Position(), Size(pps0.getPicWidthInLumaSamples(), pps0.getPicHeightInLumaSamples())));
-      } 
+      }
     }
     if ( getUseAdaptiveQP() )
     {
@@ -1043,9 +920,7 @@ void EncLib::xInitVPS( const SPS& sps )
   m_vps->m_olsHrdParams.resize(m_vps->getNumOlsTimingHrdParamsMinus1(), std::vector<OlsHrdParams>(m_vps->getMaxSubLayers()));
   ProfileLevelTierFeatures profileLevelTierFeatures;
   profileLevelTierFeatures.extractPTLInformation( sps );
-#if JVET_R0193
   m_vps->setMaxTidIlRefPicsPlus1(m_cfgVPSParameters.m_maxTidILRefPicsPlus1);
-#endif
   m_vps->deriveOutputLayerSets();
   m_vps->deriveTargetOutputLayerSet( m_vps->m_targetOlsIdx );
 
@@ -1069,7 +944,7 @@ void EncLib::xInitVPS( const SPS& sps )
   for( int olsIdx = 0, dpbIdx = 0; olsIdx < m_vps->m_numOutputLayersInOls.size(); olsIdx++ )
   {
     if ( m_vps->getNumLayersInOls(olsIdx) > 1 )
-    { 
+    {
       if( std::find( m_vps->m_layerIdInOls[olsIdx].begin(), m_vps->m_layerIdInOls[olsIdx].end(), m_layerId ) != m_vps->m_layerIdInOls[olsIdx].end() )
       {
         m_vps->setOlsDpbPicWidth( olsIdx, std::max<int>( sps.getMaxPicWidthInLumaSamples(), m_vps->getOlsDpbPicSize( olsIdx ).width ) );
@@ -1087,7 +962,7 @@ void EncLib::xInitVPS( const SPS& sps )
   for( int i = 0; i < m_vps->m_numOutputLayersInOls.size(); i++ )
   {
     if ( m_vps->getNumLayersInOls(i) > 1 )
-    { 
+    {
       int dpbIdx = m_vps->getOlsDpbParamsIdx( i );
 
       if( m_vps->getMaxSubLayers() == 1 )
@@ -1117,7 +992,7 @@ void EncLib::xInitVPS( const SPS& sps )
           decPicBuffering[tId] += m_layerDecPicBuffering[m_vps->getLayerIdInOls( i, lIdx ) * MAX_TLAYER + tId];
         }
       }
-    
+
       for( int j = ( m_vps->m_sublayerDpbParamsPresentFlag ? 0 : m_vps->m_dpbMaxTemporalId[dpbIdx] ); j <= m_vps->m_dpbMaxTemporalId[dpbIdx]; j++ )
       {
         m_vps->m_dpbParameters[dpbIdx].m_maxDecPicBuffering[j] = decPicBuffering[j] > 0 ? decPicBuffering[j] : profileLevelTierFeatures.getMaxDpbSize( m_vps->getOlsDpbPicSize( i ).width * m_vps->getOlsDpbPicSize( i ).height );
@@ -1145,19 +1020,9 @@ void EncLib::xInitVPS( const SPS& sps )
     m_vps->setHrdMaxTid(i, m_vps->getMaxSubLayers() - 1);
   }
 
-#if !JVET_R0193
-  if (m_cfgVPSParameters.m_maxTidILRefPicsPlus1 >= 0)
-  {
-    for (int i = 0; i < m_vps->getMaxLayers(); i++)
-    {
-      m_vps->setMaxTidIlRefPicsPlus1(i, m_cfgVPSParameters.m_maxTidILRefPicsPlus1);
-    }
-  }
-#endif
   m_vps->checkVPS();
 }
 
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
 void EncLib::xInitOPI(OPI& opi)
 {
   if (m_OPIEnabled && m_vps)
@@ -1165,7 +1030,7 @@ void EncLib::xInitOPI(OPI& opi)
     if (!opi.getOlsInfoPresentFlag())
     {
       opi.setOpiOlsIdx(m_vps->deriveTargetOLSIdx());
-      opi.setOlsInfoPresentFlag(true);    
+      opi.setOlsInfoPresentFlag(true);
     }
     if (!opi.getHtidInfoPresentFlag())
     {
@@ -1174,7 +1039,6 @@ void EncLib::xInitOPI(OPI& opi)
     }
   }
 }
-#endif
 
 void EncLib::xInitDCI(DCI& dci, const SPS& sps)
 {
@@ -1254,6 +1118,9 @@ void EncLib::xInitSPS( SPS& sps )
   cinfo->setNoCraConstraintFlag(m_noCraConstraintFlag);
   cinfo->setNoGdrConstraintFlag(m_noGdrConstraintFlag);
   cinfo->setNoApsConstraintFlag(m_noApsConstraintFlag);
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+  cinfo->setLowerBitRateConstraintFlag(m_generalLowerBitRateConstraintFlag);
+#endif
 
   profileTierLevel->setLevelIdc                    (m_level);
   profileTierLevel->setTierFlag                    (m_levelTier);
@@ -1269,12 +1136,26 @@ void EncLib::xInitSPS( SPS& sps )
   /* XXX: may be a good idea to refactor the above into a function
    * that chooses the actual compatibility based upon options */
   sps.setVPSId( m_vps->getVPSId() );
-  sps.setMaxPicWidthInLumaSamples( m_iSourceWidth );
-  sps.setMaxPicHeightInLumaSamples( m_iSourceHeight );
+
+#if GDR_ENABLED
+  if (m_gdrEnabled)
+  {
+    sps.setGDREnabledFlag(true);
+  }
+  else
+  {
+    sps.setGDREnabledFlag(false);
+  }
+#else
+  sps.setGDREnabledFlag(false);
+#endif
+
+  sps.setMaxPicWidthInLumaSamples( m_sourceWidth );
+  sps.setMaxPicHeightInLumaSamples( m_sourceHeight );
   if (m_resChangeInClvsEnabled)
   {
-    int maxPicWidth = std::max(m_iSourceWidth, (int)((double)m_iSourceWidth / m_scalingRatioHor + 0.5));
-    int maxPicHeight = std::max(m_iSourceHeight, (int)((double)m_iSourceHeight / m_scalingRatioVer + 0.5));
+    int maxPicWidth = std::max(m_sourceWidth, (int)((double)m_sourceWidth / m_scalingRatioHor + 0.5));
+    int maxPicHeight = std::max(m_sourceHeight, (int)((double)m_sourceHeight / m_scalingRatioVer + 0.5));
     const int minCuSize = std::max(8, 1 << m_log2MinCUSize);
     if (maxPicWidth % minCuSize)
     {
@@ -1410,7 +1291,7 @@ void EncLib::xInitSPS( SPS& sps )
   }
   sps.setALFEnabledFlag( m_alf );
   sps.setCCALFEnabledFlag( m_ccalf );
-  sps.setFieldSeqFlag(false);
+  sps.setFieldSeqFlag(m_fieldSeqFlag);
   sps.setVuiParametersPresentFlag(getVuiParametersPresentFlag());
 
   if (sps.getVuiParametersPresentFlag())
@@ -1467,9 +1348,14 @@ void EncLib::xInitSPS( SPS& sps )
   sps.getSpsRangeExtension().setTransformSkipRotationEnabledFlag(m_transformSkipRotationEnabledFlag);
   sps.getSpsRangeExtension().setTransformSkipContextEnabledFlag(m_transformSkipContextEnabledFlag);
   sps.getSpsRangeExtension().setExtendedPrecisionProcessingFlag(m_extendedPrecisionProcessingFlag);
+  sps.getSpsRangeExtension().setTSRCRicePresentFlag(m_tsrcRicePresentFlag);
   sps.getSpsRangeExtension().setIntraSmoothingDisabledFlag( m_intraSmoothingDisabledFlag );
   sps.getSpsRangeExtension().setHighPrecisionOffsetsEnabledFlag(m_highPrecisionOffsetsEnabledFlag);
+  sps.getSpsRangeExtension().setRrcRiceExtensionEnableFlag(m_rrcRiceExtensionEnableFlag);
   sps.getSpsRangeExtension().setPersistentRiceAdaptationEnabledFlag(m_persistentRiceAdaptationEnabledFlag);
+#if JVET_W0046_RLSCP
+  sps.getSpsRangeExtension().setReverseLastSigCoeffEnabledFlag(m_reverseLastSigCoeffEnabledFlag);
+#endif
   sps.getSpsRangeExtension().setCabacBypassAlignmentEnabledFlag(m_cabacBypassAlignmentEnabledFlag);
 
   sps.setSubPicInfoPresentFlag(m_subPicInfoPresentFlag);
@@ -1479,7 +1365,7 @@ void EncLib::xInitSPS( SPS& sps )
     sps.setSubPicSameSizeFlag(m_subPicSameSizeFlag);
     if (m_subPicSameSizeFlag)
     {
-      uint32_t numSubpicCols = (m_iSourceWidth + m_CTUSize - 1) / m_CTUSize / m_subPicWidth[0];
+      uint32_t numSubpicCols = (m_sourceWidth + m_CTUSize - 1) / m_CTUSize / m_subPicWidth[0];
       for (unsigned int i = 0; i < m_numSubPics; i++)
       {
         sps.setSubPicCtuTopLeftX(i, (i % numSubpicCols) * m_subPicWidth[0]);
@@ -1513,8 +1399,8 @@ void EncLib::xInitSPS( SPS& sps )
     sps.setNumSubPics(1);
     sps.setSubPicCtuTopLeftX(0, 0);
     sps.setSubPicCtuTopLeftY(0, 0);
-    sps.setSubPicWidth(0, m_iSourceWidth);
-    sps.setSubPicHeight(0, m_iSourceHeight);
+    sps.setSubPicWidth(0, m_sourceWidth);
+    sps.setSubPicHeight(0, m_sourceHeight);
     sps.setSubPicTreatedAsPicFlag(0, 1);
     sps.setLoopFilterAcrossSubpicEnabledFlag(0, 0);
     sps.setSubPicIdLen(0);
@@ -1549,7 +1435,11 @@ void EncLib::xInitSPS( SPS& sps )
   sps.setInterLayerPresentFlag( m_layerId > 0 && m_vps->getMaxLayers() > 1 && !m_vps->getAllIndependentLayersFlag() && !m_vps->getIndependentLayerFlag( m_vps->getGeneralLayerIdx( m_layerId ) ) );
   CHECK( m_vps->getIndependentLayerFlag( m_vps->getGeneralLayerIdx( m_layerId ) ) && sps.getInterLayerPresentFlag(), " When vps_independent_layer_flag[GeneralLayerIdx[nuh_layer_id ]]  is equal to 1, the value of inter_layer_ref_pics_present_flag shall be equal to 0." );
 
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+  sps.setResChangeInClvsEnabledFlag(m_resChangeInClvsEnabled || m_constrainedRaslEncoding);
+#else
   sps.setResChangeInClvsEnabledFlag(m_resChangeInClvsEnabled);
+#endif
   sps.setRprEnabledFlag(m_rprEnabledFlag);
 
   sps.setLog2ParallelMergeLevelMinus2( m_log2ParallelMergeLevelMinus2 );
@@ -1597,6 +1487,10 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps)
     bUseDQP = true;
   }
 #endif
+  if (getSmoothQPReductionEnable())
+  {
+    bUseDQP = true;
+  }
 #if ENABLE_QPA
   if (getUsePerceptQPA() && !bUseDQP)
   {
@@ -1624,11 +1518,15 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps)
     pps.setUseDQP(false);
   }
 
-  if ( m_cuChromaQpOffsetSubdiv >= 0 )
+  if ( m_cuChromaQpOffsetList.size() > 0 )
   {
+    /* insert table entries from cfg parameters (NB, 0 should not be touched) */
     pps.clearChromaQpOffsetList();
-    pps.setChromaQpOffsetListEntry(1, 6, 6, 6);
-    /* todo, insert table entries from command line (NB, 0 should not be touched) */
+    for (int i=0; i < m_cuChromaQpOffsetList.size(); i++)
+    {
+      pps.setChromaQpOffsetListEntry(i + 1, m_cuChromaQpOffsetList[i].u.comp.CbOffset,
+        m_cuChromaQpOffsetList[i].u.comp.CrOffset, m_cuChromaQpOffsetList[i].u.comp.JointCbCrOffset);
+    }
   }
   else
   {
@@ -1650,13 +1548,18 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps)
     pps.setPicInitQPMinus26( std::min( maxDQP, std::max( minDQP, baseQp ) ));
   }
 
-  if( sps.getJointCbCrEnabledFlag() == false || getChromaFormatIdc() == CHROMA_400 || m_chromaCbCrQpOffset == 0 )
+  if (sps.getJointCbCrEnabledFlag() == false || getChromaFormatIdc() == CHROMA_400)
   {
     pps.setJointCbCrQpOffsetPresentFlag(false);
   }
   else
   {
-    pps.setJointCbCrQpOffsetPresentFlag(true);
+    bool enable = (m_chromaCbCrQpOffset != 0);
+    for (int i=0; i < m_cuChromaQpOffsetList.size(); i++)
+    {
+      enable |= (m_cuChromaQpOffsetList[i].u.comp.JointCbCrOffset != 0);
+    }
+    pps.setJointCbCrQpOffsetPresentFlag(enable);
   }
 
 #if ER_CHROMA_QP_WCG_PPS
@@ -1725,10 +1628,10 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps)
     pps.setPicWidthMinusWrapAroundOffset      ((pps.getPicWidthInLumaSamples()/minCbSizeY) - (m_wrapAroundOffset / minCbSizeY));
     pps.setWrapAroundOffset                   (minCbSizeY *(pps.getPicWidthInLumaSamples() / minCbSizeY- pps.getPicWidthMinusWrapAroundOffset()));
   }
-  else 
+  else
   {
     pps.setPicWidthMinusWrapAroundOffset      ( 0 );
-    pps.setWrapAroundOffset                   ( 0 );       
+    pps.setWrapAroundOffset                   ( 0 );
   }
   CHECK( !sps.getWrapAroundEnabledFlag() && pps.getWrapAroundEnabledFlag(), "When sps_ref_wraparound_enabled_flag is equal to 0, the value of pps_ref_wraparound_enabled_flag shall be equal to 0.");
   CHECK( (((sps.getCTUSize() / minCbSizeY) + 1) > ((pps.getPicWidthInLumaSamples() / minCbSizeY) - 1)) && pps.getWrapAroundEnabledFlag(), "When the value of CtbSizeY / MinCbSizeY + 1 is greater than pps_pic_width_in_luma_samples / MinCbSizeY - 1, the value of pps_ref_wraparound_enabled_flag shall be equal to 0.");
@@ -1780,7 +1683,7 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps)
 
   pps.setUseWP( m_useWeightedPred );
   pps.setWPBiPred( m_useWeightedBiPred );
-  pps.setOutputFlagPresentFlag( false );
+  pps.setOutputFlagPresentFlag(false);
 
   if ( getDeblockingFilterMetric() )
   {
@@ -1789,18 +1692,18 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps)
   }
   else
   {
-    pps.setDeblockingFilterOverrideEnabledFlag( !getLoopFilterOffsetInPPS() );
-    pps.setPPSDeblockingFilterDisabledFlag( getLoopFilterDisable() );
+    pps.setDeblockingFilterOverrideEnabledFlag( !getDeblockingFilterOffsetInPPS() );
+    pps.setPPSDeblockingFilterDisabledFlag( getDeblockingFilterDisable() );
   }
 
   if (! pps.getPPSDeblockingFilterDisabledFlag())
   {
-    pps.setDeblockingFilterBetaOffsetDiv2( getLoopFilterBetaOffset() );
-    pps.setDeblockingFilterTcOffsetDiv2( getLoopFilterTcOffset() );
-    pps.setDeblockingFilterCbBetaOffsetDiv2( getLoopFilterCbBetaOffset() );
-    pps.setDeblockingFilterCbTcOffsetDiv2( getLoopFilterCbTcOffset() );
-    pps.setDeblockingFilterCrBetaOffsetDiv2( getLoopFilterCrBetaOffset() );
-    pps.setDeblockingFilterCrTcOffsetDiv2( getLoopFilterCrTcOffset() );
+    pps.setDeblockingFilterBetaOffsetDiv2  ( getDeblockingFilterBetaOffset() );
+    pps.setDeblockingFilterTcOffsetDiv2    ( getDeblockingFilterTcOffset() );
+    pps.setDeblockingFilterCbBetaOffsetDiv2( getDeblockingFilterCbBetaOffset() );
+    pps.setDeblockingFilterCbTcOffsetDiv2  ( getDeblockingFilterCbTcOffset() );
+    pps.setDeblockingFilterCrBetaOffsetDiv2( getDeblockingFilterCrBetaOffset() );
+    pps.setDeblockingFilterCrTcOffsetDiv2  ( getDeblockingFilterCrTcOffset() );
   }
   else
   {
@@ -1915,6 +1818,10 @@ void EncLib::xInitPicHeader(PicHeader &picHeader, const SPS &sps, const PPS &pps
     bUseDQP = true;
   }
 #endif
+  if (getSmoothQPReductionEnable())
+  {
+    bUseDQP = true;
+  }
 #if ENABLE_QPA
   if( getUsePerceptQPA() && !bUseDQP )
   {
@@ -1944,17 +1851,8 @@ void EncLib::xInitPicHeader(PicHeader &picHeader, const SPS &sps, const PPS &pps
     picHeader.setCuQpDeltaSubdivInter( 0 );
   }
 
-  if( m_cuChromaQpOffsetSubdiv >= 0 )
-  {
-    picHeader.setCuChromaQpOffsetSubdivIntra(m_cuChromaQpOffsetSubdiv);
-    picHeader.setCuChromaQpOffsetSubdivInter(m_cuChromaQpOffsetSubdiv);
-  }
-  else
-  {
-    picHeader.setCuChromaQpOffsetSubdivIntra(0);
-    picHeader.setCuChromaQpOffsetSubdivInter(0);
-  }
-
+  picHeader.setCuChromaQpOffsetSubdivIntra(m_cuChromaQpOffsetSubdiv);
+  picHeader.setCuChromaQpOffsetSubdivInter(m_cuChromaQpOffsetSubdiv);
 
   // virtual boundaries
   if( sps.getVirtualBoundariesEnabledFlag() )
@@ -1968,6 +1866,10 @@ void EncLib::xInitPicHeader(PicHeader &picHeader, const SPS &sps, const PPS &pps
     }
   }
 
+#if GDR_ENABLED  
+    picHeader.setGdrOrIrapPicFlag(false);    
+#endif
+
   // gradual decoder refresh flag
   picHeader.setGdrPicFlag(false);
 
@@ -1982,9 +1884,10 @@ void EncLib::xInitAPS(APS &aps)
   //Do nothing now
 }
 
-void EncLib::xInitRPL(SPS &sps, bool isFieldCoding)
+void EncLib::xInitRPL(SPS &sps)
 {
   ReferencePictureList*      rpl;
+  const bool                 isFieldCoding = sps.getFieldSeqFlag();
 
   int numRPLCandidates = getRPLCandidateSize(0);
   // To allocate one additional memory for RPL of POC1 (first bottom field) which is not specified in cfg file
@@ -2348,6 +2251,12 @@ int EncCfg::getQPForPicture(const uint32_t gopIndex, const Slice *pSlice) const
     }
     else
     {
+      if (pSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_IDR_N_LP || pSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_CRA)
+      {
+        qp += getIntraQPOffset();
+      }
+      else
+      {
         const GOPEntry &gopEntry=getGOPEntry(gopIndex);
         // adjust QP according to the QP offset for the GOP entry.
         qp +=gopEntry.m_QPOffset;
@@ -2357,6 +2266,7 @@ int EncCfg::getQPForPicture(const uint32_t gopIndex, const Slice *pSlice) const
         int qpOffset = (int)floor(Clip3<double>(0.0, 3.0, dqpOffset));
         qp += qpOffset ;
       }
+    }
 
 #if !QP_SWITCHING_FOR_PARALLEL
     // modify QP if a fractional QP was originally specified, cause dQPs to be 0 or 1.
diff --git a/source/Lib/EncoderLib/EncLib.h b/source/Lib/EncoderLib/EncLib.h
index 517c65cbcae9b5cf9b421ee6b6a4c5479d83d397..94e383d171ee5716a741f8920b0e732df6ed9ca2 100644
--- a/source/Lib/EncoderLib/EncLib.h
+++ b/source/Lib/EncoderLib/EncLib.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -40,7 +40,7 @@
 
 // Include files
 #include "CommonLib/TrQuant.h"
-#include "CommonLib/LoopFilter.h"
+#include "CommonLib/DeblockingFilter.h"
 #include "CommonLib/NAL.h"
 
 #include "Utilities/VideoIOYuv.h"
@@ -79,65 +79,35 @@ private:
   int                       m_layerId;
 
   // encoder search
-#if ENABLE_SPLIT_PARALLELISM
-  InterSearch              *m_cInterSearch;                       ///< encoder search class
-  IntraSearch              *m_cIntraSearch;                       ///< encoder search class
-#else
   InterSearch               m_cInterSearch;                       ///< encoder search class
   IntraSearch               m_cIntraSearch;                       ///< encoder search class
-#endif
   // coding tool
-#if ENABLE_SPLIT_PARALLELISM
-  TrQuant                  *m_cTrQuant;                           ///< transform & quantization class
-#else
   TrQuant                   m_cTrQuant;                           ///< transform & quantization class
-#endif
-  LoopFilter                m_cLoopFilter;                        ///< deblocking filter class
+  DeblockingFilter          m_deblockingFilter;                   ///< deblocking filter class
   EncSampleAdaptiveOffset   m_cEncSAO;                            ///< sample adaptive offset class
   EncAdaptiveLoopFilter     m_cEncALF;
   HLSWriter                 m_HLSWriter;                          ///< CAVLC encoder
-#if ENABLE_SPLIT_PARALLELISM
-  CABACEncoder             *m_CABACEncoder;
-#else
   CABACEncoder              m_CABACEncoder;
-#endif
 
-#if ENABLE_SPLIT_PARALLELISM
-  EncReshape               *m_cReshaper;                        ///< reshaper class
-#else
   EncReshape                m_cReshaper;                        ///< reshaper class
-#endif
 
   // processing unit
   EncGOP                    m_cGOPEncoder;                        ///< GOP encoder
   EncSlice                  m_cSliceEncoder;                      ///< slice encoder
-#if ENABLE_SPLIT_PARALLELISM
-  EncCu                    *m_cCuEncoder;                         ///< CU encoder
-#else
   EncCu                     m_cCuEncoder;                         ///< CU encoder
-#endif
   // SPS
   ParameterSetMap<SPS>&     m_spsMap;                             ///< SPS. This is the base value. This is copied to PicSym
   ParameterSetMap<PPS>&     m_ppsMap;                             ///< PPS. This is the base value. This is copied to PicSym
   ParameterSetMap<APS>&     m_apsMap;                             ///< APS. This is the base value. This is copied to PicSym
   PicHeader                 m_picHeader;                          ///< picture header
   // RD cost computation
-#if ENABLE_SPLIT_PARALLELISM
-  RdCost                   *m_cRdCost;                            ///< RD cost computation class
-  CtxCache                 *m_CtxCache;                           ///< buffer for temporarily stored context models
-#else
   RdCost                    m_cRdCost;                            ///< RD cost computation class
   CtxCache                  m_CtxCache;                           ///< buffer for temporarily stored context models
-#endif
   // quality control
   RateCtrl                  m_cRateCtrl;                          ///< Rate control class
 
   AUWriterIf*               m_AUWriterIf;
 
-#if ENABLE_SPLIT_PARALLELISM
-  int                       m_numCuEncStacks;
-#endif
-
 #if JVET_J0090_MEMORY_BANDWITH_MEASURE
   CacheModel                m_cacheModel;
 #endif
@@ -167,9 +137,7 @@ public:
 
 protected:
   void  xGetNewPicBuffer  ( std::list<PelUnitBuf*>& rcListPicYuvRecOut, Picture*& rpcPic, int ppsId ); ///< get picture buffer which will be processed. If ppsId<0, then the ppsMap will be queried for the first match.
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
   void  xInitOPI(OPI& opi); ///< initialize Operating point Information (OPI) from encoder options
-#endif
   void  xInitDCI(DCI& dci, const SPS& sps); ///< initialize Decoding Capability Information (DCI) from encoder options
   void  xInitVPS( const SPS& sps ); ///< initialize VPS from encoder options
   void  xInitSPS( SPS& sps );       ///< initialize SPS from encoder options
@@ -180,7 +148,7 @@ protected:
   void  xInitPPSforLT(PPS& pps);
   void  xInitHrdParameters(SPS &sps);                 ///< initialize HRDParameters parameters
 
-  void  xInitRPL(SPS &sps, bool isFieldCoding);           ///< initialize SPS from encoder options
+  void xInitRPL(SPS &sps);   ///< initialize SPS from encoder options
 
 public:
   EncLib( EncLibCommon* encLibCommon );
@@ -188,7 +156,7 @@ public:
 
   void      create          ( const int layerId );
   void      destroy         ();
-  void      init            ( bool isFieldCoding, AUWriterIf* auWriterIf );
+  void      init(AUWriterIf *auWriterIf);
   void      deletePicBuffer ();
 
   // -------------------------------------------------------------------------------------------------------------------
@@ -197,40 +165,22 @@ public:
 
   AUWriterIf*             getAUWriterIf         ()              { return   m_AUWriterIf;           }
   PicList*                getListPic            ()              { return  &m_cListPic;             }
-#if ENABLE_SPLIT_PARALLELISM
-  InterSearch*            getInterSearch        ( int jId = 0 ) { return  &m_cInterSearch[jId];    }
-  IntraSearch*            getIntraSearch        ( int jId = 0 ) { return  &m_cIntraSearch[jId];    }
-
-  TrQuant*                getTrQuant            ( int jId = 0 ) { return  &m_cTrQuant[jId];        }
-#else
   InterSearch*            getInterSearch        ()              { return  &m_cInterSearch;         }
   IntraSearch*            getIntraSearch        ()              { return  &m_cIntraSearch;         }
 
   TrQuant*                getTrQuant            ()              { return  &m_cTrQuant;             }
-#endif
-  LoopFilter*             getLoopFilter         ()              { return  &m_cLoopFilter;          }
+  DeblockingFilter*       getDeblockingFilter   ()              { return  &m_deblockingFilter;     }
   EncSampleAdaptiveOffset* getSAO               ()              { return  &m_cEncSAO;              }
   EncAdaptiveLoopFilter*  getALF                ()              { return  &m_cEncALF;              }
   EncGOP*                 getGOPEncoder         ()              { return  &m_cGOPEncoder;          }
   EncSlice*               getSliceEncoder       ()              { return  &m_cSliceEncoder;        }
   EncHRD*                 getHRD                ()              { return  &m_encHRD;               }
-#if ENABLE_SPLIT_PARALLELISM
-  EncCu*                  getCuEncoder          ( int jId = 0 ) { return  &m_cCuEncoder[jId];      }
-#else
   EncCu*                  getCuEncoder          ()              { return  &m_cCuEncoder;           }
-#endif
   HLSWriter*              getHLSWriter          ()              { return  &m_HLSWriter;            }
-#if ENABLE_SPLIT_PARALLELISM
-  CABACEncoder*           getCABACEncoder       ( int jId = 0 ) { return  &m_CABACEncoder[jId];    }
-
-  RdCost*                 getRdCost             ( int jId = 0 ) { return  &m_cRdCost[jId];         }
-  CtxCache*               getCtxCache           ( int jId = 0 ) { return  &m_CtxCache[jId];        }
-#else
   CABACEncoder*           getCABACEncoder       ()              { return  &m_CABACEncoder;         }
 
   RdCost*                 getRdCost             ()              { return  &m_cRdCost;              }
   CtxCache*               getCtxCache           ()              { return  &m_CtxCache;             }
-#endif
   RateCtrl*               getRateCtrl           ()              { return  &m_cRateCtrl;            }
 
 
@@ -244,16 +194,7 @@ public:
   const PPS* getPPS( int Id ) { return m_ppsMap.getPS( Id); }
   const APS*             getAPS(int Id) { return m_apsMap.getPS(Id); }
 
-#if ENABLE_SPLIT_PARALLELISM
-  void                   setNumCuEncStacks( int n )             { m_numCuEncStacks = n; }
-  int                    getNumCuEncStacks()              const { return m_numCuEncStacks; }
-#endif
-
-#if ENABLE_SPLIT_PARALLELISM
-  EncReshape*            getReshaper( int jId = 0 )             { return  &m_cReshaper[jId]; }
-#else
   EncReshape*            getReshaper()                          { return  &m_cReshaper; }
-#endif
 
   ParameterSetMap<APS>*  getApsMap() { return &m_apsMap; }
 
@@ -293,7 +234,11 @@ public:
 
 
   void printSummary(bool isField) { m_cGOPEncoder.printOutSummary(m_uiNumAllPicCoded, isField, m_printMSEBasedSequencePSNR, 
-    m_printSequenceMSE, m_printMSSSIM, m_printHexPsnr, m_resChangeInClvsEnabled, m_spsMap.getFirstPS()->getBitDepths()); }
+    m_printSequenceMSE, m_printMSSSIM, m_printHexPsnr, m_resChangeInClvsEnabled, m_spsMap.getFirstPS()->getBitDepths()
+#if JVET_W0134_UNIFORM_METRICS_LOG
+                                  , m_layerId
+#endif
+                                  ); }
 
   int getLayerId() const { return m_layerId; }
   VPS* getVPS()          { return m_vps;     }
diff --git a/source/Lib/EncoderLib/EncLibCommon.cpp b/source/Lib/EncoderLib/EncLibCommon.cpp
index 770eda97e0dbbbabbbcccd70f81d635d46e34789..1b41432fda3c21a86586fb70cfd4b56adc868279 100644
--- a/source/Lib/EncoderLib/EncLibCommon.cpp
+++ b/source/Lib/EncoderLib/EncLibCommon.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/EncoderLib/EncLibCommon.h b/source/Lib/EncoderLib/EncLibCommon.h
index f53ae803d7db4b6fac4c39b189013dd18f3ccbf5..ff3687f4c7b088e186ebe3b87fe170e6988ade15 100644
--- a/source/Lib/EncoderLib/EncLibCommon.h
+++ b/source/Lib/EncoderLib/EncLibCommon.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/EncoderLib/EncModeCtrl.cpp b/source/Lib/EncoderLib/EncModeCtrl.cpp
index 85850014726ae9484f59d198da580a868100ac79..778a40393bfd0c660ed5177ff671a530ad5a2b41 100644
--- a/source/Lib/EncoderLib/EncModeCtrl.cpp
+++ b/source/Lib/EncoderLib/EncModeCtrl.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -64,15 +64,6 @@ void EncModeCtrl::init( EncCfg *pCfg, RateCtrl *pRateCtrl, RdCost* pRdCost )
 
 bool EncModeCtrl::tryModeMaster( const EncTestMode& encTestmode, const CodingStructure &cs, Partitioner& partitioner )
 {
-#if ENABLE_SPLIT_PARALLELISM
-  if( m_ComprCUCtxList.back().isLevelSplitParallel )
-  {
-    if( !parallelJobSelector( encTestmode, cs, partitioner ) )
-    {
-      return false;
-    }
-  }
-#endif
   return tryMode( encTestmode, cs, partitioner );
 }
 
@@ -248,18 +239,97 @@ int EncModeCtrl::calculateLumaDQP( const CPelBuf& rcOrg )
 }
 #endif
 
-#if ENABLE_SPLIT_PARALLELISM
-void EncModeCtrl::copyState( const EncModeCtrl& other, const UnitArea& area )
+#if JVET_W0043
+int EncModeCtrl::calculateLumaDQPsmooth(const CPelBuf& rcOrg, int baseQP, double threshold, double scale, double offset, int limit)
+#else
+int EncModeCtrl::calculateLumaDQPsmooth(const CPelBuf& rcOrg, int baseQP)
+#endif
 {
-  m_slice          = other.m_slice;
-  m_fastDeltaQP    = other.m_fastDeltaQP;
-  m_lumaQPOffset   = other.m_lumaQPOffset;
-  m_runNextInParallel
-                   = other.m_runNextInParallel;
-  m_ComprCUCtxList = other.m_ComprCUCtxList;
-}
+  double avg = 0;
+  double diff = 0;
+#if JVET_W0043
+  double thr = (double)threshold*rcOrg.height*rcOrg.width;
+#else
+  double thr = (double)m_pcEncCfg->getSmoothQPReductionThreshold()*rcOrg.height*rcOrg.width;
+#endif
+  int qp = 0;
+  if (rcOrg.height >= 64 && rcOrg.width >= 64)
+  {
+    const int numBasis = 6;
+
+    double invb[numBasis][numBasis] = { {0.001*0.244140625000000,                         0,                         0,                        0,                        0,                        0},
+                                      {                      0,   0.001*0.013204564833946,   0.001*0.002080251479290, -0.001*0.000066039729501, -0.001*0.000165220364313,        0.000000000000000},
+                                      {                      0,   0.001*0.002080251479290,   0.001*0.013204564833946, -0.001*0.000066039729501,        0.000000000000000, -0.001*0.000165220364313},
+                                      {                      0,  -0.001*0.000066039729501,  -0.001*0.000066039729501,  0.001*0.000002096499349,        0.000000000000000,        0.000000000000000},
+                                      {                      0,  -0.001*0.000165220364313,         0.000000000000000,        0.000000000000000,  0.001*0.000002622545465,        0.000000000000000},
+                                      {                      0,         0.000000000000000,  -0.001*0.000165220364313,        0.000000000000000,        0.000000000000000,  0.001*0.000002622545465} };
+    double boffset[5] = { -31.5, -31.5, -992.25, -1333.5, -1333.5 };
+
+    int listQuadrantsX[4] = { 0, 64, 0, 64 };
+    int listQuadrantsY[4] = { 0, 0, 64, 64 };
+
+    double b1sum;
+    double b2sum;
+    double b3sum;
+    double b4sum;
+    double b5sum;
+    double b6sum;
+    int numQuadrantsX = (rcOrg.width == 128) ? 2 : 1;
+    int numQuadrantsY = (rcOrg.height == 128) ? 2 : 1;
+    //loop over quadrants
+    for (int posy = 0; posy < numQuadrantsY; posy++)
+    {
+      for (int posx = 0; posx < numQuadrantsX; posx++)
+      {
+        b2sum = 0.0;
+        b3sum = 0.0;
+        b4sum = 0.0;
+        b5sum = 0.0;
+        b6sum = 0.0;
+        avg = 0.0;
+        for (uint32_t y = 0; y < 64; y++)
+        {
+          for (uint32_t x = 0; x < 64; x++)
+          {
+            const Pel& v = rcOrg.at(x + listQuadrantsX[posx + 2 * posy], y + listQuadrantsY[posx + 2 * posy]);
+            b2sum += ((double)v)*((double)x + boffset[0]);
+            b3sum += ((double)v)*((double)y + boffset[1]);
+            b4sum += ((double)v)*((double)x*(double)y + boffset[2]);
+            b5sum += ((double)v)*((double)x*(double)x + boffset[3]);
+            b6sum += ((double)v)*((double)y*(double)y + boffset[4]);
+            avg += (double)v;
+          }
+        }
+        b1sum = avg;
+        double r[numBasis];
+        for (uint32_t b = 0; b < numBasis; b++)
+        {
+          r[b] = invb[b][0] * b1sum + invb[b][1] * b2sum + invb[b][2] * b3sum + invb[b][3] * b4sum + invb[b][4] * b5sum + invb[b][5] * b6sum;
+        }
+        // compute SAD for model
+        for (uint32_t y = 0; y < 64; y++)
+        {
+          for (uint32_t x = 0; x < 64; x++)
+          {
+            const Pel& v = rcOrg.at(x + listQuadrantsX[posx + 2 * posy], y + listQuadrantsY[posx + 2 * posy]);
 
+            diff += abs((int)v - (int)(r[0] + r[1] * ((double)x + boffset[0]) + r[2] * ((double)y + boffset[1]) + r[3] * ((double)x*(double)y + boffset[2]) + r[4] * ((double)x*(double)x + boffset[3]) + r[5] * ((double)y*(double)y + boffset[4])));
+          }
+        }
+      }
+    }
+    if (diff < thr)
+    {
+#if JVET_W0043
+      qp = max(limit, min(0, (int)(scale*(double)baseQP + offset)));
+#else
+      qp = max(m_pcEncCfg->getSmoothQPReductionLimit(), min(0, (int)(m_pcEncCfg->getSmoothQPReductionModelScale()*(double)baseQP + m_pcEncCfg->getSmoothQPReductionModelOffset())));
 #endif
+    }
+  }
+  return qp;
+}
+
 void CacheBlkInfoCtrl::create()
 {
   const unsigned numPos = MAX_CU_SIZE >> MIN_CU_LOG2;
@@ -267,6 +337,8 @@ void CacheBlkInfoCtrl::create()
   m_numWidths  = gp_sizeIdxInfo->numWidths();
   m_numHeights = gp_sizeIdxInfo->numHeights();
 
+  bool isLog2MttPartitioning = !!dynamic_cast<SizeIndexInfoLog2*>( gp_sizeIdxInfo );
+
   for( unsigned x = 0; x < numPos; x++ )
   {
     for( unsigned y = 0; y < numPos; y++ )
@@ -275,25 +347,39 @@ void CacheBlkInfoCtrl::create()
 
       for( int wIdx = 0; wIdx < gp_sizeIdxInfo->numWidths(); wIdx++ )
       {
-        if( gp_sizeIdxInfo->isCuSize( gp_sizeIdxInfo->sizeFrom( wIdx ) ) && x + ( gp_sizeIdxInfo->sizeFrom( wIdx ) >> MIN_CU_LOG2 ) <= ( MAX_CU_SIZE >> MIN_CU_LOG2 ) )
+        if( !( gp_sizeIdxInfo->isCuSize( gp_sizeIdxInfo->sizeFrom( wIdx ) ) && x + ( gp_sizeIdxInfo->sizeFrom( wIdx ) >> MIN_CU_LOG2 ) <= ( MAX_CU_SIZE >> MIN_CU_LOG2 ) ) )
         {
-          m_codedCUInfo[x][y][wIdx] = new CodedCUInfo*[gp_sizeIdxInfo->numHeights()];
-
-          for( int hIdx = 0; hIdx < gp_sizeIdxInfo->numHeights(); hIdx++ )
-          {
-            if( gp_sizeIdxInfo->isCuSize( gp_sizeIdxInfo->sizeFrom( hIdx ) ) && y + ( gp_sizeIdxInfo->sizeFrom( hIdx ) >> MIN_CU_LOG2 ) <= ( MAX_CU_SIZE >> MIN_CU_LOG2 ) )
-            {
-              m_codedCUInfo[x][y][wIdx][hIdx] = new CodedCUInfo;
-            }
-            else
-            {
-              m_codedCUInfo[x][y][wIdx][hIdx] = nullptr;
-            }
-          }
+          m_codedCUInfo[x][y][wIdx] = nullptr;
+          continue;
         }
-        else
+
+        const int wLog2 = floorLog2( gp_sizeIdxInfo->sizeFrom( wIdx ) );
+
+        if( isLog2MttPartitioning && ( ( x << MIN_CU_LOG2 ) & ( ( 1 << ( wLog2 - 1 ) ) - 1 ) ) != 0 )
         {
           m_codedCUInfo[x][y][wIdx] = nullptr;
+          continue;
+        }
+
+        m_codedCUInfo[x][y][wIdx] = new CodedCUInfo*[gp_sizeIdxInfo->numHeights()];
+
+        for( int hIdx = 0; hIdx < gp_sizeIdxInfo->numHeights(); hIdx++ )
+        {
+          if( !( gp_sizeIdxInfo->isCuSize( gp_sizeIdxInfo->sizeFrom( hIdx ) ) && y + ( gp_sizeIdxInfo->sizeFrom( hIdx ) >> MIN_CU_LOG2 ) <= ( MAX_CU_SIZE >> MIN_CU_LOG2 ) ) )
+          {
+            m_codedCUInfo[x][y][wIdx][hIdx] = nullptr;
+            continue;
+          }
+
+          const int hLog2 = floorLog2( gp_sizeIdxInfo->sizeFrom( hIdx ) );
+
+          if( isLog2MttPartitioning && ( ( ( y << MIN_CU_LOG2 ) & ( ( 1 << ( hLog2 - 1 ) ) - 1 ) ) != 0 ) )
+          {
+            m_codedCUInfo[x][y][wIdx][hIdx] = nullptr;
+            continue;
+          }
+
+          m_codedCUInfo[x][y][wIdx][hIdx] = new CodedCUInfo;
         }
       }
     }
@@ -354,72 +440,8 @@ void CacheBlkInfoCtrl::init( const Slice &slice )
   }
 
   m_slice_chblk = &slice;
-#if ENABLE_SPLIT_PARALLELISM
-
-  m_currTemporalId = 0;
-#endif
-}
-#if ENABLE_SPLIT_PARALLELISM
-
-void CacheBlkInfoCtrl::touch( const UnitArea& area )
-{
-  CodedCUInfo& cuInfo = getBlkInfo( area );
-  cuInfo.temporalId = m_currTemporalId;
 }
 
-void CacheBlkInfoCtrl::copyState( const CacheBlkInfoCtrl &other, const UnitArea& area )
-{
-  m_slice_chblk = other.m_slice_chblk;
-
-  m_currTemporalId = other.m_currTemporalId;
-
-  if( m_slice_chblk->isIntra() ) return;
-
-  const int cuSizeMask = m_slice_chblk->getSPS()->getMaxCUWidth() - 1;
-
-  const int minPosX = ( area.lx() & cuSizeMask ) >> MIN_CU_LOG2;
-  const int minPosY = ( area.ly() & cuSizeMask ) >> MIN_CU_LOG2;
-  const int maxPosX = ( area.Y().bottomRight().x & cuSizeMask ) >> MIN_CU_LOG2;
-  const int maxPosY = ( area.Y().bottomRight().y & cuSizeMask ) >> MIN_CU_LOG2;
-
-  for( unsigned x = minPosX; x <= maxPosX; x++ )
-  {
-    for( unsigned y = minPosY; y <= maxPosY; y++ )
-    {
-      for( int wIdx = 0; wIdx < gp_sizeIdxInfo->numWidths(); wIdx++ )
-      {
-        const int width = gp_sizeIdxInfo->sizeFrom( wIdx );
-
-        if( m_codedCUInfo[x][y][wIdx] && width <= area.lwidth() && x + ( width >> MIN_CU_LOG2 ) <= ( maxPosX + 1 ) )
-        {
-          for( int hIdx = 0; hIdx < gp_sizeIdxInfo->numHeights(); hIdx++ )
-          {
-            const int height = gp_sizeIdxInfo->sizeFrom( hIdx );
-
-            if( gp_sizeIdxInfo->isCuSize( height ) && height <= area.lheight() && y + ( height >> MIN_CU_LOG2 ) <= ( maxPosY + 1 ) )
-            {
-              if( other.m_codedCUInfo[x][y][wIdx][hIdx]->temporalId > m_codedCUInfo[x][y][wIdx][hIdx]->temporalId )
-              {
-                *m_codedCUInfo[x][y][wIdx][hIdx] = *other.m_codedCUInfo[x][y][wIdx][hIdx];
-                m_codedCUInfo[x][y][wIdx][hIdx]->temporalId = m_currTemporalId;
-              }
-            }
-            else if( y + ( height >> MIN_CU_LOG2 ) > maxPosY + 1 )
-            {
-              break;;
-            }
-          }
-        }
-        else if( x + ( width >> MIN_CU_LOG2 ) > maxPosX + 1 )
-        {
-          break;
-        }
-      }
-    }
-  }
-}
-#endif
-
 CodedCUInfo& CacheBlkInfoCtrl::getBlkInfo( const UnitArea& area )
 {
   unsigned idx1, idx2, idx3, idx4;
@@ -461,10 +483,6 @@ void CacheBlkInfoCtrl::setMv( const UnitArea& area, const RefPicList refPicList,
 
   m_codedCUInfo[idx1][idx2][idx3][idx4]->saveMv [refPicList][iRefIdx] = rMv;
   m_codedCUInfo[idx1][idx2][idx3][idx4]->validMv[refPicList][iRefIdx] = true;
-#if ENABLE_SPLIT_PARALLELISM
-
-  touch( area );
-#endif
 }
 
 bool CacheBlkInfoCtrl::getMv( const UnitArea& area, const RefPicList refPicList, const int iRefIdx, Mv& rMv ) const
@@ -563,13 +581,6 @@ bool SaveLoadEncInfoSbt::saveBestSbt( const UnitArea& area, const uint32_t curPu
   return true;
 }
 
-#if ENABLE_SPLIT_PARALLELISM
-void SaveLoadEncInfoSbt::copyState(const SaveLoadEncInfoSbt &other)
-{
-  m_sliceSbt = other.m_sliceSbt;
-}
-#endif
-
 void SaveLoadEncInfoSbt::resetSaveloadSbt( int maxSbtSize )
 {
   int numSizeIdx = gp_sizeIdxInfo->idxFrom( maxSbtSize ) - MIN_CU_LOG2 + 1;
@@ -653,6 +664,8 @@ void BestEncInfoCache::create( const ChromaFormat chFmt )
   m_numWidths  = gp_sizeIdxInfo->numWidths();
   m_numHeights = gp_sizeIdxInfo->numHeights();
 
+  bool isLog2MttPartitioning = !!dynamic_cast<SizeIndexInfoLog2*>( gp_sizeIdxInfo );
+
   for( unsigned x = 0; x < numPos; x++ )
   {
     for( unsigned y = 0; y < numPos; y++ )
@@ -661,45 +674,59 @@ void BestEncInfoCache::create( const ChromaFormat chFmt )
 
       for( int wIdx = 0; wIdx < gp_sizeIdxInfo->numWidths(); wIdx++ )
       {
-        if( gp_sizeIdxInfo->isCuSize( gp_sizeIdxInfo->sizeFrom( wIdx ) ) && x + ( gp_sizeIdxInfo->sizeFrom( wIdx ) >> MIN_CU_LOG2 ) <= ( MAX_CU_SIZE >> MIN_CU_LOG2 ) )
+        if( !( gp_sizeIdxInfo->isCuSize( gp_sizeIdxInfo->sizeFrom( wIdx ) ) && x + ( gp_sizeIdxInfo->sizeFrom( wIdx ) >> MIN_CU_LOG2 ) <= ( MAX_CU_SIZE >> MIN_CU_LOG2 ) ) )
         {
-          m_bestEncInfo[x][y][wIdx] = new BestEncodingInfo*[gp_sizeIdxInfo->numHeights()];
+          m_bestEncInfo[x][y][wIdx] = nullptr;
+          continue;
+        }
 
-          for( int hIdx = 0; hIdx < gp_sizeIdxInfo->numHeights(); hIdx++ )
+        const int wLog2 = floorLog2( gp_sizeIdxInfo->sizeFrom( wIdx ) );
+
+        if( isLog2MttPartitioning && ( ( x << MIN_CU_LOG2 ) & ( ( 1 << ( wLog2 - 1 ) ) - 1 ) ) != 0 )
+        {
+          m_bestEncInfo[x][y][wIdx] = nullptr;
+          continue;
+        }
+
+        m_bestEncInfo[x][y][wIdx] = new BestEncodingInfo*[gp_sizeIdxInfo->numHeights()];
+
+        for( int hIdx = 0; hIdx < gp_sizeIdxInfo->numHeights(); hIdx++ )
+        {
+          if( !( gp_sizeIdxInfo->isCuSize( gp_sizeIdxInfo->sizeFrom( hIdx ) ) && y + ( gp_sizeIdxInfo->sizeFrom( hIdx ) >> MIN_CU_LOG2 ) <= ( MAX_CU_SIZE >> MIN_CU_LOG2 ) ) )
           {
-            if( gp_sizeIdxInfo->isCuSize( gp_sizeIdxInfo->sizeFrom( hIdx ) ) && y + ( gp_sizeIdxInfo->sizeFrom( hIdx ) >> MIN_CU_LOG2 ) <= ( MAX_CU_SIZE >> MIN_CU_LOG2 ) )
-            {
-              m_bestEncInfo[x][y][wIdx][hIdx] = new BestEncodingInfo;
+            m_bestEncInfo[x][y][wIdx][hIdx] = nullptr;
+            continue;
+          }
 
-              int w = gp_sizeIdxInfo->sizeFrom( wIdx );
-              int h = gp_sizeIdxInfo->sizeFrom( hIdx );
+          const int hLog2 = floorLog2( gp_sizeIdxInfo->sizeFrom( hIdx ) );
 
-              const UnitArea area( chFmt, Area( 0, 0, w, h ) );
+          if( isLog2MttPartitioning && ( ( ( y << MIN_CU_LOG2 ) & ( ( 1 << ( hLog2 - 1 ) ) - 1 ) ) != 0 ) )
+          {
+            m_bestEncInfo[x][y][wIdx][hIdx] = nullptr;
+            continue;
+          }
 
-              new ( &m_bestEncInfo[x][y][wIdx][hIdx]->cu ) CodingUnit    ( area );
-              new ( &m_bestEncInfo[x][y][wIdx][hIdx]->pu ) PredictionUnit( area );
+          m_bestEncInfo[x][y][wIdx][hIdx] = new BestEncodingInfo;
+
+          int w = gp_sizeIdxInfo->sizeFrom( wIdx );
+          int h = gp_sizeIdxInfo->sizeFrom( hIdx );
+
+          const UnitArea area( chFmt, Area( 0, 0, w, h ) );
+
+          new ( &m_bestEncInfo[x][y][wIdx][hIdx]->cu ) CodingUnit    ( area );
+          new ( &m_bestEncInfo[x][y][wIdx][hIdx]->pu ) PredictionUnit( area );
 #if REUSE_CU_RESULTS_WITH_MULTIPLE_TUS
-              m_bestEncInfo[x][y][wIdx][hIdx]->numTus = 0;
-              for( int i = 0; i < MAX_NUM_TUS; i++ )
-              {
-                new ( &m_bestEncInfo[x][y][wIdx][hIdx]->tus[i] ) TransformUnit( area );
-              }
+          m_bestEncInfo[x][y][wIdx][hIdx]->numTus = 0;
+          for( int i = 0; i < MAX_NUM_TUS; i++ )
+          {
+            new ( &m_bestEncInfo[x][y][wIdx][hIdx]->tus[i] ) TransformUnit( area );
+          }
 #else
-              new ( &m_bestEncInfo[x][y][wIdx][hIdx]->tu ) TransformUnit( area );
+          new ( &m_bestEncInfo[x][y][wIdx][hIdx]->tu ) TransformUnit( area );
 #endif
 
-              m_bestEncInfo[x][y][wIdx][hIdx]->poc      = -1;
-              m_bestEncInfo[x][y][wIdx][hIdx]->testMode = EncTestMode();
-            }
-            else
-            {
-              m_bestEncInfo[x][y][wIdx][hIdx] = nullptr;
-            }
-          }
-        }
-        else
-        {
-          m_bestEncInfo[x][y][wIdx] = nullptr;
+          m_bestEncInfo[x][y][wIdx][hIdx]->poc      = -1;
+          m_bestEncInfo[x][y][wIdx][hIdx]->testMode = EncTestMode();
         }
       }
     }
@@ -852,10 +879,6 @@ void BestEncInfoCache::init( const Slice &slice )
       }
     }
   }
-#if ENABLE_SPLIT_PARALLELISM
-
-  m_currTemporalId = 0;
-#endif
 }
 
 bool BestEncInfoCache::setFromCs( const CodingStructure& cs, const Partitioner& partitioner )
@@ -923,7 +946,7 @@ bool BestEncInfoCache::isValid( const CodingStructure& cs, const Partitioner& pa
   {
     return false;
   }
-  if( encInfo.cu.qp != qp )
+  if( encInfo.cu.qp != qp || cs.slice->getUseChromaQpAdj())
     return false;
   if( cs.picture->poc != encInfo.poc || CS::getArea( cs, cs.area, partitioner.chType ) != CS::getArea( cs, encInfo.cu, partitioner.chType ) || !isTheSameNbHood( encInfo.cu, cs, partitioner
     , encInfo.pu, (cs.picture->Y().width), (cs.picture->Y().height)
@@ -993,75 +1016,6 @@ bool BestEncInfoCache::setCsFrom( CodingStructure& cs, EncTestMode& testMode, co
   return true;
 }
 
-#if ENABLE_SPLIT_PARALLELISM
-void BestEncInfoCache::copyState(const BestEncInfoCache &other, const UnitArea &area)
-{
-  m_slice_bencinf  = other.m_slice_bencinf;
-  m_currTemporalId = other.m_currTemporalId;
-
-  if( m_slice_bencinf->isIntra() ) return;
-
-  const int cuSizeMask = m_slice_bencinf->getSPS()->getMaxCUWidth() - 1;
-
-  const int minPosX = ( area.lx() & cuSizeMask ) >> MIN_CU_LOG2;
-  const int minPosY = ( area.ly() & cuSizeMask ) >> MIN_CU_LOG2;
-  const int maxPosX = ( area.Y().bottomRight().x & cuSizeMask ) >> MIN_CU_LOG2;
-  const int maxPosY = ( area.Y().bottomRight().y & cuSizeMask ) >> MIN_CU_LOG2;
-
-  for( unsigned x = minPosX; x <= maxPosX; x++ )
-  {
-    for( unsigned y = minPosY; y <= maxPosY; y++ )
-    {
-      for( int wIdx = 0; wIdx < gp_sizeIdxInfo->numWidths(); wIdx++ )
-      {
-        const int width = gp_sizeIdxInfo->sizeFrom( wIdx );
-
-        if( m_bestEncInfo[x][y][wIdx] && width <= area.lwidth() && x + ( width >> MIN_CU_LOG2 ) <= ( maxPosX + 1 ) )
-        {
-          for( int hIdx = 0; hIdx < gp_sizeIdxInfo->numHeights(); hIdx++ )
-          {
-            const int height = gp_sizeIdxInfo->sizeFrom( hIdx );
-
-            if( gp_sizeIdxInfo->isCuSize( height ) && height <= area.lheight() && y + ( height >> MIN_CU_LOG2 ) <= ( maxPosY + 1 ) )
-            {
-              if( other.m_bestEncInfo[x][y][wIdx][hIdx]->temporalId > m_bestEncInfo[x][y][wIdx][hIdx]->temporalId )
-              {
-                m_bestEncInfo[x][y][wIdx][hIdx]->cu       = other.m_bestEncInfo[x][y][wIdx][hIdx]->cu;
-                m_bestEncInfo[x][y][wIdx][hIdx]->pu       = other.m_bestEncInfo[x][y][wIdx][hIdx]->pu;
-                m_bestEncInfo[x][y][wIdx][hIdx]->numTus   = other.m_bestEncInfo[x][y][wIdx][hIdx]->numTus;
-                m_bestEncInfo[x][y][wIdx][hIdx]->poc      = other.m_bestEncInfo[x][y][wIdx][hIdx]->poc;
-                m_bestEncInfo[x][y][wIdx][hIdx]->testMode = other.m_bestEncInfo[x][y][wIdx][hIdx]->testMode;
-
-                for( int i = 0; i < m_bestEncInfo[x][y][wIdx][hIdx]->numTus; i++ )
-                  m_bestEncInfo[x][y][wIdx][hIdx]->tus[i] = other.m_bestEncInfo[x][y][wIdx][hIdx]->tus[i];
-              }
-            }
-            else if( y + ( height >> MIN_CU_LOG2 ) > maxPosY + 1 )
-            {
-              break;;
-            }
-          }
-        }
-        else if( x + ( width >> MIN_CU_LOG2 ) > maxPosX + 1 )
-        {
-          break;
-        }
-      }
-    }
-  }
-}
-
-void BestEncInfoCache::touch(const UnitArea &area)
-{
-  unsigned idx1, idx2, idx3, idx4;
-  getAreaIdx(area.Y(), *m_slice_bencinf->getPPS()->pcv, idx1, idx2, idx3, idx4);
-  BestEncodingInfo &encInfo = *m_bestEncInfo[idx1][idx2][idx3][idx4];
-
-  encInfo.temporalId = m_currTemporalId;
-}
-
-#endif
-
 #endif
 
 static bool interHadActive( const ComprCUCtx& ctx )
@@ -1075,6 +1029,9 @@ static bool interHadActive( const ComprCUCtx& ctx )
 
 void EncModeCtrlMTnoRQT::create( const EncCfg& cfg )
 {
+#if GDR_ENABLED
+  m_encCfg = cfg;
+#endif
   CacheBlkInfoCtrl::create();
 #if REUSE_CU_RESULTS
   BestEncInfoCache::create( cfg.getChromaFormatIdc() );
@@ -1102,9 +1059,6 @@ void EncModeCtrlMTnoRQT::initCTUEncoding( const Slice &slice )
   CHECK( !m_ComprCUCtxList.empty(), "Mode list is not empty at the beginning of a CTU" );
 
   m_slice             = &slice;
-#if ENABLE_SPLIT_PARALLELISM
-  m_runNextInParallel      = false;
-#endif
 
   if( m_pcEncCfg->getUseE0023FastEnc() )
   {
@@ -1136,19 +1090,6 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru
 
   m_ComprCUCtxList.push_back( ComprCUCtx( cs, minDepth, maxDepth, NUM_EXTRA_FEATURES ) );
 
-#if ENABLE_SPLIT_PARALLELISM
-  if( m_runNextInParallel )
-  {
-    for( auto &level : m_ComprCUCtxList )
-    {
-      CHECK( level.isLevelSplitParallel, "Tring to parallelize a level within parallel execution!" );
-    }
-    CHECK( cs.picture->scheduler.getSplitJobId() == 0, "Trying to run a parallel level although jobId is 0!" );
-    m_runNextInParallel                          = false;
-    m_ComprCUCtxList.back().isLevelSplitParallel = true;
-  }
-
-#endif
   const CodingUnit* cuLeft  = cs.getCU( cs.area.blocks[partitioner.chType].pos().offset( -1, 0 ), partitioner.chType );
   const CodingUnit* cuAbove = cs.getCU( cs.area.blocks[partitioner.chType].pos().offset( 0, -1 ), partitioner.chType );
 
@@ -1207,6 +1148,40 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru
       baseQP = Clip3 (-cs.sps->getQpBDOffset (CHANNEL_TYPE_LUMA), MAX_QP, baseQP - m_lumaQPOffset);
     }
 #endif
+    if (m_pcEncCfg->getSmoothQPReductionEnable())
+    {
+      int smoothQPoffset = 0;
+      if (partitioner.currQgEnable())
+      {
+        // enable smooth QP reduction on selected frames
+        bool checkSmoothQP = false;
+        if (m_pcEncCfg->getSmoothQPReductionPeriodicity() != 0)
+        {
+          checkSmoothQP = ((m_pcEncCfg->getSmoothQPReductionPeriodicity() == 0) && cs.slice->isIntra()) || (m_pcEncCfg->getSmoothQPReductionPeriodicity() == 1) || ((cs.slice->getPOC() % m_pcEncCfg->getSmoothQPReductionPeriodicity()) == 0);
+        }
+        else
+        {
+          checkSmoothQP = ((m_pcEncCfg->getSmoothQPReductionPeriodicity() == 0) && cs.slice->isIntra());
+        }
+        if (checkSmoothQP)
+        {
+#if JVET_W0043
+          bool isIntraSlice = cs.slice->isIntra();
+          if (isIntraSlice)
+          {
+            smoothQPoffset = calculateLumaDQPsmooth(cs.getOrgBuf(clipArea(cs.area.Y(), cs.picture->Y())), baseQP, m_pcEncCfg->getSmoothQPReductionThresholdIntra(), m_pcEncCfg->getSmoothQPReductionModelScaleIntra(), m_pcEncCfg->getSmoothQPReductionModelOffsetIntra(), m_pcEncCfg->getSmoothQPReductionLimitIntra());
+          }
+          else
+          {
+            smoothQPoffset = calculateLumaDQPsmooth(cs.getOrgBuf(clipArea(cs.area.Y(), cs.picture->Y())), baseQP, m_pcEncCfg->getSmoothQPReductionThresholdInter(), m_pcEncCfg->getSmoothQPReductionModelScaleInter(), m_pcEncCfg->getSmoothQPReductionModelOffsetInter(), m_pcEncCfg->getSmoothQPReductionLimitInter());
+          }
+#else
+          smoothQPoffset = calculateLumaDQPsmooth(cs.getOrgBuf(clipArea(cs.area.Y(), cs.picture->Y())), baseQP);
+#endif
+        }
+      }
+      baseQP = Clip3(-cs.sps->getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, baseQP + smoothQPoffset);
+    }
   }
   int minQP = baseQP;
   int maxQP = baseQP;
@@ -1369,7 +1344,7 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru
           m_ComprCUCtxList.back().testModes.push_back( { ETM_MERGE_GEO, ETO_STANDARD, qp } );
         }
         m_ComprCUCtxList.back().testModes.push_back( { ETM_MERGE_SKIP,  ETO_STANDARD, qp } );
-        if (cs.sps->getUseAffine() || cs.sps->getSbTMVPEnabledFlag())
+        if (cs.sps->getUseAffine() || (cs.sps->getSbTMVPEnabledFlag() && cs.slice->getPicHeader()->getEnableTMVPFlag()))
         {
           m_ComprCUCtxList.back().testModes.push_back( { ETM_AFFINE,    ETO_STANDARD, qp } );
         }
@@ -1383,7 +1358,7 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru
           m_ComprCUCtxList.back().testModes.push_back( { ETM_MERGE_GEO, ETO_STANDARD, qp } );
         }
         m_ComprCUCtxList.back().testModes.push_back( { ETM_MERGE_SKIP,  ETO_STANDARD, qp } );
-        if (cs.sps->getUseAffine() || cs.sps->getSbTMVPEnabledFlag())
+        if (cs.sps->getUseAffine() || (cs.sps->getSbTMVPEnabledFlag() && cs.slice->getPicHeader()->getEnableTMVPFlag()))
         {
           m_ComprCUCtxList.back().testModes.push_back( { ETM_AFFINE,    ETO_STANDARD, qp } );
         }
@@ -1421,7 +1396,14 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt
   // Fast checks, partitioning depended
   if (cuECtx.isHashPerfectMatch && encTestmode.type != ETM_MERGE_SKIP && encTestmode.type != ETM_INTER_ME && encTestmode.type != ETM_AFFINE && encTestmode.type != ETM_MERGE_GEO)
   {
+#if GDR_ENABLED // disable hash perfect match when GDR is on
+    if (!m_encCfg.getGdrEnabled())
+    {
+      return false;
+    }
+#else
     return false;
+#endif
   }
 
   // if early skip detected, skip all modes checking but the splits
@@ -1570,7 +1552,7 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt
   {
     if (partitioner.currArea().lumaSize().width > 64 || partitioner.currArea().lumaSize().height > 64
         || ((partitioner.currArea().lumaSize().width * partitioner.currArea().lumaSize().height <= 16) && (isLuma(partitioner.chType)) )
-        || ((partitioner.currArea().chromaSize().width * partitioner.currArea().chromaSize().height <= 16) && (!isLuma(partitioner.chType)) && partitioner.isSepTree(cs) ) 
+        || ((partitioner.currArea().chromaSize().width * partitioner.currArea().chromaSize().height <= 16) && (!isLuma(partitioner.chType)) && partitioner.isSepTree(cs) )
       || (partitioner.isLocalSepTree(cs)  && (!isLuma(partitioner.chType)) ) )
     {
       return false;
@@ -1746,9 +1728,6 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt
     {
       case CU_QUAD_SPLIT:
         {
-#if ENABLE_SPLIT_PARALLELISM
-          if( !cuECtx.isLevelSplitParallel )
-#endif
           if( !cuECtx.get<bool>( QT_BEFORE_BT ) && bestCU )
           {
             unsigned maxBTD        = cs.pcv->getMaxBtDepth( slice, partitioner.chType );
@@ -1971,12 +1950,7 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt
             relatedCU.relatedCuIsValid   = true;
           }
         }
-#if ENABLE_SPLIT_PARALLELISM
-#if REUSE_CU_RESULTS
-        BestEncInfoCache::touch(partitioner.currArea());
-#endif
-        CacheBlkInfoCtrl::touch(partitioner.currArea());
-#endif
+
         cuECtx.set( IS_BEST_NOSPLIT_SKIP, bestCU->skip );
       }
     }
@@ -2117,105 +2091,3 @@ bool EncModeCtrlMTnoRQT::useModeResult( const EncTestMode& encTestmode, CodingSt
   }
 }
 
-#if ENABLE_SPLIT_PARALLELISM
-void EncModeCtrlMTnoRQT::copyState( const EncModeCtrl& other, const UnitArea& area )
-{
-  const EncModeCtrlMTnoRQT* pOther = dynamic_cast<const EncModeCtrlMTnoRQT*>( &other );
-
-  CHECK( !pOther, "Trying to copy state from a different type of controller" );
-
-  this->EncModeCtrl        ::copyState( *pOther, area );
-  this->CacheBlkInfoCtrl   ::copyState( *pOther, area );
-#if REUSE_CU_RESULTS
-  this->BestEncInfoCache   ::copyState( *pOther, area );
-#endif
-  this->SaveLoadEncInfoSbt ::copyState( *pOther );
-
-  m_skipThreshold = pOther->m_skipThreshold;
-}
-
-int EncModeCtrlMTnoRQT::getNumParallelJobs( const CodingStructure &cs, Partitioner& partitioner ) const
-{
-  int numJobs = 0;
-
-  if(      partitioner.canSplit( CU_TRIH_SPLIT, cs ) )
-  {
-    numJobs = 6;
-  }
-  else if( partitioner.canSplit( CU_TRIV_SPLIT, cs ) )
-  {
-    numJobs = 5;
-  }
-  else if( partitioner.canSplit( CU_HORZ_SPLIT, cs ) )
-  {
-    numJobs = 4;
-  }
-  else if( partitioner.canSplit( CU_VERT_SPLIT, cs ) )
-  {
-    numJobs = 3;
-  }
-  else if( partitioner.canSplit( CU_QUAD_SPLIT, cs ) )
-  {
-    numJobs = 2;
-  }
-  else if( partitioner.canSplit( CU_DONT_SPLIT, cs ) )
-  {
-    numJobs = 1;
-  }
-
-  CHECK( numJobs >= NUM_RESERVERD_SPLIT_JOBS, "More jobs specified than allowed" );
-
-  return numJobs;
-}
-
-bool EncModeCtrlMTnoRQT::isParallelSplit( const CodingStructure &cs, Partitioner& partitioner ) const
-{
-  if( partitioner.getImplicitSplit( cs ) != CU_DONT_SPLIT || cs.picture->scheduler.getSplitJobId() != 0 ) return false;
-  if( cs.pps->getUseDQP() && partitioner.currQgEnable() ) return false;
-  const int numJobs = getNumParallelJobs( cs, partitioner );
-  const int numPxl  = partitioner.currArea().Y().area();
-  const int parlAt  = m_pcEncCfg->getNumSplitThreads() <= 3 ? 1024 : 256;
-  if(  cs.slice->isIntra() && numJobs > 2 && ( numPxl == parlAt || !partitioner.canSplit( CU_QUAD_SPLIT, cs ) ) ) return true;
-  if( !cs.slice->isIntra() && numJobs > 1 && ( numPxl == parlAt || !partitioner.canSplit( CU_QUAD_SPLIT, cs ) ) ) return true;
-  return false;
-}
-
-bool EncModeCtrlMTnoRQT::parallelJobSelector( const EncTestMode& encTestmode, const CodingStructure &cs, Partitioner& partitioner ) const
-{
-  // Job descriptors
-  //  - 1: all non-split modes
-  //  - 2: QT-split
-  //  - 3: all vertical modes but TT_V
-  //  - 4: all horizontal modes but TT_H
-  //  - 5: TT_V
-  //  - 6: TT_H
-  switch( cs.picture->scheduler.getSplitJobId() )
-  {
-  case 1:
-    // be sure to execute post dont split
-    return !isModeSplit( encTestmode );
-    break;
-  case 2:
-    return encTestmode.type == ETM_SPLIT_QT;
-    break;
-  case 3:
-    return encTestmode.type == ETM_SPLIT_BT_V;
-    break;
-  case 4:
-    return encTestmode.type == ETM_SPLIT_BT_H;
-    break;
-  case 5:
-    return encTestmode.type == ETM_SPLIT_TT_V;
-    break;
-  case 6:
-    return encTestmode.type == ETM_SPLIT_TT_H;
-    break;
-  default:
-    THROW( "Unknown job-ID for parallelization of EncModeCtrlMTnoRQT: " << cs.picture->scheduler.getSplitJobId() );
-    break;
-  }
-}
-
-#endif
-
-
diff --git a/source/Lib/EncoderLib/EncModeCtrl.h b/source/Lib/EncoderLib/EncModeCtrl.h
index 0eb7a332980179db841f56498c0982c03568c451..d48ac0312b64c143e21c56cc30b68f65f8b39546 100644
--- a/source/Lib/EncoderLib/EncModeCtrl.h
+++ b/source/Lib/EncoderLib/EncModeCtrl.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -194,10 +194,6 @@ struct ComprCUCtx
     , skipSecondMTSPass
                     ( false )
     , interHad      (std::numeric_limits<Distortion>::max())
-#if ENABLE_SPLIT_PARALLELISM
-    , isLevelSplitParallel
-                    ( false )
-#endif
     , bestCostWithoutSplitFlags( MAX_DOUBLE )
     , bestCostMtsFirstPassNoIsp( MAX_DOUBLE )
     , bestCostIsp   ( MAX_DOUBLE )
@@ -245,9 +241,6 @@ struct ComprCUCtx
   double                            bestMtsSize2Nx2N1stPass;
   bool                              skipSecondMTSPass;
   Distortion                        interHad;
-#if ENABLE_SPLIT_PARALLELISM
-  bool                              isLevelSplitParallel;
-#endif
   double                            bestCostWithoutSplitFlags;
   double                            bestCostMtsFirstPassNoIsp;
   double                            bestCostIsp;
@@ -286,9 +279,6 @@ protected:
 #endif
   bool                  m_fastDeltaQP;
   static_vector<ComprCUCtx, ( MAX_CU_DEPTH << 2 )> m_ComprCUCtxList;
-#if ENABLE_SPLIT_PARALLELISM
-  int                   m_runNextInParallel;
-#endif
   InterSearch*          m_pcInterSearch;
 
   bool                  m_doPlt;
@@ -311,13 +301,6 @@ public:
 
   virtual bool useModeResult        ( const EncTestMode& encTestmode, CodingStructure*& tempCS,  Partitioner& partitioner ) = 0;
   virtual bool checkSkipOtherLfnst  ( const EncTestMode& encTestmode, CodingStructure*& tempCS,  Partitioner& partitioner ) = 0;
-#if ENABLE_SPLIT_PARALLELISM
-  virtual void copyState            ( const EncModeCtrl& other, const UnitArea& area );
-  virtual int  getNumParallelJobs   ( const CodingStructure &cs, Partitioner& partitioner )                                 const { return 1;     }
-  virtual bool isParallelSplit      ( const CodingStructure &cs, Partitioner& partitioner )                                 const { return false; }
-  virtual bool parallelJobSelector  ( const EncTestMode& encTestmode, const CodingStructure &cs, Partitioner& partitioner ) const { return true;  }
-          void setParallelSplit     ( bool val ) { m_runNextInParallel = val; }
-#endif
 
   void         init                 ( EncCfg *pCfg, RateCtrl *pRateCtrl, RdCost *pRdCost );
   bool         tryModeMaster        ( const EncTestMode& encTestmode, const CodingStructure &cs, Partitioner& partitioner );
@@ -335,6 +318,11 @@ public:
 #if SHARP_LUMA_DELTA_QP
   void                  initLumaDeltaQpLUT();
   int                   calculateLumaDQP  ( const CPelBuf& rcOrg );
+#endif
+#if JVET_W0043
+  int                                 calculateLumaDQPsmooth(const CPelBuf& rcOrg, int baseQP, double threshold, double scale, double offset, int limit);
+#else
+  int                                 calculateLumaDQPsmooth(const CPelBuf& rcOrg, int baseQP);
 #endif
   void setFastDeltaQp                 ( bool b )                {        m_fastDeltaQP = b;                               }
   bool getFastDeltaQp                 ()                  const { return m_fastDeltaQP;                                   }
@@ -374,6 +362,393 @@ public:
   void   setPltEnc                    ( bool b )                { m_doPlt = b; }
   bool   getPltEnc()                                      const { return m_doPlt; }
 
+#if GDR_ENABLED
+void forceIntraMode()
+{ 
+  // remove all inter or split to force make intra      
+  int n = (int)m_ComprCUCtxList.back().testModes.size();   
+  for (int j = 0; j < n; j++) 
+  {
+    const EncTestMode etm = m_ComprCUCtxList.back().testModes[j];
+
+    if (isModeInter(etm.type)) 
+    {
+      m_ComprCUCtxList.back().testModes.erase(m_ComprCUCtxList.back().testModes.begin() + j);
+      j--;
+      n--;          
+    }
+  }  
+}
+
+void forceIntraNoSplit()
+{
+  // remove all inter or split to force make intra        
+  int n = (int)m_ComprCUCtxList.back().testModes.size();
+
+  for (int j = 0; j < n; j++) 
+  {
+    const EncTestMode etm = m_ComprCUCtxList.back().testModes[j];
+
+    if (isModeInter(etm.type) || isModeSplit(etm.type)) 
+    {
+      m_ComprCUCtxList.back().testModes.erase(m_ComprCUCtxList.back().testModes.begin() + j);
+      j--;
+      n--;
+    }
+  }  
+}
+
+// Note: ForceInterMode
+void forceInterMode()
+{    
+  int n = (int)m_ComprCUCtxList.back().testModes.size();
+  for (int j = 0; j < n; j++) 
+  {
+    const EncTestMode etm = m_ComprCUCtxList.back().testModes[j];
+    if (etm.type == ETM_INTRA) 
+    {
+      m_ComprCUCtxList.back().testModes.erase(m_ComprCUCtxList.back().testModes.begin() + j);
+      j--;
+      n--;        
+    }
+  }  
+}
+
+void removeHashInter()
+{
+  int n = (int)m_ComprCUCtxList.back().testModes.size();
+  for (int j = 0; j < n; j++) 
+  {
+    const EncTestMode etm = m_ComprCUCtxList.back().testModes[j];
+    if (etm.type == ETM_HASH_INTER) 
+    {
+      m_ComprCUCtxList.back().testModes.erase(m_ComprCUCtxList.back().testModes.begin() + j);
+      j--;
+      n--;
+    }
+  }
+}
+
+void removeMergeSkip()
+{
+  int n = (int)m_ComprCUCtxList.back().testModes.size();
+  for (int j = 0; j < n; j++) 
+  {
+    const EncTestMode etm = m_ComprCUCtxList.back().testModes[j];
+    if (etm.type == ETM_MERGE_SKIP) 
+    {
+      m_ComprCUCtxList.back().testModes.erase(m_ComprCUCtxList.back().testModes.begin() + j);
+      j--;
+      n--;
+    }
+  }
+}
+
+void removeInterME()
+{
+  int n = (int)m_ComprCUCtxList.back().testModes.size();
+  for (int j = 0; j < n; j++) 
+  {
+    const EncTestMode etm = m_ComprCUCtxList.back().testModes[j];
+    if (etm.type == ETM_INTER_ME) 
+    {
+      m_ComprCUCtxList.back().testModes.erase(m_ComprCUCtxList.back().testModes.begin() + j);
+      j--;
+      n--;
+    }
+  }
+}
+
+void removeAffine()
+{
+  int n = (int)m_ComprCUCtxList.back().testModes.size();
+  for (int j = 0; j < n; j++) 
+  {
+    const EncTestMode etm = m_ComprCUCtxList.back().testModes[j];
+    if (etm.type == ETM_AFFINE) 
+    {
+      m_ComprCUCtxList.back().testModes.erase(m_ComprCUCtxList.back().testModes.begin() + j);
+      j--;
+      n--;
+    }
+  }
+}
+
+void removeMergeGeo()
+{
+  int n = (int)m_ComprCUCtxList.back().testModes.size();
+  for (int j = 0; j < n; j++) 
+  {
+    const EncTestMode etm = m_ComprCUCtxList.back().testModes[j];
+    if (etm.type == ETM_MERGE_GEO) 
+    {
+      m_ComprCUCtxList.back().testModes.erase(m_ComprCUCtxList.back().testModes.begin() + j);
+      j--;
+      n--;
+    }
+  }
+}
+
+void removeIntra()
+{
+  int n = (int)m_ComprCUCtxList.back().testModes.size();
+  for (int j = 0; j < n; j++) 
+  {
+    const EncTestMode etm = m_ComprCUCtxList.back().testModes[j];
+    if (etm.type == ETM_INTRA) 
+    {
+      m_ComprCUCtxList.back().testModes.erase(m_ComprCUCtxList.back().testModes.begin() + j);
+      j--;
+      n--;
+    }
+  }
+}
+
+void removeBadMode()
+{  
+  int n = (int)m_ComprCUCtxList.back().testModes.size();
+  for (int j = 0; j < n; j++) 
+  {
+    const EncTestMode etm = m_ComprCUCtxList.back().testModes[j];
+
+    if (etm.type == ETM_INTER_ME && ((etm.opts & ETO_IMV) >> ETO_IMV_SHIFT) > 2) 
+    {  
+      m_ComprCUCtxList.back().testModes.erase(m_ComprCUCtxList.back().testModes.begin() + j);
+      j--;
+      n--;
+      break;
+    }
+  }  
+}
+
+bool anyPredModeLeft()
+{ 
+  int n = (int)m_ComprCUCtxList.back().testModes.size();
+  for (int j = 0; j < n; j++) 
+  {
+    const EncTestMode etm = m_ComprCUCtxList.back().testModes[j];
+
+    if (etm.type == ETM_HASH_INTER ||
+        etm.type == ETM_MERGE_SKIP || 
+        etm.type == ETM_INTER_ME   || 
+        etm.type == ETM_AFFINE     || 
+        etm.type == ETM_MERGE_GEO  || 
+        etm.type == ETM_INTRA      ||
+        etm.type == ETM_PALETTE    || 
+        etm.type == ETM_IBC        ||
+        etm.type == ETM_IBC_MERGE) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool anyIntraIBCMode()
+{
+  int n = (int)m_ComprCUCtxList.back().testModes.size();
+  for (int j = 0; j < n; j++) 
+  {
+    const EncTestMode etm = m_ComprCUCtxList.back().testModes[j];
+
+    if (etm.type == ETM_INTRA || etm.type == ETM_IBC) 
+    {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+void forceRemoveDontSplit()
+{
+  int n = (int)m_ComprCUCtxList.back().testModes.size();
+  for (int j = 0; j < n; j++) 
+  {
+    const EncTestMode etm = m_ComprCUCtxList.back().testModes[j];
+
+    if (etm.type == ETM_POST_DONT_SPLIT) 
+    {
+      m_ComprCUCtxList.back().testModes.erase(m_ComprCUCtxList.back().testModes.begin() + j);
+      j--;
+      n--;
+    }
+  }
+}
+
+void forceVerSplitOnly()
+{
+  int n = (int)m_ComprCUCtxList.back().testModes.size();
+  for (int j = 0; j < n; j++) 
+  {
+    const EncTestMode etm = m_ComprCUCtxList.back().testModes[j];       
+ 
+    if (etm.type != ETM_SPLIT_QT && etm.type != ETM_SPLIT_BT_V && etm.type != ETM_SPLIT_TT_V) 
+    {
+      m_ComprCUCtxList.back().testModes.erase(m_ComprCUCtxList.back().testModes.begin() + j);
+      j--;
+      n--;
+    }
+  }   
+}
+
+void forceRemoveTTV()
+{
+  int n = (int)m_ComprCUCtxList.back().testModes.size();
+  for (int j = 0; j < n; j++) 
+  {
+    const EncTestMode etm = m_ComprCUCtxList.back().testModes[j];
+    
+    if (etm.type == ETM_SPLIT_TT_V) 
+    {
+      m_ComprCUCtxList.back().testModes.erase(m_ComprCUCtxList.back().testModes.begin() + j);
+      j--;
+      n--;
+    }
+  }
+}
+
+void forceRemoveBTV()
+{  
+  int n = (int)m_ComprCUCtxList.back().testModes.size();
+  for (int j = 0; j < n; j++) 
+  {
+    const EncTestMode etm = m_ComprCUCtxList.back().testModes[j];
+
+    if (etm.type == ETM_SPLIT_BT_V) 
+    {
+      m_ComprCUCtxList.back().testModes.erase(m_ComprCUCtxList.back().testModes.begin() + j);
+      j--;
+      n--;
+    }
+  }
+}
+
+void forceRemoveQT()
+{
+  int n = (int)m_ComprCUCtxList.back().testModes.size();
+  for (int j = 0; j < n; j++) 
+  {
+    const EncTestMode etm = m_ComprCUCtxList.back().testModes[j];
+ 
+    if (etm.type == ETM_SPLIT_QT) 
+    {
+      m_ComprCUCtxList.back().testModes.erase(m_ComprCUCtxList.back().testModes.begin() + j);
+      j--;
+      n--;
+    }
+  }  
+}
+
+void forceRemoveHT()
+{
+  int n = (int)m_ComprCUCtxList.back().testModes.size();
+  for (int j = 0; j < n; j++) 
+  {
+    const EncTestMode etm = m_ComprCUCtxList.back().testModes[j];
+
+    if (etm.type == ETM_SPLIT_BT_H || etm.type == ETM_SPLIT_TT_H) 
+    {
+      m_ComprCUCtxList.back().testModes.erase(m_ComprCUCtxList.back().testModes.begin() + j);
+      j--;
+      n--;
+    }
+  }
+}
+
+void forceRemoveQTHT()
+{
+  int n = (int)m_ComprCUCtxList.back().testModes.size();
+  for (int j = 0; j < n; j++) 
+  {
+    const EncTestMode etm = m_ComprCUCtxList.back().testModes[j];
+    
+    if (etm.type == ETM_SPLIT_QT || etm.type == ETM_SPLIT_BT_H || etm.type == ETM_SPLIT_TT_H) 
+    {
+      m_ComprCUCtxList.back().testModes.erase(m_ComprCUCtxList.back().testModes.begin() + j);
+      j--;
+      n--;
+    }
+  }
+}
+
+void forceRemoveAllSplit()
+{
+  int n = (int)m_ComprCUCtxList.back().testModes.size();
+  for (int j = 0; j < n; j++) 
+  {
+    const EncTestMode etm = m_ComprCUCtxList.back().testModes[j];
+
+    if (etm.type == ETM_SPLIT_QT || etm.type == ETM_SPLIT_BT_H || etm.type == ETM_SPLIT_BT_V || etm.type == ETM_SPLIT_TT_H || etm.type == ETM_SPLIT_TT_V) 
+    {
+      m_ComprCUCtxList.back().testModes.erase(m_ComprCUCtxList.back().testModes.begin() + j);
+      j--;
+      n--;
+    }
+  }
+}
+
+void forceQTonlyMode()
+{
+  // remove all split except QT  
+  int n = (int)m_ComprCUCtxList.back().testModes.size();
+  for (int j = 0; j < n; j++) 
+  {
+    const EncTestMode etm = m_ComprCUCtxList.back().testModes[j];
+
+    if (etm.type != ETM_SPLIT_QT) 
+    {
+      m_ComprCUCtxList.back().testModes.erase(m_ComprCUCtxList.back().testModes.begin() + j);
+      j--;
+      n--;        
+    }
+  }    
+}
+
+const char* printType(EncTestModeType type)
+{
+  char *ret;
+
+  switch (type) {
+  case  0: ret = strdup("Hash"); break;
+  case  1: ret = strdup("Mkip"); break;
+  case  2: ret = strdup("Inter"); break;
+  case  3: ret = strdup("Affi"); break;
+  case  4: ret = strdup("Tria"); break;
+  case  5: ret = strdup("Intra"); break;
+  case  6: ret = strdup("Palet"); break;
+
+  case  7: ret = strdup("QT"); break;
+  case  8: ret = strdup("BTH"); break;
+  case  9: ret = strdup("BTV"); break;
+  case 10: ret = strdup("TTH"); break;
+  case 11: ret = strdup("TTV"); break;
+  case 12: ret = strdup("|"); break;
+  case 13: ret = strdup("CACHE"); break;
+  case 14: ret = strdup("IMV"); break;
+  case 15: ret = strdup("IBC"); break;
+  case 16: ret = strdup("IBCM"); break;
+  default:
+    ret = strdup("INVALID");
+  }
+
+  return ret;
+}
+
+void printMode()
+{
+  // remove all inter or split to force make intra          
+  int n = (int)m_ComprCUCtxList.back().testModes.size();
+  printf("-:[");
+  for (int j = 0; j < n; j++) 
+  {
+    const EncTestMode etm = m_ComprCUCtxList.back().testModes[j];      
+    printf(" %s", printType(etm.type));      
+  }
+  printf("]\n");   
+}
+#endif
+
 protected:
   void xExtractFeatures ( const EncTestMode encTestmode, CodingStructure& cs );
   void xGetMinMaxQP     ( int& iMinQP, int& iMaxQP, const CodingStructure& cs, const Partitioner &pm, const int baseQP, const SPS& sps, const PPS& pps, const PartSplit splitMode );
@@ -395,13 +770,7 @@ struct SaveLoadStructSbt
 class SaveLoadEncInfoSbt
 {
 protected:
-#if ENABLE_SPLIT_PARALLELISM
-public:
-#endif
   void init( const Slice &slice );
-#if ENABLE_SPLIT_PARALLELISM
-protected:
-#endif
   void create();
   void destroy();
 
@@ -414,9 +783,6 @@ public:
   void     resetSaveloadSbt( int maxSbtSize );
   uint16_t findBestSbt( const UnitArea& area, const uint32_t curPuSse );
   bool     saveBestSbt( const UnitArea& area, const uint32_t curPuSse, const uint8_t curPuSbt, const uint8_t curPuTrs );
-#if ENABLE_SPLIT_PARALLELISM
-  void     copyState(const SaveLoadEncInfoSbt& other);
-#endif
 };
 
 static const int MAX_STORED_CU_INFO_REFS = 4;
@@ -439,12 +805,6 @@ struct CodedCUInfo
   double   bestNonDCT2Cost;
   bool     relatedCuIsValid;
   uint8_t  bestISPIntraMode;
-
-#if ENABLE_SPLIT_PARALLELISM
-
-  uint64_t
-       temporalId;
-#endif
 };
 
 class CacheBlkInfoCtrl
@@ -460,21 +820,7 @@ protected:
 
   void create   ();
   void destroy  ();
-#if ENABLE_SPLIT_PARALLELISM
-public:
-#endif
   void init     ( const Slice &slice );
-#if ENABLE_SPLIT_PARALLELISM
-private:
-  uint64_t
-       m_currTemporalId;
-public:
-  void tick     () { m_currTemporalId++; CHECK( m_currTemporalId <= 0, "Problem with integer overflow!" ); }
-  // mark the state of the blk as changed within the current temporal id
-  void copyState( const CacheBlkInfoCtrl &other, const UnitArea& area );
-protected:
-  void touch    ( const UnitArea& area );
-#endif
 
   CodedCUInfo& getBlkInfo( const UnitArea& area );
 
@@ -508,10 +854,6 @@ struct BestEncodingInfo
   EncTestMode    testMode;
 
   int            poc;
-
-#if ENABLE_SPLIT_PARALLELISM
-  int64_t        temporalId;
-#endif
 };
 
 class BestEncInfoCache
@@ -526,9 +868,6 @@ private:
   bool               *m_runType;
   CodingStructure     m_dummyCS;
   XUCache             m_dummyCache;
-#if ENABLE_SPLIT_PARALLELISM
-  int64_t m_currTemporalId;
-#endif
 
 protected:
 
@@ -537,19 +876,10 @@ protected:
 
   bool setFromCs( const CodingStructure& cs, const Partitioner& partitioner );
   bool isValid  ( const CodingStructure &cs, const Partitioner &partitioner, int qp );
-
-#if ENABLE_SPLIT_PARALLELISM
-  void touch    ( const UnitArea& area );
-#endif
 public:
 
   BestEncInfoCache() : m_slice_bencinf( nullptr ), m_dummyCS( m_dummyCache.cuCache, m_dummyCache.puCache, m_dummyCache.tuCache ) {}
   virtual ~BestEncInfoCache() {}
-
-#if ENABLE_SPLIT_PARALLELISM
-  void     copyState( const BestEncInfoCache &other, const UnitArea &area );
-  void     tick     () { m_currTemporalId++; CHECK( m_currTemporalId <= 0, "Problem with integer overflow!" ); }
-#endif
   void     init     ( const Slice &slice );
   bool     setCsFrom( CodingStructure& cs, EncTestMode& testMode, const Partitioner& partitioner ) const;
 };
@@ -590,6 +920,9 @@ class EncModeCtrlMTnoRQT : public EncModeCtrl, public CacheBlkInfoCtrl
   };
 
   unsigned m_skipThreshold;
+#if GDR_ENABLED
+  EncCfg m_encCfg;
+#endif
 
 public:
 
@@ -602,13 +935,6 @@ public:
   virtual bool tryMode            ( const EncTestMode& encTestmode, const CodingStructure &cs, Partitioner& partitioner );
   virtual bool useModeResult      ( const EncTestMode& encTestmode, CodingStructure*& tempCS,  Partitioner& partitioner );
 
-#if ENABLE_SPLIT_PARALLELISM
-  virtual void copyState          ( const EncModeCtrl& other, const UnitArea& area );
-
-  virtual int  getNumParallelJobs ( const CodingStructure &cs, Partitioner& partitioner ) const;
-  virtual bool isParallelSplit    ( const CodingStructure &cs, Partitioner& partitioner ) const;
-  virtual bool parallelJobSelector( const EncTestMode& encTestmode, const CodingStructure &cs, Partitioner& partitioner ) const;
-#endif
   virtual bool checkSkipOtherLfnst( const EncTestMode& encTestmode, CodingStructure*& tempCS, Partitioner& partitioner );
 };
 
diff --git a/source/Lib/EncoderLib/EncReshape.cpp b/source/Lib/EncoderLib/EncReshape.cpp
index 0d473154865e79d3b8a2c405420145233ab4e042..1120b4e201e024930fa39c5ec2a84bccd3029d8d 100644
--- a/source/Lib/EncoderLib/EncReshape.cpp
+++ b/source/Lib/EncoderLib/EncReshape.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -228,11 +228,7 @@ void EncReshape::calcSeqStats(Picture *pcPic, SeqInfo &stats)
         {
           for (bx = x1; bx <= x2; bx++)
           {
-#if JVET_T0091_LMCS_ENC_OVERFLOW_FIX
             tempSq = (int64_t)pWinY[bx] * (int64_t)pWinY[bx];
-#else
-            tempSq = pWinY[bx] * pWinY[bx];
-#endif
             leftSum += pWinY[bx];
             leftSumSq += tempSq;
             leftColSum[bx] += pWinY[bx];
@@ -259,11 +255,7 @@ void EncReshape::calcSeqStats(Picture *pcPic, SeqInfo &stats)
           for (bx = x1; bx <= x2; bx++)
           {
             topRowSum[y + winLens] += pWinY[bx];
-#if JVET_T0091_LMCS_ENC_OVERFLOW_FIX
             topRowSumSq[y + winLens] += (int64_t)pWinY[bx] * (int64_t)pWinY[bx];
-#else
-            topRowSumSq[y + winLens] += pWinY[bx] * pWinY[bx];
-#endif
           }
           topSum += topRowSum[y + winLens];
           topSumSq += topRowSumSq[y + winLens];
@@ -282,11 +274,7 @@ void EncReshape::calcSeqStats(Picture *pcPic, SeqInfo &stats)
           for (bx = x1; bx <= x2; bx++)
           {
             leftColSum[bx] += pWinY[bx];
-#if JVET_T0091_LMCS_ENC_OVERFLOW_FIX
             leftColSumSq[bx] += (int64_t)pWinY[bx] * (int64_t)pWinY[bx];
-#else
-            leftColSumSq[bx] += pWinY[bx] * pWinY[bx];
-#endif
           }
           pWinY += stride;
         }
@@ -307,11 +295,7 @@ void EncReshape::calcSeqStats(Picture *pcPic, SeqInfo &stats)
             for (by = y1; by <= y2; by++)
             {
               leftColSum[x + winLens] += pWinY[x + winLens];
-#if JVET_T0091_LMCS_ENC_OVERFLOW_FIX
               leftColSumSq[x + winLens] += (int64_t)pWinY[x + winLens] * (int64_t)pWinY[x + winLens];
-#else
-              leftColSumSq[x + winLens] += pWinY[x + winLens] * pWinY[x + winLens];
-#endif
               pWinY += stride;
             }
           }
@@ -324,22 +308,14 @@ void EncReshape::calcSeqStats(Picture *pcPic, SeqInfo &stats)
               pWinY = &picY.buf[0];
               pWinY += winLens * stride;
               leftColSum[x + winLens] += pWinY[x + winLens];
-#if JVET_T0091_LMCS_ENC_OVERFLOW_FIX
               leftColSumSq[x + winLens] += (int64_t)pWinY[x + winLens] * (int64_t)pWinY[x + winLens];
-#else
-              leftColSumSq[x + winLens] += pWinY[x + winLens] * pWinY[x + winLens];
-#endif
             }
             if (y > winLens)
             {
               pWinY = &picY.buf[0];
               pWinY -= (winLens + 1) * stride;
               leftColSum[x + winLens] -= pWinY[x + winLens];
-#if JVET_T0091_LMCS_ENC_OVERFLOW_FIX
               leftColSumSq[x + winLens] -= (int64_t)pWinY[x + winLens] * (int64_t)pWinY[x + winLens];
-#else
-              leftColSumSq[x + winLens] -= pWinY[x + winLens] * pWinY[x + winLens];
-#endif
             }
           }
           topColSum[x + winLens] = leftColSum[x + winLens];
@@ -423,11 +399,7 @@ void EncReshape::calcSeqStats(Picture *pcPic, SeqInfo &stats)
     for (int x = 0; x < width; x++)
     {
       avgY += picY.buf[x];
-#if JVET_T0091_LMCS_ENC_OVERFLOW_FIX
       varY += (double)picY.buf[x] * (double)picY.buf[x];
-#else
-      varY += picY.buf[x] * picY.buf[x];
-#endif
     }
     picY.buf += stride;
   }
@@ -449,13 +421,8 @@ void EncReshape::calcSeqStats(Picture *pcPic, SeqInfo &stats)
       {
         avgU += picU.buf[x];
         avgV += picV.buf[x];
-#if JVET_T0091_LMCS_ENC_OVERFLOW_FIX
         varU += (int64_t)picU.buf[x] * (int64_t)picU.buf[x];
         varV += (int64_t)picV.buf[x] * (int64_t)picV.buf[x];
-#else
-        varU += picU.buf[x] * picU.buf[x];
-        varV += picV.buf[x] * picV.buf[x];
-#endif
       }
       picU.buf += strideC;
       picV.buf += strideC;
@@ -477,6 +444,9 @@ void EncReshape::preAnalyzerLMCS(Picture *pcPic, const uint32_t signalType, cons
   m_sliceReshapeInfo.sliceReshaperModelPresentFlag = true;
   m_sliceReshapeInfo.sliceReshaperEnableFlag = true;
   int modIP = pcPic->getPOC() - pcPic->getPOC() / reshapeCW.rspFpsToIp * reshapeCW.rspFpsToIp;
+#if GDR_ENABLED
+  if (pcPic->cs->slice->isInterGDR()) modIP = 0;
+#endif
   if (sliceType == I_SLICE || (reshapeCW.updateCtrl == 2 && modIP == 0))
   {
     if (m_sliceReshapeInfo.sliceReshaperModelPresentFlag == true)
@@ -711,6 +681,96 @@ void EncReshape::preAnalyzerLMCS(Picture *pcPic, const uint32_t signalType, cons
       const int cTid = m_reshapeCW.rspTid;
       bool enableRsp = m_tcase == 5 ? false : (m_tcase < 5 ? (cTid < m_tcase + 1 ? false : true) : (cTid <= 10 - m_tcase ? true : false));
       m_sliceReshapeInfo.sliceReshaperEnableFlag = enableRsp;
+
+      if (m_sliceReshapeInfo.sliceReshaperEnableFlag)
+      {
+        m_binNum = PIC_CODE_CW_BINS;
+        PelBuf picY = pcPic->getOrigBuf(COMPONENT_Y);
+        const int width = picY.width;
+        const int height = picY.height;
+        const int stride = picY.stride;
+        uint32_t binCnt[PIC_CODE_CW_BINS];
+        std::fill_n(binCnt, m_binNum, 0);
+
+        initSeqStats(m_srcSeqStats);
+        for (uint32_t y = 0; y < height; y++)
+        {
+          for (uint32_t x = 0; x < width; x++)
+          {
+            const Pel pxlY = picY.buf[x];
+            int binLen = m_reshapeLUTSize / m_binNum;
+            uint32_t binIdx = (uint32_t)(pxlY / binLen);
+            binCnt[binIdx]++;
+          }
+          picY.buf += stride;
+        }
+
+        for (int b = 0; b < m_binNum; b++)
+        {
+          m_srcSeqStats.binHist[b] = (double)binCnt[b] / (double)(m_reshapeCW.rspPicSize);
+        }
+
+        double avgY = 0.0;
+        double varY = 0.0;
+        picY = pcPic->getOrigBuf(COMPONENT_Y);
+        for (int y = 0; y < height; y++)
+        {
+          for (int x = 0; x < width; x++)
+          {
+            avgY += picY.buf[x];
+            varY += (double)picY.buf[x] * (double)picY.buf[x];
+          }
+          picY.buf += stride;
+        }
+        avgY = avgY / (width * height);
+        varY = varY / (width * height) - avgY * avgY;
+
+        if (isChromaEnabled(pcPic->chromaFormat))
+        {
+          PelBuf picU = pcPic->getOrigBuf(COMPONENT_Cb);
+          PelBuf picV = pcPic->getOrigBuf(COMPONENT_Cr);
+          const int widthC = picU.width;
+          const int heightC = picU.height;
+          const int strideC = picU.stride;
+          double avgU = 0.0, avgV = 0.0;
+          double varU = 0.0, varV = 0.0;
+          for (int y = 0; y < heightC; y++)
+          {
+            for (int x = 0; x < widthC; x++)
+            {
+              avgU += picU.buf[x];
+              avgV += picV.buf[x];
+              varU += (int64_t)picU.buf[x] * (int64_t)picU.buf[x];
+              varV += (int64_t)picV.buf[x] * (int64_t)picV.buf[x];
+            }
+            picU.buf += strideC;
+            picV.buf += strideC;
+          }
+          avgU = avgU / (widthC * heightC);
+          avgV = avgV / (widthC * heightC);
+          varU = varU / (widthC * heightC) - avgU * avgU;
+          varV = varV / (widthC * heightC) - avgV * avgV;
+          if (varY > 0)
+          {
+            m_srcSeqStats.ratioStdU = sqrt(varU) / sqrt(varY);
+            m_srcSeqStats.ratioStdV = sqrt(varV) / sqrt(varY);
+          }
+        }
+
+        if (m_srcSeqStats.binHist[m_binNum - 1] > 0.0003)
+        {
+          m_sliceReshapeInfo.sliceReshaperEnableFlag = false;
+        }
+        if (m_srcSeqStats.binHist[0] > 0.03)
+        {
+          m_sliceReshapeInfo.sliceReshaperEnableFlag = false;
+        }
+
+        if ((m_srcSeqStats.ratioStdU + m_srcSeqStats.ratioStdV) > 1.5 && m_srcSeqStats.binHist[1] > 0.5)
+        {
+          m_sliceReshapeInfo.sliceReshaperEnableFlag = false;
+        }
+      }
     }
   }
 }
@@ -1244,6 +1304,8 @@ void EncReshape::initLUTfromdQPModel()
     }
     else
     {
+      CHECK((m_binCW[i] + m_sliceReshapeInfo.chrResScalingOffset) < (m_initCW >> 3) || (m_binCW[i] + m_sliceReshapeInfo.chrResScalingOffset) > ((m_initCW << 3) - 1),
+        "It is a requirement of bitstream conformance that, when lmcsCW[ i ] is not equal to 0, ( lmcsCW[ i ] + lmcsDeltaCrs ) shall be in the range of ( OrgCW >> 3 ) to ( ( OrgCW << 3 ) - 1 ), inclusive.");
       m_invScaleCoef[i] = (int32_t)(m_initCW * (1 << FP_PREC) / m_binCW[i]);
       m_chromaAdjHelpLUT[i] = (int32_t)(m_initCW * (1 << FP_PREC) / (m_binCW[i] + m_sliceReshapeInfo.chrResScalingOffset));
     }
@@ -1339,6 +1401,8 @@ void EncReshape::constructReshaperLMCS()
     }
     else
     {
+      CHECK((m_binCW[i] + m_sliceReshapeInfo.chrResScalingOffset) < (m_initCW >> 3) || (m_binCW[i] + m_sliceReshapeInfo.chrResScalingOffset) > ((m_initCW << 3) - 1),
+        "It is a requirement of bitstream conformance that, when lmcsCW[ i ] is not equal to 0, ( lmcsCW[ i ] + lmcsDeltaCrs ) shall be in the range of ( OrgCW >> 3 ) to ( ( OrgCW << 3 ) - 1 ), inclusive.");
       m_invScaleCoef[i] = (int32_t)(m_initCW * (1 << FP_PREC) / m_binCW[i]);
       m_chromaAdjHelpLUT[i] = (int32_t)(m_initCW * (1 << FP_PREC) / (m_binCW[i] + m_sliceReshapeInfo.chrResScalingOffset));
     }
@@ -1429,45 +1493,5 @@ void EncReshape::adjustLmcsPivot()
     }
   }
 }
-
-#if ENABLE_SPLIT_PARALLELISM
-void EncReshape::copyState(const EncReshape &other)
-{
-  m_srcReshaped     = other.m_srcReshaped;
-  m_picWidth        = other.m_picWidth;
-  m_picHeight       = other.m_picHeight;
-  m_maxCUWidth      = other.m_maxCUWidth;
-  m_maxCUHeight     = other.m_maxCUHeight;
-  m_widthInCtus     = other.m_widthInCtus;
-  m_heightInCtus    = other.m_heightInCtus;
-  m_numCtuInFrame   = other.m_numCtuInFrame;
-  m_exceedSTD       = other.m_exceedSTD;
-  m_binImportance   = other.m_binImportance;
-  m_tcase           = other.m_tcase;
-  m_rateAdpMode     = other.m_rateAdpMode;
-  m_useAdpCW        = other.m_useAdpCW;
-  m_initCWAnalyze   = other.m_initCWAnalyze;
-  m_reshapeCW       = other.m_reshapeCW;
-  memcpy( m_cwLumaWeight, other.m_cwLumaWeight, sizeof( m_cwLumaWeight ) );
-  m_chromaWeight    = other.m_chromaWeight;
-  m_chromaAdj       = other.m_chromaAdj;
-
-  m_sliceReshapeInfo = other.m_sliceReshapeInfo;
-  m_CTUFlag          = other.m_CTUFlag;
-  m_recReshaped      = other.m_recReshaped;
-  m_invLUT           = other.m_invLUT;
-  m_fwdLUT           = other.m_fwdLUT;
-  m_chromaAdjHelpLUT = other.m_chromaAdjHelpLUT;
-  m_binCW            = other.m_binCW;
-  m_initCW           = other.m_initCW;
-  m_reshape          = other.m_reshape;
-  m_reshapePivot     = other.m_reshapePivot;
-  m_inputPivot       = other.m_inputPivot;
-  m_fwdScaleCoef     = other.m_fwdScaleCoef;
-  m_invScaleCoef     = other.m_invScaleCoef;
-  m_lumaBD           = other.m_lumaBD;
-  m_reshapeLUTSize   = other.m_reshapeLUTSize;
-}
-#endif
 //
 //! \}
diff --git a/source/Lib/EncoderLib/EncReshape.h b/source/Lib/EncoderLib/EncReshape.h
index ba9b5195058d2c216e599f37ab1c4d6ed83f90f3..72efd26fed74e36af1735734afdda9609c2a89c5 100644
--- a/source/Lib/EncoderLib/EncReshape.h
+++ b/source/Lib/EncoderLib/EncReshape.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -115,10 +115,6 @@ public:
   Pel * getWeightTable() { return m_cwLumaWeight; }
   double getCWeight() { return m_chromaWeight; }
   void adjustLmcsPivot();
-
-#if ENABLE_SPLIT_PARALLELISM
-  void copyState(const EncReshape& other);
-#endif
 };// END CLASS DEFINITION EncReshape
 
 //! \}
diff --git a/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp b/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp
index 90583a5a4b8a0446a23d3896789dd455e8837cdd..1c0188652a1f2d4225561ba9f99345de23863cfb 100644
--- a/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp
+++ b/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -211,9 +211,9 @@ void EncSampleAdaptiveOffset::SAOProcess( CodingStructure& cs, bool* sliceEnable
 #if ENABLE_QPA
                                           const double lambdaChromaWeight,
 #endif
-                                          const bool bTestSAODisableAtPictureLevel, const double saoEncodingRate, const double saoEncodingRateChroma, const bool isPreDBFSamplesUsed, bool isGreedyMergeEncoding )
+                                          const bool bTestSAODisableAtPictureLevel, const double saoEncodingRate, const double saoEncodingRateChroma, const bool isPreDBFSamplesUsed, bool isGreedyMergeEncoding, bool usingTrueOrg )
 {
-  PelUnitBuf org = cs.getOrgBuf();
+  PelUnitBuf org = usingTrueOrg ? cs.getTrueOrgBuf() : cs.getOrgBuf();
   PelUnitBuf res = cs.getRecoBuf();
   PelUnitBuf src = m_tempBuf;
   memcpy(m_lambda, lambdas, sizeof(m_lambda));
@@ -248,10 +248,9 @@ void EncSampleAdaptiveOffset::SAOProcess( CodingStructure& cs, bool* sliceEnable
 
 }
 
-
-void EncSampleAdaptiveOffset::getPreDBFStatistics(CodingStructure& cs)
+void EncSampleAdaptiveOffset::getPreDBFStatistics( CodingStructure& cs, bool usingTrueOrg )
 {
-  PelUnitBuf org = cs.getOrgBuf();
+  PelUnitBuf org = usingTrueOrg ? cs.getTrueOrgBuf() : cs.getOrgBuf();
   PelUnitBuf rec = cs.getRecoBuf();
   getStatistics(m_preDBFstatData, org, rec, cs, true);
 }
@@ -825,7 +824,7 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn
 
   int     mergeCtuAddr = 1; //Ctu to be merged
   int     groupSize = 1;
-  double  Cost[2] = { 0, 0 };
+  double  cost[2]      = { 0, 0 };
   TempCtx ctxBeforeMerge(m_CtxCache);
   TempCtx ctxAfterMerge(m_CtxCache);
 
@@ -920,13 +919,13 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn
       {
         if (ctuRsAddr == (mergeCtuAddr - 1))
         {
-          Cost[0] = minCost;  //previous
+          cost[0]   = minCost;   // previous
           groupSize = 1;
           getMergeList(cs, ctuRsAddr, reconParams, startingMergeList);
         }
         else if (ctuRsAddr == mergeCtuAddr)
         {
-          Cost[1] = minCost;
+          cost[1]  = minCost;
           minCost2 = MAX_DOUBLE;
           for (int tmp = groupSize; tmp >= 0; tmp--)
           {
@@ -980,13 +979,13 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn
             ctxAfterMerge = SAOCtx(m_CABACEstimator->getCtx());
           }
 
-          totalCost += Cost[0];
-          totalCost += Cost[1];
+          totalCost += cost[0];
+          totalCost += cost[1];
 
-          if ((Cost[0] + Cost[1]) > minCost2) //merge current CTU
+          if ((cost[0] + cost[1]) > minCost2)   // merge current CTU
           {
             //original merge all
-            totalCost = totalCost - Cost[0] - Cost[1] + minCost2;
+            totalCost                          = totalCost - cost[0] - cost[1] + minCost2;
             codedParams[ctuRsAddr - groupSize] = groupParam;
             for (int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++)
             {
@@ -1007,7 +1006,7 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn
             }
             else //next CTU can be merged with current group
             {
-              Cost[0] = minCost2;
+              cost[0] = minCost2;
               groupSize += 1;
             }
             m_CABACEstimator->getCtx() = SAOCtx(ctxAfterMerge);
@@ -1016,7 +1015,7 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn
           {
             mergeCtuAddr += 1;
             // Current block will be the starting block for successive operations
-            Cost[0] = Cost[1];
+            cost[0] = cost[1];
             getMergeList(cs, ctuRsAddr, reconParams, startingMergeList);
             groupSize = 1;
             m_CABACEstimator->getCtx() = SAOCtx(ctxStart);
@@ -1026,7 +1025,7 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn
             {
               mergeCtuAddr += 1;
             }
-          } //else, if(Cost[0] + Cost[1] > minCost2)
+          }   // else, if(cost[0] + cost[1] > minCost2)
         }//else if (ctuRsAddr == mergeCtuAddr)
       }
       else
@@ -1083,6 +1082,8 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn
       sliceEnabled[componentIndex] = false;
     }
     m_CABACEstimator->getCtx() = SAOCtx(ctxPicStart);
+
+    resYuv.copyFrom(srcYuv);
   }
 
   EncSampleAdaptiveOffset::disabledRate( cs, reconParams, saoEncodingRate, saoEncodingRateChroma );
diff --git a/source/Lib/EncoderLib/EncSampleAdaptiveOffset.h b/source/Lib/EncoderLib/EncSampleAdaptiveOffset.h
index 8a0530ec791d63d711cbb65ac10b94b13e93cc42..0627649a90ace364744ad6e4aa271965f523ec62 100644
--- a/source/Lib/EncoderLib/EncSampleAdaptiveOffset.h
+++ b/source/Lib/EncoderLib/EncSampleAdaptiveOffset.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -93,10 +93,10 @@ public:
 #if ENABLE_QPA
                    const double lambdaChromaWeight,
 #endif
-                   const bool bTestSAODisableAtPictureLevel, const double saoEncodingRate, const double saoEncodingRateChroma, const bool isPreDBFSamplesUsed, bool isGreedyMergeEncoding );
+                   const bool bTestSAODisableAtPictureLevel, const double saoEncodingRate, const double saoEncodingRateChroma, const bool isPreDBFSamplesUsed, bool isGreedyMergeEncoding, bool usingTrueOrg );
 
   void disabledRate( CodingStructure& cs, SAOBlkParam* reconParams, const double saoEncodingRate, const double saoEncodingRateChroma );
-  void getPreDBFStatistics(CodingStructure& cs);
+  void getPreDBFStatistics( CodingStructure& cs, bool usingTrueOrg );
 private: //methods
 
   void deriveLoopFilterBoundaryAvailibility(CodingStructure& cs, const Position &pos, bool& isLeftAvail, bool& isAboveAvail, bool& isAboveLeftAvail) const;
diff --git a/source/Lib/EncoderLib/EncSlice.cpp b/source/Lib/EncoderLib/EncSlice.cpp
index 5ac56442f3ca24529e1ad0fe3846725e12b64554..7aeb6ba3d5c52727016333beec22354105354668 100644
--- a/source/Lib/EncoderLib/EncSlice.cpp
+++ b/source/Lib/EncoderLib/EncSlice.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -175,7 +175,10 @@ static void filterAndCalculateAverageEnergies (const Pel* pSrc, const int  iSrcS
   hpEner = double(saAct) / double((iWidth - 2) * (iHeight - 2));
 
   // lower limit, compensate for highpass amplification
-  if (hpEner < double(1 << (uBitDepth - 4))) hpEner = double(1 << (uBitDepth - 4));
+  if (hpEner < double(1 << (uBitDepth - 4)))
+  {
+    hpEner = double(1 << (uBitDepth - 4));
+  }
 }
 
 #ifndef GLOBAL_AVERAGING
@@ -229,13 +232,19 @@ static int getGlaringColorQPOffset (Picture* const pcPic, const int ctuAddr, Sli
     }
     else avgCompValue = pcPic->getOrigBuf (pcPic->block (compID)).computeAvg();
 
-    if (chrValue > avgCompValue) chrValue = avgCompValue; // minimum of the DC offsets
+    if (chrValue > avgCompValue)
+    {
+      chrValue = avgCompValue;   // minimum of the DC offsets
+    }
   }
   CHECK (chrValue < 0, "DC offset cannot be negative!");
 
   chrValue = (int)avgLumaValue - chrValue;
 
-  if (chrValue > midLevel) return apprI3Log2 (double (chrValue * chrValue) / double (midLevel * midLevel));
+  if (chrValue > midLevel)
+  {
+    return apprI3Log2(double(chrValue * chrValue) / double(midLevel * midLevel));
+  }
 
   return 0;
 }
@@ -279,7 +288,10 @@ static int applyQPAdaptationChroma (Picture* const pcPic, Slice* const pcSlice,
         // change mean picture QP index based on picture's average luma value (Sharp)
         if (pcEncCfg->getLumaLevelToDeltaQPMapping().mode == LUMALVL_TO_DQP_NUM_MODES)
         {
-          if (meanLuma == MAX_UINT) meanLuma = pcPic->getOrigBuf().Y().computeAvg();
+          if (meanLuma == MAX_UINT)
+          {
+            meanLuma = pcPic->getOrigBuf().Y().computeAvg();
+          }
 
           averageAdaptedLumaQP = Clip3 (0, MAX_QP, averageAdaptedLumaQP + lumaDQPOffset (meanLuma, bitDepth));
         }
@@ -316,7 +328,7 @@ static int applyQPAdaptationChroma (Picture* const pcPic, Slice* const pcSlice,
  \param isField       true for field coding
  */
 void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr, const int iGOPid, Slice*& rpcSlice, const bool isField,
-                            bool isEncodeLtRef, int layerId)
+                            bool isEncodeLtRef, int layerId, NalUnitType nalType)
 {
   double dQP;
   double dLambda;
@@ -347,7 +359,7 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr
     rpcSlice->setSignDataHidingEnabledFlag( m_pcCfg->getSignDataHidingEnabledFlag() );
     rpcSlice->setTSResidualCodingDisabledFlag( false );
 
-    CHECK( (m_pcCfg->getDepQuantEnabledFlag() || m_pcCfg->getSignDataHidingEnabledFlag() ) 
+    CHECK( (m_pcCfg->getDepQuantEnabledFlag() || m_pcCfg->getSignDataHidingEnabledFlag() )
            && rpcSlice->getTSResidualCodingDisabledFlag() , "TSRC cannot be bypassed if either DQ or SDH are enabled at slice level.");
   }
   else
@@ -422,10 +434,22 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr
       if(m_pcCfg->getDecodingRefreshType() == 3)
       {
         eSliceType = (pocLast == 0 || pocCurr % (m_pcCfg->getIntraPeriod() * multipleFactor) == 0 || m_pcGOPEncoder->getGOPSize() == 0) && (!useIlRef) ? I_SLICE : eSliceType;
+#if GDR_ENABLED
+        if (m_pcCfg->getGdrEnabled() && (pocCurr >= m_pcCfg->getGdrPocStart()) && ((pocCurr - m_pcCfg->getGdrPocStart()) % m_pcCfg->getGdrPeriod() == 0))
+        {
+          eSliceType = B_SLICE;
+        }
+#endif
       }
       else
       {
         eSliceType = (pocLast == 0 || (pocCurr - (isField ? 1 : 0)) % (m_pcCfg->getIntraPeriod() * multipleFactor) == 0 || m_pcGOPEncoder->getGOPSize() == 0) && (!useIlRef) ? I_SLICE : eSliceType;
+#if GDR_ENABLED
+        if (m_pcCfg->getGdrEnabled() && (pocCurr >= m_pcCfg->getGdrPocStart()) && ((pocCurr - m_pcCfg->getGdrPocStart()) % m_pcCfg->getGdrPeriod() == 0))
+        {
+          eSliceType = B_SLICE;
+        }
+#endif
       }
     }
   }
@@ -448,14 +472,13 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr
   // ------------------------------------------------------------------------------------------------------------------
 
 #if X0038_LAMBDA_FROM_QP_CAPABILITY
+  rpcSlice->setNalUnitType(nalType);
   dQP = m_pcCfg->getQPForPicture(iGOPid, rpcSlice);
 #else
   dQP = m_pcCfg->getBaseQP();
   if(eSliceType!=I_SLICE)
   {
-    {
-      dQP += m_pcCfg->getGOPEntry(iGOPid).m_QPOffset;
-    }
+    dQP += m_pcCfg->getGOPEntry(iGOPid).m_QPOffset;
   }
 
   // modify QP
@@ -589,10 +612,22 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr
         if(m_pcCfg->getDecodingRefreshType() == 3)
         {
           eSliceType = (pocLast == 0 || pocCurr % (m_pcCfg->getIntraPeriod() * multipleFactor) == 0 || m_pcGOPEncoder->getGOPSize() == 0) && (!useIlRef) ? I_SLICE : eSliceType;
+#if GDR_ENABLED
+          if (m_pcCfg->getGdrEnabled() && (pocCurr >= m_pcCfg->getGdrPocStart()) && ((pocCurr - m_pcCfg->getGdrPocStart()) % m_pcCfg->getGdrPeriod() == 0))
+          {
+            eSliceType = B_SLICE;
+          }
+#endif
         }
         else
         {
           eSliceType = (pocLast == 0 || (pocCurr - (isField ? 1 : 0)) % (m_pcCfg->getIntraPeriod() * multipleFactor) == 0 || m_pcGOPEncoder->getGOPSize() == 0) && (!useIlRef) ? I_SLICE : eSliceType;
+#if GDR_ENABLED
+          if (m_pcCfg->getGdrEnabled() && (pocCurr >= m_pcCfg->getGdrPocStart()) && ((pocCurr - m_pcCfg->getGdrPocStart()) % m_pcCfg->getGdrPeriod() == 0))
+          {
+            eSliceType = B_SLICE;
+          }
+#endif
         }
       }
     }
@@ -613,12 +648,16 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr
   rpcSlice->setSliceQp           ( iQP );
   rpcSlice->setSliceQpDelta      ( 0 );
   pcPic->setLossyQPValue(iQP);
+  if ((!rpcSlice->getTSResidualCodingDisabledFlag()) && ( rpcSlice->getSPS()->getSpsRangeExtension().getTSRCRicePresentFlag() ))
+  {
+    rpcSlice->set_tsrc_index(Clip3(MIN_TSRC_RICE, MAX_TSRC_RICE, (int) ((19 - iQP) / 6)) - 1);
+  }
 #if !W0038_CQP_ADJ
   rpcSlice->setSliceChromaQpDelta( COMPONENT_Cb, 0 );
   rpcSlice->setSliceChromaQpDelta( COMPONENT_Cr, 0 );
   rpcSlice->setSliceChromaQpDelta( JOINT_CbCr,   0 );
 #endif
-  rpcSlice->setUseChromaQpAdj( rpcSlice->getPPS()->getCuChromaQpOffsetListEnabledFlag() );
+  rpcSlice->setUseChromaQpAdj( rpcSlice->getPPS()->getCuChromaQpOffsetListEnabledFlag() && m_pcCfg->getCuChromaQpOffsetEnabled() );
   rpcSlice->setNumRefIdx(REF_PIC_LIST_0, m_pcCfg->getRPLEntry(0, iGOPid).m_numRefPicsActive);
   rpcSlice->setNumRefIdx(REF_PIC_LIST_1, m_pcCfg->getRPLEntry(1, iGOPid).m_numRefPicsActive);
 
@@ -641,31 +680,31 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr
     {
       if ( rpcSlice->getDeblockingFilterOverrideFlag() && eSliceType!=I_SLICE)
       {
-        rpcSlice->setDeblockingFilterBetaOffsetDiv2( m_pcCfg->getGOPEntry(iGOPid).m_betaOffsetDiv2 + m_pcCfg->getLoopFilterBetaOffset()  );
-        rpcSlice->setDeblockingFilterTcOffsetDiv2( m_pcCfg->getGOPEntry(iGOPid).m_tcOffsetDiv2 + m_pcCfg->getLoopFilterTcOffset() );
+        rpcSlice->setDeblockingFilterBetaOffsetDiv2( m_pcCfg->getGOPEntry(iGOPid).m_betaOffsetDiv2 + m_pcCfg->getDeblockingFilterBetaOffset()  );
+        rpcSlice->setDeblockingFilterTcOffsetDiv2  ( m_pcCfg->getGOPEntry(iGOPid).m_tcOffsetDiv2   + m_pcCfg->getDeblockingFilterTcOffset() );
         if( rpcSlice->getPPS()->getPPSChromaToolFlag() )
         {
-          rpcSlice->setDeblockingFilterCbBetaOffsetDiv2( m_pcCfg->getGOPEntry(iGOPid).m_CbBetaOffsetDiv2 + m_pcCfg->getLoopFilterCbBetaOffset() );
-          rpcSlice->setDeblockingFilterCbTcOffsetDiv2( m_pcCfg->getGOPEntry(iGOPid).m_CbTcOffsetDiv2 + m_pcCfg->getLoopFilterCbTcOffset() );
-          rpcSlice->setDeblockingFilterCrBetaOffsetDiv2( m_pcCfg->getGOPEntry(iGOPid).m_CrBetaOffsetDiv2 + m_pcCfg->getLoopFilterCrBetaOffset() );
-          rpcSlice->setDeblockingFilterCrTcOffsetDiv2( m_pcCfg->getGOPEntry(iGOPid).m_CrTcOffsetDiv2 + m_pcCfg->getLoopFilterCrTcOffset() );
+          rpcSlice->setDeblockingFilterCbBetaOffsetDiv2( m_pcCfg->getGOPEntry(iGOPid).m_CbBetaOffsetDiv2 + m_pcCfg->getDeblockingFilterCbBetaOffset() );
+          rpcSlice->setDeblockingFilterCbTcOffsetDiv2  ( m_pcCfg->getGOPEntry(iGOPid).m_CbTcOffsetDiv2   + m_pcCfg->getDeblockingFilterCbTcOffset() );
+          rpcSlice->setDeblockingFilterCrBetaOffsetDiv2( m_pcCfg->getGOPEntry(iGOPid).m_CrBetaOffsetDiv2 + m_pcCfg->getDeblockingFilterCrBetaOffset() );
+          rpcSlice->setDeblockingFilterCrTcOffsetDiv2  ( m_pcCfg->getGOPEntry(iGOPid).m_CrTcOffsetDiv2   + m_pcCfg->getDeblockingFilterCrTcOffset() );
         }
         else
         {
           rpcSlice->setDeblockingFilterCbBetaOffsetDiv2( rpcSlice->getDeblockingFilterBetaOffsetDiv2() );
-          rpcSlice->setDeblockingFilterCbTcOffsetDiv2( rpcSlice->getDeblockingFilterTcOffsetDiv2() );
+          rpcSlice->setDeblockingFilterCbTcOffsetDiv2  ( rpcSlice->getDeblockingFilterTcOffsetDiv2() );
           rpcSlice->setDeblockingFilterCrBetaOffsetDiv2( rpcSlice->getDeblockingFilterBetaOffsetDiv2() );
-          rpcSlice->setDeblockingFilterCrTcOffsetDiv2( rpcSlice->getDeblockingFilterTcOffsetDiv2() );
+          rpcSlice->setDeblockingFilterCrTcOffsetDiv2  ( rpcSlice->getDeblockingFilterTcOffsetDiv2() );
         }
       }
       else
       {
-        rpcSlice->setDeblockingFilterBetaOffsetDiv2( m_pcCfg->getLoopFilterBetaOffset() );
-        rpcSlice->setDeblockingFilterTcOffsetDiv2( m_pcCfg->getLoopFilterTcOffset() );
-        rpcSlice->setDeblockingFilterCbBetaOffsetDiv2( m_pcCfg->getLoopFilterCbBetaOffset() );
-        rpcSlice->setDeblockingFilterCbTcOffsetDiv2( m_pcCfg->getLoopFilterCbTcOffset() );
-        rpcSlice->setDeblockingFilterCrBetaOffsetDiv2( m_pcCfg->getLoopFilterCrBetaOffset() );
-        rpcSlice->setDeblockingFilterCrTcOffsetDiv2( m_pcCfg->getLoopFilterCrTcOffset() );
+        rpcSlice->setDeblockingFilterBetaOffsetDiv2  ( m_pcCfg->getDeblockingFilterBetaOffset() );
+        rpcSlice->setDeblockingFilterTcOffsetDiv2    ( m_pcCfg->getDeblockingFilterTcOffset() );
+        rpcSlice->setDeblockingFilterCbBetaOffsetDiv2( m_pcCfg->getDeblockingFilterCbBetaOffset() );
+        rpcSlice->setDeblockingFilterCbTcOffsetDiv2  ( m_pcCfg->getDeblockingFilterCbTcOffset() );
+        rpcSlice->setDeblockingFilterCrBetaOffsetDiv2( m_pcCfg->getDeblockingFilterCrBetaOffset() );
+        rpcSlice->setDeblockingFilterCrTcOffsetDiv2  ( m_pcCfg->getDeblockingFilterCrTcOffset() );
       }
     }
   }
@@ -695,6 +734,131 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr
     m_pcCuEncoder->getIbcHashMap().destroy();
     m_pcCuEncoder->getIbcHashMap().init( pcPic->cs->pps->getPicWidthInLumaSamples(), pcPic->cs->pps->getPicHeightInLumaSamples() );
   }
+#if GDR_ENABLED
+  if (m_pcCfg->getGdrEnabled())
+  {
+    int gdrPocStart = m_pcCuEncoder->getEncCfg()->getGdrPocStart();
+    int gdrPeriod = m_pcCuEncoder->getEncCfg()->getGdrPeriod();
+    int gdrInterval = m_pcCuEncoder->getEncCfg()->getGdrInterval();
+
+    int picWidth = rpcSlice->getPPS()->getPicWidthInLumaSamples();
+
+    int curPoc = rpcSlice->getPOC();
+    int gdrPoc = (curPoc - gdrPocStart) % gdrPeriod;
+
+    pcPic->cs->picHeader->setGdrPicFlag(false);
+    pcPic->cs->picHeader->setRecoveryPocCnt(0);
+    pcPic->cs->picHeader->setInGdrInterval(false);
+
+    pcPic->cs->picHeader->setVirtualBoundariesPresentFlag(false);
+
+    int  offset = (curPoc < gdrPocStart) ? 0 : (((curPoc - gdrPocStart) / gdrPeriod) * gdrPeriod);
+    int  actualGdrStart = gdrPocStart + offset;
+    int  actualGdrInterval = min(gdrInterval, (int)(pcPic->getPicWidthInLumaSamples() / 8));
+    int  recoveryPocCnt = actualGdrInterval - 1;
+    int  recoveryPicPoc = actualGdrStart + recoveryPocCnt;
+
+    bool isInGdrInterval = (curPoc >= actualGdrStart) && (curPoc < recoveryPicPoc);
+    bool isOutGdrInterval = !isInGdrInterval;
+    bool isGdrPic = (actualGdrStart == curPoc);
+
+#if GDR_ENC_TRACE
+    printf("\n");
+    printf("-poc:%d gdrPocStart:%d actualGdrStart:%d actualGdrInterval:%d actualGdrEndPoc:%d\n", rpcSlice->getPOC(), gdrPocStart, actualGdrStart, actualGdrInterval, recoveryPicPoc - 1);
+#endif
+
+    // for none gdr period pictures
+    if ((curPoc < gdrPocStart) || isOutGdrInterval)
+    {
+      pcPic->cs->picHeader->setInGdrInterval(false);
+      pcPic->cs->picHeader->setVirtualBoundariesPresentFlag(false);
+
+      pcPic->cs->picHeader->setNumHorVirtualBoundaries(0);
+      pcPic->cs->picHeader->setNumVerVirtualBoundaries(0);
+
+#if GDR_ENC_TRACE
+      printf("-poc:%d no virtual boundary\n", rpcSlice->getPOC());
+#endif
+    }
+    // for gdr inteval pictures
+    else
+    {
+      if (curPoc == recoveryPicPoc)
+      {
+        pcPic->cs->picHeader->setInGdrInterval(false);
+      }
+      else
+      {
+        pcPic->cs->picHeader->setInGdrInterval(true);
+      }
+
+      pcPic->cs->picHeader->setVirtualBoundariesPresentFlag(true);
+
+      if (isGdrPic)
+      {
+        pcPic->cs->picHeader->setGdrOrIrapPicFlag(true);
+        pcPic->cs->picHeader->setGdrPicFlag(true);
+
+        pcPic->cs->picHeader->setRecoveryPocCnt(recoveryPocCnt);
+        m_pcGOPEncoder->setLastGdrIntervalPoc(recoveryPicPoc - 1);
+      }
+
+      pcPic->cs->picHeader->setNumHorVirtualBoundaries(0);
+      pcPic->cs->picHeader->setNumVerVirtualBoundaries(1);
+
+      int begGdrX;
+      int endGdrX;
+      int m1, m2, n1;
+
+      double dd = (picWidth / (double)gdrInterval);
+      int mm = (int)((picWidth / (double)gdrInterval) + 0.49999);
+      m1 = ((mm + 7) >> 3) << 3;
+      m2 = ((mm + 0) >> 3) << 3;
+
+      if (dd > mm && m1 == m2)
+      {
+        m1 = m1 + 8;
+      }
+
+      n1 = (picWidth - m2 * gdrInterval) / 8;
+
+      if (gdrPoc < n1)
+      {
+        begGdrX = m1 * gdrPoc;
+        endGdrX = begGdrX + m1;
+      }
+      else
+      {
+        begGdrX = m1 * n1 + m2 * (gdrPoc - n1);
+        endGdrX = begGdrX + m2;
+        if (picWidth <= begGdrX)
+        {
+          begGdrX = picWidth;
+          endGdrX = picWidth;
+        }
+      }
+
+      pcPic->cs->picHeader->setVirtualBoundariesPosX(endGdrX, 0);
+
+#if GDR_ENC_TRACE
+      printf("\n");
+      printf("-poc:%d beg:%d end:%d\n", rpcSlice->getPOC(), begGdrX, endGdrX);
+#endif
+    }
+  }
+#endif
+
+  if (rpcSlice->getSPS()->getSpsRangeExtension().getRrcRiceExtensionEnableFlag())
+  {
+    int bitDepth = rpcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA);
+    int baseLevel = (bitDepth > 12) ? (rpcSlice->isIntra() ? 5 : 2 * 5) : (rpcSlice->isIntra() ? 2 * 5 : 3 * 5);
+    rpcSlice->setRiceBaseLevel(baseLevel);
+  }
+  else
+  {
+    rpcSlice->setRiceBaseLevel(4);
+  }
+
 }
 
 double EncSlice::initializeLambda(const Slice* slice, const int GOPid, const int refQP, const double dQP)
@@ -726,7 +890,10 @@ double EncSlice::initializeLambda(const Slice* slice, const int GOPid, const int
       }
       else
 #endif
-      dQPFactor = 0.57 * (1.0 - Clip3(0.0, 0.5, 0.05 * double (slice->getPic()->fieldPic ? numberBFrames >> 1 : numberBFrames)));
+      {
+        dQPFactor =
+          0.57 * (1.0 - Clip3(0.0, 0.5, 0.05 * double(slice->getPic()->fieldPic ? numberBFrames >> 1 : numberBFrames)));
+      }
     }
   }
 #if X0038_LAMBDA_FROM_QP_CAPABILITY
@@ -797,7 +964,11 @@ void EncSlice::resetQP( Picture* pic, int sliceQP, double lambda )
 #endif
   setUpLambda(slice, lambda, sliceQP);
 #if WCG_EXT
+#if !JVET_W0043
+  if (!(m_pcCfg->getLumaLevelToDeltaQPMapping().isEnabled() || m_pcCfg->getSmoothQPReductionEnable()))
+#else
   if (!m_pcCfg->getLumaLevelToDeltaQPMapping().isEnabled())
+#endif
   {
     m_pcRdCost->saveUnadjustedLambda();
   }
@@ -855,7 +1026,10 @@ static bool applyQPAdaptation (Picture* const pcPic,       Slice* const pcSlice,
 #if SHARP_LUMA_DELTA_QP
           && !useSharpLumaDQP
 #endif
-          ) iQPFixed = MAX_QP;
+      )
+      {
+        iQPFixed = MAX_QP;
+      }
     }
 #if SHARP_LUMA_DELTA_QP
 
@@ -878,9 +1052,11 @@ static bool applyQPAdaptation (Picture* const pcPic,       Slice* const pcSlice,
     }
 #endif
 
-    if (iQPIndex >= MAX_QP) iQPFixed = MAX_QP;
-    else
-    if (iQPFixed != iQPIndex)
+    if (iQPIndex >= MAX_QP)
+    {
+      iQPFixed = MAX_QP;
+    }
+    else if (iQPFixed != iQPIndex)
     {
       const double* oldLambdas = pcSlice->getLambdas();
       const double  corrFactor = pow (2.0, double(iQPFixed - iQPIndex) / 3.0);
@@ -921,7 +1097,10 @@ static bool applyQPAdaptation (Picture* const pcPic,       Slice* const pcSlice,
 #if SHARP_LUMA_DELTA_QP
               && !useSharpLumaDQP
 #endif
-              ) iQPAdapt = MAX_QP;
+          )
+          {
+            iQPAdapt = MAX_QP;
+          }
           CHECK (meanLuma != (uint32_t)pcPic->m_iOffsetCtu[ctuRsAddr], "luma DC offsets don't match");
         }
 #if SHARP_LUMA_DELTA_QP
@@ -961,7 +1140,10 @@ static bool applyQPAdaptation (Picture* const pcPic,       Slice* const pcSlice,
           uAbsDCless = uint32_t((uint64_t(uAbsDCless) * 64*64 + (blockSize >> 1)) / blockSize);
         }
 
-        if (uAbsDCless < 64*64) uAbsDCless = 64*64;  // limit to 1
+        if (uAbsDCless < 64 * 64)
+        {
+          uAbsDCless = 64 * 64;   // limit to 1
+        }
 
         // reduce QP index if CTU would be fully quantized to zero
         if (uAbsDCless < uRefScale)
@@ -1023,7 +1205,10 @@ static int applyQPAdaptationSubCtu (CodingStructure &cs, const UnitArea ctuArea,
   const int       bitDepth = cs.slice->getSPS()->getBitDepth (CHANNEL_TYPE_LUMA); // overall image bit-depth
   const int   adaptedCtuQP = pcPic ? pcPic->m_iOffsetCtu[ctuAddr] : cs.slice->getSliceQpBase();
 
-  if (!pcPic || cs.slice->getCuQpDeltaSubdiv() == 0) return adaptedCtuQP;
+  if (!pcPic || cs.slice->getCuQpDeltaSubdiv() == 0)
+  {
+    return adaptedCtuQP;
+  }
 
   for (unsigned addr = 0; addr < cs.picture->m_subCtuQP.size(); addr++)
   {
@@ -1075,7 +1260,10 @@ static int applyQPAdaptationSubCtu (CodingStructure &cs, const UnitArea ctuArea,
 #endif
       }
     }
-    if (sumAct <= 0.0) return adaptedCtuQP;
+    if (sumAct <= 0.0)
+    {
+      return adaptedCtuQP;
+    }
 
     sumAct = double(numAct) / sumAct; // 1.0 / (average CTU activity)
 
@@ -1133,7 +1321,7 @@ void EncSlice::setSearchRange( Slice* pcSlice )
   }
 }
 
-void EncSlice::setLosslessSlice(Picture* pcPic, bool islossless) 
+void EncSlice::setLosslessSlice(Picture* pcPic, bool islossless)
 {
   Slice* slice = pcPic->slices[getSliceSegmentIdx()];
   slice->setLossless(islossless);
@@ -1145,13 +1333,12 @@ void EncSlice::setLosslessSlice(Picture* pcPic, bool islossless)
       int losslessQp = LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP - ((slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) - 8) * 6);
       slice->setSliceQp(losslessQp); // update the slice/base QPs
 
-     slice->setTSResidualCodingDisabledFlag(m_pcCfg->getTSRCdisableLL() ? true : false);
-
+      slice->setTSResidualCodingDisabledFlag(m_pcCfg->getTSRCdisableLL() ? true : false);
     }
     else
     {
-        slice->setSliceQp(pcPic->getLossyQPValue());
-        slice->setTSResidualCodingDisabledFlag(false);
+      slice->setSliceQp(pcPic->getLossyQPValue());
+      slice->setTSResidualCodingDisabledFlag(false);
     }
   }
 }
@@ -1177,8 +1364,6 @@ void EncSlice::precompressSlice( Picture* pcPic )
 
   Slice* pcSlice        = pcPic->slices[getSliceSegmentIdx()];
 
-
-
   double     dPicRdCostBest = MAX_DOUBLE;
   uint32_t       uiQpIdxBest = 0;
 
@@ -1239,7 +1424,6 @@ void EncSlice::calCostSliceI(Picture* pcPic) // TODO: this only analyses the fir
   const int      shift             = sps.getBitDepth(CHANNEL_TYPE_LUMA)-8;
   const int      offset            = (shift>0)?(1<<(shift-1)):0;
 
-
   for( uint32_t ctuIdx = 0; ctuIdx < pcSlice->getNumCtuInSlice(); ctuIdx++ )
   {
     uint32_t ctuRsAddr = pcSlice->getCtuAddrInSlice( ctuIdx );
@@ -1252,7 +1436,6 @@ void EncSlice::calCostSliceI(Picture* pcPic) // TODO: this only analyses the fir
 
     (m_pcRateCtrl->getRCPic()->getLCU(ctuRsAddr)).m_costIntra=(iSumHad+offset)>>shift;
     iSumHadSlice += (m_pcRateCtrl->getRCPic()->getLCU(ctuRsAddr)).m_costIntra;
-
   }
   m_pcRateCtrl->getRCPic()->setTotalIntraCost(iSumHadSlice);
 }
@@ -1290,6 +1473,17 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c
 
   Slice* const pcSlice    = pcPic->slices[getSliceSegmentIdx()];
 
+  if (pcSlice->getSPS()->getSpsRangeExtension().getRrcRiceExtensionEnableFlag())
+  {
+    int bitDepth = pcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA);
+    int baseLevel = (bitDepth > 12) ? (pcSlice->isIntra() ? 5 : 2 * 5 ) : (pcSlice->isIntra() ? 2 * 5 : 3 * 5);
+    pcSlice->setRiceBaseLevel(baseLevel);
+  }
+  else
+  {
+    pcSlice->setRiceBaseLevel(4);
+  }
+
   // initialize cost values - these are used by precompressSlice (they should be parameters).
   m_uiPicTotalBits  = 0;
   m_uiPicDist       = 0;
@@ -1298,14 +1492,6 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c
 
   m_CABACEstimator->initCtxModels( *pcSlice );
 
-#if ENABLE_SPLIT_PARALLELISM
-  for( int jId = 1; jId < m_pcLib->getNumCuEncStacks(); jId++ )
-  {
-    CABACWriter* cw = m_pcLib->getCABACEncoder( jId )->getCABACEstimator( pcSlice->getSPS() );
-    cw->initCtxModels( *pcSlice );
-  }
-
-#endif
   m_pcCuEncoder->getModeCtrl()->setFastDeltaQp(bFastDeltaQP);
 
 
@@ -1330,9 +1516,7 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c
     xCheckWPEnable( pcSlice );
   }
 
-
-
-    pcPic->m_prevQP[0] = pcPic->m_prevQP[1] = pcSlice->getSliceQp();
+  pcPic->m_prevQP[0] = pcPic->m_prevQP[1] = pcSlice->getSliceQp();
 
   CHECK( pcPic->m_prevQP[0] == std::numeric_limits<int>::max(), "Invalid previous QP" );
 
@@ -1353,14 +1537,7 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c
                            (m_pcCfg->getBaseQP() >= 38) || (m_pcCfg->getSourceWidth() <= 512 && m_pcCfg->getSourceHeight() <= 320), m_adaptedLumaQP))
     {
       m_CABACEstimator->initCtxModels (*pcSlice);
-#if ENABLE_SPLIT_PARALLELISM
-      for (int jId = 1; jId < m_pcLib->getNumCuEncStacks(); jId++)
-      {
-        CABACWriter* cw = m_pcLib->getCABACEncoder (jId)->getCABACEstimator (pcSlice->getSPS());
-        cw->initCtxModels (*pcSlice);
-      }
-#endif
-        pcPic->m_prevQP[0] = pcPic->m_prevQP[1] = pcSlice->getSliceQp();
+      pcPic->m_prevQP[0] = pcPic->m_prevQP[1] = pcSlice->getSliceQp();
       if (pcSlice->getFirstCtuRsAddrInSlice() == 0)
       {
         cs.currQP[0] = cs.currQP[1] = pcSlice->getSliceQp(); // cf code above
@@ -1389,7 +1566,10 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c
   m_pcInterSearch->resetUniMvList();
   ::memset(g_isReusedUniMVsFilled, 0, sizeof(g_isReusedUniMVsFilled));
   encodeCtus( pcPic, bCompressEntireSlice, bFastDeltaQP, m_pcLib );
-  if (checkPLTRatio) m_pcLib->checkPltStats( pcPic );
+  if (checkPLTRatio)
+  {
+    m_pcLib->checkPltStats(pcPic);
+  }
 }
 
 void EncSlice::checkDisFracMmvd( Picture* pcPic, uint32_t startCtuTsAddr, uint32_t boundingCtuTsAddr )
@@ -1424,13 +1604,13 @@ void EncSlice::checkDisFracMmvd( Picture* pcPic, uint32_t startCtuTsAddr, uint32
   {
     pcPic->cs->picHeader->setDisFracMMVD( true );
   }
-  if (!pcPic->cs->picHeader->getDisFracMMVD()) {
+  if (!pcPic->cs->picHeader->getDisFracMMVD())
+  {
     bool useIntegerMVD = (pcPic->lwidth()*pcPic->lheight() > 1920 * 1080);
     pcPic->cs->picHeader->setDisFracMMVD( useIntegerMVD );
   }
 }
 
-
 void EncSlice::setJointCbCrModes( CodingStructure& cs, const Position topLeftLuma, const Size sizeLuma )
 {
   bool              sgnFlag = true;
@@ -1479,12 +1659,9 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons
   const int iQPIndex              = pcSlice->getSliceQpBase();
 #endif
 
-#if ENABLE_SPLIT_PARALLELISM
-  const int       dataId          = 0;
-#endif
-  CABACWriter*    pCABACWriter    = pEncLib->getCABACEncoder( PARL_PARAM0( dataId ) )->getCABACEstimator( pcSlice->getSPS() );
-  TrQuant*        pTrQuant        = pEncLib->getTrQuant( PARL_PARAM0( dataId ) );
-  RdCost*         pRdCost         = pEncLib->getRdCost( PARL_PARAM0( dataId ) );
+  CABACWriter*    pCABACWriter    = pEncLib->getCABACEncoder()->getCABACEstimator( pcSlice->getSPS() );
+  TrQuant*        pTrQuant        = pEncLib->getTrQuant();
+  RdCost*         pRdCost         = pEncLib->getRdCost();
   EncCfg*         pCfg            = pEncLib;
   RateCtrl*       pRateCtrl       = pEncLib->getRateCtrl();
   pRdCost->setLosslessRDCost(pcSlice->isLossless());
@@ -1506,7 +1683,7 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons
   prevQP[0] = prevQP[1] = pcSlice->getSliceQp();
   currQP[0] = currQP[1] = pcSlice->getSliceQp();
 
-    prevQP[0] = prevQP[1] = pcSlice->getSliceQp();
+  prevQP[0] = prevQP[1] = pcSlice->getSliceQp();
   if ( pcSlice->getSPS()->getFpelMmvdEnabledFlag() ||
       (pcSlice->getSPS()->getIBCFlag() && m_pcCuEncoder->getEncCfg()->getIBCHashSearch()))
   {
@@ -1516,6 +1693,29 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons
       int hashBlkHitPerc = m_pcCuEncoder->getIbcHashMap().calHashBlkMatchPerc(cs.area.Y());
       cs.slice->setDisableSATDForRD(hashBlkHitPerc > 59);
     }
+    if ((pcSlice->getSPS()->getSpsRangeExtension().getTSRCRicePresentFlag()) && (m_pcGOPEncoder->getPreQP() != pcSlice->getSliceQp()) && (pcPic->cs->pps->getNumSlicesInPic() == 1) && (pcSlice->get_tsrc_index() > 0) && (pcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) <= 12))
+    {
+      uint32_t totalCtu  = 0;
+      uint32_t hashRatio = 0;
+      for (uint32_t ctuIdx = 0; ctuIdx < pcSlice->getNumCtuInSlice(); ctuIdx++)
+      {
+        const uint32_t ctuRsAddr     = pcSlice->getCtuAddrInSlice(ctuIdx);
+        const uint32_t ctuXPosInCtus = ctuRsAddr % widthInCtus;
+        const uint32_t ctuYPosInCtus = ctuRsAddr / widthInCtus;
+        const Position pos(ctuXPosInCtus * pcv.maxCUWidth, ctuYPosInCtus * pcv.maxCUHeight);
+        const UnitArea ctuArea(cs.area.chromaFormat, Area(pos.x, pos.y, pcv.maxCUWidth, pcv.maxCUHeight));
+
+        hashRatio += m_pcCuEncoder->getIbcHashMap().calHashBlkMatchPerc(cs.area.Y());
+        totalCtu++;
+      }
+      if (totalCtu > 0)
+      {
+        if ((hashRatio < 4200) || (hashRatio < (41 * totalCtu)))
+        {
+          pcSlice->set_tsrc_index(0);
+        }
+      }
+    }
   }
 
   // for every CTU in the slice
@@ -1676,22 +1876,16 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons
     if (pcSlice->getSPS()->getUseLmcs())
     {
       m_pcCuEncoder->setDecCuReshaperInEncCU(m_pcLib->getReshaper(), pcSlice->getSPS()->getChromaFormatIdc());
-
-#if ENABLE_SPLIT_PARALLELISM
-      for (int jId = 1; jId < m_pcLib->getNumCuEncStacks(); jId++)
-      {
-        m_pcLib->getCuEncoder(jId)->setDecCuReshaperInEncCU(m_pcLib->getReshaper(jId), pcSlice->getSPS()->getChromaFormatIdc());
-      }
-#endif
     }
     if( !cs.slice->isIntra() && pCfg->getMCTSEncConstraint() )
     {
       pcPic->mctsInfo.init( &cs, ctuRsAddr );
     }
 
-  if (pCfg->getSwitchPOC() != pcPic->poc || ctuRsAddr >= pCfg->getDebugCTU())
-    m_pcCuEncoder->compressCtu( cs, ctuArea, ctuRsAddr, prevQP, currQP );
-
+    if (pCfg->getSwitchPOC() != pcPic->poc || ctuRsAddr >= pCfg->getDebugCTU())
+    {
+      m_pcCuEncoder->compressCtu(cs, ctuArea, ctuRsAddr, prevQP, currQP);
+    }
 #if K0149_BLOCK_STATISTICS
     getAndStoreBlockStatistics(cs, ctuArea);
 #endif
@@ -1700,13 +1894,7 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons
     pCABACWriter->coding_tree_unit( cs, ctuArea, prevQP, ctuRsAddr, true, true );
     const int numberOfWrittenBits = int( pCABACWriter->getEstFracBits() >> SCALE_BITS );
 
-#if ENABLE_SPLIT_PARALLELISM
-#pragma omp critical
-#endif
     pcSlice->setSliceBits( ( uint32_t ) ( pcSlice->getSliceBits() + numberOfWrittenBits ) );
-#if ENABLE_SPLIT_PARALLELISM
-#pragma omp critical
-#endif
 
     // Store probabilities of first CTU in line into buffer - used only if wavefront-parallel-processing is enabled.
     if( cs.pps->ctuIsTileColBd( ctuXPosInCtus ) && pEncLib->getEntropyCodingSyncEnabledFlag() )
@@ -1782,7 +1970,6 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons
     // for last Ctu in the slice
     if (pcSlice->getPPS()->getNumSubPics() >= 2 && curSubPic.getTreatedAsPicFlag() && ctuIdx == (pcSlice->getNumCtuInSlice() - 1))
     {
-
       int subPicX = (int)curSubPic.getSubPicLeft();
       int subPicY = (int)curSubPic.getSubPicTop();
       int subPicWidth = (int)curSubPic.getSubPicWidthInLumaSample();
@@ -1926,7 +2113,6 @@ void EncSlice::encodeSlice   ( Picture* pcPic, OutputBitstream* pcSubstreams, ui
     m_encCABACTableIdx = pcSlice->getSliceType();
   }
   numBinsCoded += m_CABACWriter->getNumBins();
-
 }
 
 
diff --git a/source/Lib/EncoderLib/EncSlice.h b/source/Lib/EncoderLib/EncSlice.h
index 9feaffd7561a3818d0b0b31427f6eb7d240d25e5..17ec52a6bbe203b90fb59d68b613a6939f823133 100644
--- a/source/Lib/EncoderLib/EncSlice.h
+++ b/source/Lib/EncoderLib/EncSlice.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -118,7 +118,7 @@ public:
 
   /// preparation of slice encoding (reference marking, QP and lambda)
   void    initEncSlice        ( Picture*  pcPic, const int pocLast, const int pocCurr,
-                                const int iGOPid, Slice*& rpcSlice, const bool isField, bool isEncodeLtRef, int layerId );
+                                const int iGOPid, Slice*& rpcSlice, const bool isField, bool isEncodeLtRef, int layerId, NalUnitType nalType );
 
   void    resetQP             ( Picture* pic, int sliceQP, double lambda );
 
diff --git a/source/Lib/EncoderLib/EncTemporalFilter.cpp b/source/Lib/EncoderLib/EncTemporalFilter.cpp
index c9c74336ff102cc7074193b3fbf0262267fea1ab..867effa41fa62b2f32141cc9cd0fc34ef7788f4b 100644
--- a/source/Lib/EncoderLib/EncTemporalFilter.cpp
+++ b/source/Lib/EncoderLib/EncTemporalFilter.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -43,10 +43,10 @@
 // Constructor / destructor / initialization / destroy
 // ====================================================================================================================
 
-const int EncTemporalFilter::m_range = 2;
-const double EncTemporalFilter::m_chromaFactor = 0.55;
-const double EncTemporalFilter::m_sigmaMultiplier = 9.0;
-const double EncTemporalFilter::m_sigmaZeroPoint = 10.0;
+const int EncTemporalFilter::m_range = 4;
+const double EncTemporalFilter::m_chromaFactor    =  0.55;
+const double EncTemporalFilter::m_sigmaMultiplier =  9.0;
+const double EncTemporalFilter::m_sigmaZeroPoint  = 10.0;
 const int EncTemporalFilter::m_motionVectorFactor = 16;
 const int EncTemporalFilter::m_padding = 128;
 const int EncTemporalFilter::m_interpolationFilter[16][8] =
@@ -69,12 +69,12 @@ const int EncTemporalFilter::m_interpolationFilter[16][8] =
   {   0,   0,  -2,   4,  64,  -3,   1,   0 }    //15-->-->
 };
 
-const double EncTemporalFilter::m_refStrengths[3][2] =
+const double EncTemporalFilter::m_refStrengths[3][4] =
 { // abs(POC offset)
-  //  1,    2
-  {0.85, 0.60},  // m_range * 2
-  {1.20, 1.00},  // m_range
-  {0.30, 0.30}   // otherwise
+  //  1,    2     3     4
+  {0.85, 0.57, 0.41, 0.33},  // m_range * 2
+  {1.13, 0.97, 0.81, 0.57},  // m_range
+  {0.30, 0.30, 0.30, 0.30}   // otherwise
 };
 
 EncTemporalFilter::EncTemporalFilter() :
@@ -105,23 +105,23 @@ void EncTemporalFilter::init(const int frameSkip,
   m_FrameSkip = frameSkip;
   for (int i = 0; i < MAX_NUM_CHANNEL_TYPE; i++)
   {
-    m_inputBitDepth[i] = inputBitDepth[i];
+    m_inputBitDepth[i]       = inputBitDepth[i];
     m_MSBExtendedBitDepth[i] = msbExtendedBitDepth[i];
-    m_internalBitDepth[i] = internalBitDepth[i];
+    m_internalBitDepth[i]    = internalBitDepth[i];
   }
 
-  m_sourceWidth = width;
+  m_sourceWidth  = width;
   m_sourceHeight = height;
   for (int i = 0; i < 2; i++)
   {
     m_pad[i] = pad[i];
   }
   m_clipInputVideoToRec709Range = rec709;
-  m_inputFileName = filename;
+  m_inputFileName   = filename;
   m_chromaFormatIDC = inputChromaFormatIDC;
   m_inputColourSpaceConvert = colorSpaceConv;
   m_area = Area(0, 0, width, height);
-  m_QP = qp;
+  m_QP   = qp;
   m_temporalFilterStrengths = temporalFilterStrengths;
   m_gopBasedTemporalFilterFutureReference = gopBasedTemporalFilterFutureReference;
 }
@@ -153,11 +153,10 @@ bool EncTemporalFilter::filter(PelStorage *orgPic, int receivedPoc)
     yuvFrames.open(m_inputFileName, false, m_inputBitDepth, m_MSBExtendedBitDepth, m_internalBitDepth);
     yuvFrames.skipFrames(std::max(offset + receivedPoc - m_range, 0), m_sourceWidth - m_pad[0], m_sourceHeight - m_pad[1], m_chromaFormatIDC);
 
-
     std::deque<TemporalFilterSourcePicInfo> srcFrameInfo;
 
     int firstFrame = receivedPoc + offset - m_range;
-    int lastFrame = receivedPoc + offset + m_range;
+    int lastFrame  = receivedPoc + offset + m_range;
     if (!m_gopBasedTemporalFilterFutureReference)
     {
       lastFrame = receivedPoc + offset - 1;
@@ -192,7 +191,7 @@ bool EncTemporalFilter::filter(PelStorage *orgPic, int receivedPoc)
         continue;
       }
       srcFrameInfo.push_back(TemporalFilterSourcePicInfo());
-      TemporalFilterSourcePicInfo &srcPic=srcFrameInfo.back();
+      TemporalFilterSourcePicInfo &srcPic = srcFrameInfo.back();
 
       PelStorage dummyPicBufferTO; // Only used temporary in yuvFrames.read
       srcPic.picBuffer.create(m_chromaFormatIDC, m_area, 0, m_padding);
@@ -240,20 +239,20 @@ bool EncTemporalFilter::filter(PelStorage *orgPic, int receivedPoc)
 
 void EncTemporalFilter::subsampleLuma(const PelStorage &input, PelStorage &output, const int factor) const
 {
-  const int newWidth = input.Y().width / factor;
+  const int newWidth  = input.Y().width  / factor;
   const int newHeight = input.Y().height / factor;
   output.create(m_chromaFormatIDC, Area(0, 0, newWidth, newHeight), 0, m_padding);
 
-  const Pel* srcRow = input.Y().buf;
+  const Pel* srcRow   = input.Y().buf;
   const int srcStride = input.Y().stride;
-  Pel *dstRow = output.Y().buf;
+  Pel *dstRow         = output.Y().buf;
   const int dstStride = output.Y().stride;
 
-  for (int y = 0; y < newHeight; y++, srcRow+=factor*srcStride, dstRow+=dstStride)
+  for (int y = 0; y < newHeight; y++, srcRow += factor * srcStride, dstRow += dstStride)
   {
     const Pel *inRow      = srcRow;
-    const Pel *inRowBelow = srcRow+srcStride;
-    Pel *target     = dstRow;
+    const Pel *inRowBelow = srcRow + srcStride;
+    Pel *target           = dstRow;
 
     for (int x = 0; x < newWidth; x++)
     {
@@ -275,19 +274,19 @@ int EncTemporalFilter::motionErrorLuma(const PelStorage &orig,
   const int besterror = 8 * 8 * 1024 * 1024) const
 {
   const Pel* origOrigin = orig.Y().buf;
-  const int origStride  = orig.Y().stride;
-  const Pel *buffOrigin = buffer.Y().buf;
-  const int buffStride  = buffer.Y().stride;
+  const int  origStride = orig.Y().stride;
+  const Pel* buffOrigin = buffer.Y().buf;
+  const int  buffStride = buffer.Y().stride;
 
-  int error = 0;// dx * 10 + dy * 10;
+  int error = 0;
   if (((dx | dy) & 0xF) == 0)
   {
     dx /= m_motionVectorFactor;
     dy /= m_motionVectorFactor;
     for (int y1 = 0; y1 < bs; y1++)
     {
-      const Pel* origRowStart = origOrigin + (y+y1)*origStride + x;
-      const Pel* bufferRowStart = buffOrigin + (y+y1+dy)*buffStride + (x+dx);
+      const Pel* origRowStart   = origOrigin + (y + y1) * origStride + x;
+      const Pel* bufferRowStart = buffOrigin + (y + y1 + dy) * buffStride + (x + dx);
       for (int x1 = 0; x1 < bs; x1 += 2)
       {
         int diff = origRowStart[x1] - bufferRowStart[x1];
@@ -311,7 +310,7 @@ int EncTemporalFilter::motionErrorLuma(const PelStorage &orig,
     for (int y1 = 1; y1 < bs + 7; y1++)
     {
       const int yOffset = y + y1 + (dy >> 4) - 3;
-      const Pel *sourceRow = buffOrigin + (yOffset)*buffStride + 0;
+      const Pel *sourceRow = buffOrigin + yOffset * buffStride + 0;
       for (int x1 = 0; x1 < bs; x1++)
       {
         sum = 0;
@@ -329,10 +328,10 @@ int EncTemporalFilter::motionErrorLuma(const PelStorage &orig,
       }
     }
 
-    const Pel maxSampleValue = (1<<m_internalBitDepth[CHANNEL_TYPE_LUMA])-1;
+    const Pel maxSampleValue = (1 << m_internalBitDepth[CHANNEL_TYPE_LUMA]) - 1;
     for (int y1 = 0; y1 < bs; y1++)
     {
-      const Pel *origRow = origOrigin + (y+y1)*origStride + 0;
+      const Pel *origRow = origOrigin + (y + y1) * origStride;
       for (int x1 = 0; x1 < bs; x1++)
       {
         sum = 0;
@@ -360,15 +359,15 @@ int EncTemporalFilter::motionErrorLuma(const PelStorage &orig,
 void EncTemporalFilter::motionEstimationLuma(Array2D<MotionVector> &mvs, const PelStorage &orig, const PelStorage &buffer, const int blockSize,
   const Array2D<MotionVector> *previous, const int factor, const bool doubleRes) const
 {
-  int range = 5;
+  int range = doubleRes ? 0 : 5;
   const int stepSize = blockSize;
 
   const int origWidth  = orig.Y().width;
   const int origHeight = orig.Y().height;
 
-  for (int blockY = 0; blockY + blockSize < origHeight; blockY += stepSize)
+  for (int blockY = 0; blockY + blockSize <= origHeight; blockY += stepSize)
   {
-    for (int blockX = 0; blockX + blockSize < origWidth; blockX += stepSize)
+    for (int blockX = 0; blockX + blockSize <= origWidth; blockX += stepSize)
     {
       MotionVector best;
 
@@ -378,10 +377,10 @@ void EncTemporalFilter::motionEstimationLuma(Array2D<MotionVector> &mvs, const P
       }
       else
       {
-        for (int py = -2; py <= 2; py++)
+        for (int py = -1; py <= 1; py++)
         {
           int testy = blockY / (2 * blockSize) + py;
-          for (int px = -2; px <= 2; px++)
+          for (int px = -1; px <= 1; px++)
           {
             int testx = blockX / (2 * blockSize) + px;
             if ((testx >= 0) && (testx < origWidth / (2 * blockSize)) && (testy >= 0) && (testy < origHeight / (2 * blockSize)))
@@ -395,6 +394,11 @@ void EncTemporalFilter::motionEstimationLuma(Array2D<MotionVector> &mvs, const P
             }
           }
         }
+        int error = motionErrorLuma(orig, buffer, blockX, blockY, 0, 0, blockSize, best.error);
+        if (error < best.error)
+        {
+          best.set(0, 0, error);
+        }
       }
       MotionVector prevBest = best;
       for (int y2 = prevBest.y / m_motionVectorFactor - range; y2 <= prevBest.y / m_motionVectorFactor + range; y2++)
@@ -409,7 +413,7 @@ void EncTemporalFilter::motionEstimationLuma(Array2D<MotionVector> &mvs, const P
         }
       }
       if (doubleRes)
-      { // merge into one loop, probably with precision array (here [12, 3] or maybe [4, 1]) with setable number of iterations
+      {
         prevBest = best;
         int doubleRange = 3 * 4;
         for (int y2 = prevBest.y - doubleRange; y2 <= prevBest.y + doubleRange; y2 += 4)
@@ -421,7 +425,6 @@ void EncTemporalFilter::motionEstimationLuma(Array2D<MotionVector> &mvs, const P
             {
               best.set(x2, y2, error);
             }
-
           }
         }
 
@@ -436,11 +439,51 @@ void EncTemporalFilter::motionEstimationLuma(Array2D<MotionVector> &mvs, const P
             {
               best.set(x2, y2, error);
             }
-
           }
         }
+      }
+
+      if (blockY > 0)
+      {
+        MotionVector aboveMV = mvs.get(blockX / stepSize, (blockY - stepSize) / stepSize);
+        int error = motionErrorLuma(orig, buffer, blockX, blockY, aboveMV.x, aboveMV.y, blockSize, best.error);
+        if (error < best.error)
+        {
+          best.set(aboveMV.x, aboveMV.y, error);
+        }
+      }
+      if (blockX > 0)
+      {
+        MotionVector leftMV = mvs.get((blockX - stepSize) / stepSize, blockY / stepSize);
+        int error = motionErrorLuma(orig, buffer, blockX, blockY, leftMV.x, leftMV.y, blockSize, best.error);
+        if (error < best.error)
+        {
+          best.set(leftMV.x, leftMV.y, error);
+        }
+      }
+
+      // calculate average
+      double avg = 0.0;
+      for (int x1 = 0; x1 < blockSize; x1++)
+      {
+        for (int y1 = 0; y1 < blockSize; y1++)
+        {
+          avg = avg + orig.Y().at(blockX + x1, blockY + y1);
+        }
+      }
+      avg = avg / (blockSize * blockSize);
 
+      // calculate variance
+      double variance = 0;
+      for (int x1 = 0; x1 < blockSize; x1++)
+      {
+        for (int y1 = 0; y1 < blockSize; y1++)
+        {
+          int pix = orig.Y().at(blockX + x1, blockY + y1);
+          variance = variance + (pix - avg) * (pix - avg);
+        }
       }
+      best.error = (int)(20 * ((best.error + 5.0) / (variance + 5.0)) + (best.error / (blockSize * blockSize)) / 50);
       mvs.get(blockX / stepSize, blockY / stepSize) = best;
     }
   }
@@ -448,7 +491,7 @@ void EncTemporalFilter::motionEstimationLuma(Array2D<MotionVector> &mvs, const P
 
 void EncTemporalFilter::motionEstimation(Array2D<MotionVector> &mv, const PelStorage &orgPic, const PelStorage &buffer, const PelStorage &origSubsampled2, const PelStorage &origSubsampled4) const
 {
-  const int width = m_sourceWidth;
+  const int width  = m_sourceWidth;
   const int height = m_sourceHeight;
   Array2D<MotionVector> mv_0(width / 16, height / 16);
   Array2D<MotionVector> mv_1(width / 16, height / 16);
@@ -469,25 +512,25 @@ void EncTemporalFilter::motionEstimation(Array2D<MotionVector> &mv, const PelSto
 
 void EncTemporalFilter::applyMotion(const Array2D<MotionVector> &mvs, const PelStorage &input, PelStorage &output) const
 {
-  static const int lumaBlockSize=8;
+  static const int lumaBlockSize = 8;
 
-  for(int c=0; c< getNumberValidComponents(m_chromaFormatIDC); c++)
+  for(int c = 0; c < getNumberValidComponents(m_chromaFormatIDC); c++)
   {
-    const ComponentID compID=(ComponentID)c;
-    const int csx=getComponentScaleX(compID, m_chromaFormatIDC);
-    const int csy=getComponentScaleY(compID, m_chromaFormatIDC);
-    const int blockSizeX = lumaBlockSize>>csx;
-    const int blockSizeY = lumaBlockSize>>csy;
+    const ComponentID compID = (ComponentID)c;
+    const int csx = getComponentScaleX(compID, m_chromaFormatIDC);
+    const int csy = getComponentScaleY(compID, m_chromaFormatIDC);
+    const int blockSizeX = lumaBlockSize >> csx;
+    const int blockSizeY = lumaBlockSize >> csy;
     const int height = input.bufs[c].height;
     const int width  = input.bufs[c].width;
 
-    const Pel maxValue = (1<<m_internalBitDepth[toChannelType(compID)])-1;
+    const Pel maxValue = (1 << m_internalBitDepth[toChannelType(compID)]) - 1;
 
     const Pel *srcImage = input.bufs[c].buf;
-    const int srcStride  = input.bufs[c].stride;
+    const int srcStride = input.bufs[c].stride;
 
     Pel *dstImage = output.bufs[c].buf;
-    int dstStride  = output.bufs[c].stride;
+    int dstStride = output.bufs[c].stride;
 
     for (int y = 0, blockNumY = 0; y + blockSizeY <= height; y += blockSizeY, blockNumY++)
     {
@@ -496,23 +539,23 @@ void EncTemporalFilter::applyMotion(const Array2D<MotionVector> &mvs, const PelS
         const MotionVector &mv = mvs.get(blockNumX,blockNumY);
         const int dx = mv.x >> csx ;
         const int dy = mv.y >> csy ;
-        const int xInt = mv.x >> (4+csx) ;
-        const int yInt = mv.y >> (4+csy) ;
+        const int xInt = mv.x >> (4 + csx) ;
+        const int yInt = mv.y >> (4 + csy) ;
 
         const int *xFilter = m_interpolationFilter[dx & 0xf];
         const int *yFilter = m_interpolationFilter[dy & 0xf]; // will add 6 bit.
-        const int numFilterTaps=7;
-        const int centreTapOffset=3;
+        const int numFilterTaps   = 7;
+        const int centerTapOffset = 3;
 
         int tempArray[lumaBlockSize + numFilterTaps][lumaBlockSize];
 
         for (int by = 1; by < blockSizeY + numFilterTaps; by++)
         {
-          const int yOffset = y + by + yInt - centreTapOffset;
-          const Pel *sourceRow = srcImage+yOffset*srcStride;
+          const int yOffset = y + by + yInt - centerTapOffset;
+          const Pel *sourceRow = srcImage + yOffset * srcStride;
           for (int bx = 0; bx < blockSizeX; bx++)
           {
-            int base = x + bx + xInt - centreTapOffset;
+            int base = x + bx + xInt - centerTapOffset;
             const Pel *rowStart = sourceRow + base;
 
             int sum = 0;
@@ -527,10 +570,10 @@ void EncTemporalFilter::applyMotion(const Array2D<MotionVector> &mvs, const PelS
           }
         }
 
-        Pel *dstRow = dstImage+y*dstStride;
-        for (int by = 0; by < blockSizeY; by++, dstRow+=dstStride)
+        Pel *dstRow = dstImage + y * dstStride;
+        for (int by = 0; by < blockSizeY; by++, dstRow += dstStride)
         {
-          Pel *dstPel=dstRow+x;
+          Pel *dstPel = dstRow + x;
           for (int bx = 0; bx < blockSizeX; bx++, dstPel++)
           {
             int sum = 0;
@@ -553,7 +596,7 @@ void EncTemporalFilter::applyMotion(const Array2D<MotionVector> &mvs, const PelS
 }
 
 void EncTemporalFilter::bilateralFilter(const PelStorage &orgPic,
-  const std::deque<TemporalFilterSourcePicInfo> &srcFrameInfo,
+  std::deque<TemporalFilterSourcePicInfo> &srcFrameInfo,
   PelStorage &newOrgPic,
   double overallStrength) const
 {
@@ -566,7 +609,7 @@ void EncTemporalFilter::bilateralFilter(const PelStorage &orgPic,
   }
 
   int refStrengthRow = 2;
-  if (numRefs == m_range*2)
+  if (numRefs == m_range * 2)
   {
     refStrengthRow = 0;
   }
@@ -578,44 +621,90 @@ void EncTemporalFilter::bilateralFilter(const PelStorage &orgPic,
   const double lumaSigmaSq = (m_QP - m_sigmaZeroPoint) * (m_QP - m_sigmaZeroPoint) * m_sigmaMultiplier;
   const double chromaSigmaSq = 30 * 30;
 
-  for(int c=0; c< getNumberValidComponents(m_chromaFormatIDC); c++)
+  for(int c = 0; c < getNumberValidComponents(m_chromaFormatIDC); c++)
   {
-    const ComponentID compID=(ComponentID)c;
+    const ComponentID compID = (ComponentID)c;
     const int height = orgPic.bufs[c].height;
     const int width  = orgPic.bufs[c].width;
-    const Pel *srcPelRow = orgPic.bufs[c].buf;
-    const int srcStride = orgPic.bufs[c].stride;
-    Pel *dstPelRow = newOrgPic.bufs[c].buf;
-    const int dstStride = newOrgPic.bufs[c].stride;
-    const double sigmaSq = isChroma(compID)? chromaSigmaSq : lumaSigmaSq;
+    const Pel* srcPelRow = orgPic.bufs[c].buf;
+    const int  srcStride = orgPic.bufs[c].stride;
+          Pel* dstPelRow = newOrgPic.bufs[c].buf;
+    const int  dstStride = newOrgPic.bufs[c].stride;
+    const double sigmaSq = isChroma(compID) ? chromaSigmaSq : lumaSigmaSq;
     const double weightScaling = overallStrength * (isChroma(compID) ? m_chromaFactor : 0.4);
-    const Pel maxSampleValue = (1<<m_internalBitDepth[toChannelType(compID)])-1;
-    const double bitDepthDiffWeighting=1024.0 / (maxSampleValue+1);
-
-    for (int y = 0; y < height; y++, srcPelRow+=srcStride, dstPelRow+=dstStride)
+    const Pel maxSampleValue   = (1 << m_internalBitDepth[toChannelType(compID)]) - 1;
+    const double bitDepthDiffWeighting = 1024.0 / (maxSampleValue + 1);
+    const int lumaBlockSize = 8;
+    const int csx = getComponentScaleX(compID, m_chromaFormatIDC);
+    const int csy = getComponentScaleY(compID, m_chromaFormatIDC);
+    const int blockSizeX = lumaBlockSize >> csx;
+    const int blockSizeY = lumaBlockSize >> csy;
+
+    for (int y = 0; y < height; y++, srcPelRow += srcStride, dstPelRow += dstStride)
     {
-      const Pel *srcPel=srcPelRow;
-      Pel *dstPel=dstPelRow;
+      const Pel *srcPel = srcPelRow;
+      Pel *dstPel = dstPelRow;
       for (int x = 0; x < width; x++, srcPel++, dstPel++)
       {
         const int orgVal = (int) *srcPel;
         double temporalWeightSum = 1.0;
         double newVal = (double) orgVal;
+        if ((y % blockSizeY == 0) && (x % blockSizeX == 0))
+        {
+          for (int i = 0; i < numRefs; i++)
+          {
+            double variance = 0, diffsum = 0;
+            for (int y1 = 0; y1 < blockSizeY - 1; y1++)
+            {
+              for (int x1 = 0; x1 < blockSizeX - 1; x1++)
+              {
+                int pix  = *(srcPel + x1);
+                int pixR = *(srcPel + x1 + 1);
+                int pixD = *(srcPel + x1 + srcStride);
+                int ref  = *(correctedPics[i].bufs[c].buf + ((y + y1) * correctedPics[i].bufs[c].stride + x + x1));
+                int refR = *(correctedPics[i].bufs[c].buf + ((y + y1) * correctedPics[i].bufs[c].stride + x + x1 + 1));
+                int refD = *(correctedPics[i].bufs[c].buf + ((y + y1 + 1) * correctedPics[i].bufs[c].stride + x + x1));
+
+                int diff  = pix  - ref;
+                int diffR = pixR - refR;
+                int diffD = pixD - refD;
+
+                variance += diff * diff;
+                diffsum  += (diffR - diff) * (diffR - diff);
+                diffsum  += (diffD - diff) * (diffD - diff);
+              }
+            }
+            srcFrameInfo[i].mvs.get(x / blockSizeX, y / blockSizeY).noise = (int) round((300 * variance + 50) / (10 * diffsum + 50));
+          }
+        }
+        double minError = 9999999;
+        for (int i = 0; i < numRefs; i++)
+        {
+          minError = std::min(minError, (double) srcFrameInfo[i].mvs.get(x / blockSizeX, y / blockSizeY).error);
+        }
         for (int i = 0; i < numRefs; i++)
         {
-          const Pel *pCorrectedPelPtr=correctedPics[i].bufs[c].buf+(y*correctedPics[i].bufs[c].stride+x);
+          const int error = srcFrameInfo[i].mvs.get(x / blockSizeX, y / blockSizeY).error;
+          const int noise = srcFrameInfo[i].mvs.get(x / blockSizeX, y / blockSizeY).noise;
+          const Pel *pCorrectedPelPtr = correctedPics[i].bufs[c].buf + (y * correctedPics[i].bufs[c].stride + x);
           const int refVal = (int) *pCorrectedPelPtr;
           double diff = (double)(refVal - orgVal);
           diff *= bitDepthDiffWeighting;
           double diffSq = diff * diff;
-          const int index = std::min(1, std::abs(srcFrameInfo[i].origOffset) - 1);
-          const double weight = weightScaling * m_refStrengths[refStrengthRow][index] * exp(-diffSq / (2 * sigmaSq));
+          const int index = std::min(3, std::abs(srcFrameInfo[i].origOffset) - 1);
+          double ww = 1, sw = 1;
+          ww *= (noise < 25) ? 1.0 : 0.6;
+          sw *= (noise < 25) ? 1.0 : 0.8;
+          ww *= (error < 50) ? 1.2 : ((error > 100) ? 0.6 : 1.0);
+          sw *= (error < 50) ? 1.0 : 0.8;
+          ww *= ((minError + 1) / (error + 1));
+          double weight = weightScaling * m_refStrengths[refStrengthRow][index] * ww * exp(-diffSq / (2 * sw * sigmaSq));
           newVal += weight * refVal;
           temporalWeightSum += weight;
         }
         newVal /= temporalWeightSum;
         Pel sampleVal = (Pel)round(newVal);
-        sampleVal=(sampleVal<0?0 : (sampleVal>maxSampleValue ? maxSampleValue : sampleVal));
+        sampleVal = (sampleVal < 0 ? 0 : (sampleVal > maxSampleValue ? maxSampleValue : sampleVal));
         *dstPel = sampleVal;
       }
     }
diff --git a/source/Lib/EncoderLib/EncTemporalFilter.h b/source/Lib/EncoderLib/EncTemporalFilter.h
index b46b265dc783b548290155278be0aa3708ab5f41..42f2f88c8663d2038a9fe87a1c3a05f7d8ed6b1f 100644
--- a/source/Lib/EncoderLib/EncTemporalFilter.h
+++ b/source/Lib/EncoderLib/EncTemporalFilter.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -51,7 +51,8 @@ struct MotionVector
 {
   int x, y;
   int error;
-  MotionVector() : x(0), y(0), error(INT_LEAST32_MAX) {}
+  int noise;
+  MotionVector() : x(0), y(0), error(INT_LEAST32_MAX), noise(0) {}
   void set(int vectorX, int vectorY, int errorValue) { x = vectorX; y = vectorY; error = errorValue; }
 };
 
@@ -67,21 +68,21 @@ public:
 
   void allocate(int width, int height, const T& value=T())
   {
-    m_width=width;
-    m_height=height;
-    v.resize(std::size_t(m_width*m_height), value);
+    m_width  = width;
+    m_height = height;
+    v.resize(std::size_t(m_width * m_height), value);
   }
 
   T& get(int x, int y)
   {
-    assert(x<m_width && y<m_height);
-    return v[y*m_width+x];
+    assert(x < m_width && y < m_height);
+    return v[y * m_width + x];
   }
 
   const T& get(int x, int y) const
   {
-    assert(x<m_width && y<m_height);
-    return v[y*m_width+x];
+    assert(x < m_width && y < m_height);
+    return v[y * m_width + x];
   }
 };
 
@@ -129,7 +130,7 @@ private:
   static const int m_motionVectorFactor;
   static const int m_padding;
   static const int m_interpolationFilter[16][8];
-  static const double m_refStrengths[3][2];
+  static const double m_refStrengths[3][4];
 
   // Private member variables
   int m_FrameSkip;
@@ -155,7 +156,7 @@ private:
     const Array2D<MotionVector> *previous=0, const int factor = 1, const bool doubleRes = false) const;
   void motionEstimation(Array2D<MotionVector> &mvs, const PelStorage &orgPic, const PelStorage &buffer, const PelStorage &origSubsampled2, const PelStorage &origSubsampled4) const;
 
-  void bilateralFilter(const PelStorage &orgPic, const std::deque<TemporalFilterSourcePicInfo> &srcFrameInfo, PelStorage &newOrgPic, double overallStrength) const;
+  void bilateralFilter(const PelStorage &orgPic, std::deque<TemporalFilterSourcePicInfo> &srcFrameInfo, PelStorage &newOrgPic, double overallStrength) const;
   void applyMotion(const Array2D<MotionVector> &mvs, const PelStorage &input, PelStorage &output) const;
 }; // END CLASS DEFINITION EncTemporalFilter
 
diff --git a/source/Lib/EncoderLib/InterSearch.cpp b/source/Lib/EncoderLib/InterSearch.cpp
index ff9ba841d315778eeaba31500454145b1d63d480..039078e9ee68a5eef40b6d17e5fcf85c3f92f8cc 100644
--- a/source/Lib/EncoderLib/InterSearch.cpp
+++ b/source/Lib/EncoderLib/InterSearch.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -110,6 +110,9 @@ InterSearch::InterSearch()
 
   setWpScalingDistParam( -1, REF_PIC_LIST_X, nullptr );
   m_affMVList = nullptr;
+#if GDR_ENABLED
+  m_affMVListSolid = nullptr;
+#endif
   m_affMVListSize = 0;
   m_affMVListIdx = 0;
   m_uniMvList = nullptr;
@@ -158,6 +161,14 @@ void InterSearch::destroy()
     delete[] m_affMVList;
     m_affMVList = nullptr;
   }
+#if GDR_ENABLED
+  if (m_affMVListSolid)
+  {
+    delete[] m_affMVListSolid;
+    m_affMVListSolid = nullptr;
+  }
+#endif
+
   m_affMVListIdx = 0;
   m_affMVListSize = 0;
   if (m_uniMvList)
@@ -177,13 +188,6 @@ void InterSearch::setTempBuffers( CodingStructure ****pSplitCS, CodingStructure
   m_pSaveCS  = pSaveCS;
 }
 
-#if ENABLE_SPLIT_PARALLELISM
-void InterSearch::copyState( const InterSearch& other )
-{
-  memcpy( m_aaiAdaptSR, other.m_aaiAdaptSR, sizeof( m_aaiAdaptSR ) );
-}
-#endif
-
 InterSearch::~InterSearch()
 {
   if (m_isInitialized)
@@ -263,7 +267,15 @@ void InterSearch::init( EncCfg*        pcEncCfg,
   m_pTempPel = new Pel[maxCUWidth*maxCUHeight];
   m_affMVListMaxSize = (pcEncCfg->getIntraPeriod() == (uint32_t)-1) ? AFFINE_ME_LIST_SIZE_LD : AFFINE_ME_LIST_SIZE;
   if (!m_affMVList)
+  {
     m_affMVList = new AffineMVInfo[m_affMVListMaxSize];
+#if GDR_ENABLED
+    if (!m_affMVListSolid)
+    {
+      m_affMVListSolid = new AffineMVInfoSolid[m_affMVListMaxSize];
+    }
+#endif
+  }
   m_affMVListIdx = 0;
   m_affMVListSize = 0;
   m_uniMvListMaxSize = 15;
@@ -284,8 +296,15 @@ void InterSearch::resetSavedAffineMotion()
     {
       m_affineMotion.acMvAffine4Para[i][j] = Mv( 0, 0 );
       m_affineMotion.acMvAffine6Para[i][j] = Mv( 0, 0 );
+#if GDR_ENABLED
+      m_affineMotion.acMvAffine4ParaSolid[i][j] = true;
+      m_affineMotion.acMvAffine6ParaSolid[i][j] = true;
+#endif
     }
     m_affineMotion.acMvAffine6Para[i][2] = Mv( 0, 0 );
+#if GDR_ENABLED
+    m_affineMotion.acMvAffine6ParaSolid[i][2] = true;
+#endif
 
     m_affineMotion.affine4ParaRefIdx[i] = -1;
     m_affineMotion.affine6ParaRefIdx[i] = -1;
@@ -298,7 +317,11 @@ void InterSearch::resetSavedAffineMotion()
   m_affineMotion.affine6ParaAvail = false;
 }
 
+#if GDR_ENABLED
+void InterSearch::storeAffineMotion(Mv acAffineMv[2][3], bool acAffineMvSolid[2][3], int16_t affineRefIdx[2], EAffineModel affineType, int bcwIdx)
+#else
 void InterSearch::storeAffineMotion( Mv acAffineMv[2][3], int16_t affineRefIdx[2], EAffineModel affineType, int bcwIdx )
+#endif
 {
   if ( ( bcwIdx == BCW_DEFAULT || !m_affineMotion.affine6ParaAvail ) && affineType == AFFINEMODEL_6PARAM )
   {
@@ -307,6 +330,9 @@ void InterSearch::storeAffineMotion( Mv acAffineMv[2][3], int16_t affineRefIdx[2
       for ( int j = 0; j < 3; j++ )
       {
         m_affineMotion.acMvAffine6Para[i][j] = acAffineMv[i][j];
+#if GDR_ENABLED
+        m_affineMotion.acMvAffine6ParaSolid[i][j] = acAffineMvSolid[i][j];
+#endif
       }
       m_affineMotion.affine6ParaRefIdx[i] = affineRefIdx[i];
     }
@@ -320,6 +346,9 @@ void InterSearch::storeAffineMotion( Mv acAffineMv[2][3], int16_t affineRefIdx[2
       for ( int j = 0; j < 2; j++ )
       {
         m_affineMotion.acMvAffine4Para[i][j] = acAffineMv[i][j];
+#if GDR_ENABLED
+        m_affineMotion.acMvAffine4ParaSolid[i][j] = acAffineMvSolid[i][j];
+#endif
       }
       m_affineMotion.affine4ParaRefIdx[i] = affineRefIdx[i];
     }
@@ -703,21 +732,46 @@ inline void InterSearch::xTZ8PointDiamondSearch( IntTZSearchStruct& rcStruct,
     } // iDist <= 8
   } // iDist == 1
 }
+#if GDR_ENABLED
+Distortion InterSearch::xPatternRefinement(
+  const PredictionUnit& pu,
+  RefPicList eRefPicList,
+  int iRefIdx,
+  const CPelBuf* pcPatternKey,
+  Mv baseRefMv,
+  int iFrac, Mv& rcMvFrac,
+  bool bAllowUseOfHadamard,
+  bool& rbCleanCandExist)
+#else
 
 Distortion InterSearch::xPatternRefinement( const CPelBuf* pcPatternKey,
                                             Mv baseRefMv,
                                             int iFrac, Mv& rcMvFrac,
                                             bool bAllowUseOfHadamard )
+#endif
 {
   Distortion  uiDist;
   Distortion  uiDistBest  = std::numeric_limits<Distortion>::max();
   uint32_t        uiDirecBest = 0;
 
+#if GDR_ENABLED
+  const CodingStructure &cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+  bool uiDistOk = false;
+  bool uiDistBestOk = false;
+  bool allOk = true;
+#endif
   Pel*  piRefPos;
   int iRefStride = pcPatternKey->width + 1;
   m_pcRdCost->setDistParam( m_cDistParam, *pcPatternKey, m_filteredBlock[0][0][0], iRefStride, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, m_pcEncCfg->getUseHADME() && bAllowUseOfHadamard );
 
   const Mv* pcMvRefine = (iFrac == 2 ? s_acMvRefineH : s_acMvRefineQ);
+#if GDR_ENABLED
+  if (isEncodeGdrClean)
+  {
+    rbCleanCandExist = false;
+  }
+#endif
   for (uint32_t i = 0; i < 9; i++)
   {
     if (m_skipFracME && i > 0)
@@ -747,12 +801,51 @@ Distortion InterSearch::xPatternRefinement( const CPelBuf* pcPatternKey,
     uiDist = m_cDistParam.distFunc( m_cDistParam );
     uiDist += m_pcRdCost->getCostOfVectorWithPredictor( cMvTest.getHor(), cMvTest.getVer(), 0 );
 
+#if GDR_ENABLED
+    allOk = (uiDist < uiDistBest);
+
+    if (isEncodeGdrClean)
+    {
+      Mv motion = cMvTest;
+      MvPrecision curPrec = (iFrac == 2 ? MV_PRECISION_HALF : MV_PRECISION_QUARTER);
+      motion.changePrecision(curPrec, MV_PRECISION_INTERNAL);
+      uiDistOk = cs.isClean(pu.Y().bottomRight(), motion, eRefPicList, iRefIdx);
+
+      if (uiDistOk)
+      {
+        allOk = (uiDistBestOk) ? (uiDist < uiDistBest) : true;
+      }
+      else
+      {
+        allOk = false;
+      }
+    }
+#endif
+
+#if GDR_ENABLED
+    if (allOk)
+#else
     if ( uiDist < uiDistBest )
+#endif
     {
       uiDistBest  = uiDist;
       uiDirecBest = i;
       m_cDistParam.maximumDistortionForEarlyExit = uiDist;
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        uiDistBestOk = uiDistOk;
+        rbCleanCandExist = true;
+      }
+#endif
+    }
+#if GDR_ENABLED
+    if (isEncodeGdrClean)
+    {
+      if (!rbCleanCandExist)
+        uiDistBest = 65535;
     }
+#endif
   }
 
   rcMvFrac = pcMvRefine[uiDirecBest];
@@ -785,7 +878,9 @@ void InterSearch::xIBCSearchMVCandUpdate(Distortion  sad, int x, int y, Distorti
     for (int t = CHROMA_REFINEMENT_CANDIDATES - 1; t >= 0; t--)
     {
       if (sad < sadBestCand[t])
+      {
         j = t;
+      }
     }
 
     for (int k = CHROMA_REFINEMENT_CANDIDATES - 1; k > j; k--)
@@ -835,13 +930,33 @@ int InterSearch::xIBCSearchMVChromaRefine(PredictionUnit& pu,
     }
 
     if ((!cMVCand[cand].getHor()) && (!cMVCand[cand].getVer()))
+    {
       continue;
+    }
 
     if (((int)(cuPelY + cMVCand[cand].getVer() + roiHeight) >= picHeight) || ((cuPelY + cMVCand[cand].getVer()) < 0))
+    {
       continue;
+    }
 
     if (((int)(cuPelX + cMVCand[cand].getHor() + roiWidth) >= picWidth) || ((cuPelX + cMVCand[cand].getHor()) < 0))
+    {
       continue;
+    }
+
+#if GDR_ENABLED
+    CodingStructure &cs = *pu.cs;
+    const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+
+    if (isEncodeGdrClean)
+    {
+      Position curBR(cuPelX + roiWidth + cMVCand[cand].getHor() - 1, cuPelY + roiHeight + cMVCand[cand].getVer() - 1);    // is this correct???
+      if (!cs.isClean(curBR, CHANNEL_TYPE_LUMA))
+      {
+        continue;
+      }
+    }
+#endif
 
     tempSad = sadBestCand[cand];
 
@@ -954,6 +1069,10 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct&  cS
   Distortion  sadBestCand[CHROMA_REFINEMENT_CANDIDATES];
   Mv      cMVCand[CHROMA_REFINEMENT_CANDIDATES];
 
+#if GDR_ENABLED
+  CodingStructure &cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+#endif
 
   for (int cand = 0; cand < CHROMA_REFINEMENT_CANDIDATES; cand++)
   {
@@ -992,7 +1111,13 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct&  cS
         && !((xPred < srLeft) || (xPred > srRight)))
       {
         bool validCand = searchBv(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, xPred, yPred, lcuWidth);
-
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          Position BvBR(cuPelX + roiWidth + xPred - 1, cuPelY + roiHeight + yPred - 1);
+          validCand = validCand && cs.isClean(BvBR, CHANNEL_TYPE_LUMA);
+        }
+#endif
         if (validCand)
         {
           sad = m_pcRdCost->getBvCostMultiplePreds(xPred, yPred, pu.cs->sps->getAMVREnabledFlag());
@@ -1016,6 +1141,16 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct&  cS
       {
         continue;
       }
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        Position BvBR(cuPelX + roiWidth - 1, cuPelY + roiHeight + y - 1);
+        if (!cs.isClean(BvBR, CHANNEL_TYPE_LUMA))
+        {
+          continue;
+        }
+      }
+#endif
 
       sad = m_pcRdCost->getBvCostMultiplePreds(0, y, pu.cs->sps->getAMVREnabledFlag());
       m_cDistParam.cur.buf = piRefSrch + cStruct.iRefStride * y;
@@ -1041,6 +1176,16 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct&  cS
       {
         continue;
       }
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        Position BvBR(cuPelX + roiWidth + x - 1, cuPelY + roiHeight - 1);
+        if (!cs.isClean(BvBR, CHANNEL_TYPE_LUMA))
+        {
+          continue;
+        }
+      }
+#endif
 
       sad = m_pcRdCost->getBvCostMultiplePreds(x, 0, pu.cs->sps->getAMVREnabledFlag());
       m_cDistParam.cur.buf = piRefSrch + x;
@@ -1081,18 +1226,33 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct&  cS
       for (int y = std::max(srchRngVerTop, -cuPelY); y <= srchRngVerBottom; y += 2)
       {
         if ((y == 0) || ((int)(cuPelY + y + roiHeight) >= picHeight))
+        {
           continue;
+        }
 
         for (int x = std::max(srchRngHorLeft, -cuPelX); x <= srchRngHorRight; x++)
         {
           if ((x == 0) || ((int)(cuPelX + x + roiWidth) >= picWidth))
+          {
             continue;
+          }
 
           if (!searchBv(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, x, y, lcuWidth))
           {
             continue;
           }
 
+#if GDR_ENABLED
+          if (isEncodeGdrClean)
+          {
+            Position BvBR(cuPelX + roiWidth + x - 1, cuPelY + roiHeight + y - 1);
+            if (!cs.isClean(BvBR, CHANNEL_TYPE_LUMA))
+            {
+              continue;
+            }
+          }
+#endif
+
           sad = m_pcRdCost->getBvCostMultiplePreds(x, y, pu.cs->sps->getAMVREnabledFlag());
           m_cDistParam.cur.buf = piRefSrch + cStruct.iRefStride * y + x;
           sad += m_cDistParam.distFunc(m_cDistParam);
@@ -1121,18 +1281,33 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct&  cS
       for (int y = (std::max(srchRngVerTop, -cuPelY) + 1); y <= srchRngVerBottom; y += 2)
       {
         if ((y == 0) || ((int)(cuPelY + y + roiHeight) >= picHeight))
+        {
           continue;
+        }
 
         for (int x = std::max(srchRngHorLeft, -cuPelX); x <= srchRngHorRight; x += 2)
         {
           if ((x == 0) || ((int)(cuPelX + x + roiWidth) >= picWidth))
+          {
             continue;
+          }
 
           if (!searchBv(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, x, y, lcuWidth))
           {
             continue;
           }
 
+#if GDR_ENABLED
+          if (isEncodeGdrClean)
+          {
+            Position BvBR(cuPelX + roiWidth + x - 1, cuPelY + roiHeight + y - 1);
+            if (!cs.isClean(BvBR, CHANNEL_TYPE_LUMA))
+            {
+              continue;
+            }
+          }
+#endif
+
           sad = m_pcRdCost->getBvCostMultiplePreds(x, y, pu.cs->sps->getAMVREnabledFlag());
           m_cDistParam.cur.buf = piRefSrch + cStruct.iRefStride * y + x;
           sad += m_cDistParam.distFunc(m_cDistParam);
@@ -1175,20 +1350,32 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct&  cS
       for (int y = (std::max(srchRngVerTop, -cuPelY) + 1); y <= srchRngVerBottom; y += 2)
       {
         if ((y == 0) || ((int)(cuPelY + y + roiHeight) >= picHeight))
+        {
           continue;
-
-
+        }
 
         for (int x = (std::max(srchRngHorLeft, -cuPelX) + 1); x <= srchRngHorRight; x += 2)
         {
 
           if ((x == 0) || ((int)(cuPelX + x + roiWidth) >= picWidth))
+          {
             continue;
+          }
 
           if (!searchBv(pu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, x, y, lcuWidth))
           {
             continue;
           }
+#if GDR_ENABLED
+          if (isEncodeGdrClean)
+          {
+            Position BvBR(cuPelX + roiWidth + x - 1, cuPelY + roiHeight + y - 1);
+            if (!cs.isClean(BvBR, CHANNEL_TYPE_LUMA))
+            {
+              continue;
+            }
+          }
+#endif
 
           sad = m_pcRdCost->getBvCostMultiplePreds(x, y, pu.cs->sps->getAMVREnabledFlag());
           m_cDistParam.cur.buf = piRefSrch + cStruct.iRefStride * y + x;
@@ -1264,6 +1451,11 @@ void InterSearch::xIBCEstimation(PredictionUnit& pu, PelUnitBuf& origBuf,
   CPelBuf* pcPatternKey = &tmpPattern;
   PelBuf tmpOrgLuma;
 
+#if GDR_ENABLED
+  CodingStructure &cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+#endif
+
   if ((pu.cs->slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag()))
   {
     const CompArea &area = pu.blocks[COMPONENT_Y];
@@ -1306,7 +1498,17 @@ void InterSearch::xIBCEstimation(PredictionUnit& pu, PelUnitBuf& origBuf,
 
       int xBv = bv.hor;
       int yBv = bv.ver;
+#if GDR_ENABLED
+      bool validCand = true;
+      if (isEncodeGdrClean)
+      {
+        Position BvBR(cuPelX + iRoiWidth + xBv - 1, cuPelY + iRoiHeight + yBv - 1);
+        validCand = validCand && cs.isClean(BvBR, CHANNEL_TYPE_LUMA);
+      }
+      if (validCand && searchBv(pu, cuPelX, cuPelY, iRoiWidth, iRoiHeight, iPicWidth, iPicHeight, xBv, yBv, lcuWidth))
+#else
       if (searchBv(pu, cuPelX, cuPelY, iRoiWidth, iRoiHeight, iPicWidth, iPicHeight, xBv, yBv, lcuWidth))
+#endif
       {
         buffered = true;
         Distortion sad = m_pcRdCost->getBvCostMultiplePreds(xBv, yBv, pu.cs->sps->getAMVREnabledFlag());
@@ -1341,7 +1543,17 @@ void InterSearch::xIBCEstimation(PredictionUnit& pu, PelUnitBuf& origBuf,
         int xPred = cMvPredEncOnly[cand].getHor();
         int yPred = cMvPredEncOnly[cand].getVer();
 
+#if GDR_ENABLED
+        bool validCand = true;
+        if (isEncodeGdrClean)
+        {
+          Position BvBR(cuPelX + iRoiWidth + xPred - 1, cuPelY + iRoiHeight + yPred - 1);
+          validCand = cs.isClean(BvBR, CHANNEL_TYPE_LUMA);
+        }
+        if (validCand && searchBv(pu, cuPelX, cuPelY, iRoiWidth, iRoiHeight, iPicWidth, iPicHeight, xPred, yPred, lcuWidth))
+#else
         if (searchBv(pu, cuPelX, cuPelY, iRoiWidth, iRoiHeight, iPicWidth, iPicHeight, xPred, yPred, lcuWidth))
+#endif
         {
           Distortion sad = m_pcRdCost->getBvCostMultiplePreds(xPred, yPred, pu.cs->sps->getAMVREnabledFlag());
           m_cDistParam.cur.buf = cStruct.piRefY + cStruct.iRefStride * yPred + xPred;
@@ -1410,16 +1622,8 @@ void InterSearch::xSetIntraSearchRange(PredictionUnit& pu, int iRoiWidth, int iR
   rcMvSrchRngRB <<= 2;
   bool temp = m_clipMvInSubPic;
   m_clipMvInSubPic = true;
-  xClipMv(rcMvSrchRngLT, pu.cu->lumaPos(),
-         pu.cu->lumaSize(),
-         sps
-      , *pu.cs->pps
-  );
-  xClipMv(rcMvSrchRngRB, pu.cu->lumaPos(),
-         pu.cu->lumaSize(),
-         sps
-      , *pu.cs->pps
-  );
+  xClipMv(rcMvSrchRngLT, pu.cu->lumaPos(), pu.cu->lumaSize(), sps, *pu.cs->pps);
+  xClipMv(rcMvSrchRngRB, pu.cu->lumaPos(), pu.cu->lumaSize(), sps, *pu.cs->pps);
   m_clipMvInSubPic = temp;
   rcMvSrchRngLT >>= 2;
   rcMvSrchRngRB >>= 2;
@@ -1438,6 +1642,20 @@ bool InterSearch::predIBCSearch(CodingUnit& cu, Partitioner& partitioner, const
     m_maxCompIDToPred = MAX_NUM_COMPONENT;
 
     CHECK(pu.cu != &cu, "PU is contained in another CU");
+#if GDR_ENABLED
+    CodingStructure &cs = *pu.cs;
+    const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+#endif
+
+#if GDR_ENABLED
+    if (isEncodeGdrClean)
+    {
+      pu.mvSolid[0] = false;
+      pu.mvSolid[1] = false;
+      pu.mvValid[0] = false;
+      pu.mvValid[1] = false;
+    }
+#endif
     //////////////////////////////////////////////////////////
     /// ibc search
     pu.cu->imv = 2;
@@ -1498,10 +1716,13 @@ bool InterSearch::predIBCSearch(CodingUnit& cu, Partitioner& partitioner, const
         bvpIdxBest = bvpIdxTemp;
 
         if (cu.cs->sps->getAMVREnabledFlag() && cMv != cMvPred[bvpIdxTemp])
+        {
           pu.cu->imv = 1; // set as full-pel
+        }
         else
+        {
           pu.cu->imv = 0; // set as fractional-pel
-
+        }
       }
 
       unsigned int bitsBVPQP = MAX_UINT;
@@ -1526,9 +1747,10 @@ bool InterSearch::predIBCSearch(CodingUnit& cu, Partitioner& partitioner, const
         bvpIdxBest = bvpIdxTemp;
 
         if (cu.cs->sps->getAMVREnabledFlag())
+        {
           pu.cu->imv = 2; // set as quad-pel
+        }
       }
-
     }
 
     pu.bv = cMv; // bv is always at integer accuracy
@@ -1538,16 +1760,26 @@ bool InterSearch::predIBCSearch(CodingUnit& cu, Partitioner& partitioner, const
     pu.mvpIdx[REF_PIC_LIST_0] = bvpIdxBest;
 
     if(pu.cu->imv == 2 && cMv != amvpInfo4Pel.mvCand[bvpIdxBest])
+    {
       pu.mvd[REF_PIC_LIST_0] = cMv - amvpInfo4Pel.mvCand[bvpIdxBest];
+    }
     else
+    {
       pu.mvd[REF_PIC_LIST_0] = cMv - amvpInfo.mvCand[bvpIdxBest];
+    }
 
     if (pu.mvd[REF_PIC_LIST_0] == Mv(0, 0))
+    {
       pu.cu->imv = 0;
+    }
     if (pu.cu->imv == 2)
+    {
       assert((cMv.getHor() % 16 == 0) && (cMv.getVer() % 16 == 0));
+    }
     if (cu.cs->sps->getAMVREnabledFlag())
+    {
       assert(pu.cu->imv>0 || pu.mvd[REF_PIC_LIST_0] == Mv());
+    }
 
     pu.refIdx[REF_PIC_LIST_0] = MAX_NUM_REF;
 
@@ -1561,6 +1793,10 @@ void InterSearch::xxIBCHashSearch(PredictionUnit& pu, Mv* mvPred, int numMvPred,
   mv.setZero();
   m_pcRdCost->setCostScale(0);
 
+#if GDR_ENABLED
+  CodingStructure &cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+#endif
   std::vector<Position> candPos;
   if (ibcHashMap.ibcHashMatch(pu.Y(), candPos, *pu.cs, m_pcEncCfg->getIBCHashSearchMaxCand(), m_pcEncCfg->getIBCHashSearchRange4SmallBlk()))
   {
@@ -1587,6 +1823,13 @@ void InterSearch::xxIBCHashSearch(PredictionUnit& pu, Mv* mvPred, int numMvPred,
         {
           continue;
         }
+#if GDR_ENABLED
+        Position BvBR(cuPelX + roiWidth + candMv.getHor() - 1, cuPelY + roiHeight + candMv.getVer() - 1);
+        if (isEncodeGdrClean && !cs.isClean(BvBR, CHANNEL_TYPE_LUMA))
+        {
+          continue;
+        }
+#endif
 
         for (int n = 0; n < numMvPred; n++)
         {
@@ -1790,6 +2033,11 @@ bool InterSearch::xRectHashInterEstimation(PredictionUnit& pu, RefPicList& bestR
   unsigned int* hashValue1s = new unsigned int[baseNum];
   unsigned int* hashValue2s = new unsigned int[baseNum];
 
+#if GDR_ENABLED
+  CodingStructure &cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+#endif
+
   for (int k = 0; k < baseNum; k++)
   {
     if (isHorizontal)
@@ -1871,6 +2119,26 @@ bool InterSearch::xRectHashInterEstimation(PredictionUnit& pu, RefPicList& bestR
         AMVPInfo currAMVPInfoPel;
         AMVPInfo currAMVPInfo4Pel;
         AMVPInfo currAMVPInfoQPel;
+
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          currAMVPInfoPel.allCandSolidInAbove = true;
+          currAMVPInfo4Pel.allCandSolidInAbove = true;
+          currAMVPInfoQPel.allCandSolidInAbove = true;
+
+          for (int i = 0; i < AMVP_MAX_NUM_CANDS_MEM; i++)
+          {
+            currAMVPInfoPel.mvSolid[i] = true;
+            currAMVPInfoPel.mvValid[i] = true;
+            currAMVPInfo4Pel.mvSolid[i] = true;
+            currAMVPInfo4Pel.mvValid[i] = true;
+            currAMVPInfoQPel.mvSolid[i] = true;
+            currAMVPInfoQPel.mvValid[i] = true;
+          }
+        }
+#endif
+
         pu.cu->imv = 2;
         PU::fillMvpCand(pu, eRefPicList, refIdx, currAMVPInfo4Pel);
         pu.cu->imv = 1;
@@ -1902,6 +2170,25 @@ bool InterSearch::xRectHashInterEstimation(PredictionUnit& pu, RefPicList& bestR
           m_hashMVStoreds[eRefPicList][refIdx][countMV++] = cMv;
           cMv.changePrecision(MV_PRECISION_INT, MV_PRECISION_QUARTER);
 
+#if GDR_ENABLED
+          bool allOk = true;
+          bool anyCandOk = false;
+          bool Valid = true;
+          if (isEncodeGdrClean)
+          {
+            Mv cMv16 = cMv;
+            cMv16.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
+            const Position bottomRight = pu.Y().bottomRight();
+            Valid = cs.isClean(bottomRight, cMv16, eRefPicList, refIdx);
+          }
+#endif
+
+#if GDR_ENABLED
+          if (!Valid)
+          {
+            continue;
+          }
+#endif
           for (int mvpIdxTemp = 0; mvpIdxTemp < 2; mvpIdxTemp++)
           {
             Mv cMvPredPel = currAMVPInfoQPel.mvCand[mvpIdxTemp];
@@ -1909,7 +2196,21 @@ bool InterSearch::xRectHashInterEstimation(PredictionUnit& pu, RefPicList& bestR
 
             unsigned int tempMVPbits = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer(), 0);
 
+#if GDR_ENABLED
+            allOk = (tempMVPbits < curMVPbits);
+            if (isEncodeGdrClean)
+            {
+              bool isSolid = currAMVPInfoQPel.mvSolid[mvpIdxTemp];
+              allOk = allOk && isSolid;
+              if (allOk) anyCandOk = true;
+            }
+#endif
+
+#if GDR_ENABLED
+            if (allOk)
+#else
             if (tempMVPbits < curMVPbits)
+#endif
             {
               curMVPbits = tempMVPbits;
               curMVPIdx = mvpIdxTemp;
@@ -1922,7 +2223,24 @@ bool InterSearch::xRectHashInterEstimation(PredictionUnit& pu, RefPicList& bestR
               Mv mvPred1Pel = currAMVPInfoPel.mvCand[mvpIdxTemp];
               m_pcRdCost->setPredictor(mvPred1Pel);
               bitsMVP1Pel = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer(), 2);
+#if GDR_ENABLED
+              allOk = (bitsMVP1Pel < curMVPbits);
+              if (isEncodeGdrClean)
+              {
+                bool isSolid = currAMVPInfoPel.mvSolid[mvpIdxTemp];
+                allOk = allOk && isSolid;
+                if (allOk)
+                {
+                  anyCandOk = true;
+                }
+              }
+#endif
+
+#if GDR_ENABLED
+              if (allOk)
+#else
               if (bitsMVP1Pel < curMVPbits)
+#endif
               {
                 curMVPbits = bitsMVP1Pel;
                 curMVPIdx = mvpIdxTemp;
@@ -1935,7 +2253,24 @@ bool InterSearch::xRectHashInterEstimation(PredictionUnit& pu, RefPicList& bestR
                 Mv mvPred4Pel = currAMVPInfo4Pel.mvCand[mvpIdxTemp];
                 m_pcRdCost->setPredictor(mvPred4Pel);
                 bitsMVP4Pel = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer(), 4);
+#if GDR_ENABLED
+                allOk = (bitsMVP4Pel < curMVPbits);
+                if (isEncodeGdrClean)
+                {
+                  bool isSolid = currAMVPInfo4Pel.mvSolid[mvpIdxTemp];
+                  allOk = allOk && isSolid;
+                  if (allOk)
+                  {
+                    anyCandOk = true;
+                  }
+                }
+#endif
+
+#if GDR_ENABLED
+                if (allOk)
+#else
                 if (bitsMVP4Pel < curMVPbits)
+#endif
                 {
                   curMVPbits = bitsMVP4Pel;
                   curMVPIdx = mvpIdxTemp;
@@ -1944,6 +2279,14 @@ bool InterSearch::xRectHashInterEstimation(PredictionUnit& pu, RefPicList& bestR
               }
             }
           }
+
+#if GDR_ENABLED
+          if (isEncodeGdrClean && !anyCandOk)
+          {
+            continue;
+          }
+#endif
+
           curMVPbits += bitsOnRefIdx;
 
           m_cDistParam.cur.buf = refBufStart + (*it).y*refStride + (*it).x;
@@ -2014,6 +2357,10 @@ bool InterSearch::xHashInterEstimation(PredictionUnit& pu, RefPicList& bestRefPi
     return false;
   }
 
+#if GDR_ENABLED
+  CodingStructure &cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+#endif
   BlockHash currBlockHash;
   currBlockHash.x = xPos;
   currBlockHash.y = yPos;
@@ -2068,11 +2415,45 @@ bool InterSearch::xHashInterEstimation(PredictionUnit& pu, RefPicList& bestRefPi
         }
         AMVPInfo currAMVPInfoPel;
         AMVPInfo currAMVPInfo4Pel;
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          currAMVPInfo4Pel.allCandSolidInAbove = true;
+          for (int i = 0; i < AMVP_MAX_NUM_CANDS_MEM; i++)
+          {
+            currAMVPInfo4Pel.mvSolid[i] = true;
+            currAMVPInfo4Pel.mvValid[i] = true;
+          }
+        }
+#endif
         pu.cu->imv = 2;
         PU::fillMvpCand(pu, eRefPicList, refIdx, currAMVPInfo4Pel);
+
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          currAMVPInfoPel.allCandSolidInAbove = true;
+          for (int i = 0; i < AMVP_MAX_NUM_CANDS_MEM; i++)
+          {
+            currAMVPInfoPel.mvSolid[i] = true;
+            currAMVPInfoPel.mvValid[i] = true;
+          }
+        }
+#endif
         pu.cu->imv = 1;
         PU::fillMvpCand(pu, eRefPicList, refIdx, currAMVPInfoPel);
         AMVPInfo currAMVPInfoQPel;
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          currAMVPInfoQPel.allCandSolidInAbove = true;
+          for (int i = 0; i < AMVP_MAX_NUM_CANDS_MEM; i++)
+          {
+            currAMVPInfoQPel.mvSolid[i] = true;
+            currAMVPInfoQPel.mvValid[i] = true;
+          }
+        }
+#endif
         pu.cu->imv = 0;
         PU::fillMvpCand(pu, eRefPicList, refIdx, currAMVPInfoQPel);
         CHECK(currAMVPInfoPel.numCand <= 1, "Wrong")
@@ -2102,6 +2483,27 @@ bool InterSearch::xHashInterEstimation(PredictionUnit& pu, RefPicList& bestRefPi
           m_hashMVStoreds[eRefPicList][refIdx][countMV++] = cMv;
           cMv.changePrecision(MV_PRECISION_INT, MV_PRECISION_QUARTER);
 
+#if GDR_ENABLED
+          bool Valid = true;
+          bool allOk = true;
+          bool anyCandOk = false;
+
+          if (isEncodeGdrClean)
+          {
+            Mv cMv16 = cMv;
+            cMv16.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
+            const Position bottomRight = pu.Y().bottomRight();
+            Valid = cs.isClean(bottomRight, cMv16, eRefPicList, refIdx);
+          }
+#endif
+
+#if GDR_ENABLED
+          if (!Valid)
+          {
+            continue;
+          }
+#endif
+
           for (int mvpIdxTemp = 0; mvpIdxTemp < 2; mvpIdxTemp++)
           {
             Mv cMvPredPel = currAMVPInfoQPel.mvCand[mvpIdxTemp];
@@ -2109,7 +2511,21 @@ bool InterSearch::xHashInterEstimation(PredictionUnit& pu, RefPicList& bestRefPi
 
             unsigned int tempMVPbits = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer(), 0);
 
+#if GDR_ENABLED
+            allOk = (tempMVPbits < curMVPbits);
+            if (isEncodeGdrClean)
+            {
+              bool isSolid = currAMVPInfoQPel.mvSolid[mvpIdxTemp];
+              allOk = allOk && isSolid;
+              if (allOk) anyCandOk = true;
+            }
+#endif
+
+#if GDR_ENABLED
+            if (allOk)
+#else
             if (tempMVPbits < curMVPbits)
+#endif
             {
               curMVPbits = tempMVPbits;
               curMVPIdx = mvpIdxTemp;
@@ -2122,7 +2538,21 @@ bool InterSearch::xHashInterEstimation(PredictionUnit& pu, RefPicList& bestRefPi
               Mv mvPred1Pel = currAMVPInfoPel.mvCand[mvpIdxTemp];
               m_pcRdCost->setPredictor(mvPred1Pel);
               bitsMVP1Pel = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer(), 2);
+#if GDR_ENABLED
+              allOk = (bitsMVP1Pel < curMVPbits);
+              if (isEncodeGdrClean)
+              {
+                bool isSolid = currAMVPInfoPel.mvSolid[mvpIdxTemp];
+                allOk = allOk && isSolid;
+                if (allOk) anyCandOk = true;
+              }
+#endif
+
+#if GDR_ENABLED
+              if (allOk)
+#else
               if (bitsMVP1Pel < curMVPbits)
+#endif
               {
                 curMVPbits = bitsMVP1Pel;
                 curMVPIdx = mvpIdxTemp;
@@ -2135,7 +2565,22 @@ bool InterSearch::xHashInterEstimation(PredictionUnit& pu, RefPicList& bestRefPi
                 Mv mvPred4Pel = currAMVPInfo4Pel.mvCand[mvpIdxTemp];
                 m_pcRdCost->setPredictor(mvPred4Pel);
                 bitsMVP4Pel = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.getHor(), cMv.getVer(), 4);
+
+#if GDR_ENABLED
+                allOk = (bitsMVP4Pel < curMVPbits);
+                if (isEncodeGdrClean)
+                {
+                  bool isSolid = currAMVPInfo4Pel.mvSolid[mvpIdxTemp];
+                  allOk = allOk && isSolid;
+                  if (allOk) anyCandOk = true;
+                }
+#endif
+
+#if GDR_ENABLED
+                if (allOk)
+#else
                 if (bitsMVP4Pel < curMVPbits)
+#endif
                 {
                   curMVPbits = bitsMVP4Pel;
                   curMVPIdx = mvpIdxTemp;
@@ -2145,6 +2590,13 @@ bool InterSearch::xHashInterEstimation(PredictionUnit& pu, RefPicList& bestRefPi
             }
           }
 
+#if GDR_ENABLED
+          if (isEncodeGdrClean && !anyCandOk)
+          {
+            continue;
+          }
+#endif
+
           curMVPbits += bitsOnRefIdx;
 
           m_cDistParam.cur.buf = refBufStart + (*it).y*refStride + (*it).x;
@@ -2266,6 +2718,37 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
   int          aaiMvpIdx[2][33];
   int          aaiMvpNum[2][33];
 
+#if GDR_ENABLED
+  bool         cMvSolid[2];
+  bool         cMvValid[2];
+  bool         cMvBiSolid[2];
+  bool         cMvBiValid[2];
+
+  bool         cMvPredSolid[2][33];
+  bool         cMvPredBiSolid[2][33];
+
+  bool         cMvTempSolid[2][33];
+  bool         cMvTempValid[2][33];
+
+  bool         cMvHevcTempSolid[2][33];
+  bool         cMvHevcTempValid[2][33];
+
+  bool         allOk;
+  bool         bestBiPDistOk;
+  bool         biPDistTempOk;
+  bool         uiCostTempOk;
+  bool         uiCostTempL0Ok[MAX_NUM_REF];
+
+  bool         uiHevcCostOk;
+  bool         uiAffineCostOk;
+  bool         uiAffine6CostOk;
+  bool         uiCostOk[2];
+  bool         uiCostBiOk;
+  bool         costValidList1Ok;
+
+  bool         bCleanCandExist;
+#endif
+
   AMVPInfo     aacAMVPInfo[2][33];
 
   int          iRefIdx[2]={0,0}; //If un-initialized, may cause SEGV in bi-directional prediction iterative stage.
@@ -2309,6 +2792,60 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
     checkNonAffine = m_affineMotion.hevcCost[1] < m_affineMotion.hevcCost[0] * 1.06f;
   }
 
+#if GDR_ENABLED
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+  const bool init_value = true;
+#endif
+
+  amvp[0].numCand = 0;
+  amvp[1].numCand = 0;
+  memset(aacAMVPInfo, 0, sizeof(aacAMVPInfo));
+
+#if GDR_ENABLED
+  if (isEncodeGdrClean)
+  {
+    biPDistTempOk = init_value;
+    bestBiPDistOk = init_value;
+    uiCostTempOk = init_value;
+
+    uiHevcCostOk = init_value;
+    uiAffineCostOk = init_value;
+    uiAffine6CostOk = init_value;
+    memset(uiCostOk, init_value, sizeof(uiCostOk));
+    uiCostBiOk = init_value;
+    uiCostTempOk = init_value;
+    costValidList1Ok = init_value;
+
+    memset(cMvSolid, init_value, sizeof(cMvSolid));
+    memset(cMvValid, init_value, sizeof(cMvValid));
+    memset(cMvBiSolid, !init_value, sizeof(cMvBiSolid));
+    memset(cMvBiValid, !init_value, sizeof(cMvBiValid));
+
+    memset(cMvPredSolid, init_value, sizeof(cMvPredSolid));
+    memset(cMvPredBiSolid, init_value, sizeof(cMvPredBiSolid));
+
+    memset(cMvTempSolid, init_value, sizeof(cMvTempSolid));
+    memset(cMvTempValid, init_value, sizeof(cMvTempValid));
+    memset(cMvHevcTempSolid, init_value, sizeof(cMvHevcTempSolid));
+    memset(cMvHevcTempValid, init_value, sizeof(cMvHevcTempValid));
+
+
+    memset(pu.mvSolid, init_value, sizeof(pu.mvSolid));
+    memset(pu.mvValid, init_value, sizeof(pu.mvValid));
+
+    memset(pu.mvAffiSolid, init_value, sizeof(pu.mvAffiSolid));
+    memset(pu.mvAffiValid, init_value, sizeof(pu.mvAffiValid));
+
+    memset(pu.mvpSolid, init_value, sizeof(pu.mvpSolid));
+    memset(pu.mvpType, init_value, sizeof(pu.mvpType));
+
+    pu.mvpPos[0] = Position(0, 0);
+    pu.mvpPos[1] = Position(0, 0);
+
+    bCleanCandExist = false;
+  }
+#endif
+
   {
     if (pu.cu->cs->bestParent != nullptr && pu.cu->cs->bestParent->getCU(CHANNEL_TYPE_LUMA) != nullptr && pu.cu->cs->bestParent->getCU(CHANNEL_TYPE_LUMA)->affine == false)
     {
@@ -2334,6 +2871,15 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
     Distortion   uiCostBi  =   std::numeric_limits<Distortion>::max();
     Distortion   uiCostTemp;
 
+#if GDR_ENABLED
+    memset(uiCostTempL0Ok, init_value, sizeof(uiCostTempL0Ok));
+
+    bool mvValidList1Solid = init_value;
+    bool mvValidList1Valid = init_value;
+    uiHevcCostOk = false;
+    uiAffineCostOk = false;
+#endif
+
     uint32_t         uiBits[3];
     uint32_t         uiBitsTemp;
     Distortion   bestBiPDist = std::numeric_limits<Distortion>::max();
@@ -2378,12 +2924,48 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
 
           aaiMvpIdx[iRefList][iRefIdxTemp] = pu.mvpIdx[eRefPicList];
           aaiMvpNum[iRefList][iRefIdxTemp] = pu.mvpNum[eRefPicList];
+#if GDR_ENABLED
+          if (isEncodeGdrClean)
+          {
+            biPDistTempOk = true;
+            biPDistTempOk = amvp[eRefPicList].mvSolid[aaiMvpIdx[iRefList][iRefIdxTemp]];
+            cMvPredSolid[iRefList][iRefIdxTemp] = biPDistTempOk;
+            cMvTempSolid[iRefList][iRefIdxTemp] = biPDistTempOk;
+            cMvTempValid[iRefList][iRefIdxTemp] = cs.isClean(pu.Y().bottomRight(), cMvTemp[iRefList][iRefIdxTemp], (RefPicList)iRefList, iRefIdxTemp);
+          }
+#endif
 
+#if GDR_ENABLED
+          allOk = (cs.picHeader->getMvdL1ZeroFlag() && iRefList == 1 && biPDistTemp < bestBiPDist);
+
+          if (isEncodeGdrClean)
+          {
+            if (biPDistTempOk)
+            {
+              allOk = (bestBiPDistOk) ? (cs.picHeader->getMvdL1ZeroFlag() && iRefList == 1 && biPDistTemp < bestBiPDist) : true;
+            }
+            else
+            {
+              allOk = false;
+            }
+          }
+#endif
+
+#if GDR_ENABLED
+          if (allOk)
+#else
           if(cs.picHeader->getMvdL1ZeroFlag() && iRefList==1 && biPDistTemp < bestBiPDist)
+#endif
           {
             bestBiPDist = biPDistTemp;
             bestBiPMvpL1 = aaiMvpIdx[iRefList][iRefIdxTemp];
             bestBiPRefIdxL1 = iRefIdxTemp;
+#if GDR_ENABLED
+            if (isEncodeGdrClean)
+            {
+              bestBiPDistOk = biPDistTempOk;
+            }
+#endif
           }
 
           uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdx[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS];
@@ -2393,8 +2975,21 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
             if ( cs.slice->getList1IdxToList0Idx( iRefIdxTemp ) >= 0 )
             {
               cMvTemp[1][iRefIdxTemp] = cMvTemp[0][cs.slice->getList1IdxToList0Idx( iRefIdxTemp )];
+#if GDR_ENABLED
+              if (isEncodeGdrClean)
+              {
+                cMvTempSolid[1][iRefIdxTemp] = cMvTempSolid[1][cs.slice->getList1IdxToList0Idx(iRefIdxTemp)];
+                cMvTempValid[1][iRefIdxTemp] = cs.isClean(pu.Y().bottomRight(), cMvTemp[1][iRefIdxTemp], (RefPicList)1, cs.slice->getList1IdxToList0Idx(iRefIdxTemp));
+              }
+#endif
               uiCostTemp = uiCostTempL0[cs.slice->getList1IdxToList0Idx( iRefIdxTemp )];
               /*first subtract the bit-rate part of the cost of the other list*/
+#if GDR_ENABLED
+              if (isEncodeGdrClean)
+              {
+                uiCostTempOk = uiCostTempL0Ok[cs.slice->getList1IdxToList0Idx(iRefIdxTemp)];
+              }
+#endif
               uiCostTemp -= m_pcRdCost->getCost( uiBitsTempL0[cs.slice->getList1IdxToList0Idx( iRefIdxTemp )] );
               /*correct the bit-rate part of the current ref*/
               m_pcRdCost->setPredictor  ( cMvPred[iRefList][iRefIdxTemp] );
@@ -2404,28 +2999,131 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
             }
             else
             {
+#if GDR_ENABLED
+              bCleanCandExist = false;
+              xMotionEstimation(pu, origBuf, eRefPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], cMvTempSolid[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, amvp[eRefPicList], bCleanCandExist);
+#else
               xMotionEstimation( pu, origBuf, eRefPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, amvp[eRefPicList] );
+#endif
+
+#if GDR_ENABLED
+              if (isEncodeGdrClean)
+              {
+                int mvp_idx = aaiMvpIdx[iRefList][iRefIdxTemp];
+                cMvPredSolid[iRefList][iRefIdxTemp] = amvp[eRefPicList].mvSolid[mvp_idx];
+                cMvTempSolid[iRefList][iRefIdxTemp] = amvp[eRefPicList].mvSolid[mvp_idx];
+                cMvTempValid[iRefList][iRefIdxTemp] = cs.isClean(pu.Y().bottomRight(), cMvTemp[iRefList][iRefIdxTemp], (RefPicList)iRefList, iRefIdxTemp);
+
+                if (cMvTempValid[iRefList][iRefIdxTemp])
+                {
+                  cMvTempValid[iRefList][iRefIdxTemp] = cMvTempSolid[iRefList][iRefIdxTemp];
+                }
+
+                uiCostTempOk = bCleanCandExist;
+                uiCostTempOk = uiCostTempOk && cMvPredSolid[iRefList][iRefIdxTemp];
+                uiCostTempOk = uiCostTempOk && cMvTempSolid[iRefList][iRefIdxTemp];
+                uiCostTempOk = uiCostTempOk && cMvTempValid[iRefList][iRefIdxTemp];
+              }
+#endif
             }
           }
           else
           {
+#if GDR_ENABLED
+            bCleanCandExist = false;
+            xMotionEstimation(pu, origBuf, eRefPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], cMvTempSolid[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, amvp[eRefPicList], bCleanCandExist);
+#else
             xMotionEstimation( pu, origBuf, eRefPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, amvp[eRefPicList] );
+#endif
+
+#if GDR_ENABLED
+            if (isEncodeGdrClean)
+            {
+              int mvp_idx = aaiMvpIdx[iRefList][iRefIdxTemp];
+              cMvPredSolid[iRefList][iRefIdxTemp] = amvp[eRefPicList].mvSolid[mvp_idx];
+              cMvTempSolid[iRefList][iRefIdxTemp] = amvp[eRefPicList].mvSolid[mvp_idx];
+              cMvTempValid[iRefList][iRefIdxTemp] = cs.isClean(pu.Y().bottomRight(), cMvTemp[iRefList][iRefIdxTemp], (RefPicList)iRefList, iRefIdxTemp);
+              if (cMvTempValid[iRefList][iRefIdxTemp])
+              {
+                cMvTempValid[iRefList][iRefIdxTemp] = cMvTempSolid[iRefList][iRefIdxTemp];
+              }
+
+              uiCostTempOk = bCleanCandExist;
+              uiCostTempOk = uiCostTempOk && cMvPredSolid[iRefList][iRefIdxTemp];
+              uiCostTempOk = uiCostTempOk && cMvTempSolid[iRefList][iRefIdxTemp];
+              uiCostTempOk = uiCostTempOk && cMvTempValid[iRefList][iRefIdxTemp];
+            }
+#endif
           }
           if( cu.cs->sps->getUseBcw() && cu.BcwIdx == BCW_DEFAULT && cu.cs->slice->isInterB() )
           {
             const bool checkIdentical = true;
             m_uniMotions.setReadMode(checkIdentical, (uint32_t)iRefList, (uint32_t)iRefIdxTemp);
+#if GDR_ENABLED
+            m_uniMotions.copyFrom(cMvTemp[iRefList][iRefIdxTemp], cMvTempSolid[iRefList][iRefIdxTemp], uiCostTemp - m_pcRdCost->getCost(uiBitsTemp), (uint32_t)iRefList, (uint32_t)iRefIdxTemp);
+#else
             m_uniMotions.copyFrom(cMvTemp[iRefList][iRefIdxTemp], uiCostTemp - m_pcRdCost->getCost(uiBitsTemp), (uint32_t)iRefList, (uint32_t)iRefIdxTemp);
+#endif
           }
           xCopyAMVPInfo( &amvp[eRefPicList], &aacAMVPInfo[iRefList][iRefIdxTemp]); // must always be done ( also when AMVP_MODE = AM_NONE )
-          xCheckBestMVP( eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], amvp[eRefPicList], uiBitsTemp, uiCostTemp, pu.cu->imv );
+#if GDR_ENABLED
+          xCheckBestMVP(pu, eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], amvp[eRefPicList], uiBitsTemp, uiCostTemp, pu.cu->imv);
 
+          if (isEncodeGdrClean)
+          {
+            int mvp_idx = aaiMvpIdx[iRefList][iRefIdxTemp];
+
+            cMvPredSolid[iRefList][iRefIdxTemp] = amvp[eRefPicList].mvSolid[mvp_idx];
+            cMvTempSolid[iRefList][iRefIdxTemp] = amvp[eRefPicList].mvSolid[mvp_idx];
+            cMvTempValid[iRefList][iRefIdxTemp] = cs.isClean(pu.Y().bottomRight(), cMvTemp[iRefList][iRefIdxTemp], (RefPicList)iRefList, iRefIdxTemp);
+            if (cMvTempValid[iRefList][iRefIdxTemp])
+            {
+              cMvTempValid[iRefList][iRefIdxTemp] = cMvTempSolid[iRefList][iRefIdxTemp];
+            }
+
+            uiCostTempOk = true;
+            uiCostTempOk = uiCostTempOk && cMvPredSolid[iRefList][iRefIdxTemp];
+            uiCostTempOk = uiCostTempOk && cMvTempSolid[iRefList][iRefIdxTemp];
+            uiCostTempOk = uiCostTempOk && cMvTempValid[iRefList][iRefIdxTemp];
+          }
+#else
+          xCheckBestMVP( eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], amvp[eRefPicList], uiBitsTemp, uiCostTemp, pu.cu->imv );
+#endif
           if ( iRefList == 0 )
           {
             uiCostTempL0[iRefIdxTemp] = uiCostTemp;
             uiBitsTempL0[iRefIdxTemp] = uiBitsTemp;
           }
+#if GDR_ENABLED
+          if (isEncodeGdrClean)
+          {
+            uiCostTempL0Ok[iRefIdxTemp] = uiCostTempOk;
+          }
+#endif
+
+#if GDR_ENABLED
+          allOk = (uiCostTemp < uiCost[iRefList]);
+          if (isEncodeGdrClean)
+          {
+            if (uiCostTempOk)
+            {
+              allOk = (uiCostOk[iRefList]) ? (uiCostTemp < uiCost[iRefList]) : true;
+            }
+            else
+            {
+              allOk = false;
+            }
+
+            allOk = allOk && bCleanCandExist;
+          }
+#endif
+
+
+#if GDR_ENABLED
+          if (allOk)
+#else
           if ( uiCostTemp < uiCost[iRefList] )
+#endif
           {
             uiCost[iRefList] = uiCostTemp;
             uiBits[iRefList] = uiBitsTemp; // storing for bi-prediction
@@ -2433,21 +3131,65 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
             // set motion
             cMv    [iRefList] = cMvTemp[iRefList][iRefIdxTemp];
             iRefIdx[iRefList] = iRefIdxTemp;
+
+#if GDR_ENABLED
+            if (isEncodeGdrClean)
+            {
+              uiCostOk[iRefList] = uiCostTempOk;
+              cMvSolid[iRefList] = cMvTempSolid[iRefList][iRefIdxTemp];
+              cMvValid[iRefList] = cs.isClean(pu.Y().bottomRight(), cMv[iRefList], (RefPicList)iRefList, iRefIdx[iRefList]);
+            }
+#endif
           }
 
+
+#if GDR_ENABLED
+          allOk = (iRefList == 1 && uiCostTemp < costValidList1 && cs.slice->getList1IdxToList0Idx(iRefIdxTemp) < 0);
+          if (isEncodeGdrClean)
+          {
+            if (uiCostTempOk)
+            {
+              allOk = (costValidList1Ok) ? (iRefList == 1 && uiCostTemp < costValidList1 && cs.slice->getList1IdxToList0Idx(iRefIdxTemp) < 0) : true;
+            }
+            else
+            {
+              allOk = false;
+            }
+          }
+#endif
+
+#if GDR_ENABLED
+          if (allOk)
+#else
           if ( iRefList == 1 && uiCostTemp < costValidList1 && cs.slice->getList1IdxToList0Idx( iRefIdxTemp ) < 0 )
+#endif
           {
             costValidList1 = uiCostTemp;
             bitsValidList1 = uiBitsTemp;
 
             // set motion
             mvValidList1     = cMvTemp[iRefList][iRefIdxTemp];
+#if GDR_ENABLED
+            if (isEncodeGdrClean)
+            {
+              costValidList1Ok = uiCostTempOk;
+              mvValidList1Solid = cMvTempSolid[iRefList][iRefIdxTemp];
+              mvValidList1Valid = cs.isClean(pu.Y().bottomRight(), mvValidList1, (RefPicList)iRefList, iRefIdxTemp);
+            }
+#endif
             refIdxValidList1 = iRefIdxTemp;
           }
         }
       }
 
       ::memcpy(cMvHevcTemp, cMvTemp, sizeof(cMvTemp));
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        ::memcpy(cMvHevcTempSolid, cMvTempSolid, sizeof(cMvTempSolid));
+        ::memcpy(cMvHevcTempValid, cMvTempValid, sizeof(cMvTempValid));
+      }
+#endif
       if (cu.imv == 0 && (!cu.slice->getSPS()->getUseBcw() || bcwIdx == BCW_DEFAULT))
       {
         insertUniMvCands(pu.Y(), cMvTemp);
@@ -2469,8 +3211,23 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
         iRefIdxBi[0] = iRefIdx[0];
         iRefIdxBi[1] = iRefIdx[1];
 
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          cMvBiSolid[0] = cMvSolid[0];
+          cMvBiSolid[1] = cMvSolid[1];
+          cMvBiValid[0] = cMvValid[0];
+          cMvBiValid[1] = cMvValid[1];
+        }
+#endif
         ::memcpy( cMvPredBi,   cMvPred,   sizeof( cMvPred   ) );
         ::memcpy( aaiMvpIdxBi, aaiMvpIdx, sizeof( aaiMvpIdx ) );
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          ::memcpy(cMvPredBiSolid, cMvPredSolid, sizeof(cMvPredSolid));
+        }
+#endif
 
         uint32_t uiMotBits[2];
 
@@ -2481,10 +3238,25 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
           cMvPredBi  [1][bestBiPRefIdxL1] = amvp[REF_PIC_LIST_1].mvCand[bestBiPMvpL1];
 
           cMvBi    [1] = cMvPredBi[1][bestBiPRefIdxL1];
+#if GDR_ENABLED
+          if (isEncodeGdrClean)
+          {
+            cMvPredBiSolid[1][bestBiPRefIdxL1] = amvp[REF_PIC_LIST_1].mvSolid[bestBiPMvpL1];
+            cMvBiSolid[1] = cMvPredBiSolid[1][bestBiPRefIdxL1];
+            cMvBiValid[1] = cs.isClean(pu.Y().bottomRight(), cMvBi[1], REF_PIC_LIST_1, bestBiPRefIdxL1);
+          }
+#endif
           iRefIdxBi[1] = bestBiPRefIdxL1;
           pu.mv    [REF_PIC_LIST_1] = cMvBi[1];
           pu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1];
           pu.mvpIdx[REF_PIC_LIST_1] = bestBiPMvpL1;
+#if GDR_ENABLED
+          if (isEncodeGdrClean)
+          {
+            pu.mvSolid[REF_PIC_LIST_1] = cMvBiSolid[1];
+            pu.mvValid[REF_PIC_LIST_1] = cs.isClean(pu.Y().bottomRight(), pu.mv[REF_PIC_LIST_1], REF_PIC_LIST_1, pu.refIdx[REF_PIC_LIST_1]);
+          }
+#endif
 
           if( m_pcEncCfg->getMCTSEncConstraint() )
           {
@@ -2519,6 +3291,13 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
           uiBits[2] = uiMbBits[2] + uiMotBits[0] + uiMotBits[1];
 
           cMvTemp[1][bestBiPRefIdxL1] = cMvBi[1];
+#if GDR_ENABLED
+          if (isEncodeGdrClean)
+          {
+            cMvTempSolid[1][bestBiPRefIdxL1] = cMvBiSolid[1];
+            cMvTempValid[1][bestBiPRefIdxL1] = cs.isClean(pu.Y().bottomRight(), cMvBi[1], REF_PIC_LIST_1, bestBiPRefIdxL1);
+          }
+#endif
         }
         else
         {
@@ -2529,126 +3308,313 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
 
         if( doBiPred )
         {
-        // 4-times iteration (default)
-        int iNumIter = 4;
-
-        // fast encoder setting: only one iteration
-        if ( m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE2 || cs.picHeader->getMvdL1ZeroFlag() )
-        {
-          iNumIter = 1;
-        }
+          // 4-times iteration (default)
+          int iNumIter = 4;
 
-        enforceBcwPred = (bcwIdx != BCW_DEFAULT);
-        for ( int iIter = 0; iIter < iNumIter; iIter++ )
-        {
-          int         iRefList    = iIter % 2;
+          // fast encoder setting: only one iteration
+          if (m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE1
+              || m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE2 || cs.picHeader->getMvdL1ZeroFlag())
+          {
+            iNumIter = 1;
+          }
 
-          if ( m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE2 )
+          enforceBcwPred = (bcwIdx != BCW_DEFAULT);
+          for (int iIter = 0; iIter < iNumIter; iIter++)
           {
-            if( uiCost[0] <= uiCost[1] )
+            int iRefList = iIter % 2;
+
+            if (m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE1
+                || m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE2)
             {
-              iRefList = 1;
+#if GDR_ENABLED
+              allOk = (uiCost[0] <= uiCost[1]);
+              if (isEncodeGdrClean)
+              {
+                if (uiCostOk[0])
+                {
+                  allOk = (uiCostOk[1]) ? (uiCost[0] <= uiCost[1]) : true;
+                }
+                else
+                {
+                  allOk = false;
+                }
+              }
+#endif
+
+#if GDR_ENABLED
+              if (allOk)
+#else
+              if (uiCost[0] <= uiCost[1])
+#endif
+              {
+                iRefList = 1;
+              }
+              else
+              {
+                iRefList = 0;
+              }
+              if (bcwIdx != BCW_DEFAULT)
+              {
+                iRefList =
+                  (abs(getBcwWeight(bcwIdx, REF_PIC_LIST_0)) > abs(getBcwWeight(bcwIdx, REF_PIC_LIST_1)) ? 1 : 0);
+              }
             }
-            else
+            else if (iIter == 0)
             {
               iRefList = 0;
             }
-            if( bcwIdx != BCW_DEFAULT )
+            if (iIter == 0 && !cs.picHeader->getMvdL1ZeroFlag())
             {
-              iRefList = ( abs( getBcwWeight(bcwIdx, REF_PIC_LIST_0 ) ) > abs( getBcwWeight(bcwIdx, REF_PIC_LIST_1 ) ) ? 1 : 0 );
+              pu.mv[1 - iRefList]     = cMv[1 - iRefList];
+              pu.refIdx[1 - iRefList] = iRefIdx[1 - iRefList];
+#if GDR_ENABLED
+              if (isEncodeGdrClean)
+              {
+                pu.mvSolid[1 - iRefList] = cMvSolid[1 - iRefList];
+                pu.mvValid[1 - iRefList] = cs.isClean(pu.Y().bottomRight(), pu.mv[1 - iRefList], (RefPicList)(1 - iRefList), pu.refIdx[1 - iRefList]);
+              }
+#endif
+              PelUnitBuf predBufTmp = m_tmpPredStorage[1 - iRefList].getBuf(UnitAreaRelative(cu, pu));
+              motionCompensation(pu, predBufTmp, RefPicList(1 - iRefList));
             }
-          }
-          else if ( iIter == 0 )
-          {
-            iRefList = 0;
-          }
-          if ( iIter == 0 && !cs.picHeader->getMvdL1ZeroFlag())
-          {
-            pu.mv    [1 - iRefList] = cMv    [1 - iRefList];
-            pu.refIdx[1 - iRefList] = iRefIdx[1 - iRefList];
 
-            PelUnitBuf predBufTmp = m_tmpPredStorage[1 - iRefList].getBuf( UnitAreaRelative(cu, pu) );
-            motionCompensation( pu, predBufTmp, RefPicList(1 - iRefList) );
-          }
-
-          RefPicList  eRefPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 );
+            RefPicList eRefPicList = (iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0);
 
-          if(cs.picHeader->getMvdL1ZeroFlag())
-          {
-            iRefList = 0;
-            eRefPicList = REF_PIC_LIST_0;
-          }
-
-          bool bChanged = false;
-
-          iRefStart = 0;
-          iRefEnd   = cs.slice->getNumRefIdx(eRefPicList)-1;
-          for (int iRefIdxTemp = iRefStart; iRefIdxTemp <= iRefEnd; iRefIdxTemp++)
-          {
-            if( m_pcEncCfg->getUseBcwFast() && (bcwIdx != BCW_DEFAULT)
-              && (pu.cu->slice->getRefPic(eRefPicList, iRefIdxTemp)->getPOC() == pu.cu->slice->getRefPic(RefPicList(1 - iRefList), pu.refIdx[1 - iRefList])->getPOC())
-              && (!pu.cu->imv && pu.cu->slice->getTLayer()>1))
+            if (cs.picHeader->getMvdL1ZeroFlag())
             {
-              continue;
+              iRefList    = 0;
+              eRefPicList = REF_PIC_LIST_0;
             }
-            uiBitsTemp = uiMbBits[2] + uiMotBits[1-iRefList];
-            uiBitsTemp += ((cs.slice->getSPS()->getUseBcw() == true) ? getWeightIdxBits(bcwIdx) : 0);
-            if ( cs.slice->getNumRefIdx(eRefPicList) > 1 )
+
+            bool bChanged = false;
+
+            iRefStart = 0;
+            iRefEnd   = cs.slice->getNumRefIdx(eRefPicList) - 1;
+            for (int iRefIdxTemp = iRefStart; iRefIdxTemp <= iRefEnd; iRefIdxTemp++)
             {
-              uiBitsTemp += iRefIdxTemp+1;
-              if ( iRefIdxTemp == cs.slice->getNumRefIdx(eRefPicList)-1 )
+              if (m_pcEncCfg->getUseBcwFast() && (bcwIdx != BCW_DEFAULT)
+                  && (pu.cu->slice->getRefPic(eRefPicList, iRefIdxTemp)->getPOC()
+                      == pu.cu->slice->getRefPic(RefPicList(1 - iRefList), pu.refIdx[1 - iRefList])->getPOC())
+                  && (!pu.cu->imv && pu.cu->slice->getTLayer() > 1))
               {
-                uiBitsTemp--;
+                continue;
               }
-            }
-            uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdxBi[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS];
-            if ( cs.slice->getBiDirPred() )
-            {
-              uiBitsTemp += 1; // add one bit for symmetrical MVD mode
-            }
-            // call ME
-            xCopyAMVPInfo(&aacAMVPInfo[iRefList][iRefIdxTemp], &amvp[eRefPicList] );
-            xMotionEstimation ( pu, origBuf, eRefPicList, cMvPredBi[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], aaiMvpIdxBi[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, amvp[eRefPicList], true );
-            xCheckBestMVP( eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPredBi[iRefList][iRefIdxTemp], aaiMvpIdxBi[iRefList][iRefIdxTemp], amvp[eRefPicList], uiBitsTemp, uiCostTemp, pu.cu->imv);
-            if ( uiCostTemp < uiCostBi )
-            {
-              bChanged = true;
+              uiBitsTemp = uiMbBits[2] + uiMotBits[1 - iRefList];
+              uiBitsTemp += ((cs.slice->getSPS()->getUseBcw() == true) ? getWeightIdxBits(bcwIdx) : 0);
+              if (cs.slice->getNumRefIdx(eRefPicList) > 1)
+              {
+                uiBitsTemp += iRefIdxTemp + 1;
+                if (iRefIdxTemp == cs.slice->getNumRefIdx(eRefPicList) - 1)
+                {
+                  uiBitsTemp--;
+                }
+              }
+              uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdxBi[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS];
+              if (cs.slice->getBiDirPred())
+              {
+                uiBitsTemp += 1;   // add one bit for symmetrical MVD mode
+              }
+              // call ME
+              xCopyAMVPInfo(&aacAMVPInfo[iRefList][iRefIdxTemp], &amvp[eRefPicList]);
+#if GDR_ENABLED
+              bCleanCandExist = false;
+              xMotionEstimation(pu, origBuf, eRefPicList, cMvPredBi[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], cMvTempSolid[iRefList][iRefIdxTemp], aaiMvpIdxBi[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, amvp[eRefPicList], bCleanCandExist, true);
+#else
+              xMotionEstimation(pu, origBuf, eRefPicList, cMvPredBi[iRefList][iRefIdxTemp], iRefIdxTemp,
+                                cMvTemp[iRefList][iRefIdxTemp], aaiMvpIdxBi[iRefList][iRefIdxTemp], uiBitsTemp,
+                                uiCostTemp, amvp[eRefPicList], true);
+#endif
+#if GDR_ENABLED
+              if (isEncodeGdrClean)
+              {
+                int mvp_idx = aaiMvpIdxBi[iRefList][iRefIdxTemp];
+                cMvPredBiSolid[iRefList][iRefIdxTemp] = amvp[eRefPicList].mvSolid[mvp_idx];
+                cMvTempSolid[iRefList][iRefIdxTemp] = amvp[eRefPicList].mvSolid[mvp_idx];
+                cMvTempValid[iRefList][iRefIdxTemp] = cs.isClean(pu.Y().bottomRight(), cMvTemp[iRefList][iRefIdxTemp], (RefPicList)iRefList, iRefIdxTemp);
+                if (cMvTempValid[iRefList][iRefIdxTemp])
+                {
+                  cMvTempValid[iRefList][iRefIdxTemp] = cMvTempSolid[iRefList][iRefIdxTemp];
+                }
 
-              cMvBi[iRefList]     = cMvTemp[iRefList][iRefIdxTemp];
-              iRefIdxBi[iRefList] = iRefIdxTemp;
+                uiCostTempOk = bCleanCandExist;
+                uiCostTempOk = uiCostTempOk && cMvPredBiSolid[iRefList][iRefIdxTemp];
+                uiCostTempOk = uiCostTempOk && cMvTempSolid[iRefList][iRefIdxTemp];
+                uiCostTempOk = uiCostTempOk && cMvTempValid[iRefList][iRefIdxTemp];
+              }
+#endif
 
-              uiCostBi            = uiCostTemp;
-              uiMotBits[iRefList] = uiBitsTemp - uiMbBits[2] - uiMotBits[1-iRefList];
-              uiMotBits[iRefList] -= ((cs.slice->getSPS()->getUseBcw() == true) ? getWeightIdxBits(bcwIdx) : 0);
-              uiBits[2]           = uiBitsTemp;
+#if GDR_ENABLED
+              // note : uiCostTemp is the new Best MVP cost,
+              //        solid info will be at amvp[eRefPicList].mvSolid[aaiMvpIdx[iRefList][iRefIdxTemp]];
+              xCheckBestMVP(pu, eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPredBi[iRefList][iRefIdxTemp], aaiMvpIdxBi[iRefList][iRefIdxTemp], amvp[eRefPicList], uiBitsTemp, uiCostTemp, pu.cu->imv);
 
-              if(iNumIter!=1)
+              if (isEncodeGdrClean)
               {
-                //  Set motion
-                pu.mv    [eRefPicList] = cMvBi    [iRefList];
-                pu.refIdx[eRefPicList] = iRefIdxBi[iRefList];
+                int mvp_idx = aaiMvpIdxBi[iRefList][iRefIdxTemp];
+
+                cMvPredBiSolid[iRefList][iRefIdxTemp] = amvp[eRefPicList].mvSolid[mvp_idx];
+                cMvTempSolid[iRefList][iRefIdxTemp] = amvp[eRefPicList].mvSolid[mvp_idx];
+                cMvTempValid[iRefList][iRefIdxTemp] = cs.isClean(pu.Y().bottomRight(), cMvTemp[iRefList][iRefIdxTemp], (RefPicList)iRefList, iRefIdxTemp);
+                if (cMvTempValid[iRefList][iRefIdxTemp])
+                {
+                  cMvTempValid[iRefList][iRefIdxTemp] = cMvTempSolid[iRefList][iRefIdxTemp];
+                }
 
-                PelUnitBuf predBufTmp = m_tmpPredStorage[iRefList].getBuf( UnitAreaRelative(cu, pu) );
-                motionCompensation( pu, predBufTmp, eRefPicList );
+                uiCostTempOk = true;
+                uiCostTempOk = uiCostTempOk && cMvPredBiSolid[iRefList][iRefIdxTemp];
+                uiCostTempOk = uiCostTempOk && cMvTempSolid[iRefList][iRefIdxTemp];
+                uiCostTempOk = uiCostTempOk && cMvTempValid[iRefList][iRefIdxTemp];
               }
-            }
-          } // for loop-iRefIdxTemp
+#else
 
-          if ( !bChanged )
-          {
-            if ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceBcwPred)
+              xCheckBestMVP(eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPredBi[iRefList][iRefIdxTemp],
+                            aaiMvpIdxBi[iRefList][iRefIdxTemp], amvp[eRefPicList], uiBitsTemp, uiCostTemp, pu.cu->imv);
+#endif
+#if GDR_ENABLED
+              allOk = (uiCostTemp < uiCostBi);
+              if (isEncodeGdrClean)
+              {
+                if (uiCostTempOk)
+                {
+                  allOk = (uiCostBiOk) ? (uiCostTemp < uiCostBi) : true;
+                }
+                else
+                {
+                  allOk = false;
+                }
+              }
+#endif
+#if GDR_ENABLED
+              if (allOk)
+#else
+              if (uiCostTemp < uiCostBi)
+#endif
+              {
+                bChanged = true;
+
+                cMvBi[iRefList]     = cMvTemp[iRefList][iRefIdxTemp];
+#if GDR_ENABLED
+                if (isEncodeGdrClean)
+                {
+                  cMvBiSolid[iRefList] = cMvTempSolid[iRefList][iRefIdxTemp];
+                  cMvBiValid[iRefList] = cs.isClean(pu.Y().bottomRight(), cMvTemp[iRefList][iRefIdxTemp], (RefPicList)iRefList, iRefIdxTemp);
+                }
+#endif
+                iRefIdxBi[iRefList] = iRefIdxTemp;
+
+                uiCostBi            = uiCostTemp;
+#if GDR_ENABLED
+                if (isEncodeGdrClean)
+                {
+                  uiCostBiOk = uiCostTempOk;
+                }
+#endif
+                uiMotBits[iRefList] = uiBitsTemp - uiMbBits[2] - uiMotBits[1 - iRefList];
+                uiMotBits[iRefList] -= ((cs.slice->getSPS()->getUseBcw() == true) ? getWeightIdxBits(bcwIdx) : 0);
+                uiBits[2] = uiBitsTemp;
+
+                if (iNumIter != 1)
+                {
+                  //  Set motion
+                  pu.mv[eRefPicList]     = cMvBi[iRefList];
+                  pu.refIdx[eRefPicList] = iRefIdxBi[iRefList];
+#if GDR_ENABLED
+                  if (isEncodeGdrClean)
+                  {
+                    pu.mvSolid[eRefPicList] = cMvBiSolid[iRefList];
+                    pu.mvValid[eRefPicList] = cs.isClean(pu.Y().bottomRight(), pu.mv[eRefPicList], (RefPicList)eRefPicList, pu.refIdx[eRefPicList]);
+                  }
+#endif
+                  PelUnitBuf predBufTmp = m_tmpPredStorage[iRefList].getBuf(UnitAreaRelative(cu, pu));
+                  motionCompensation(pu, predBufTmp, eRefPicList);
+                }
+              }
+            }   // for loop-iRefIdxTemp
+
+            if (!bChanged)
             {
-              xCopyAMVPInfo(&aacAMVPInfo[0][iRefIdxBi[0]], &amvp[REF_PIC_LIST_0]);
-              xCheckBestMVP( REF_PIC_LIST_0, cMvBi[0], cMvPredBi[0][iRefIdxBi[0]], aaiMvpIdxBi[0][iRefIdxBi[0]], amvp[REF_PIC_LIST_0], uiBits[2], uiCostBi, pu.cu->imv);
-              if(!cs.picHeader->getMvdL1ZeroFlag())
+#if GDR_ENABLED
+              allOk = ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceBcwPred);
+
+              if (isEncodeGdrClean)
+              {
+                if (uiCostBiOk)
+                {
+                  allOk = (uiCostOk[0] && uiCostOk[1]) ? ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceBcwPred) : true;
+                }
+                else
+                {
+                  allOk = false;
+                }
+              }
+#endif
+#if GDR_ENABLED
+              if (allOk)
+#else
+              if ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceBcwPred)
+#endif
               {
-                xCopyAMVPInfo(&aacAMVPInfo[1][iRefIdxBi[1]], &amvp[REF_PIC_LIST_1]);
-                xCheckBestMVP( REF_PIC_LIST_1, cMvBi[1], cMvPredBi[1][iRefIdxBi[1]], aaiMvpIdxBi[1][iRefIdxBi[1]], amvp[REF_PIC_LIST_1], uiBits[2], uiCostBi, pu.cu->imv);
+                xCopyAMVPInfo(&aacAMVPInfo[0][iRefIdxBi[0]], &amvp[REF_PIC_LIST_0]);
+#if GDR_ENABLED
+                // note : uiCostBi is the new Best MVP cost,
+                //          solid info will be at amvp[eRefPicList].mvSolid[aaiMvpIdx[iRefList][iRefIdxTemp]];
+                xCheckBestMVP(pu, REF_PIC_LIST_0, cMvBi[0], cMvPredBi[0][iRefIdxBi[0]], aaiMvpIdxBi[0][iRefIdxBi[0]], amvp[REF_PIC_LIST_0], uiBits[2], uiCostBi, pu.cu->imv);
+
+                if (isEncodeGdrClean)
+                {
+                  int mvp_idx = aaiMvpIdxBi[0][iRefIdxBi[0]];
+
+                  cMvPredBiSolid[0][iRefIdxBi[0]] = amvp[0].mvSolid[mvp_idx];
+                  cMvBiSolid[0] = amvp[0].mvSolid[mvp_idx];
+                  cMvBiValid[0] = cs.isClean(pu.Y().bottomRight(), cMvBi[0], (RefPicList)0, iRefIdxBi[0]);
+                  if (cMvBiValid[0])
+                  {
+                    cMvBiValid[0] = cMvBiSolid[0];
+                  }
+
+                  uiCostBiOk = true;
+                  uiCostBiOk = uiCostBiOk && cMvPredBiSolid[0][iRefIdxBi[0]];
+                  uiCostBiOk = uiCostBiOk && cMvBiSolid[0];
+                  uiCostBiOk = uiCostBiOk && cMvBiValid[0];
+                }
+#else
+                xCheckBestMVP(REF_PIC_LIST_0, cMvBi[0], cMvPredBi[0][iRefIdxBi[0]], aaiMvpIdxBi[0][iRefIdxBi[0]],
+                              amvp[REF_PIC_LIST_0], uiBits[2], uiCostBi, pu.cu->imv);
+#endif
+                if (!cs.picHeader->getMvdL1ZeroFlag())
+                {
+                  xCopyAMVPInfo(&aacAMVPInfo[1][iRefIdxBi[1]], &amvp[REF_PIC_LIST_1]);
+#if GDR_ENABLED
+                  // note : uiCostBi is the new Best MVP cost,
+                  //          solid info will be at amvp[eRefPicList].mvSolid[aaiMvpIdx[iRefList][iRefIdxTemp]];
+                  xCheckBestMVP(pu, REF_PIC_LIST_1, cMvBi[1], cMvPredBi[1][iRefIdxBi[1]], aaiMvpIdxBi[1][iRefIdxBi[1]], amvp[REF_PIC_LIST_1], uiBits[2], uiCostBi, pu.cu->imv);
+
+                  if (isEncodeGdrClean)
+                  {
+                    int mvp_idx = aaiMvpIdxBi[1][iRefIdxBi[1]];
+
+                    cMvPredBiSolid[1][iRefIdxBi[1]] = aaiMvpIdxBi[1][iRefIdxBi[1]];
+                    cMvBiSolid[1] = amvp[REF_PIC_LIST_1].mvSolid[mvp_idx];
+                    cMvBiValid[1] = cs.isClean(pu.Y().bottomRight(), cMvBi[1], (RefPicList)1, iRefIdxBi[1]);
+                    if (cMvBiValid[1])
+                    {
+                      cMvBiValid[1] = cMvBiSolid[1];
+                    }
+
+                    uiCostBiOk = true;
+                    uiCostBiOk = uiCostBiOk && cMvPredBiSolid[1][iRefIdxBi[1]];
+                    uiCostBiOk = uiCostBiOk && cMvBiSolid[1];
+                    uiCostBiOk = uiCostBiOk && cMvBiValid[1];
+                  }
+#else
+                  xCheckBestMVP(REF_PIC_LIST_1, cMvBi[1], cMvPredBi[1][iRefIdxBi[1]], aaiMvpIdxBi[1][iRefIdxBi[1]],
+                                amvp[REF_PIC_LIST_1], uiBits[2], uiCostBi, pu.cu->imv);
+#endif
+                }
               }
+              break;
             }
-            break;
-          }
-        } // for loop-iter
+          }   // for loop-iter
         }
         cu.refIdxBi[0] = iRefIdxBi[0];
         cu.refIdxBi[1] = iRefIdxBi[1];
@@ -2667,12 +3633,31 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
           CHECK (refIdxCur==-1 || refIdxTar==-1, "Uninitialized reference index not allowed");
 
           if ( aacAMVPInfo[curRefList][refIdxCur].mvCand[0] == aacAMVPInfo[curRefList][refIdxCur].mvCand[1] )
+          {
             aacAMVPInfo[curRefList][refIdxCur].numCand = 1;
+          }
           if ( aacAMVPInfo[tarRefList][refIdxTar].mvCand[0] == aacAMVPInfo[tarRefList][refIdxTar].mvCand[1] )
+          {
             aacAMVPInfo[tarRefList][refIdxTar].numCand = 1;
+          }
 
           MvField cCurMvField, cTarMvField;
           Distortion costStart = std::numeric_limits<Distortion>::max();
+
+#if GDR_ENABLED
+          bool cMvPredSymSolid[2] = { init_value, init_value };
+          bool cMvPredSymValid[2] = { init_value, init_value };
+
+          bool cCurMvFieldSolid = init_value;
+          bool cTarMvFieldSolid = init_value;
+          bool cCurMvFieldValid = init_value;
+          bool cTarMvFieldValid = init_value;
+
+          bool costStartOk = false;
+          bool symCostOk = init_value;
+          bool costOk = init_value;
+          bool bestCostOk = init_value;
+#endif
           for ( int i = 0; i < aacAMVPInfo[curRefList][refIdxCur].numCand; i++ )
           {
             for ( int j = 0; j < aacAMVPInfo[tarRefList][refIdxTar].numCand; j++ )
@@ -2680,11 +3665,46 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
               cCurMvField.setMvField( aacAMVPInfo[curRefList][refIdxCur].mvCand[i], refIdxCur );
               cTarMvField.setMvField( aacAMVPInfo[tarRefList][refIdxTar].mvCand[j], refIdxTar );
               Distortion cost = xGetSymmetricCost( pu, origBuf, eCurRefList, cCurMvField, cTarMvField, bcwIdx );
+
+#if GDR_ENABLED
+              if (isEncodeGdrClean)
+              {
+                cCurMvFieldSolid = aacAMVPInfo[curRefList][refIdxCur].mvSolid[i];
+                cTarMvFieldSolid = aacAMVPInfo[tarRefList][refIdxTar].mvSolid[i];
+                costOk = cCurMvFieldSolid && cTarMvFieldSolid;
+              }
+#endif
+#if GDR_ENABLED
+              allOk = (cost < costStart);
+              if (isEncodeGdrClean)
+              {
+                if (costOk)
+                {
+                  allOk = (costStartOk) ? (cost < costStart) : true;
+                }
+                else
+                {
+                  allOk = false;
+                }
+              }
+#endif
+#if GDR_ENABLED
+              if (allOk)
+#else
               if ( cost < costStart )
+#endif
               {
                 costStart = cost;
                 cMvPredSym[curRefList] = aacAMVPInfo[curRefList][refIdxCur].mvCand[i];
                 cMvPredSym[tarRefList] = aacAMVPInfo[tarRefList][refIdxTar].mvCand[j];
+#if GDR_ENABLED
+                if (isEncodeGdrClean)
+                {
+                  costStartOk = costOk;
+                  cMvPredSymSolid[curRefList] = aacAMVPInfo[curRefList][refIdxCur].mvSolid[i];
+                  cMvPredSymSolid[tarRefList] = aacAMVPInfo[tarRefList][refIdxTar].mvSolid[j];
+                }
+#endif
                 mvpIdxSym[curRefList] = i;
                 mvpIdxSym[tarRefList] = j;
               }
@@ -2693,6 +3713,13 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
           cCurMvField.mv = cMvPredSym[curRefList];
           cTarMvField.mv = cMvPredSym[tarRefList];
 
+#if GDR_ENABLED
+          if (isEncodeGdrClean)
+          {
+            cCurMvFieldSolid = cMvPredSymSolid[curRefList];
+            cTarMvFieldSolid = cMvPredSymSolid[tarRefList];
+          }
+#endif
           m_pcRdCost->setCostScale(0);
           Mv pred = cMvPredSym[curRefList];
           pred.changeTransPrecInternal2Amvr(pu.cu->imv);
@@ -2737,7 +3764,9 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
           for (int i = 0; i < m_uniMvListSize; i++)
           {
             if ( symmvdCands.size() >= 5 )
+            {
               break;
+            }
             BlkUniMvInfo* curMvInfo = m_uniMvList + ((m_uniMvListIdx - 1 - i + m_uniMvListMaxSize) % (m_uniMvListMaxSize));
             smmvdCandsGen(curMvInfo->uniMvs[curRefList][refIdxCur], true);
           }
@@ -2755,11 +3784,63 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
             }
 
             Distortion bestCost = costStart;
+#if GDR_ENABLED
+            symmvdCheckBestMvp(pu, origBuf, mvStart, (RefPicList)curRefList, aacAMVPInfo, bcwIdx, cMvPredSym, cMvPredSymSolid, mvpIdxSym, costStart);
+#else
             symmvdCheckBestMvp(pu, origBuf, mvStart, (RefPicList)curRefList, aacAMVPInfo, bcwIdx, cMvPredSym, mvpIdxSym, costStart);
+#endif
+
+#if GDR_ENABLED
+            if (isEncodeGdrClean)
+            {
+              int mvp_idx0 = mvpIdxSym[0];
+              int mvp_idx1 = mvpIdxSym[1];
+
+              cMvPredSymSolid[curRefList] = aacAMVPInfo[curRefList][refIdxCur].mvSolid[mvp_idx0];
+              cMvPredSymSolid[tarRefList] = aacAMVPInfo[tarRefList][refIdxTar].mvSolid[mvp_idx1];
+              cMvPredSymValid[curRefList] = cs.isClean(pu.Y().bottomRight(), mvStart, (RefPicList)curRefList, pu.cu->slice->getSymRefIdx(curRefList));
+              cMvPredSymValid[tarRefList] = cs.isClean(pu.Y().bottomRight(), mvStart, (RefPicList)tarRefList, pu.cu->slice->getSymRefIdx(tarRefList));
+
+              costStartOk = true;
+              costStartOk = costStartOk && cMvPredSymSolid[curRefList];
+              costStartOk = costStartOk && cMvPredSymSolid[tarRefList];
+              costStartOk = costStartOk && cMvPredSymValid[curRefList];
+              costStartOk = costStartOk && cMvPredSymValid[tarRefList];
+            }
+#endif
+
+#if GDR_ENABLED
+            bool allOk = (costStart < bestCost);
+            if (isEncodeGdrClean)
+            {
+              if (costStartOk)
+              {
+                allOk = (bestCostOk) ? (costStart < bestCost) : true;
+              }
+              else
+              {
+                allOk = false;
+              }
+            }
+#endif
+
+#if GDR_ENABLED
+            if (allOk)
+#else
             if (costStart < bestCost)
+#endif
             {
               cCurMvField.setMvField(mvStart, refIdxCur);
               cTarMvField.setMvField(mvStart.getSymmvdMv(cMvPredSym[curRefList], cMvPredSym[tarRefList]), refIdxTar);
+#if GDR_ENABLED
+              if (isEncodeGdrClean)
+              {
+                cCurMvFieldSolid = cMvPredSymSolid[curRefList];
+                cTarMvFieldSolid = cMvPredSymSolid[tarRefList];
+                cCurMvFieldValid = cMvPredSymValid[curRefList];
+                cTarMvFieldValid = cMvPredSymValid[tarRefList];
+              }
+#endif
             }
           }
           Mv startPtMv = cCurMvField.mv;
@@ -2768,13 +3849,47 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
           symCost = costStart - mvpCost;
 
           // ME
+#if GDR_ENABLED
+          xSymmetricMotionEstimation(pu, origBuf, cMvPredSym[curRefList], cMvPredSym[tarRefList], eCurRefList, cCurMvField, cTarMvField, symCost, bcwIdx, costStartOk);
+#else
           xSymmetricMotionEstimation( pu, origBuf, cMvPredSym[curRefList], cMvPredSym[tarRefList], eCurRefList, cCurMvField, cTarMvField, symCost, bcwIdx );
-
+#endif
           symCost += mvpCost;
 
+#if GDR_ENABLED
+          if (isEncodeGdrClean)
+          {
+            cCurMvFieldValid = cs.isClean(pu.Y().bottomRight(), cCurMvField.mv, (RefPicList)(eCurRefList), cCurMvField.refIdx);
+            cTarMvFieldValid = cs.isClean(pu.Y().bottomRight(), cTarMvField.mv, (RefPicList)(1 - eCurRefList), cTarMvField.refIdx);
+            symCostOk = (cMvPredSymSolid[curRefList] && cMvPredSymSolid[tarRefList]) && (cCurMvFieldValid && cTarMvFieldValid);
+          }
+#endif
           if (startPtMv != cCurMvField.mv)
           { // if ME change MV, run a final check for best MVP.
+#if GDR_ENABLED
+            symmvdCheckBestMvp(pu, origBuf, cCurMvField.mv, (RefPicList)curRefList, aacAMVPInfo, bcwIdx, cMvPredSym, cMvPredSymSolid, mvpIdxSym, symCost);
+#else
             symmvdCheckBestMvp(pu, origBuf, cCurMvField.mv, (RefPicList)curRefList, aacAMVPInfo, bcwIdx, cMvPredSym, mvpIdxSym, symCost, true);
+#endif
+
+#if GDR_ENABLED
+            if (isEncodeGdrClean)
+            {
+              int mvp_idx0 = mvpIdxSym[0];
+              int mvp_idx1 = mvpIdxSym[1];
+
+              cMvPredSymSolid[curRefList] = aacAMVPInfo[curRefList][refIdxCur].mvSolid[mvp_idx0];
+              cMvPredSymSolid[tarRefList] = aacAMVPInfo[tarRefList][refIdxTar].mvSolid[mvp_idx1];
+              cMvPredSymValid[curRefList] = cs.isClean(pu.Y().bottomRight(), cCurMvField.mv, (RefPicList)curRefList, pu.cu->slice->getSymRefIdx(curRefList));
+              cMvPredSymValid[tarRefList] = cs.isClean(pu.Y().bottomRight(), cCurMvField.mv, (RefPicList)tarRefList, pu.cu->slice->getSymRefIdx(tarRefList));
+
+              symCostOk = true;
+              symCostOk = symCostOk && cMvPredSymSolid[curRefList];
+              symCostOk = symCostOk && cMvPredSymSolid[tarRefList];
+              symCostOk = symCostOk && cMvPredSymValid[curRefList];
+              symCostOk = symCostOk && cMvPredSymValid[tarRefList];
+            }
+#endif
           }
 
           bits = uiMbBits[2];
@@ -2786,13 +3901,37 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
           if( m_pcEncCfg->getMCTSEncConstraint() )
           {
             if( !( MCTSHelper::checkMvForMCTSConstraint( pu, cCurMvField.mv ) && MCTSHelper::checkMvForMCTSConstraint( pu, cTarMvField.mv ) ) )
+            {
               symCost = std::numeric_limits<Distortion>::max();
+            }
           }
           // save results
+#if GDR_ENABLED
+          bool allOk = (symCost < uiCostBi);
+          if (isEncodeGdrClean)
+          {
+            if (symCostOk)
+            {
+              allOk = (uiCostBiOk) ? (symCost < uiCostBi) : true;
+            }
+            else
+            {
+              allOk = false;
+            }
+          }
+#endif
+
+#if GDR_ENABLED
+          if (allOk)
+#else
           if ( symCost < uiCostBi )
+#endif
           {
             uiCostBi = symCost;
             symMode = 1 + curRefList;
+#if GDR_ENABLED
+            uiCostBiOk = symCostOk;
+#endif
 
             cMvBi[curRefList] = cCurMvField.mv;
             iRefIdxBi[curRefList] = cCurMvField.refIdx;
@@ -2803,6 +3942,16 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
             iRefIdxBi[tarRefList] = cTarMvField.refIdx;
             aaiMvpIdxBi[tarRefList][cTarMvField.refIdx] = mvpIdxSym[tarRefList];
             cMvPredBi[tarRefList][iRefIdxBi[tarRefList]] = cMvPredSym[tarRefList];
+
+#if GDR_ENABLED
+            if (isEncodeGdrClean)
+            {
+              cMvBiValid[curRefList] = cCurMvFieldValid;
+              cMvBiValid[tarRefList] = cTarMvFieldValid;
+              cMvPredBiSolid[curRefList][iRefIdxBi[curRefList]] = cMvPredSymSolid[curRefList];
+              cMvPredBiSolid[tarRefList][iRefIdxBi[tarRefList]] = cMvPredSymSolid[tarRefList];
+            }
+#endif
           }
         }
       } // if (B_SLICE)
@@ -2810,43 +3959,107 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
 
 
       //  Clear Motion Field
-    pu.mv    [REF_PIC_LIST_0] = Mv();
-    pu.mv    [REF_PIC_LIST_1] = Mv();
-    pu.mvd   [REF_PIC_LIST_0] = cMvZero;
-    pu.mvd   [REF_PIC_LIST_1] = cMvZero;
-    pu.refIdx[REF_PIC_LIST_0] = NOT_VALID;
-    pu.refIdx[REF_PIC_LIST_1] = NOT_VALID;
-    pu.mvpIdx[REF_PIC_LIST_0] = NOT_VALID;
-    pu.mvpIdx[REF_PIC_LIST_1] = NOT_VALID;
-    pu.mvpNum[REF_PIC_LIST_0] = NOT_VALID;
-    pu.mvpNum[REF_PIC_LIST_1] = NOT_VALID;
+      pu.mv[REF_PIC_LIST_0]     = Mv();
+      pu.mv[REF_PIC_LIST_1]     = Mv();
+      pu.mvd[REF_PIC_LIST_0]    = cMvZero;
+      pu.mvd[REF_PIC_LIST_1]    = cMvZero;
+      pu.refIdx[REF_PIC_LIST_0] = NOT_VALID;
+      pu.refIdx[REF_PIC_LIST_1] = NOT_VALID;
+      pu.mvpIdx[REF_PIC_LIST_0] = NOT_VALID;
+      pu.mvpIdx[REF_PIC_LIST_1] = NOT_VALID;
+      pu.mvpNum[REF_PIC_LIST_0] = NOT_VALID;
+      pu.mvpNum[REF_PIC_LIST_1] = NOT_VALID;
+
+#if GDR_ENABLED
+    if (isEncodeGdrClean)
+    {
+      pu.mvSolid[REF_PIC_LIST_0] = true;
+      pu.mvSolid[REF_PIC_LIST_1] = true;
+      pu.mvValid[REF_PIC_LIST_0] = true;
+      pu.mvValid[REF_PIC_LIST_1] = true;
+    }
+#endif
+      // Set Motion Field
 
+      cMv[1]     = mvValidList1;
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        cMvSolid[1] = mvValidList1Solid;
+        cMvValid[1] = mvValidList1Valid;
+      }
+#endif
+      iRefIdx[1] = refIdxValidList1;
+      uiBits[1]  = bitsValidList1;
+      uiCost[1]  = costValidList1;
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        uiCostOk[1] = costValidList1Ok;
+      }
+#endif
+      if (cu.cs->pps->getWPBiPred() == true && tryBipred && (bcwIdx != BCW_DEFAULT))
+      {
+        CHECK(iRefIdxBi[0] < 0, "Invalid picture reference index");
+        CHECK(iRefIdxBi[1] < 0, "Invalid picture reference index");
+        wp0 = cu.cs->slice->getWpScaling(REF_PIC_LIST_0, iRefIdxBi[0]);
+        wp1 = cu.cs->slice->getWpScaling(REF_PIC_LIST_1, iRefIdxBi[1]);
+        if (WPScalingParam::isWeighted(wp0) || WPScalingParam::isWeighted(wp1))
+        {
+          uiCostBi       = MAX_UINT;
+#if GDR_ENABLED
+          if (isEncodeGdrClean)
+          {
+            uiCostBiOk = false;
+          }
+#endif
+          enforceBcwPred = false;
+        }
+      }
+      if (enforceBcwPred)
+      {
+        uiCost[0] = uiCost[1] = MAX_UINT;
+#if GDR_ENABLED
+        uiCostOk[0] = uiCostOk[1] = false;
+#endif
+      }
 
-    // Set Motion Field
+      uiLastModeTemp = uiLastMode;
+#if GDR_ENABLED
+      allOk = ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceBcwPred);
 
-    cMv    [1] = mvValidList1;
-    iRefIdx[1] = refIdxValidList1;
-    uiBits [1] = bitsValidList1;
-    uiCost [1] = costValidList1;
-    if (cu.cs->pps->getWPBiPred() == true && tryBipred && (bcwIdx != BCW_DEFAULT))
-    {
-      CHECK(iRefIdxBi[0]<0, "Invalid picture reference index");
-      CHECK(iRefIdxBi[1]<0, "Invalid picture reference index");
-      wp0 = cu.cs->slice->getWpScaling(REF_PIC_LIST_0, iRefIdxBi[0]);
-      wp1 = cu.cs->slice->getWpScaling(REF_PIC_LIST_1, iRefIdxBi[1]);
-      if (WPScalingParam::isWeighted(wp0) || WPScalingParam::isWeighted(wp1))
+      if (isEncodeGdrClean)
       {
-        uiCostBi = MAX_UINT;
-        enforceBcwPred = false;
+        if (uiCostBiOk)
+        {
+          allOk = (uiCostOk[0] && uiCostOk[1]) ? ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceBcwPred) : true;
+        }
+        else
+        {
+          allOk = false;
+        }
       }
-    }
-    if( enforceBcwPred )
-    {
-      uiCost[0] = uiCost[1] = MAX_UINT;
-    }
 
-      uiLastModeTemp = uiLastMode;
+      bool L0ok = (uiCost[0] <= uiCost[1]);
+
+      if (isEncodeGdrClean)
+      {
+        if (uiCostOk[0])
+        {
+          L0ok = (uiCostOk[1]) ? (uiCost[0] <= uiCost[1]) : true;
+        }
+        else
+        {
+          L0ok = false;
+        }
+      }
+#endif
+
+#if GDR_ENABLED
+      if (allOk)
+#else
       if ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1])
+#endif
       {
         uiLastMode = 2;
         pu.mv    [REF_PIC_LIST_0] = cMvBi[0];
@@ -2862,8 +4075,23 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
         pu.interDir = 3;
 
         pu.cu->smvdMode = symMode;
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          int mvp_idx0 = pu.mvpIdx[REF_PIC_LIST_0];
+          int mvp_idx1 = pu.mvpIdx[REF_PIC_LIST_1];
+          pu.mvSolid[REF_PIC_LIST_0] = cMvBiSolid[REF_PIC_LIST_0] && cMvPredBiSolid[REF_PIC_LIST_0][mvp_idx0];
+          pu.mvSolid[REF_PIC_LIST_1] = cMvBiSolid[REF_PIC_LIST_1] && cMvPredBiSolid[REF_PIC_LIST_1][mvp_idx1];
+          pu.mvValid[REF_PIC_LIST_0] = cs.isClean(pu.Y().bottomRight(), pu.mv[REF_PIC_LIST_0], (RefPicList)REF_PIC_LIST_0, pu.refIdx[REF_PIC_LIST_0]);
+          pu.mvValid[REF_PIC_LIST_1] = cs.isClean(pu.Y().bottomRight(), pu.mv[REF_PIC_LIST_1], (RefPicList)REF_PIC_LIST_1, pu.refIdx[REF_PIC_LIST_1]);
+        }
+#endif
       }
+#if GDR_ENABLED
+      else if (L0ok)
+#else
       else if ( uiCost[0] <= uiCost[1] )
+#endif
       {
         uiLastMode = 0;
         pu.mv    [REF_PIC_LIST_0] = cMv[0];
@@ -2872,6 +4100,13 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
         pu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdx[0][iRefIdx[0]];
         pu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdx[0]];
         pu.interDir = 1;
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          pu.mvSolid[REF_PIC_LIST_0] = cMvSolid[REF_PIC_LIST_0] && cMvPredSolid[0][iRefIdx[0]];
+          pu.mvValid[REF_PIC_LIST_0] = cs.isClean(pu.Y().bottomRight(), pu.mv[REF_PIC_LIST_0], (RefPicList)REF_PIC_LIST_0, pu.refIdx[REF_PIC_LIST_0]);
+        }
+#endif
       }
       else
       {
@@ -2882,6 +4117,13 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
         pu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdx[1][iRefIdx[1]];
         pu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdx[1]];
         pu.interDir = 2;
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          pu.mvSolid[REF_PIC_LIST_1] = cMvSolid[REF_PIC_LIST_1] && cMvPredSolid[1][iRefIdx[1]];
+          pu.mvValid[REF_PIC_LIST_1] = cs.isClean(pu.Y().bottomRight(), pu.mv[REF_PIC_LIST_1], (RefPicList)REF_PIC_LIST_1, pu.refIdx[REF_PIC_LIST_1]);
+        }
+#endif
       }
 
       if( bcwIdx != BCW_DEFAULT )
@@ -2889,14 +4131,29 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
         cu.BcwIdx = BCW_DEFAULT; // Reset to default for the Non-NormalMC modes.
       }
 
-    uiHevcCost = ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1] ) ? uiCostBi : ( ( uiCost[0] <= uiCost[1] ) ? uiCost[0] : uiCost[1] );
+      uiHevcCost = (uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1])
+                     ? uiCostBi
+                     : ((uiCost[0] <= uiCost[1]) ? uiCost[0] : uiCost[1]);
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        uiHevcCostOk = (uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) ? uiCostBiOk : ((uiCost[0] <= uiCost[1]) ? uiCostOk[0] : uiCostOk[1]);
+      }
+#endif
     }
+
     if (cu.Y().width > 8 && cu.Y().height > 8 && cu.slice->getSPS()->getUseAffine()
       && checkAffine
       && (bcwIdx == BCW_DEFAULT || m_affineModeSelected || !m_pcEncCfg->getUseBcwFast())
       )
     {
       m_hevcCost = uiHevcCost;
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        m_hevcCostOk = uiHevcCostOk;
+      }
+#endif
       // save normal hevc result
       uint32_t uiMRGIndex = pu.mergeIdx;
       bool bMergeFlag = pu.mergeFlag;
@@ -2916,28 +4173,101 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
       cHevcMvField[0].setMvField( pu.mv[REF_PIC_LIST_0], pu.refIdx[REF_PIC_LIST_0] );
       cHevcMvField[1].setMvField( pu.mv[REF_PIC_LIST_1], pu.refIdx[REF_PIC_LIST_1] );
 
+#if GDR_ENABLED
+      bool cHevcMvFieldSolid[2] = { true, true };
+      bool cHevcMvFieldValid[2] = { true, true };
+
+      if (isEncodeGdrClean)
+      {
+        cHevcMvFieldSolid[0] = pu.mvSolid[0];
+        cHevcMvFieldSolid[1] = pu.mvSolid[1];
+        cHevcMvFieldValid[0] = pu.mvValid[0];
+        cHevcMvFieldValid[1] = pu.mvValid[1];
+      }
+#endif
+
       // do affine ME & Merge
       cu.affineType = AFFINEMODEL_4PARAM;
       Mv acMvAffine4Para[2][33][3];
+#if GDR_ENABLED
+      bool acMvAffine4ParaSolid[2][33][3];
+
+      for (int i = 0; i < 2; i++)
+        for (int j = 0; j < 33; j++)
+          for (int k = 0; k < 3; k++)
+            acMvAffine4ParaSolid[i][j][k] = true;
+#endif
       int refIdx4Para[2] = { -1, -1 };
 
+#if GDR_ENABLED
+      xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffineCost, cMvHevcTemp, cMvHevcTempSolid, acMvAffine4Para, acMvAffine4ParaSolid, refIdx4Para, bcwIdx, enforceBcwPred,
+        ((cu.slice->getSPS()->getUseBcw() == true) ? getWeightIdxBits(bcwIdx) : 0));
+#else
       xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffineCost, cMvHevcTemp, acMvAffine4Para, refIdx4Para, bcwIdx, enforceBcwPred,
         ((cu.slice->getSPS()->getUseBcw() == true) ? getWeightIdxBits(bcwIdx) : 0));
+#endif
 
       if ( pu.cu->imv == 0 )
       {
+#if GDR_ENABLED
+        storeAffineMotion(pu.mvAffi, pu.mvAffiSolid, pu.refIdx, AFFINEMODEL_4PARAM, bcwIdx);
+#else
         storeAffineMotion( pu.mvAffi, pu.refIdx, AFFINEMODEL_4PARAM, bcwIdx );
+#endif
       }
 
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        uiAffineCostOk = true;
+
+        if (pu.interDir & 0x01)
+        {
+          uiAffineCostOk = uiAffineCostOk && pu.mvAffiSolid[0][0] && pu.mvAffiSolid[0][1];
+          uiAffineCostOk = uiAffineCostOk && pu.mvAffiValid[0][0] && pu.mvAffiValid[0][1];
+        }
+
+        if (pu.interDir & 0x02)
+        {
+          uiAffineCostOk = uiAffineCostOk && pu.mvAffiSolid[1][0] && pu.mvAffiSolid[1][1];
+          uiAffineCostOk = uiAffineCostOk && pu.mvAffiValid[1][0] && pu.mvAffiValid[1][1];
+        }
+      }
+#endif
+
       if ( cu.slice->getSPS()->getUseAffineType() )
       {
+#if GDR_ENABLED
+        allOk = (uiAffineCost < uiHevcCost * 1.05);
+        if (isEncodeGdrClean)
+        {
+          if (uiAffineCostOk)
+          {
+            allOk = (uiHevcCostOk) ? (uiAffineCost < uiHevcCost * 1.05) : true;
+          }
+          else
+          {
+            allOk = false;
+          }
+        }
+#endif
+
+#if GDR_ENABLED
+        if (allOk)
+#else
         if ( uiAffineCost < uiHevcCost * 1.05 ) ///< condition for 6 parameter affine ME
+#endif
         {
           // save 4 parameter results
           Mv bestMv[2][3], bestMvd[2][3];
           int bestMvpIdx[2], bestMvpNum[2], bestRefIdx[2];
           uint8_t bestInterDir;
 
+#if GDR_ENABLED
+          bool bestMvSolid[2][3];
+          bool bestMvValid[2][3];
+#endif
+
           bestInterDir = pu.interDir;
           bestRefIdx[0] = pu.refIdx[0];
           bestRefIdx[1] = pu.refIdx[1];
@@ -2954,6 +4284,19 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
             bestMvd[refList][0] = pu.mvdAffi[refList][0];
             bestMvd[refList][1] = pu.mvdAffi[refList][1];
             bestMvd[refList][2] = pu.mvdAffi[refList][2];
+
+#if GDR_ENABLED
+            if (isEncodeGdrClean)
+            {
+              bestMvSolid[refList][0] = pu.mvAffiSolid[refList][0];
+              bestMvSolid[refList][1] = pu.mvAffiSolid[refList][1];
+              bestMvSolid[refList][2] = pu.mvAffiSolid[refList][2];
+
+              bestMvValid[refList][0] = pu.mvAffiValid[refList][0];
+              bestMvValid[refList][1] = pu.mvAffiValid[refList][1];
+              bestMvValid[refList][2] = pu.mvAffiValid[refList][2];
+            }
+#endif
           }
 
           refIdx4Para[0] = bestRefIdx[0];
@@ -2961,16 +4304,63 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
 
           Distortion uiAffine6Cost = std::numeric_limits<Distortion>::max();
           cu.affineType = AFFINEMODEL_6PARAM;
+#if GDR_ENABLED
+          xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffine6Cost, cMvHevcTemp, cMvHevcTempSolid, acMvAffine4Para, acMvAffine4ParaSolid, refIdx4Para, bcwIdx, enforceBcwPred,
+            ((cu.slice->getSPS()->getUseBcw() == true) ? getWeightIdxBits(bcwIdx) : 0));
+#else
           xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffine6Cost, cMvHevcTemp, acMvAffine4Para, refIdx4Para, bcwIdx, enforceBcwPred,
             ((cu.slice->getSPS()->getUseBcw() == true) ? getWeightIdxBits(bcwIdx) : 0));
+#endif
+
+          if ( pu.cu->imv == 0 )
+          {
+#if GDR_ENABLED
+            storeAffineMotion(pu.mvAffi, pu.mvAffiSolid, pu.refIdx, AFFINEMODEL_6PARAM, bcwIdx);
+#else
+            storeAffineMotion( pu.mvAffi, pu.refIdx, AFFINEMODEL_6PARAM, bcwIdx );
+#endif
+          }
+
+#if GDR_ENABLED
+          if (isEncodeGdrClean)
+          {
+            uiAffine6CostOk = true;
+
+            if (pu.interDir & 0x01)
+            {
+              uiAffine6CostOk = uiAffine6CostOk && pu.mvAffiSolid[0][0] && pu.mvAffiSolid[0][1] && pu.mvAffiSolid[0][2];
+              uiAffine6CostOk = uiAffine6CostOk && pu.mvAffiValid[0][0] && pu.mvAffiValid[0][1] && pu.mvAffiValid[0][2];
+            }
 
-          if ( pu.cu->imv == 0 )
+            if (pu.interDir & 0x02)
+            {
+              uiAffine6CostOk = uiAffine6CostOk && pu.mvAffiSolid[1][0] && pu.mvAffiSolid[1][1] && pu.mvAffiSolid[1][2];
+              uiAffine6CostOk = uiAffine6CostOk && pu.mvAffiValid[1][0] && pu.mvAffiValid[1][1] && pu.mvAffiValid[1][2];
+            }
+          }
+#endif
+
+#if GDR_ENABLED
+          allOk = (uiAffineCost <= uiAffine6Cost);
+          if (isEncodeGdrClean)
           {
-            storeAffineMotion( pu.mvAffi, pu.refIdx, AFFINEMODEL_6PARAM, bcwIdx );
+            if (uiAffineCostOk)
+            {
+              allOk = (uiAffine6CostOk) ? (uiAffineCost < uiHevcCost * 1.05) : true;
+            }
+            else
+            {
+              allOk = false;
+            }
           }
+#endif
 
           // reset to 4 parameter affine inter mode
+#if GDR_ENABLED
+          if (allOk && (uiAffineCost <= uiAffine6Cost))
+#else
           if ( uiAffineCost <= uiAffine6Cost )
+#endif
           {
             cu.affineType = AFFINEMODEL_4PARAM;
             pu.interDir = bestInterDir;
@@ -2989,10 +4379,37 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
 
             PU::setAllAffineMv( pu, bestMv[0][0], bestMv[0][1], bestMv[0][2], REF_PIC_LIST_0);
             PU::setAllAffineMv( pu, bestMv[1][0], bestMv[1][1], bestMv[1][2], REF_PIC_LIST_1);
+
+#if GDR_ENABLED
+            if (isEncodeGdrClean)
+            {
+              pu.mvAffiSolid[REF_PIC_LIST_0][0] = bestMvSolid[REF_PIC_LIST_0][0];
+              pu.mvAffiSolid[REF_PIC_LIST_0][1] = bestMvSolid[REF_PIC_LIST_0][1];
+              pu.mvAffiSolid[REF_PIC_LIST_0][2] = bestMvSolid[REF_PIC_LIST_0][2];
+
+              pu.mvAffiValid[REF_PIC_LIST_0][0] = bestMvValid[REF_PIC_LIST_0][0];
+              pu.mvAffiValid[REF_PIC_LIST_0][1] = bestMvValid[REF_PIC_LIST_0][1];
+              pu.mvAffiValid[REF_PIC_LIST_0][2] = bestMvValid[REF_PIC_LIST_0][2];
+
+              pu.mvAffiSolid[REF_PIC_LIST_1][0] = bestMvSolid[REF_PIC_LIST_1][0];
+              pu.mvAffiSolid[REF_PIC_LIST_1][1] = bestMvSolid[REF_PIC_LIST_1][1];
+              pu.mvAffiSolid[REF_PIC_LIST_1][2] = bestMvSolid[REF_PIC_LIST_1][2];
+
+              pu.mvAffiValid[REF_PIC_LIST_1][0] = bestMvValid[REF_PIC_LIST_1][0];
+              pu.mvAffiValid[REF_PIC_LIST_1][1] = bestMvValid[REF_PIC_LIST_1][1];
+              pu.mvAffiValid[REF_PIC_LIST_1][2] = bestMvValid[REF_PIC_LIST_1][2];
+            }
+#endif
           }
           else
           {
             uiAffineCost = uiAffine6Cost;
+#if GDR_ENABLED
+            if (isEncodeGdrClean)
+            {
+              uiAffineCostOk = uiAffine6CostOk;
+            }
+#endif
           }
         }
 
@@ -3006,7 +4423,26 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
           uiAffineCost = std::numeric_limits<Distortion>::max();
         }
       }
+#if GDR_ENABLED
+      allOk = (uiHevcCost <= uiAffineCost);
+      if (isEncodeGdrClean)
+      {
+        if (uiHevcCostOk)
+        {
+          allOk = (uiAffineCostOk) ? (uiHevcCost <= uiAffineCost) : true;
+        }
+        else
+        {
+          allOk = false;
+        }
+      }
+#endif
+
+#if GDR_ENABLED
+      if (allOk)
+#else
       if ( uiHevcCost <= uiAffineCost )
+#endif
       {
         // set hevc me result
         cu.affine = false;
@@ -3025,6 +4461,15 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
         pu.mvpNum[REF_PIC_LIST_1] = uiMvpNum[1];
         pu.mvd[REF_PIC_LIST_0] = cMvd[0];
         pu.mvd[REF_PIC_LIST_1] = cMvd[1];
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          pu.mvSolid[REF_PIC_LIST_0] = cHevcMvFieldSolid[0];
+          pu.mvSolid[REF_PIC_LIST_1] = cHevcMvFieldSolid[1];
+          pu.mvValid[REF_PIC_LIST_0] = cHevcMvFieldValid[0];
+          pu.mvValid[REF_PIC_LIST_1] = cHevcMvFieldValid[1];
+        }
+#endif
       }
       else
       {
@@ -3043,9 +4488,7 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
     }
     m_maxCompIDToPred = MAX_NUM_COMPONENT;
 
-    {
-      PU::spanMotionInfo( pu, mergeCtx );
-    }
+    PU::spanMotionInfo(pu, mergeCtx);
 
     m_skipPROF = false;
     m_encOnly = false;
@@ -3093,6 +4536,25 @@ void InterSearch::xEstimateMvPredAMVP( PredictionUnit& pu, PelUnitBuf& origBuf,
   int        i;
 
   AMVPInfo*  pcAMVPInfo = &rAMVPInfo;
+#if GDR_ENABLED
+  const CodingStructure &cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+#endif
+
+#if GDR_ENABLED
+  if (isEncodeGdrClean)
+  {
+    pcAMVPInfo->allCandSolidInAbove = true;
+    for (int i = 0; i < AMVP_MAX_NUM_CANDS_MEM; i++)
+    {
+      pcAMVPInfo->mvSolid[i] = true;
+      pcAMVPInfo->mvValid[i] = true;
+    }
+  }
+
+  bool uiBestCostOk = false;
+  bool uiTmpCostOk = false;
+#endif
 
   // Fill the MV Candidates
   if (!bFilled)
@@ -3103,6 +4565,12 @@ void InterSearch::xEstimateMvPredAMVP( PredictionUnit& pu, PelUnitBuf& origBuf,
   // initialize Mvp index & Mvp
   iBestIdx = 0;
   cBestMv  = pcAMVPInfo->mvCand[0];
+#if GDR_ENABLED
+  if (isEncodeGdrClean)
+  {
+    uiBestCostOk = pcAMVPInfo->mvSolid[0];
+  }
+#endif
 
   PelUnitBuf predBuf = m_tmpStorageLCU.getBuf( UnitAreaRelative(*pu.cu, pu) );
 
@@ -3110,12 +4578,46 @@ void InterSearch::xEstimateMvPredAMVP( PredictionUnit& pu, PelUnitBuf& origBuf,
   for( i = 0 ; i < pcAMVPInfo->numCand; i++)
   {
     Distortion uiTmpCost = xGetTemplateCost( pu, origBuf, predBuf, pcAMVPInfo->mvCand[i], i, AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdx );
+
+#if GDR_ENABLED
+    if (isEncodeGdrClean)
+    {
+      uiTmpCostOk = pcAMVPInfo->mvSolid[i];
+    }
+#endif
+
+#if GDR_ENABLED
+    bool allOk = (uiBestCost > uiTmpCost);
+
+    if (isEncodeGdrClean)
+    {
+      if (uiBestCostOk)
+      {
+        allOk = (uiTmpCostOk) ? (uiBestCost > uiTmpCost) : true;
+      }
+      else
+      {
+        allOk = false;
+      }
+    }
+#endif
+
+#if GDR_ENABLED
+    if (allOk)
+#else
     if( uiBestCost > uiTmpCost )
+#endif
     {
       uiBestCost     = uiTmpCost;
       cBestMv        = pcAMVPInfo->mvCand[i];
       iBestIdx       = i;
       (*puiDistBiP)  = uiTmpCost;
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        uiBestCostOk = uiTmpCostOk;
+      }
+#endif
     }
   }
 
@@ -3124,6 +4626,13 @@ void InterSearch::xEstimateMvPredAMVP( PredictionUnit& pu, PelUnitBuf& origBuf,
   pu.mvpIdx[eRefPicList] = iBestIdx;
   pu.mvpNum[eRefPicList] = pcAMVPInfo->numCand;
 
+#if GDR_ENABLED
+  if (isEncodeGdrClean)
+  {
+    pu.mvpSolid[eRefPicList] = pcAMVPInfo->mvSolid[iBestIdx];
+  }
+#endif
+
   return;
 }
 
@@ -3168,11 +4677,28 @@ void InterSearch::xCopyAMVPInfo (AMVPInfo* pSrc, AMVPInfo* pDst)
   for (int i = 0; i < pSrc->numCand; i++)
   {
     pDst->mvCand[i] = pSrc->mvCand[i];
+#if GDR_ENABLED
+    pDst->mvPos[i] = pSrc->mvPos[i];
+    pDst->mvSolid[i] = pSrc->mvSolid[i];
+    pDst->mvValid[i] = pSrc->mvValid[i];
+    pDst->mvType[i] = pSrc->mvType[i];
+#endif
   }
 }
 
+#if GDR_ENABLED
+void InterSearch::xCheckBestMVP(PredictionUnit &pu, RefPicList eRefPicList, Mv cMv, Mv& rcMvPred, int& riMVPIdx, AMVPInfo& amvpInfo, uint32_t& ruiBits, Distortion& ruiCost, const uint8_t imv)
+#else
 void InterSearch::xCheckBestMVP ( RefPicList eRefPicList, Mv cMv, Mv& rcMvPred, int& riMVPIdx, AMVPInfo& amvpInfo, uint32_t& ruiBits, Distortion& ruiCost, const uint8_t imv )
+#endif
 {
+#if GDR_ENABLED
+  const CodingStructure &cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+  bool iBestMvBitsOk = false;
+  bool iMvBitsOk = false;
+#endif
+
   if ( imv > 0 && imv < 3 )
   {
     return;
@@ -3199,6 +4725,12 @@ void InterSearch::xCheckBestMVP ( RefPicList eRefPicList, Mv cMv, Mv& rcMvPred,
   int iOrgMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(mv.getHor(), mv.getVer(), 0);
   iOrgMvBits += m_auiMVPIdxCost[riMVPIdx][AMVP_MAX_NUM_CANDS];
   int iBestMvBits = iOrgMvBits;
+#if GDR_ENABLED
+  if (isEncodeGdrClean)
+  {
+    iBestMvBitsOk = pcAMVPInfo->mvSolid[riMVPIdx];
+  }
+#endif
 
   for (int iMVPIdx = 0; iMVPIdx < pcAMVPInfo->numCand; iMVPIdx++)
   {
@@ -3213,10 +4745,36 @@ void InterSearch::xCheckBestMVP ( RefPicList eRefPicList, Mv cMv, Mv& rcMvPred,
     int iMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(mv.getHor(), mv.getVer(), 0);
     iMvBits += m_auiMVPIdxCost[iMVPIdx][AMVP_MAX_NUM_CANDS];
 
+#if GDR_ENABLED
+    bool allOk = (iMvBits < iBestMvBits);
+    if (isEncodeGdrClean)
+    {
+      iMvBitsOk = pcAMVPInfo->mvSolid[iMVPIdx];
+      if (iMvBitsOk)
+      {
+        allOk = (iBestMvBitsOk) ? (iMvBits < iBestMvBits) : true;
+      }
+      else
+      {
+        allOk = false;
+      }
+    }
+#endif
+
+#if GDR_ENABLED
+    if (allOk)
+#else
     if (iMvBits < iBestMvBits)
+#endif
     {
       iBestMvBits = iMvBits;
       iBestMVPIdx = iMVPIdx;
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        iBestMvBitsOk = iMvBitsOk;
+      }
+#endif
     }
   }
 
@@ -3250,11 +4808,7 @@ Distortion InterSearch::xGetTemplateCost( const PredictionUnit& pu,
   // prediction pattern
   const bool bi = pu.cu->slice->testWeightPred() && pu.cu->slice->getSliceType()==P_SLICE;
 
-
-  xPredInterBlk( COMPONENT_Y, pu, picRef, cMvCand, predBuf, bi, pu.cu->slice->clpRng( COMPONENT_Y )
-                , false
-                , false
-                );
+  xPredInterBlk(COMPONENT_Y, pu, picRef, cMvCand, predBuf, bi, pu.cu->slice->clpRng(COMPONENT_Y), false, false);
 
   if ( bi )
   {
@@ -3269,7 +4823,11 @@ Distortion InterSearch::xGetTemplateCost( const PredictionUnit& pu,
   return uiCost;
 }
 
+#if GDR_ENABLED
+Distortion InterSearch::xGetAffineTemplateCost(PredictionUnit& pu, PelUnitBuf& origBuf, PelUnitBuf& predBuf, Mv acMvCand[3], int iMVPIdx, int iMVPNum, RefPicList eRefPicList, int iRefIdx, bool& rbOk)
+#else
 Distortion InterSearch::xGetAffineTemplateCost( PredictionUnit& pu, PelUnitBuf& origBuf, PelUnitBuf& predBuf, Mv acMvCand[3], int iMVPIdx, int iMVPNum, RefPicList eRefPicList, int iRefIdx )
+#endif
 {
   Distortion uiCost = std::numeric_limits<Distortion>::max();
 
@@ -3280,7 +4838,12 @@ Distortion InterSearch::xGetAffineTemplateCost( PredictionUnit& pu, PelUnitBuf&
   Mv mv[3];
   memcpy(mv, acMvCand, sizeof(mv));
   m_iRefListIdx = eRefPicList;
+
+#if GDR_ENABLED
+  rbOk = xPredAffineBlk(COMPONENT_Y, pu, picRef, mv, predBuf, bi, pu.cu->slice->clpRng(COMPONENT_Y));
+#else
   xPredAffineBlk(COMPONENT_Y, pu, picRef, mv, predBuf, bi, pu.cu->slice->clpRng(COMPONENT_Y));
+#endif
   if( bi )
   {
     xWeightedPredictionUni( pu, predBuf, eRefPicList, predBuf, iRefIdx, m_maxCompIDToPred );
@@ -3296,9 +4859,17 @@ Distortion InterSearch::xGetAffineTemplateCost( PredictionUnit& pu, PelUnitBuf&
   return uiCost;
 }
 
+#if GDR_ENABLED
+void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, RefPicList eRefPicList, Mv& rcMvPred, int iRefIdxPred, Mv& rcMv, bool &rcMvSolid, int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost, const AMVPInfo& amvpInfo, bool& rbCleanCandExist, bool bBi)
+#else
 void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, RefPicList eRefPicList, Mv& rcMvPred, int iRefIdxPred, Mv& rcMv, int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost, const AMVPInfo& amvpInfo, bool bBi)
+#endif
 {
+#if GDR_ENABLED
+  if (pu.cu->cs->sps->getUseBcw() && pu.cu->BcwIdx != BCW_DEFAULT && !bBi && xReadBufferedUniMv(pu, eRefPicList, iRefIdxPred, rcMvPred, rcMv, rcMvSolid, ruiBits, ruiCost))
+#else
   if( pu.cu->cs->sps->getUseBcw() && pu.cu->BcwIdx != BCW_DEFAULT && !bBi && xReadBufferedUniMv(pu, eRefPicList, iRefIdxPred, rcMvPred, rcMv, ruiBits, ruiCost) )
+#endif
   {
     return;
   }
@@ -3408,7 +4979,9 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref
         }
       }
       if (j < i)
+      {
         continue;
+      }
 
       cTmpMv = curMvInfo->uniMvs[eRefPicList][iRefIdxPred];
       clipMv( cTmpMv, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
@@ -3427,7 +5000,11 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref
 
     if( !bQTBTMV )
     {
+#if GDR_ENABLED
+      xSetSearchRange(pu, bestInitMv, iSrchRng, cStruct.searchRange, cStruct, eRefPicList, iRefIdxPred);
+#else
       xSetSearchRange(pu, bestInitMv, iSrchRng, cStruct.searchRange, cStruct);
+#endif
     }
     xPatternSearch( cStruct, rcMv, ruiCost);
   }
@@ -3473,7 +5050,11 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref
         MCTSHelper::clipMvToArea( rcMv, pu.Y(), curTileAreaSubPelRestricted, *pu.cs->sps, 0 );
       }
     }
+#if GDR_ENABLED
+    xPatternSearchFracDIF(pu, eRefPicList, iRefIdxPred, cStruct, rcMv, cMvHalf, cMvQter, ruiCost, rbCleanCandExist);
+#else
     xPatternSearchFracDIF( pu, eRefPicList, iRefIdxPred, cStruct, rcMv, cMvHalf, cMvQter, ruiCost );
+#endif
     m_pcRdCost->setCostScale( 0 );
     rcMv <<= 2;
     rcMv  += ( cMvHalf <<= 1 );
@@ -3486,7 +5067,11 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref
   else // integer refinement for integer-pel and 4-pel resolution
   {
     rcMv.changePrecision(MV_PRECISION_INT, MV_PRECISION_INTERNAL);
+#if GDR_ENABLED
+    xPatternSearchIntRefine(pu, cStruct, rcMv, rcMvPred, riMVPIdx, ruiBits, ruiCost, amvpInfo, fWeight, eRefPicList, iRefIdxPred, rbCleanCandExist);
+#else
     xPatternSearchIntRefine( pu, cStruct, rcMv, rcMvPred, riMVPIdx, ruiBits, ruiCost, amvpInfo, fWeight);
+#endif
   }
   DTRACE(g_trace_ctx, D_ME, "   MECost<L%d,%d>: %6d (%d)  MV:%d,%d\n", (int)eRefPicList, (int)bBi, ruiCost, ruiBits, rcMv.getHor() << 2, rcMv.getVer() << 2);
 }
@@ -3498,14 +5083,61 @@ void InterSearch::xSetSearchRange ( const PredictionUnit& pu,
                                     const int iSrchRng,
                                     SearchRange& sr
                                   , IntTZSearchStruct& cStruct
+#if GDR_ENABLED
+                                  , RefPicList eRefPicList
+                                  , int iRefIdx
+#endif
 )
 {
   const int iMvShift = MV_FRACTIONAL_BITS_INTERNAL;
   Mv cFPMvPred = cMvPred;
   clipMv( cFPMvPred, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
-  
+
   Mv mvTL(cFPMvPred.getHor() - (iSrchRng << iMvShift), cFPMvPred.getVer() - (iSrchRng << iMvShift));
   Mv mvBR(cFPMvPred.getHor() + (iSrchRng << iMvShift), cFPMvPred.getVer() + (iSrchRng << iMvShift));
+#if GDR_ENABLED
+  if (m_pcEncCfg->getGdrEnabled())
+  {
+    bool isRefGdrPicture = pu.cs->slice->getRefPic(eRefPicList, iRefIdx)->cs->picHeader->getInGdrInterval();
+    if (isRefGdrPicture)
+    {
+      mvTL = { cFPMvPred.getHor(), cFPMvPred.getVer() };
+      mvBR = { cFPMvPred.getHor(), cFPMvPred.getVer() };
+
+      const int lumaPixelAway = 4;
+      const int chromaPixelAway = 5;
+
+      const Position LastPos = pu.Y().bottomRight();
+
+      const int iMvShift = MV_FRACTIONAL_BITS_INTERNAL;
+      const int iMvLumaFrac = (1 << iMvShift);
+      const int iMvChromaFrac = (iMvLumaFrac << 1);
+      const int iFracOne = (1 << iMvShift);
+
+      const bool isIntLumaMv = (cFPMvPred.getHor() % iMvLumaFrac) == 0;
+      const bool isIntChromaMv = (cFPMvPred.getHor() % iMvChromaFrac) == 0;
+
+      const int scaled_endx = pu.cs->slice->getRefPic(eRefPicList, iRefIdx)->cs->picHeader->getVirtualBoundariesPosX(0) << iMvShift;
+
+      const Position OrigFracPos = Position(LastPos.x << iMvShift, LastPos.y << iMvShift);
+      const int last_luma_pos = ((OrigFracPos.x / iMvLumaFrac)   * iMvLumaFrac) + cFPMvPred.getHor() + (isIntLumaMv ? 0 : (lumaPixelAway << iMvShift));
+      const int last_chroma_pos = ((OrigFracPos.x / iMvChromaFrac) * iMvChromaFrac) + cFPMvPred.getHor() + (isIntChromaMv ? 0 : (chromaPixelAway << iMvShift));
+
+      const int last_pel_pos = std::max(last_luma_pos, last_chroma_pos);
+
+      const int distance = Clip3(-(iSrchRng << iMvShift), (iSrchRng << iMvShift), scaled_endx - (last_pel_pos + iFracOne));
+
+
+      int srLeft = cFPMvPred.getHor() - (iSrchRng << iMvShift);
+      int srRight = cFPMvPred.getHor() + distance;
+      int srTop = cFPMvPred.getVer() - (iSrchRng << iMvShift);
+      int srBottom = cFPMvPred.getVer() + (iSrchRng << iMvShift);
+
+      mvTL = { srLeft, srTop };
+      mvBR = { srRight, srBottom };
+    }
+  }
+#endif
 
   if (m_pcEncCfg->getMCTSEncConstraint())
   {
@@ -3514,16 +5146,8 @@ void InterSearch::xSetSearchRange ( const PredictionUnit& pu,
   }
   else
   {
-    xClipMv( mvTL, pu.cu->lumaPos(),
-            pu.cu->lumaSize(),
-            *pu.cs->sps
-          , *pu.cs->pps
-    );
-    xClipMv( mvBR, pu.cu->lumaPos(),
-            pu.cu->lumaSize(),
-            *pu.cs->sps
-          , *pu.cs->pps
-    );
+    xClipMv(mvTL, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps);
+    xClipMv(mvBR, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps);
   }
 
   mvTL.divideByPowerOf2( iMvShift );
@@ -3542,13 +5166,21 @@ void InterSearch::xSetSearchRange ( const PredictionUnit& pu,
     Position posRBinCTU(posRB.x & pcv->maxCUWidthMask, posRB.y & pcv->maxCUHeightMask);
     Position posLTinCTU = Position(posTL.x & pcv->maxCUWidthMask, posTL.y & pcv->maxCUHeightMask).offset(-4, -4);
     if (sr.left < -posLTinCTU.x)
+    {
       sr.left = -posLTinCTU.x;
+    }
     if (sr.top < -posLTinCTU.y)
+    {
       sr.top = -posLTinCTU.y;
+    }
     if (sr.right >((int)pcv->maxCUWidth - 4 - posRBinCTU.x))
+    {
       sr.right = (int)pcv->maxCUWidth - 4 - posRBinCTU.x;
+    }
     if (sr.bottom >((int)pcv->maxCUHeight - 4 - posRBinCTU.y))
+    {
       sr.bottom = (int)pcv->maxCUHeight - 4 - posRBinCTU.y;
+    }
     if (posLTinCTU.x == -4 || posLTinCTU.y == -4)
     {
       sr.left = sr.right = sr.bottom = sr.top = 0;
@@ -3670,6 +5302,10 @@ void InterSearch::xTZSearch( const PredictionUnit& pu,
   const uint32_t uiStarRefinementRounds                  = 2;  // star refinement stop X rounds after best match (must be >=1)
   const bool bNewZeroNeighbourhoodTest               = bExtendedSettings;
 
+#if GDR_ENABLED
+  const CodingStructure &cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+#endif
   int iSearchRange = m_iSearchRange;
   if( m_pcEncCfg->getMCTSEncConstraint() )
   {
@@ -3745,7 +5381,9 @@ void InterSearch::xTZSearch( const PredictionUnit& pu,
       }
     }
     if (j < i)
+    {
       continue;
+    }
 
     Mv cTmpMv = curMvInfo->uniMvs[eRefPicList][iRefIdxPred];
     clipMv( cTmpMv, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
@@ -3754,6 +5392,37 @@ void InterSearch::xTZSearch( const PredictionUnit& pu,
 
     Distortion uiSad = m_cDistParam.distFunc(m_cDistParam);
     uiSad += m_pcRdCost->getCostOfVectorWithPredictor(cTmpMv.hor, cTmpMv.ver, cStruct.imvShift);
+#if GDR_ENABLED
+    bool allOk = (uiSad < cStruct.uiBestSad);
+
+    if (isEncodeGdrClean)
+    {
+      Mv motion = cTmpMv;
+      motion.changePrecision(MV_PRECISION_INT, MV_PRECISION_INTERNAL);
+      bool cTmpMvOk = cs.isClean(pu.Y().bottomRight(), motion, eRefPicList, iRefIdxPred);
+
+      Mv bestMv = { cStruct.iBestX, cStruct.iBestY };
+      bestMv.changePrecision(MV_PRECISION_INT, MV_PRECISION_INTERNAL);
+      bool bestMvOk = cs.isClean(pu.Y().bottomRight(), bestMv, eRefPicList, iRefIdxPred);
+
+      if (cTmpMvOk)
+      {
+        allOk = (bestMvOk) ? (uiSad < cStruct.uiBestSad) : true;
+      }
+      else
+      {
+        allOk = false;
+      }
+    }
+
+    if (allOk)
+    {
+      cStruct.uiBestSad = uiSad;
+      cStruct.iBestX = cTmpMv.hor;
+      cStruct.iBestY = cTmpMv.ver;
+      m_cDistParam.maximumDistortionForEarlyExit = uiSad;
+    }
+#else
     if (uiSad < cStruct.uiBestSad)
     {
       cStruct.uiBestSad = uiSad;
@@ -3761,13 +5430,18 @@ void InterSearch::xTZSearch( const PredictionUnit& pu,
       cStruct.iBestY = cTmpMv.ver;
       m_cDistParam.maximumDistortionForEarlyExit = uiSad;
     }
+#endif
   }
 
   {
     // set search range
     Mv currBestMv(cStruct.iBestX, cStruct.iBestY );
     currBestMv <<= MV_FRACTIONAL_BITS_INTERNAL;
+#if GDR_ENABLED
+    xSetSearchRange(pu, currBestMv, m_iSearchRange >> (bFastSettings ? 1 : 0), sr, cStruct, eRefPicList, iRefIdxPred);
+#else
     xSetSearchRange(pu, currBestMv, m_iSearchRange >> (bFastSettings ? 1 : 0), sr, cStruct);
+#endif
   }
   if (m_pcEncCfg->getUseHashME() && (m_currRefPicList == 0 || pu.cu->slice->getList1IdxToList0Idx(m_currRefPicIndex) < 0))
   {
@@ -4033,7 +5707,6 @@ void InterSearch::xTZSearchSelective( const PredictionUnit& pu,
     integerMv2Nx2NPred.divideByPowerOf2(2);
 
     xTZSearchHelp( cStruct, integerMv2Nx2NPred.getHor(), integerMv2Nx2NPred.getVer(), 0, 0);
-
   }
 
   for (int i = 0; i < m_uniMvListSize; i++)
@@ -4050,7 +5723,9 @@ void InterSearch::xTZSearchSelective( const PredictionUnit& pu,
       }
     }
     if (j < i)
+    {
       continue;
+    }
 
     Mv cTmpMv = curMvInfo->uniMvs[eRefPicList][iRefIdxPred];
     clipMv( cTmpMv, pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
@@ -4072,7 +5747,11 @@ void InterSearch::xTZSearchSelective( const PredictionUnit& pu,
     // set search range
     Mv currBestMv(cStruct.iBestX, cStruct.iBestY );
     currBestMv <<= 2;
+#if GDR_ENABLED
+    xSetSearchRange(pu, currBestMv, m_iSearchRange, sr, cStruct, eRefPicList, iRefIdxPred);
+#else
     xSetSearchRange( pu, currBestMv, m_iSearchRange, sr, cStruct );
+#endif
   }
   if (m_pcEncCfg->getUseHashME() && (m_currRefPicList == 0 || pu.cu->slice->getList1IdxToList0Idx(m_currRefPicIndex) < 0))
   {
@@ -4169,8 +5848,16 @@ void InterSearch::xTZSearchSelective( const PredictionUnit& pu,
   ruiSAD = cStruct.uiBestSad - m_pcRdCost->getCostOfVectorWithPredictor( cStruct.iBestX, cStruct.iBestY, cStruct.imvShift );
 }
 
+#if GDR_ENABLED
+void InterSearch::xPatternSearchIntRefine(PredictionUnit& pu, IntTZSearchStruct&  cStruct, Mv& rcMv, Mv& rcMvPred, int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost, const AMVPInfo& amvpInfo, double fWeight, RefPicList eRefPicList, int iRefIdxPred, bool& rbCleanCandExist)
+#else
 void InterSearch::xPatternSearchIntRefine(PredictionUnit& pu, IntTZSearchStruct&  cStruct, Mv& rcMv, Mv& rcMvPred, int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost, const AMVPInfo& amvpInfo, double fWeight)
+#endif
 {
+#if GDR_ENABLED
+  const CodingStructure &cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+#endif
 
   CHECK( pu.cu->imv == 0 || pu.cu->imv == IMV_HPEL , "xPatternSearchIntRefine(): Sub-pel MV used.");
   CHECK( amvpInfo.mvCand[riMVPIdx] != rcMvPred, "xPatternSearchIntRefine(): MvPred issue.");
@@ -4202,6 +5889,11 @@ void InterSearch::xPatternSearchIntRefine(PredictionUnit& pu, IntTZSearchStruct&
   cBaseMvd[1].roundTransPrecInternal2Amvr(pu.cu->imv);
 
   // test best integer position and all 8 neighboring positions
+#if GDR_ENABLED
+  bool allOk = true;
+  bool uiDistOk = false;
+  bool uiBestDistOk = false;
+#endif
   for (int pos = 0; pos < 9; pos ++)
   {
     Mv cTestMv[2];
@@ -4249,12 +5941,42 @@ void InterSearch::xPatternSearchIntRefine(PredictionUnit& pu, IntTZSearchStruct&
       iMvBits += m_pcRdCost->getBitsOfVectorWithPredictor( mv.getHor(), mv.getVer(), 0 );
       uiDist += m_pcRdCost->getCost(iMvBits);
 
+#if GDR_ENABLED
+      allOk = (uiDist < uiBestDist);
+      if (isEncodeGdrClean)
+      {
+        bool isSolid = amvpInfo.mvSolid[iMVPIdx];
+        bool isValid = cs.isClean(pu.Y().bottomRight(), cTestMv[iMVPIdx], eRefPicList, iRefIdxPred);
+
+        uiDistOk = isSolid && isValid;
+        if (uiDistOk)
+        {
+          allOk = (uiBestDistOk) ? (uiDist < uiBestDist) : true;
+        }
+        else
+        {
+          allOk = false;
+        }
+      }
+#endif
+
+#if GDR_ENABLED
+      if (allOk)
+#else
       if (uiDist < uiBestDist)
+#endif
       {
         uiBestDist = uiDist;
         cBestMv = cTestMv[iMVPIdx];
         iBestMVPIdx = iMVPIdx;
         iBestBits = iMvBits;
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          uiBestDistOk = uiDistOk;
+          rbCleanCandExist = true;
+        }
+#endif
       }
     }
   }
@@ -4290,6 +6012,9 @@ void InterSearch::xPatternSearchFracDIF(
   Mv&                   rcMvHalf,
   Mv&                   rcMvQter,
   Distortion&           ruiCost
+#if GDR_ENABLED
+  , bool&                rbCleanCandExist
+#endif
 )
 {
 
@@ -4303,7 +6028,11 @@ void InterSearch::xPatternSearchFracDIF(
     m_pcRdCost->setCostScale(0);
     xExtDIFUpSamplingH(&cPatternRoi, cStruct.useAltHpelIf);
     rcMvQter = rcMvInt;   rcMvQter <<= 2;    // for mv-cost
+#if GDR_ENABLED
+    ruiCost = xPatternRefinement(pu, eRefPicList, iRefIdx, cStruct.pcPatternKey, baseRefMv, 1, rcMvQter, !pu.cs->slice->getDisableSATDForRD(), rbCleanCandExist);
+#else
     ruiCost = xPatternRefinement(cStruct.pcPatternKey, baseRefMv, 1, rcMvQter, !pu.cs->slice->getDisableSATDForRD());
+#endif
     return;
   }
 
@@ -4322,19 +6051,29 @@ void InterSearch::xPatternSearchFracDIF(
 
   rcMvHalf = rcMvInt;   rcMvHalf <<= 1;    // for mv-cost
   Mv baseRefMv(0, 0);
+#if GDR_ENABLED
+  ruiCost = xPatternRefinement(pu, eRefPicList, iRefIdx, cStruct.pcPatternKey, baseRefMv, 2, rcMvHalf, (!pu.cs->slice->getDisableSATDForRD()), rbCleanCandExist);
+#else
   ruiCost = xPatternRefinement(cStruct.pcPatternKey, baseRefMv, 2, rcMvHalf, (!pu.cs->slice->getDisableSATDForRD()));
+#endif
 
   //  quarter-pel refinement
   if (cStruct.imvShift == IMV_OFF)
   {
-  m_pcRdCost->setCostScale( 0 );
-  xExtDIFUpSamplingQ ( &cPatternRoi, rcMvHalf );
-  baseRefMv = rcMvHalf;
-  baseRefMv <<= 1;
-
-  rcMvQter = rcMvInt;    rcMvQter <<= 1;    // for mv-cost
-  rcMvQter += rcMvHalf;  rcMvQter <<= 1;
-  ruiCost = xPatternRefinement(cStruct.pcPatternKey, baseRefMv, 1, rcMvQter, (!pu.cs->slice->getDisableSATDForRD()));
+    m_pcRdCost->setCostScale(0);
+    xExtDIFUpSamplingQ(&cPatternRoi, rcMvHalf);
+    baseRefMv = rcMvHalf;
+    baseRefMv <<= 1;
+
+    rcMvQter = rcMvInt;
+    rcMvQter <<= 1;   // for mv-cost
+    rcMvQter += rcMvHalf;
+    rcMvQter <<= 1;
+#if GDR_ENABLED
+    ruiCost = xPatternRefinement(pu, eRefPicList, iRefIdx, cStruct.pcPatternKey, baseRefMv, 1, rcMvQter, (!pu.cs->slice->getDisableSATDForRD()), rbCleanCandExist);
+#else
+    ruiCost = xPatternRefinement(cStruct.pcPatternKey, baseRefMv, 1, rcMvQter, (!pu.cs->slice->getDisableSATDForRD()));
+#endif
   }
 }
 
@@ -4390,9 +6129,20 @@ Distortion InterSearch::xGetSymmetricCost( PredictionUnit& pu, PelUnitBuf& origB
   return(cost);
 }
 
+#if GDR_ENABLED
+Distortion InterSearch::xSymmeticRefineMvSearch( PredictionUnit &pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred
+  , RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion uiMinCost, int SearchPattern, int nSearchStepShift, uint32_t uiMaxSearchRounds, int bcwIdx, bool& rOk)
+#else
 Distortion InterSearch::xSymmeticRefineMvSearch( PredictionUnit &pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred
   , RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion uiMinCost, int SearchPattern, int nSearchStepShift, uint32_t uiMaxSearchRounds, int bcwIdx )
+#endif
 {
+#if GDR_ENABLED
+  const CodingStructure &cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+  bool uiCostOk;
+  bool uiMinCostOk = rOk;
+#endif
   const Mv mvSearchOffsetCross[4] = { Mv( 0 , 1 ) , Mv( 1 , 0 ) , Mv( 0 , -1 ) , Mv( -1 ,  0 ) };
   const Mv mvSearchOffsetSquare[8] = { Mv( -1 , 1 ) , Mv( 0 , 1 ) , Mv( 1 ,  1 ) , Mv( 1 ,  0 ) , Mv( 1 , -1 ) , Mv( 0 , -1 ) , Mv( -1 , -1 ) , Mv( -1 , 0 ) };
   const Mv mvSearchOffsetDiamond[8] = { Mv( 0 , 2 ) , Mv( 1 , 1 ) , Mv( 2 ,  0 ) , Mv( 1 , -1 ) , Mv( 0 , -2 ) , Mv( -1 , -1 ) , Mv( -2 ,  0 ) , Mv( -1 , 1 ) };
@@ -4468,6 +6218,10 @@ Distortion InterSearch::xSymmeticRefineMvSearch( PredictionUnit &pu, PelUnitBuf&
       uint32_t uiMvBits = m_pcRdCost->getBitsOfVectorWithPredictor( mv.getHor(), mv.getVer(), 0 );
       Distortion uiCost = m_pcRdCost->getCost( uiMvBits );
 
+#if GDR_ENABLED
+      uiCostOk = cs.isClean(pu.Y().bottomRight(), mvCand.mv, eRefPicList, mvCand.refIdx);
+#endif
+
       // get MVD pair and set target MV
       mvPair.refIdx = rTarMvField.refIdx;
       mvPair.mv.set( rcMvTarPred.hor - (mvCand.mv.hor - rcMvCurPred.hor), rcMvTarPred.ver - (mvCand.mv.ver - rcMvCurPred.ver) );
@@ -4477,12 +6231,33 @@ Distortion InterSearch::xSymmeticRefineMvSearch( PredictionUnit &pu, PelUnitBuf&
           continue; // Skip this this pos
       }
       uiCost += xGetSymmetricCost( pu, origBuf, eRefPicList, mvCand, mvPair, bcwIdx );
+
+#if GDR_ENABLED
+      bool allOk = (uiCost < uiMinCost);
+      if (isEncodeGdrClean)
+      {
+        bool curValid = cs.isClean(pu.Y().bottomRight(), mvCand.mv, (RefPicList)(eRefPicList), mvCand.refIdx);
+        bool tarValid = cs.isClean(pu.Y().bottomRight(), mvPair.mv, (RefPicList)(1 - eRefPicList), mvPair.refIdx);
+        allOk = curValid && tarValid;
+      }
+#endif
+
+#if GDR_ENABLED
+      if (allOk)
+#else
       if ( uiCost < uiMinCost )
+#endif
       {
         uiMinCost = uiCost;
         rCurMvField = mvCand;
         rTarMvField = mvPair;
         nBestDirect = nDirect;
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          uiMinCostOk = uiCostOk;
+        }
+#endif
       }
     }
 
@@ -4499,11 +6274,19 @@ Distortion InterSearch::xSymmeticRefineMvSearch( PredictionUnit &pu, PelUnitBuf&
     nDirectEnd = nBestDirect + nStep;
   }
 
+#if GDR_ENABLED
+  rOk = uiMinCostOk;
+#endif
+
   return(uiMinCost);
 }
 
 
+#if GDR_ENABLED
+bool InterSearch::xSymmetricMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred, RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion& ruiCost, int bcwIdx, bool& ruiCostOk)
+#else
 void InterSearch::xSymmetricMotionEstimation( PredictionUnit& pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred, RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion& ruiCost, int bcwIdx )
+#endif
 {
   // Refine Search
   int nSearchStepShift = MV_FRACTIONAL_BITS_DIFF;
@@ -4513,8 +6296,17 @@ void InterSearch::xSymmetricMotionEstimation( PredictionUnit& pu, PelUnitBuf& or
   nSearchStepShift += pu.cu->imv == IMV_HPEL ? 1 : (pu.cu->imv << 1);
   nDiamondRound >>= pu.cu->imv;
 
+#if GDR_ENABLED
+  ruiCost = xSymmeticRefineMvSearch(pu, origBuf, rcMvCurPred, rcMvTarPred, eRefPicList, rCurMvField, rTarMvField, ruiCost, 2, nSearchStepShift, nDiamondRound, bcwIdx, ruiCostOk);
+  ruiCost = xSymmeticRefineMvSearch(pu, origBuf, rcMvCurPred, rcMvTarPred, eRefPicList, rCurMvField, rTarMvField, ruiCost, 0, nSearchStepShift, nCrossRound, bcwIdx, ruiCostOk);
+#else
   ruiCost = xSymmeticRefineMvSearch( pu, origBuf, rcMvCurPred, rcMvTarPred, eRefPicList, rCurMvField, rTarMvField, ruiCost, 2, nSearchStepShift, nDiamondRound, bcwIdx );
   ruiCost = xSymmeticRefineMvSearch( pu, origBuf, rcMvCurPred, rcMvTarPred, eRefPicList, rCurMvField, rTarMvField, ruiCost, 0, nSearchStepShift, nCrossRound, bcwIdx );
+#endif
+
+#if GDR_ENABLED
+  return ruiCostOk;
+#endif
 }
 
 void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
@@ -4523,7 +6315,13 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
                                           uint32_t&                 lastMode,
                                           Distortion&           affineCost,
                                           Mv                    hevcMv[2][33]
+#if GDR_ENABLED
+                                        , bool                  hevcMvSolid[2][33]
+#endif
                                         , Mv                    mvAffine4Para[2][33][3]
+#if GDR_ENABLED
+                                        , bool                  mvAffine4ParaSolid[2][33][3]
+#endif
                                         , int                   refIdx4Para[2]
                                         , uint8_t               bcwIdx
                                         , bool                  enforceBcwPred
@@ -4551,6 +6349,27 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
   int       aaiMvpIdx[2][33];
   int       aaiMvpNum[2][33];
 
+#if GDR_ENABLED
+  bool      aacMvSolid[2][3];
+  bool      aacMvValid[2][3];
+
+  bool      cMvTempSolid[2][33][3];
+  bool      cMvTempValid[2][33][3];
+
+  bool      cMvBiSolid[2][3];
+  bool      cMvBiValid[2][3];
+
+  bool      cMvPredSolid[2][33][3];
+  bool      cMvPredBiSolid[2][33][3];
+
+  bool      mvValidList1Solid[3];
+  bool      mvValidList1Valid[3];
+
+  bool      mvHevcSolid[3];
+
+  const CodingStructure &cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+#endif
   AffineAMVPInfo aacAffineAMVPInfo[2][33];
   AffineAMVPInfo affiAMVPInfoTemp[2];
 
@@ -4561,13 +6380,73 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
 
   int           iRefStart, iRefEnd;
 
-  int           bestBiPRefIdxL1 = 0;
-  int           bestBiPMvpL1 = 0;
-  Distortion biPDistTemp = std::numeric_limits<Distortion>::max();
+  int           bestBiPRefIdxL1 = 0;
+  int           bestBiPMvpL1 = 0;
+  Distortion biPDistTemp = std::numeric_limits<Distortion>::max();
+
+#if GDR_ENABLED
+  bool init_value = true;
+
+  bool          allOk = true;
+  bool          biPDistTempOk = init_value;
+  bool          bestBiPDistOk = init_value;
+
+
+  // if (isEncodeGdrClean)
+  {
+    iRefIdxBi[0] = -1;
+    iRefIdxBi[1] = -1;
+    memset(mvHevcSolid, init_value, sizeof(mvHevcSolid));
+
+    // note : will have Solid problem if initialize to true
+    memset(aacMvSolid, false, sizeof(aacMvSolid));
+    memset(aacMvValid, false, sizeof(aacMvValid));
+
+    memset(cMvBiSolid, init_value, sizeof(cMvBiSolid));
+    memset(cMvBiValid, init_value, sizeof(cMvBiValid));
+
+    memset(cMvTempSolid, init_value, sizeof(cMvTempSolid));
+    memset(cMvTempValid, init_value, sizeof(cMvTempValid));
+
+    memset(mvValidList1Solid, init_value, sizeof(mvValidList1Solid));
+    memset(mvValidList1Valid, init_value, sizeof(mvValidList1Valid));
+
+    // AffineAMVPInfo aacAffineAMVPInfo[2][33];
+    ::memset(aacAffineAMVPInfo, 0, sizeof(aacAffineAMVPInfo));
+    ::memset(affiAMVPInfoTemp, 0, sizeof(affiAMVPInfoTemp));
+
+    for (int i = 0; i < 2; i++)
+    {
+      for (int j = 0; j < 33; j++)
+      {
+        for (int k = 0; k < AMVP_MAX_NUM_CANDS_MEM; k++)
+        {
+          aacAffineAMVPInfo[i][j].mvSolidLT[k] = init_value;
+          aacAffineAMVPInfo[i][j].mvSolidRT[k] = init_value;
+          aacAffineAMVPInfo[i][j].mvSolidLB[k] = init_value;
+        }
+      }
+
+      for (int k = 0; k < AMVP_MAX_NUM_CANDS_MEM; k++)
+      {
+        affiAMVPInfoTemp[i].mvSolidLT[k] = init_value;
+        affiAMVPInfoTemp[i].mvSolidRT[k] = init_value;
+        affiAMVPInfoTemp[i].mvSolidLB[k] = init_value;
+      }
+    }
+  }
+
+  bool bAnyClean = false;
+#endif
 
   Distortion    uiCost[2] = { std::numeric_limits<Distortion>::max(), std::numeric_limits<Distortion>::max() };
   Distortion    uiCostBi  = std::numeric_limits<Distortion>::max();
   Distortion    uiCostTemp;
+#if GDR_ENABLED
+  bool uiCostOk[2] = { init_value, init_value };
+  bool uiCostBiOk = init_value;
+  bool uiCostTempOk = init_value;
+#endif
 
   uint32_t          uiBits[3] = { 0 };
   uint32_t          uiBitsTemp;
@@ -4578,12 +6457,24 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
   {
     uiCostTempL0[iNumRef] = std::numeric_limits<Distortion>::max();
   }
+
+#if GDR_ENABLED
+  bool uiCostTempL0Ok[MAX_NUM_REF];
+  for (int iNumRef = 0; iNumRef < MAX_NUM_REF; iNumRef++)
+  {
+    uiCostTempL0Ok[iNumRef] = true;
+  }
+#endif
+
   uint32_t uiBitsTempL0[MAX_NUM_REF];
 
   Mv            mvValidList1[4];
   int           refIdxValidList1 = 0;
   uint32_t          bitsValidList1 = MAX_UINT;
   Distortion costValidList1 = std::numeric_limits<Distortion>::max();
+#if GDR_ENABLED
+  bool costValidList1Ok = true;
+#endif
   Mv            mvHevc[3];
   const bool affineAmvrEnabled = pu.cu->slice->getSPS()->getAffineAmvrEnabledFlag();
   int tryBipred = 0;
@@ -4624,7 +6515,22 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
         biPDistTemp += m_pcRdCost->getCost( xCalcAffineMVBits( pu, cMvPred[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp] ) );
       }
       aaiMvpIdx[iRefList][iRefIdxTemp] = pu.mvpIdx[eRefPicList];
-      aaiMvpNum[iRefList][iRefIdxTemp] = pu.mvpNum[eRefPicList];;
+      aaiMvpNum[iRefList][iRefIdxTemp] = pu.mvpNum[eRefPicList];
+
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        int mvpIdx = aaiMvpIdx[iRefList][iRefIdxTemp];
+        cMvPredSolid[iRefList][iRefIdxTemp][0] = affiAMVPInfoTemp[eRefPicList].mvSolidLT[mvpIdx];
+        cMvPredSolid[iRefList][iRefIdxTemp][1] = affiAMVPInfoTemp[eRefPicList].mvSolidRT[mvpIdx];
+        cMvPredSolid[iRefList][iRefIdxTemp][2] = affiAMVPInfoTemp[eRefPicList].mvSolidLB[mvpIdx];
+
+        biPDistTempOk = true;
+        biPDistTempOk = biPDistTempOk && affiAMVPInfoTemp[eRefPicList].mvSolidLT[mvpIdx] && affiAMVPInfoTemp[eRefPicList].mvSolidRT[mvpIdx];
+        biPDistTempOk = biPDistTempOk && ((mvNum > 2) ? affiAMVPInfoTemp[eRefPicList].mvSolidLB[mvpIdx] : true);
+      }
+#endif
+
       if ( pu.cu->affineType == AFFINEMODEL_6PARAM && refIdx4Para[iRefList] != iRefIdxTemp )
       {
         xCopyAffineAMVPInfo( affiAMVPInfoTemp[eRefPicList], aacAffineAMVPInfo[iRefList][iRefIdxTemp] );
@@ -4636,11 +6542,26 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
       {
         mvHevc[i] = hevcMv[iRefList][iRefIdxTemp];
         mvHevc[i].roundAffinePrecInternal2Amvr(pu.cu->imv);
+
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          mvHevcSolid[i] = hevcMvSolid[iRefList][iRefIdxTemp];
+        }
+#endif
       }
       PelUnitBuf predBuf = m_tmpStorageLCU.getBuf( UnitAreaRelative(*pu.cu, pu) );
+#if GDR_ENABLED
+      bool uiCandCostOk = true;
+      Distortion uiCandCost = xGetAffineTemplateCost(pu, origBuf, predBuf, mvHevc, aaiMvpIdx[iRefList][iRefIdxTemp],
+        AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdxTemp, uiCandCostOk);
 
+      uiCandCostOk = uiCandCostOk && mvHevcSolid[0] && mvHevcSolid[1] && ((mvNum > 2) ? mvHevcSolid[2] : true);
+
+#else
       Distortion uiCandCost = xGetAffineTemplateCost(pu, origBuf, predBuf, mvHevc, aaiMvpIdx[iRefList][iRefIdxTemp],
                                                      AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdxTemp);
+#endif
 
       if ( affineAmvrEnabled )
       {
@@ -4655,19 +6576,58 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
       if ( savedParaAvail )
       {
         Mv mvFour[3];
+#if GDR_ENABLED
+        bool mvFourSolid[3] = { true, true, true };
+#endif
         for ( int i = 0; i < mvNum; i++ )
         {
           mvFour[i] = affine4Para ? m_affineMotion.acMvAffine4Para[iRefList][i] : m_affineMotion.acMvAffine6Para[iRefList][i];
           mvFour[i].roundAffinePrecInternal2Amvr(pu.cu->imv);
+#if GDR_ENABLED
+          mvFourSolid[i] = affine4Para ? m_affineMotion.acMvAffine4ParaSolid[iRefList][i] : m_affineMotion.acMvAffine6ParaSolid[iRefList][i];
+#endif
         }
 
+#if GDR_ENABLED
+        bool candCostInheritOk = true;
+        Distortion candCostInherit = xGetAffineTemplateCost(pu, origBuf, predBuf, mvFour, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdxTemp, candCostInheritOk);
+
+        candCostInheritOk = candCostInheritOk && mvFourSolid[0] && mvFourSolid[1] && ((mvNum > 2) ? mvFourSolid[2] : true);
+#else
         Distortion candCostInherit = xGetAffineTemplateCost( pu, origBuf, predBuf, mvFour, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdxTemp );
+#endif
         candCostInherit += m_pcRdCost->getCost( xCalcAffineMVBits( pu, mvFour, cMvPred[iRefList][iRefIdxTemp] ) );
 
+#if GDR_ENABLED
+        allOk = (candCostInherit < uiCandCost);
+        if (isEncodeGdrClean)
+        {
+          if (candCostInheritOk)
+          {
+            allOk = (uiCandCostOk) ? (candCostInherit < uiCandCost) : true;
+          }
+          else
+          {
+            allOk = false;
+          }
+        }
+#endif
+
+#if GDR_ENABLED
+        if (allOk)
+#else
         if ( candCostInherit < uiCandCost )
+#endif
         {
           uiCandCost = candCostInherit;
           memcpy( mvHevc, mvFour, 3 * sizeof( Mv ) );
+#if GDR_ENABLED
+          if (isEncodeGdrClean)
+          {
+            uiCandCostOk = candCostInheritOk;
+            memcpy(mvHevcSolid, mvFourSolid, 3 * sizeof(bool));
+          }
+#endif
         }
       }
 
@@ -4679,6 +6639,10 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
         for (int i = 0; i < m_affMVListSize; i++)
         {
           AffineMVInfo *mvInfo = m_affMVList + ((m_affMVListIdx - i - 1 + m_affMVListMaxSize) % (m_affMVListMaxSize));
+#if GDR_ENABLED
+          AffineMVInfoSolid *mvInfoSolid = m_affMVListSolid + ((m_affMVListIdx - i - 1 + m_affMVListMaxSize) % (m_affMVListMaxSize));
+#endif
+
           //check;
           int j = 0;
           for (; j < i; j++)
@@ -4694,9 +6658,17 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
             }
           }
           if (j < i)
+          {
             continue;
+          }
 
           Mv mvTmp[3], *nbMv = mvInfo->affMVs[iRefList][iRefIdxTemp];
+#if GDR_ENABLED
+          bool mvTmpSolid[3];
+          bool *nbMvSolid = mvInfoSolid->affMVsSolid[iRefList][iRefIdxTemp];
+          mvTmpSolid[0] = nbMvSolid[0];
+          mvTmpSolid[1] = nbMvSolid[1];
+#endif
           int vx, vy;
           int dMvHorX, dMvHorY, dMvVerX, dMvVerY;
           int mvScaleHor = nbMv[0].getHor() << shift;
@@ -4721,15 +6693,48 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
           clipMv( mvTmp[1], pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
           mvTmp[0].roundAffinePrecInternal2Amvr(pu.cu->imv);
           mvTmp[1].roundAffinePrecInternal2Amvr(pu.cu->imv);
+
+#if GDR_ENABLED
+          bool tmpCostOk = true;
+          Distortion tmpCost = xGetAffineTemplateCost(pu, origBuf, predBuf, mvTmp, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdxTemp, tmpCostOk);
+          tmpCostOk = tmpCostOk && mvTmpSolid[0] && mvTmpSolid[1];
+#else
           Distortion tmpCost = xGetAffineTemplateCost(pu, origBuf, predBuf, mvTmp, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdxTemp);
+#endif
           if ( affineAmvrEnabled )
           {
             tmpCost += m_pcRdCost->getCost( xCalcAffineMVBits( pu, mvTmp, cMvPred[iRefList][iRefIdxTemp] ) );
           }
+#if GDR_ENABLED
+          allOk = (tmpCost < uiCandCost);
+          if (isEncodeGdrClean)
+          {
+            if (tmpCostOk)
+            {
+              allOk = (uiCandCostOk) ? (tmpCost < uiCandCost) : true;
+            }
+            else
+            {
+              allOk = false;
+            }
+          }
+#endif
+
+#if GDR_ENABLED
+          if (allOk)
+#else
           if (tmpCost < uiCandCost)
+#endif
           {
             uiCandCost = tmpCost;
             std::memcpy(mvHevc, mvTmp, 3 * sizeof(Mv));
+#if GDR_ENABLED
+            if (isEncodeGdrClean)
+            {
+              uiCandCostOk = tmpCostOk;
+              std::memset(mvHevcSolid, true, 3 * sizeof(bool));
+            }
+#endif
           }
         }
       }
@@ -4738,6 +6743,12 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
         Mv mvFour[3];
         mvFour[0] = mvAffine4Para[iRefList][iRefIdxTemp][0];
         mvFour[1] = mvAffine4Para[iRefList][iRefIdxTemp][1];
+#if GDR_ENABLED
+        bool mvFourSolid[3];
+        mvFourSolid[0] = mvAffine4ParaSolid[iRefList][iRefIdxTemp][0];
+        mvFourSolid[1] = mvAffine4ParaSolid[iRefList][iRefIdxTemp][1];
+#endif
+
         mvAffine4Para[iRefList][iRefIdxTemp][0].roundAffinePrecInternal2Amvr(pu.cu->imv);
         mvAffine4Para[iRefList][iRefIdxTemp][1].roundAffinePrecInternal2Amvr(pu.cu->imv);
 
@@ -4753,36 +6764,134 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
         mvFour[0].roundAffinePrecInternal2Amvr(pu.cu->imv);
         mvFour[1].roundAffinePrecInternal2Amvr(pu.cu->imv);
         mvFour[2].roundAffinePrecInternal2Amvr(pu.cu->imv);
+
+#if GDR_ENABLED
+        bool uiCandCostInheritOk = true;
+        Distortion uiCandCostInherit = xGetAffineTemplateCost(pu, origBuf, predBuf, mvFour, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdxTemp, uiCandCostInheritOk);
+        uiCandCostInheritOk = uiCandCostInheritOk && mvFourSolid[0] && mvFourSolid[1];
+#else
         Distortion uiCandCostInherit = xGetAffineTemplateCost( pu, origBuf, predBuf, mvFour, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdxTemp );
+#endif
+
         if ( affineAmvrEnabled )
         {
           uiCandCostInherit += m_pcRdCost->getCost( xCalcAffineMVBits( pu, mvFour, cMvPred[iRefList][iRefIdxTemp] ) );
         }
+#if GDR_ENABLED
+        allOk = (uiCandCostInherit < uiCandCost);
+
+        if (isEncodeGdrClean)
+        {
+          if (uiCandCostInheritOk)
+          {
+            allOk = (uiCandCostOk) ? (uiCandCostInherit < uiCandCost) : true;
+          }
+          else
+          {
+            allOk = false;
+          }
+        }
+#endif
+
+#if GDR_ENABLED
+        if (allOk)
+#else
         if ( uiCandCostInherit < uiCandCost )
+#endif
         {
           uiCandCost = uiCandCostInherit;
           for ( int i = 0; i < 3; i++ )
           {
             mvHevc[i] = mvFour[i];
+#if GDR_ENABLED
+            if (isEncodeGdrClean)
+            {
+              uiCandCostOk = uiCandCostInheritOk;
+              mvHevcSolid[i] = true;
+            }
+#endif
           }
         }
       }
 
+
+#if GDR_ENABLED
+      allOk = (uiCandCost < biPDistTemp);
+
+      if (isEncodeGdrClean)
+      {
+        if (uiCandCostOk)
+        {
+          allOk = (biPDistTempOk) ? (uiCandCost < biPDistTemp) : true;
+        }
+        else
+        {
+          allOk = false;
+        }
+      }
+#endif
+
+#if GDR_ENABLED
+      if (allOk)
+#else
       if ( uiCandCost < biPDistTemp )
+#endif
       {
         ::memcpy( cMvTemp[iRefList][iRefIdxTemp], mvHevc, sizeof(Mv)*3 );
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          cMvTempSolid[iRefList][iRefIdxTemp][0] = mvHevcSolid[0];
+          cMvTempSolid[iRefList][iRefIdxTemp][1] = mvHevcSolid[1];
+          cMvTempSolid[iRefList][iRefIdxTemp][2] = mvHevcSolid[2];
+        }
+#endif
       }
       else
       {
         ::memcpy( cMvTemp[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], sizeof(Mv)*3 );
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          cMvTempSolid[iRefList][iRefIdxTemp][0] = cMvPredSolid[iRefList][iRefIdxTemp][0];
+          cMvTempSolid[iRefList][iRefIdxTemp][1] = cMvPredSolid[iRefList][iRefIdxTemp][1];
+          cMvTempSolid[iRefList][iRefIdxTemp][2] = cMvPredSolid[iRefList][iRefIdxTemp][2];
+        }
+#endif
       }
 
       // GPB list 1, save the best MvpIdx, RefIdx and Cost
+#if GDR_ENABLED
+      allOk = (slice.getPicHeader()->getMvdL1ZeroFlag() && iRefList == 1 && (biPDistTemp < bestBiPDist));
+
+      if (isEncodeGdrClean)
+      {
+        if (biPDistTempOk)
+        {
+          allOk = (bestBiPDistOk) ? (slice.getPicHeader()->getMvdL1ZeroFlag() && iRefList == 1 && (biPDistTemp < bestBiPDist)) : true;
+        }
+        else
+        {
+          allOk = false;
+        }
+      }
+#endif
+
+#if GDR_ENABLED
+      if (allOk)
+#else
       if ( slice.getPicHeader()->getMvdL1ZeroFlag() && iRefList==1 && biPDistTemp < bestBiPDist )
+#endif
       {
         bestBiPDist = biPDistTemp;
         bestBiPMvpL1 = aaiMvpIdx[iRefList][iRefIdxTemp];
         bestBiPRefIdxL1 = iRefIdxTemp;
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          bestBiPDistOk = biPDistTempOk;
+        }
+#endif
       }
 
       // Update bits
@@ -4794,6 +6903,13 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
         {
           int iList1ToList0Idx = slice.getList1IdxToList0Idx( iRefIdxTemp );
           ::memcpy( cMvTemp[1][iRefIdxTemp], cMvTemp[0][iList1ToList0Idx], sizeof(Mv)*3 );
+#if GDR_ENABLED
+          if (isEncodeGdrClean)
+          {
+            ::memcpy(cMvTempSolid[1][iRefIdxTemp], cMvTempSolid[0][iList1ToList0Idx], sizeof(bool) * 3);
+            uiCostTempOk = uiCostTempL0Ok[iList1ToList0Idx];
+          }
+#endif
           uiCostTemp = uiCostTempL0[iList1ToList0Idx];
 
           uiCostTemp -= m_pcRdCost->getCost( uiBitsTempL0[iList1ToList0Idx] );
@@ -4804,52 +6920,249 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
         }
         else
         {
+#if GDR_ENABLED
+          xAffineMotionEstimation(pu, origBuf, eRefPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], cMvTempSolid[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp
+            , aaiMvpIdx[iRefList][iRefIdxTemp], affiAMVPInfoTemp[eRefPicList], bAnyClean
+#else
           xAffineMotionEstimation( pu, origBuf, eRefPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp
                                    , aaiMvpIdx[iRefList][iRefIdxTemp], affiAMVPInfoTemp[eRefPicList]
+#endif
           );
+
+#if GDR_ENABLED
+          if (isEncodeGdrClean)
+          {
+            int mvp_idx = aaiMvpIdx[iRefList][iRefIdxTemp];
+            PelUnitBuf     tmpBuf = m_tmpAffiStorage.getBuf(UnitAreaRelative(*pu.cu, pu));
+            const Picture *refPic = pu.cu->slice->getRefPic((RefPicList)iRefList, iRefIdxTemp);
+
+
+            cMvPredSolid[iRefList][iRefIdxTemp][0] = affiAMVPInfoTemp[eRefPicList].mvSolidLT[mvp_idx];
+            cMvPredSolid[iRefList][iRefIdxTemp][1] = affiAMVPInfoTemp[eRefPicList].mvSolidRT[mvp_idx];
+            cMvPredSolid[iRefList][iRefIdxTemp][2] = affiAMVPInfoTemp[eRefPicList].mvSolidLB[mvp_idx];
+
+            cMvTempSolid[iRefList][iRefIdxTemp][0] = cMvPredSolid[iRefList][iRefIdxTemp][0];
+            cMvTempSolid[iRefList][iRefIdxTemp][1] = cMvPredSolid[iRefList][iRefIdxTemp][1];
+            cMvTempSolid[iRefList][iRefIdxTemp][2] = cMvPredSolid[iRefList][iRefIdxTemp][2];
+
+            bool isSubPuYYClean = xPredAffineBlk(COMPONENT_Y, pu, refPic, cMvTemp[iRefList][iRefIdxTemp], tmpBuf, false, pu.cu->slice->clpRng(COMPONENT_Y));
+            bool isSubPuCbClean = (isSubPuYYClean) ? xPredAffineBlk(COMPONENT_Cb, pu, refPic, cMvTemp[iRefList][iRefIdxTemp], tmpBuf, false, pu.cu->slice->clpRng(COMPONENT_Cb)) : false;
+
+            cMvTempValid[iRefList][iRefIdxTemp][0] = isSubPuYYClean && isSubPuCbClean;
+            cMvTempValid[iRefList][iRefIdxTemp][1] = isSubPuYYClean && isSubPuCbClean;
+            cMvTempValid[iRefList][iRefIdxTemp][2] = isSubPuYYClean && isSubPuCbClean;
+
+            uiCostTempOk = true;
+            uiCostTempOk = uiCostTempOk && cMvPredSolid[iRefList][iRefIdxTemp][0] && cMvPredSolid[iRefList][iRefIdxTemp][1];
+            uiCostTempOk = uiCostTempOk && ((mvNum > 2) ? cMvPredSolid[iRefList][iRefIdxTemp][2] : true);
+            uiCostTempOk = uiCostTempOk && cMvTempSolid[iRefList][iRefIdxTemp][0] && cMvTempSolid[iRefList][iRefIdxTemp][1] && ((mvNum > 2) ? cMvTempSolid[iRefList][iRefIdxTemp][2] : true);
+            uiCostTempOk = uiCostTempOk && cMvTempValid[iRefList][iRefIdxTemp][0] && cMvTempValid[iRefList][iRefIdxTemp][1] && ((mvNum > 2) ? cMvTempValid[iRefList][iRefIdxTemp][2] : true);
+          }
+#endif
         }
       }
       else
       {
+#if GDR_ENABLED
+        xAffineMotionEstimation(pu, origBuf, eRefPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], cMvTempSolid[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp
+          , aaiMvpIdx[iRefList][iRefIdxTemp], affiAMVPInfoTemp[eRefPicList], bAnyClean
+        );
+#else
         xAffineMotionEstimation( pu, origBuf, eRefPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp
                                  , aaiMvpIdx[iRefList][iRefIdxTemp], affiAMVPInfoTemp[eRefPicList]
         );
+#endif
+
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          int mvp_idx = aaiMvpIdx[iRefList][iRefIdxTemp];
+          PelUnitBuf     tmpBuf = m_tmpAffiStorage.getBuf(UnitAreaRelative(*pu.cu, pu));
+          const Picture *refPic = pu.cu->slice->getRefPic((RefPicList)iRefList, iRefIdxTemp);
+
+          cMvPredSolid[iRefList][iRefIdxTemp][0] = affiAMVPInfoTemp[eRefPicList].mvSolidLT[mvp_idx];
+          cMvPredSolid[iRefList][iRefIdxTemp][1] = affiAMVPInfoTemp[eRefPicList].mvSolidRT[mvp_idx];
+          cMvPredSolid[iRefList][iRefIdxTemp][2] = affiAMVPInfoTemp[eRefPicList].mvSolidLB[mvp_idx];
+
+          cMvTempSolid[iRefList][iRefIdxTemp][0] = cMvPredSolid[iRefList][iRefIdxTemp][0];
+          cMvTempSolid[iRefList][iRefIdxTemp][1] = cMvPredSolid[iRefList][iRefIdxTemp][1];
+          cMvTempSolid[iRefList][iRefIdxTemp][2] = cMvPredSolid[iRefList][iRefIdxTemp][2];
+
+          bool isSubPuYYClean = xPredAffineBlk(COMPONENT_Y, pu, refPic, cMvTemp[iRefList][iRefIdxTemp], tmpBuf, false, pu.cu->slice->clpRng(COMPONENT_Y));
+          bool isSubPuCbClean = (isSubPuYYClean) ? xPredAffineBlk(COMPONENT_Cb, pu, refPic, cMvTemp[iRefList][iRefIdxTemp], tmpBuf, false, pu.cu->slice->clpRng(COMPONENT_Cb)) : false;
+
+          cMvTempValid[iRefList][iRefIdxTemp][0] = isSubPuYYClean && isSubPuCbClean;
+          cMvTempValid[iRefList][iRefIdxTemp][1] = isSubPuYYClean && isSubPuCbClean;
+          cMvTempValid[iRefList][iRefIdxTemp][2] = isSubPuYYClean && isSubPuCbClean;
+
+          uiCostTempOk = true;
+          uiCostTempOk = uiCostTempOk && cMvPredSolid[iRefList][iRefIdxTemp][0] && cMvPredSolid[iRefList][iRefIdxTemp][1];
+          uiCostTempOk = uiCostTempOk && ((mvNum > 2) ? cMvPredSolid[iRefList][iRefIdxTemp][2] : true);
+          uiCostTempOk = uiCostTempOk && cMvTempSolid[iRefList][iRefIdxTemp][0] && cMvTempSolid[iRefList][iRefIdxTemp][1] && ((mvNum > 2) ? cMvTempSolid[iRefList][iRefIdxTemp][2] : true);
+          uiCostTempOk = uiCostTempOk && cMvTempValid[iRefList][iRefIdxTemp][0] && cMvTempValid[iRefList][iRefIdxTemp][1] && ((mvNum > 2) ? cMvTempValid[iRefList][iRefIdxTemp][2] : true);
+        }
+#endif
       }
       if(pu.cu->cs->sps->getUseBcw() && pu.cu->BcwIdx == BCW_DEFAULT && pu.cu->slice->isInterB())
       {
         m_uniMotions.setReadModeAffine(true, (uint8_t)iRefList, (uint8_t)iRefIdxTemp, pu.cu->affineType);
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          m_uniMotions.copyAffineMvFrom(
+            cMvTemp[iRefList][iRefIdxTemp],
+            cMvTempSolid[iRefList][iRefIdxTemp],
+            uiCostTemp - m_pcRdCost->getCost(uiBitsTemp),
+            (uint8_t)iRefList,
+            (uint8_t)iRefIdxTemp,
+            pu.cu->affineType,
+            aaiMvpIdx[iRefList][iRefIdxTemp]
+          );
+        }
+        else
+        {
+          m_uniMotions.copyAffineMvFrom(cMvTemp[iRefList][iRefIdxTemp], uiCostTemp - m_pcRdCost->getCost(uiBitsTemp), (uint8_t)iRefList, (uint8_t)iRefIdxTemp, pu.cu->affineType
+            , aaiMvpIdx[iRefList][iRefIdxTemp]
+          );
+        }
+#else
         m_uniMotions.copyAffineMvFrom(cMvTemp[iRefList][iRefIdxTemp], uiCostTemp - m_pcRdCost->getCost(uiBitsTemp), (uint8_t)iRefList, (uint8_t)iRefIdxTemp, pu.cu->affineType
                                       , aaiMvpIdx[iRefList][iRefIdxTemp]
         );
+#endif
       }
       // Set best AMVP Index
       xCopyAffineAMVPInfo( affiAMVPInfoTemp[eRefPicList], aacAffineAMVPInfo[iRefList][iRefIdxTemp] );
+#if GDR_ENABLED
+      if ( pu.cu->imv != 2 || !m_pcEncCfg->getUseAffineAmvrEncOpt() )
+      {
+        xCheckBestAffineMVP( pu, affiAMVPInfoTemp[eRefPicList], eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp );
+        if (isEncodeGdrClean)
+        {
+          int mvp_idx = aaiMvpIdx[iRefList][iRefIdxTemp];
+
+          cMvPredSolid[iRefList][iRefIdxTemp][0] = affiAMVPInfoTemp[eRefPicList].mvSolidLT[mvp_idx];
+          cMvPredSolid[iRefList][iRefIdxTemp][1] = affiAMVPInfoTemp[eRefPicList].mvSolidRT[mvp_idx];
+          cMvPredSolid[iRefList][iRefIdxTemp][2] = affiAMVPInfoTemp[eRefPicList].mvSolidLB[mvp_idx];
+
+          cMvTempSolid[iRefList][iRefIdxTemp][0] = cMvPredSolid[iRefList][iRefIdxTemp][0];
+          cMvTempSolid[iRefList][iRefIdxTemp][1] = cMvPredSolid[iRefList][iRefIdxTemp][0];
+          cMvTempSolid[iRefList][iRefIdxTemp][2] = cMvPredSolid[iRefList][iRefIdxTemp][0];
+
+          if (cMvTempValid[iRefList][iRefIdxTemp][0] && cMvTempValid[iRefList][iRefIdxTemp][1] && cMvTempValid[iRefList][iRefIdxTemp][2])
+          {
+            cMvTempValid[iRefList][iRefIdxTemp][0] = cMvPredSolid[iRefList][iRefIdxTemp][0];
+            cMvTempValid[iRefList][iRefIdxTemp][1] = cMvPredSolid[iRefList][iRefIdxTemp][0];
+            cMvTempValid[iRefList][iRefIdxTemp][2] = cMvPredSolid[iRefList][iRefIdxTemp][0];
+          }
+
+          uiCostTempOk = true;
+          uiCostTempOk = uiCostTempOk && cMvPredSolid[iRefList][iRefIdxTemp][0] && cMvPredSolid[iRefList][iRefIdxTemp][1];
+          uiCostTempOk = uiCostTempOk && ((mvNum > 2) ? cMvPredSolid[iRefList][iRefIdxTemp][2] : true);
+          uiCostTempOk = uiCostTempOk && cMvTempSolid[iRefList][iRefIdxTemp][0] && cMvTempSolid[iRefList][iRefIdxTemp][1] && ((mvNum > 2) ? cMvTempSolid[iRefList][iRefIdxTemp][2] : true);
+          uiCostTempOk = uiCostTempOk && cMvTempValid[iRefList][iRefIdxTemp][0] && cMvTempValid[iRefList][iRefIdxTemp][1] && ((mvNum > 2) ? cMvTempValid[iRefList][iRefIdxTemp][2] : true);
+        }
+      }
+#else
       if ( pu.cu->imv != 2 || !m_pcEncCfg->getUseAffineAmvrEncOpt() )
-      xCheckBestAffineMVP( pu, affiAMVPInfoTemp[eRefPicList], eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp );
+      {
+        xCheckBestAffineMVP( pu, affiAMVPInfoTemp[eRefPicList], eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp );
+      }
+#endif
 
       if ( iRefList == 0 )
       {
         uiCostTempL0[iRefIdxTemp] = uiCostTemp;
         uiBitsTempL0[iRefIdxTemp] = uiBitsTemp;
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          uiCostTempL0Ok[iRefIdxTemp] = uiCostTempOk;
+        }
+#endif
       }
       DTRACE( g_trace_ctx, D_COMMON, " (%d) uiCostTemp=%d, uiCost[iRefList]=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiCostTemp, uiCost[iRefList] );
+#if GDR_ENABLED
+      allOk = (uiCostTemp < uiCost[iRefList]);
+
+      if (isEncodeGdrClean)
+      {
+        if (uiCostTempOk)
+        {
+          allOk = (uiCostOk[iRefList]) ? (uiCostTemp < uiCost[iRefList]) : true;
+        }
+        else
+        {
+          allOk = false;
+        }
+      }
+#endif
+
+#if GDR_ENABLED
+      if (allOk)
+#else
       if ( uiCostTemp < uiCost[iRefList] )
+#endif
       {
         uiCost[iRefList] = uiCostTemp;
         uiBits[iRefList] = uiBitsTemp; // storing for bi-prediction
 
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          uiCostOk[iRefList] = uiCostTempOk;
+        }
+#endif
         // set best motion
         ::memcpy( aacMv[iRefList], cMvTemp[iRefList][iRefIdxTemp], sizeof(Mv) * 3 );
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          ::memcpy(aacMvSolid[iRefList], cMvTempSolid[iRefList][iRefIdxTemp], sizeof(bool) * 3);
+          ::memcpy(aacMvValid[iRefList], cMvTempValid[iRefList][iRefIdxTemp], sizeof(bool) * 3);
+        }
+#endif
         iRefIdx[iRefList] = iRefIdxTemp;
       }
 
+
+#if GDR_ENABLED
+      allOk = (iRefList == 1 && uiCostTemp < costValidList1 && slice.getList1IdxToList0Idx(iRefIdxTemp) < 0);
+
+      if (isEncodeGdrClean)
+      {
+        if (uiCostTempOk)
+        {
+          allOk = (costValidList1Ok) ? (iRefList == 1 && uiCostTemp < costValidList1 && slice.getList1IdxToList0Idx(iRefIdxTemp) < 0) : true;
+        }
+        else
+        {
+          allOk = false;
+        }
+      }
+#endif
+
+
+#if GDR_ENABLED
+      if (allOk)
+#else
       if ( iRefList == 1 && uiCostTemp < costValidList1 && slice.getList1IdxToList0Idx( iRefIdxTemp ) < 0 )
+#endif
       {
         costValidList1 = uiCostTemp;
         bitsValidList1 = uiBitsTemp;
 
         // set motion
         memcpy( mvValidList1, cMvTemp[iRefList][iRefIdxTemp], sizeof(Mv)*3 );
+
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          costValidList1Ok = uiCostTempOk;
+          ::memcpy(mvValidList1Solid, cMvTempSolid[iRefList][iRefIdxTemp], sizeof(bool) * 3);
+          ::memcpy(mvValidList1Valid, cMvTempSolid[iRefList][iRefIdxTemp], sizeof(bool) * 3);
+        }
+#endif
         refIdxValidList1 = iRefIdxTemp;
       }
     } // End refIdx loop
@@ -4858,9 +7171,15 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
   if ( pu.cu->affineType == AFFINEMODEL_4PARAM )
   {
     ::memcpy( mvAffine4Para, cMvTemp, sizeof( cMvTemp ) );
+#if GDR_ENABLED
+    ::memcpy(mvAffine4ParaSolid, cMvTempSolid, sizeof(cMvTempSolid));
+#endif
     if ( pu.cu->imv == 0 && ( !pu.cu->cs->sps->getUseBcw() || bcwIdx == BCW_DEFAULT ) )
     {
       AffineMVInfo *affMVInfo = m_affMVList + m_affMVListIdx;
+#if GDR_ENABLED
+      AffineMVInfoSolid *affMVInfoSolid = m_affMVListSolid + m_affMVListIdx;
+#endif
 
       //check;
       int j = 0;
@@ -4872,10 +7191,19 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
           break;
         }
       }
+#if GDR_ENABLED
+      if (j < m_affMVListSize)
+      {
+        affMVInfo = m_affMVList + ((m_affMVListIdx - j - 1 + m_affMVListMaxSize) % (m_affMVListMaxSize));
+        affMVInfoSolid = m_affMVListSolid + ((m_affMVListIdx - j - 1 + m_affMVListMaxSize) % (m_affMVListMaxSize));
+      }
+      ::memcpy(affMVInfo->affMVs, cMvTemp, sizeof(cMvTemp));
+      ::memcpy(affMVInfoSolid->affMVsSolid, cMvTempSolid, sizeof(cMvTempSolid));
+#else
       if (j < m_affMVListSize)
         affMVInfo = m_affMVList + ((m_affMVListIdx - j - 1 + m_affMVListMaxSize) % (m_affMVListMaxSize));
-
       ::memcpy(affMVInfo->affMVs, cMvTemp, sizeof(cMvTemp));
+#endif
 
       if (j == m_affMVListSize)
       {
@@ -4903,6 +7231,15 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
     ::memcpy( cMvPredBi,   cMvPred,   sizeof(cMvPred)   );
     ::memcpy( aaiMvpIdxBi, aaiMvpIdx, sizeof(aaiMvpIdx) );
 
+#if GDR_ENABLED
+    if (isEncodeGdrClean)
+    {
+      ::memcpy(cMvBiSolid, aacMvSolid, sizeof(cMvBiSolid));
+      ::memcpy(cMvBiValid, aacMvValid, sizeof(cMvBiValid));
+      ::memcpy(cMvPredBiSolid, cMvPredSolid, sizeof(cMvPredSolid));
+    }
+#endif
+
     uint32_t uiMotBits[2];
     bool doBiPred = true;
 
@@ -4920,6 +7257,33 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
       ::memcpy( cMvBi[1],                      pcMvTemp, sizeof(Mv)*3 );
       ::memcpy( cMvTemp[1][bestBiPRefIdxL1],   pcMvTemp, sizeof(Mv)*3 );
       iRefIdxBi[1] = bestBiPRefIdxL1;
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        PelUnitBuf     tmpBuf = m_tmpAffiStorage.getBuf(UnitAreaRelative(*pu.cu, pu));
+        const Picture *refPic = pu.cu->slice->getRefPic((RefPicList)REF_PIC_LIST_1, iRefIdxBi[1]);
+
+        cMvPredBiSolid[1][bestBiPRefIdxL1][0] = affiAMVPInfoTemp[REF_PIC_LIST_1].mvSolidLT[bestBiPMvpL1];
+        cMvPredBiSolid[1][bestBiPRefIdxL1][1] = affiAMVPInfoTemp[REF_PIC_LIST_1].mvSolidRT[bestBiPMvpL1];
+        cMvPredBiSolid[1][bestBiPRefIdxL1][2] = affiAMVPInfoTemp[REF_PIC_LIST_1].mvSolidLB[bestBiPMvpL1];
+
+        cMvBiSolid[1][0] = affiAMVPInfoTemp[REF_PIC_LIST_1].mvSolidLT[bestBiPMvpL1];
+        cMvBiSolid[1][1] = affiAMVPInfoTemp[REF_PIC_LIST_1].mvSolidRT[bestBiPMvpL1];
+        cMvBiSolid[1][2] = affiAMVPInfoTemp[REF_PIC_LIST_1].mvSolidLB[bestBiPMvpL1];
+
+
+        bool isSubPuYYClean = xPredAffineBlk(COMPONENT_Y, pu, refPic, cMvTemp[1][bestBiPRefIdxL1], tmpBuf, false, pu.cu->slice->clpRng(COMPONENT_Y));
+        bool isSubPuCbClean = (isSubPuYYClean) ? xPredAffineBlk(COMPONENT_Cb, pu, refPic, cMvTemp[1][bestBiPRefIdxL1], tmpBuf, false, pu.cu->slice->clpRng(COMPONENT_Cb)) : false;
+
+        cMvBiValid[1][0] = isSubPuYYClean && isSubPuCbClean;
+        cMvBiValid[1][1] = isSubPuYYClean && isSubPuCbClean;
+        cMvBiValid[1][2] = isSubPuYYClean && isSubPuCbClean;
+
+        cMvTempSolid[1][bestBiPRefIdxL1][0] = affiAMVPInfoTemp[REF_PIC_LIST_1].mvSolidLT[bestBiPMvpL1];
+        cMvTempSolid[1][bestBiPRefIdxL1][1] = affiAMVPInfoTemp[REF_PIC_LIST_1].mvSolidRT[bestBiPMvpL1];
+        cMvTempSolid[1][bestBiPRefIdxL1][2] = affiAMVPInfoTemp[REF_PIC_LIST_1].mvSolidLB[bestBiPMvpL1];
+      }
+#endif
 
       if( m_pcEncCfg->getMCTSEncConstraint() )
       {
@@ -4942,6 +7306,26 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
       PU::setAllAffineMv( pu, cMvBi[1][0], cMvBi[1][1], cMvBi[1][2], REF_PIC_LIST_1);
       pu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1];
 
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        PelUnitBuf     tmpBuf = m_tmpAffiStorage.getBuf(UnitAreaRelative(*pu.cu, pu));
+        const Picture *refPic = pu.cu->slice->getRefPic((RefPicList)REF_PIC_LIST_1, pu.refIdx[REF_PIC_LIST_1]);
+
+        pu.mvAffiSolid[REF_PIC_LIST_1][0] = cMvBiSolid[REF_PIC_LIST_1][0];
+        pu.mvAffiSolid[REF_PIC_LIST_1][1] = cMvBiSolid[REF_PIC_LIST_1][1];
+        pu.mvAffiSolid[REF_PIC_LIST_1][2] = cMvBiSolid[REF_PIC_LIST_1][2];
+
+
+        bool isSubPuYYClean = xPredAffineBlk(COMPONENT_Y, pu, refPic, cMvBi[1], tmpBuf, false, pu.cu->slice->clpRng(COMPONENT_Y));
+        bool isSubPuCbClean = (isSubPuYYClean) ? xPredAffineBlk(COMPONENT_Cb, pu, refPic, cMvBi[1], tmpBuf, false, pu.cu->slice->clpRng(COMPONENT_Cb)) : false;
+
+        pu.mvAffiValid[REF_PIC_LIST_1][0] = cMvBiValid[REF_PIC_LIST_1][0] = isSubPuYYClean && isSubPuCbClean;
+        pu.mvAffiValid[REF_PIC_LIST_1][1] = cMvBiValid[REF_PIC_LIST_1][1] = isSubPuYYClean && isSubPuCbClean;
+        pu.mvAffiValid[REF_PIC_LIST_1][2] = cMvBiValid[REF_PIC_LIST_1][2] = isSubPuYYClean && isSubPuCbClean;
+      }
+#endif
+
       PelUnitBuf predBufTmp = m_tmpPredStorage[REF_PIC_LIST_1].getBuf( UnitAreaRelative(*pu.cu, pu) );
       motionCompensation( pu, predBufTmp, REF_PIC_LIST_1 );
 
@@ -4977,13 +7361,33 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
       iNumIter = 1;
     }
 
-    for ( int iIter = 0; iIter < iNumIter; iIter++ )
-    {
-      // Set RefList
-      int iRefList = iIter % 2;
-      if ( m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE2 )
-      {
+    for ( int iIter = 0; iIter < iNumIter; iIter++ )
+    {
+      // Set RefList
+      int iRefList = iIter % 2;
+      if ( m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode()==FASTINTERSEARCH_MODE2 )
+      {
+#if GDR_ENABLED
+        allOk = (uiCost[0] <= uiCost[1]);
+
+        if (isEncodeGdrClean)
+        {
+          if (uiCostOk[0])
+          {
+            allOk = (uiCostOk[1]) ? (uiCost[0] <= uiCost[1]) : true;
+          }
+          else
+          {
+            allOk = false;
+          }
+        }
+#endif
+
+#if GDR_ENABLED
+        if (allOk)
+#else
         if( uiCost[0] <= uiCost[1] )
+#endif
         {
           iRefList = 1;
         }
@@ -5006,6 +7410,25 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
       {
         PU::setAllAffineMv( pu, aacMv[1-iRefList][0], aacMv[1-iRefList][1], aacMv[1-iRefList][2], RefPicList(1-iRefList));
         pu.refIdx[1-iRefList] = iRefIdx[1-iRefList];
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          PelUnitBuf     tmpBuf = m_tmpAffiStorage.getBuf(UnitAreaRelative(*pu.cu, pu));
+          const Picture *refPic = pu.cu->slice->getRefPic((RefPicList)(1 - iRefList), pu.refIdx[1 - iRefList]);
+
+          pu.mvAffiSolid[1 - iRefList][0] = aacMvSolid[1 - iRefList][0];
+          pu.mvAffiSolid[1 - iRefList][1] = aacMvSolid[1 - iRefList][1];
+          pu.mvAffiSolid[1 - iRefList][2] = aacMvSolid[1 - iRefList][2];
+
+
+          bool isSubPuYYClean = xPredAffineBlk(COMPONENT_Y, pu, refPic, aacMv[1 - iRefList], tmpBuf, false, pu.cu->slice->clpRng(COMPONENT_Y));
+          bool isSubPuCbClean = (isSubPuYYClean) ? xPredAffineBlk(COMPONENT_Cb, pu, refPic, aacMv[1 - iRefList], tmpBuf, false, pu.cu->slice->clpRng(COMPONENT_Cb)) : false;
+
+          pu.mvAffiValid[1 - iRefList][0] = aacMvValid[1 - iRefList][0] = isSubPuYYClean && isSubPuCbClean;
+          pu.mvAffiValid[1 - iRefList][1] = aacMvValid[1 - iRefList][1] = isSubPuYYClean && isSubPuCbClean;
+          pu.mvAffiValid[1 - iRefList][2] = aacMvValid[1 - iRefList][2] = isSubPuYYClean && isSubPuCbClean;
+        }
+#endif
 
         PelUnitBuf predBufTmp = m_tmpPredStorage[1 - iRefList].getBuf( UnitAreaRelative(*pu.cu, pu) );
         motionCompensation( pu, predBufTmp, RefPicList(1 - iRefList) );
@@ -5049,20 +7472,129 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
         uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdxBi[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS];
 
         // call Affine ME
+#if GDR_ENABLED
+        xAffineMotionEstimation(pu, origBuf, eRefPicList, cMvPredBi[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], cMvTempSolid[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp,
+          aaiMvpIdxBi[iRefList][iRefIdxTemp], aacAffineAMVPInfo[iRefList][iRefIdxTemp], bAnyClean,
+          true);
+#else
         xAffineMotionEstimation( pu, origBuf, eRefPicList, cMvPredBi[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp,
                                  aaiMvpIdxBi[iRefList][iRefIdxTemp], aacAffineAMVPInfo[iRefList][iRefIdxTemp],
           true );
+#endif
+
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          int mvp_idx = aaiMvpIdx[iRefList][iRefIdxTemp];
+          PelUnitBuf     tmpBuf = m_tmpAffiStorage.getBuf(UnitAreaRelative(*pu.cu, pu));
+          const Picture *refPic = pu.cu->slice->getRefPic((RefPicList)iRefList, iRefIdxTemp);
+
+          cMvPredBiSolid[iRefList][iRefIdxTemp][0] = aacAffineAMVPInfo[iRefList][iRefIdxTemp].mvSolidLT[mvp_idx];
+          cMvPredBiSolid[iRefList][iRefIdxTemp][1] = aacAffineAMVPInfo[iRefList][iRefIdxTemp].mvSolidRT[mvp_idx];
+          cMvPredBiSolid[iRefList][iRefIdxTemp][2] = aacAffineAMVPInfo[iRefList][iRefIdxTemp].mvSolidLB[mvp_idx];
+
+          cMvTempSolid[iRefList][iRefIdxTemp][0] = cMvPredBiSolid[iRefList][iRefIdxTemp][0];
+          cMvTempSolid[iRefList][iRefIdxTemp][1] = cMvPredSolid[iRefList][iRefIdxTemp][1];
+          cMvTempSolid[iRefList][iRefIdxTemp][2] = cMvPredSolid[iRefList][iRefIdxTemp][2];
+
+
+          bool isSubPuYYClean = xPredAffineBlk(COMPONENT_Y, pu, refPic, cMvTemp[iRefList][iRefIdxTemp], tmpBuf, false, pu.cu->slice->clpRng(COMPONENT_Y));
+          bool isSubPuCbClean = (isSubPuYYClean) ? xPredAffineBlk(COMPONENT_Cb, pu, refPic, cMvTemp[iRefList][iRefIdxTemp], tmpBuf, false, pu.cu->slice->clpRng(COMPONENT_Cb)) : false;
+
+          cMvTempValid[iRefList][iRefIdxTemp][0] = isSubPuYYClean && isSubPuCbClean;
+          cMvTempValid[iRefList][iRefIdxTemp][1] = isSubPuYYClean && isSubPuCbClean;
+          cMvTempValid[iRefList][iRefIdxTemp][2] = isSubPuYYClean && isSubPuCbClean;
+
+          uiCostTempOk = true;
+          uiCostTempOk = uiCostTempOk && cMvPredBiSolid[iRefList][iRefIdxTemp][0] && cMvPredBiSolid[iRefList][iRefIdxTemp][1];
+          uiCostTempOk = uiCostTempOk && ((mvNum > 2) ? cMvPredBiSolid[iRefList][iRefIdxTemp][2] : true);
+          uiCostTempOk = uiCostTempOk && cMvTempSolid[iRefList][iRefIdxTemp][0] && cMvTempSolid[iRefList][iRefIdxTemp][1] && ((mvNum > 2) ? cMvTempSolid[iRefList][iRefIdxTemp][2] : true);
+          uiCostTempOk = uiCostTempOk && cMvTempValid[iRefList][iRefIdxTemp][0] && cMvTempValid[iRefList][iRefIdxTemp][1] && ((mvNum > 2) ? cMvTempValid[iRefList][iRefIdxTemp][2] : true);
+        }
+#endif
+
         xCopyAffineAMVPInfo( aacAffineAMVPInfo[iRefList][iRefIdxTemp], affiAMVPInfoTemp[eRefPicList] );
+#if GDR_ENABLED
+        if ( pu.cu->imv != 2 || !m_pcEncCfg->getUseAffineAmvrEncOpt() )
+        {
+          xCheckBestAffineMVP( pu, affiAMVPInfoTemp[eRefPicList], eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPredBi[iRefList][iRefIdxTemp], aaiMvpIdxBi[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp );
+          if (isEncodeGdrClean)
+          {
+            int mvp_idx = aaiMvpIdxBi[iRefList][iRefIdxTemp];
+
+            cMvPredBiSolid[iRefList][iRefIdxTemp][0] = affiAMVPInfoTemp[eRefPicList].mvSolidLT[mvp_idx];
+            cMvPredBiSolid[iRefList][iRefIdxTemp][1] = affiAMVPInfoTemp[eRefPicList].mvSolidRT[mvp_idx];
+            cMvPredBiSolid[iRefList][iRefIdxTemp][2] = affiAMVPInfoTemp[eRefPicList].mvSolidLB[mvp_idx];
+
+            cMvTempSolid[iRefList][iRefIdxTemp][0] = cMvPredBiSolid[iRefList][iRefIdxTemp][0];
+            cMvTempSolid[iRefList][iRefIdxTemp][1] = cMvPredBiSolid[iRefList][iRefIdxTemp][1];
+            cMvTempSolid[iRefList][iRefIdxTemp][2] = cMvPredBiSolid[iRefList][iRefIdxTemp][2];
+
+            if (cMvTempValid[iRefList][iRefIdxTemp][0] && cMvTempValid[iRefList][iRefIdxTemp][1] && cMvTempValid[iRefList][iRefIdxTemp][2])
+            {
+              cMvTempValid[iRefList][iRefIdxTemp][0] = cMvPredBiSolid[iRefList][iRefIdxTemp][0];
+              cMvTempValid[iRefList][iRefIdxTemp][1] = cMvPredBiSolid[iRefList][iRefIdxTemp][1];
+              cMvTempValid[iRefList][iRefIdxTemp][2] = cMvPredBiSolid[iRefList][iRefIdxTemp][2];
+            }
+
+            uiCostTempOk = true;
+            uiCostTempOk = uiCostTempOk && cMvPredBiSolid[iRefList][iRefIdxTemp][0] && cMvPredBiSolid[iRefList][iRefIdxTemp][1];
+            uiCostTempOk = uiCostTempOk && ((mvNum > 2) ? cMvPredBiSolid[iRefList][iRefIdxTemp][2] : true);
+            uiCostTempOk = uiCostTempOk && cMvTempSolid[iRefList][iRefIdxTemp][0] && cMvTempSolid[iRefList][iRefIdxTemp][1] && ((mvNum > 2) ? cMvTempSolid[iRefList][iRefIdxTemp][2] : true);
+            uiCostTempOk = uiCostTempOk && cMvTempValid[iRefList][iRefIdxTemp][0] && cMvTempValid[iRefList][iRefIdxTemp][1] && ((mvNum > 2) ? cMvTempValid[iRefList][iRefIdxTemp][2] : true);
+          }
+        }
+#else
         if ( pu.cu->imv != 2 || !m_pcEncCfg->getUseAffineAmvrEncOpt() )
-        xCheckBestAffineMVP( pu, affiAMVPInfoTemp[eRefPicList], eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPredBi[iRefList][iRefIdxTemp], aaiMvpIdxBi[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp );
+        {
+          xCheckBestAffineMVP(pu, affiAMVPInfoTemp[eRefPicList], eRefPicList, cMvTemp[iRefList][iRefIdxTemp],
+                              cMvPredBi[iRefList][iRefIdxTemp], aaiMvpIdxBi[iRefList][iRefIdxTemp], uiBitsTemp,
+                              uiCostTemp);
+        }
+#endif
+
+#if GDR_ENABLED
+        allOk = (uiCostTemp < uiCostBi);
+
+        if (isEncodeGdrClean)
+        {
+          if (uiCostTempOk)
+          {
+            allOk = (uiCostBiOk) ? (uiCostTemp < uiCostBi) : true;
+          }
+          else
+          {
+            allOk = false;
+          }
+        }
+#endif
+
+
 
+#if GDR_ENABLED
+        if (allOk)
+#else
         if ( uiCostTemp < uiCostBi )
+#endif
         {
           bChanged = true;
           ::memcpy( cMvBi[iRefList], cMvTemp[iRefList][iRefIdxTemp], sizeof(Mv)*3 );
+#if GDR_ENABLED
+          if (isEncodeGdrClean)
+          {
+            ::memcpy(cMvBiSolid[iRefList], cMvTempSolid[iRefList][iRefIdxTemp], sizeof(bool) * 3);
+            ::memcpy(cMvBiValid[iRefList], cMvTempValid[iRefList][iRefIdxTemp], sizeof(bool) * 3);
+          }
+#endif
           iRefIdxBi[iRefList] = iRefIdxTemp;
 
           uiCostBi            = uiCostTemp;
+#if GDR_ENABLED
+          if (isEncodeGdrClean)
+          {
+            uiCostBiOk = uiCostTempOk;
+          }
+#endif
           uiMotBits[iRefList] = uiBitsTemp - uiMbBits[2] - uiMotBits[1-iRefList];
           uiMotBits[iRefList] -= ((pu.cu->slice->getSPS()->getUseBcw() == true) ? bcwIdxBits : 0);
           uiBits[2]           = uiBitsTemp;
@@ -5072,6 +7604,28 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
             //  Set motion
             PU::setAllAffineMv( pu, cMvBi[iRefList][0], cMvBi[iRefList][1], cMvBi[iRefList][2], eRefPicList);
             pu.refIdx[eRefPicList] = iRefIdxBi[eRefPicList];
+
+#if GDR_ENABLED
+            if (isEncodeGdrClean)
+            {
+              bool isSubPuYYClean;
+              bool isSubPuCbClean;
+              PelUnitBuf     tmpBuf = m_tmpAffiStorage.getBuf(UnitAreaRelative(*pu.cu, pu));
+              const Picture *refPic = pu.cu->slice->getRefPic((RefPicList)iRefList, pu.refIdx[eRefPicList]);
+
+              pu.mvAffiSolid[eRefPicList][0] = cMvBiSolid[iRefList][0];
+              pu.mvAffiSolid[eRefPicList][1] = cMvBiSolid[iRefList][1];
+              pu.mvAffiSolid[eRefPicList][2] = cMvBiSolid[iRefList][2];
+
+              isSubPuYYClean = xPredAffineBlk(COMPONENT_Y, pu, refPic, cMvBi[iRefList], tmpBuf, false, pu.cu->slice->clpRng(COMPONENT_Y));
+              isSubPuCbClean = (isSubPuYYClean) ? xPredAffineBlk(COMPONENT_Cb, pu, refPic, cMvBi[iRefList], tmpBuf, false, pu.cu->slice->clpRng(COMPONENT_Cb)) : false;
+
+              pu.mvAffiValid[eRefPicList][0] = cMvBiValid[iRefList][0] = isSubPuYYClean && isSubPuCbClean;
+              pu.mvAffiValid[eRefPicList][1] = cMvBiValid[iRefList][1] = isSubPuYYClean && isSubPuCbClean;
+              pu.mvAffiValid[eRefPicList][2] = cMvBiValid[iRefList][2] = isSubPuYYClean && isSubPuCbClean;
+            }
+#endif
+
             PelUnitBuf predBufTmp = m_tmpPredStorage[iRefList].getBuf( UnitAreaRelative(*pu.cu, pu) );
             motionCompensation( pu, predBufTmp, eRefPicList );
           }
@@ -5080,15 +7634,89 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
 
       if ( !bChanged )
       {
+#if GDR_ENABLED
+        allOk = ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceBcwPred);
+
+        if (isEncodeGdrClean)
+        {
+          if (uiCostBiOk)
+          {
+            allOk = (uiCostOk[0] && uiCostOk[1]) ? ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceBcwPred) : true;
+          }
+          else
+          {
+            allOk = false;
+          }
+        }
+#endif
+
+#if GDR_ENABLED
+        if (allOk)
+#else
         if ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceBcwPred)
+#endif
         {
           xCopyAffineAMVPInfo( aacAffineAMVPInfo[0][iRefIdxBi[0]], affiAMVPInfoTemp[REF_PIC_LIST_0] );
           xCheckBestAffineMVP( pu, affiAMVPInfoTemp[REF_PIC_LIST_0], REF_PIC_LIST_0, cMvBi[0], cMvPredBi[0][iRefIdxBi[0]], aaiMvpIdxBi[0][iRefIdxBi[0]], uiBits[2], uiCostBi );
+#if GDR_ENABLED
+          if (isEncodeGdrClean)
+          {
+            int mvp_idx = aaiMvpIdxBi[0][iRefIdxBi[0]];
+
+            cMvPredBiSolid[REF_PIC_LIST_0][iRefIdxBi[0]][0] = affiAMVPInfoTemp[REF_PIC_LIST_0].mvSolidLT[mvp_idx];
+            cMvPredBiSolid[REF_PIC_LIST_0][iRefIdxBi[0]][1] = affiAMVPInfoTemp[REF_PIC_LIST_0].mvSolidRT[mvp_idx];
+            cMvPredBiSolid[REF_PIC_LIST_0][iRefIdxBi[0]][2] = affiAMVPInfoTemp[REF_PIC_LIST_0].mvSolidLB[mvp_idx];
+
+            cMvBiSolid[REF_PIC_LIST_0][0] = cMvPredBiSolid[REF_PIC_LIST_0][iRefIdxBi[0]][0];
+            cMvBiSolid[REF_PIC_LIST_0][1] = cMvPredBiSolid[REF_PIC_LIST_0][iRefIdxBi[0]][1];
+            cMvBiSolid[REF_PIC_LIST_0][2] = cMvPredBiSolid[REF_PIC_LIST_0][iRefIdxBi[0]][2];
+
+            if (cMvBiValid[REF_PIC_LIST_0][0] && cMvBiValid[REF_PIC_LIST_0][1] && cMvBiValid[REF_PIC_LIST_0][2])
+            {
+              cMvBiValid[REF_PIC_LIST_0][0] = cMvPredBiSolid[REF_PIC_LIST_0][iRefIdxBi[0]][0];
+              cMvBiValid[REF_PIC_LIST_0][1] = cMvPredBiSolid[REF_PIC_LIST_0][iRefIdxBi[0]][1];
+              cMvBiValid[REF_PIC_LIST_0][2] = cMvPredBiSolid[REF_PIC_LIST_0][iRefIdxBi[0]][2];
+            }
+
+            uiCostBiOk = true;
+            uiCostBiOk = uiCostBiOk && cMvPredBiSolid[REF_PIC_LIST_0][iRefIdxBi[0]][0] && cMvPredBiSolid[REF_PIC_LIST_0][iRefIdxBi[0]][1];
+            uiCostBiOk = uiCostBiOk && ((mvNum > 2) ? cMvPredBiSolid[REF_PIC_LIST_0][iRefIdxBi[0]][2] : true);
+            uiCostBiOk = uiCostBiOk && cMvBiSolid[0][0] && cMvBiSolid[0][1] && ((mvNum > 2) ? cMvBiSolid[0][2] : true);
+            uiCostBiOk = uiCostBiOk && cMvBiValid[0][0] && cMvBiValid[0][1] && ((mvNum > 2) ? cMvBiValid[0][2] : true);
+          }
+#endif
 
           if ( !slice.getPicHeader()->getMvdL1ZeroFlag() )
           {
             xCopyAffineAMVPInfo( aacAffineAMVPInfo[1][iRefIdxBi[1]], affiAMVPInfoTemp[REF_PIC_LIST_1] );
             xCheckBestAffineMVP( pu, affiAMVPInfoTemp[REF_PIC_LIST_1], REF_PIC_LIST_1, cMvBi[1], cMvPredBi[1][iRefIdxBi[1]], aaiMvpIdxBi[1][iRefIdxBi[1]], uiBits[2], uiCostBi );
+#if GDR_ENABLED
+            if (isEncodeGdrClean)
+            {
+              int mvp_idx = aaiMvpIdxBi[1][iRefIdxBi[1]];
+
+              cMvPredBiSolid[REF_PIC_LIST_1][iRefIdxBi[1]][0] = affiAMVPInfoTemp[REF_PIC_LIST_1].mvSolidLT[mvp_idx];
+              cMvPredBiSolid[REF_PIC_LIST_1][iRefIdxBi[1]][1] = affiAMVPInfoTemp[REF_PIC_LIST_1].mvSolidRT[mvp_idx];
+              cMvPredBiSolid[REF_PIC_LIST_1][iRefIdxBi[1]][2] = affiAMVPInfoTemp[REF_PIC_LIST_1].mvSolidLB[mvp_idx];
+
+              cMvBiSolid[REF_PIC_LIST_1][0] = cMvPredBiSolid[REF_PIC_LIST_1][iRefIdxBi[1]][0];
+              cMvBiSolid[REF_PIC_LIST_1][1] = cMvPredBiSolid[REF_PIC_LIST_1][iRefIdxBi[1]][1];
+              cMvBiSolid[REF_PIC_LIST_1][2] = cMvPredBiSolid[REF_PIC_LIST_1][iRefIdxBi[1]][2];
+
+              if (cMvBiValid[REF_PIC_LIST_1][0] && cMvBiValid[REF_PIC_LIST_1][1] && cMvBiValid[REF_PIC_LIST_1][2])
+              {
+                cMvBiValid[REF_PIC_LIST_1][0] = cMvPredBiSolid[REF_PIC_LIST_1][iRefIdxBi[1]][0];
+                cMvBiValid[REF_PIC_LIST_1][1] = cMvPredBiSolid[REF_PIC_LIST_1][iRefIdxBi[1]][1];
+                cMvBiValid[REF_PIC_LIST_1][2] = cMvPredBiSolid[REF_PIC_LIST_1][iRefIdxBi[1]][2];
+              }
+
+              uiCostBiOk = true;
+              uiCostBiOk = uiCostBiOk && cMvPredBiSolid[REF_PIC_LIST_1][iRefIdxBi[1]][0] && cMvPredBiSolid[REF_PIC_LIST_1][iRefIdxBi[1]][1];
+              uiCostBiOk = uiCostBiOk && ((mvNum > 2) ? cMvPredBiSolid[REF_PIC_LIST_1][iRefIdxBi[1]][2] : true);
+              uiCostBiOk = uiCostBiOk && cMvBiSolid[1][0] && cMvBiSolid[1][1] && ((mvNum > 2) ? cMvBiSolid[1][2] : true);
+              uiCostBiOk = uiCostBiOk && cMvBiValid[1][0] && cMvBiValid[1][1] && ((mvNum > 2) ? cMvBiValid[1][2] : true);
+            }
+#endif
           }
         }
         break;
@@ -5120,6 +7748,15 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
   iRefIdx[1] = refIdxValidList1;
   uiBits[1]  = bitsValidList1;
   uiCost[1]  = costValidList1;
+
+#if GDR_ENABLED
+  if (isEncodeGdrClean)
+  {
+    memcpy(aacMvSolid[1], mvValidList1Solid, sizeof(bool) * 3);
+    memcpy(aacMvValid[1], mvValidList1Valid, sizeof(bool) * 3);
+    uiCostOk[1] = costValidList1Ok;
+  }
+#endif
   if (pu.cs->pps->getWPBiPred() == true && tryBipred && (bcwIdx != BCW_DEFAULT))
   {
     CHECK(iRefIdxBi[0]<0, "Invalid picture reference index");
@@ -5131,15 +7768,46 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
     {
       uiCostBi = MAX_UINT;
       enforceBcwPred = false;
+#if GDR_ENABLED
+      uiCostBiOk = false;
+#endif
     }
   }
   if( enforceBcwPred )
   {
     uiCost[0] = uiCost[1] = MAX_UINT;
+#if GDR_ENABLED
+    uiCostOk[0] = uiCostOk[1] = false;
+#endif
   }
 
   // Affine ME result set
+#if GDR_ENABLED
+  bool BiOk = (uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]);
+
+  if (isEncodeGdrClean)
+  {
+    if (uiCostBiOk)
+      BiOk = (uiCostOk[0] && uiCostOk[1]) ? (uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) : true;
+    else
+      BiOk = false;
+  }
+
+  bool L0ok = (uiCost[0] <= uiCost[1]);
+  if (isEncodeGdrClean)
+  {
+    if (uiCostOk[0])
+      L0ok = (uiCostOk[1]) ? (uiCost[0] <= uiCost[1]) : true;
+    else
+      L0ok = false;
+  }
+#endif
+
+#if GDR_ENABLED
+  if (BiOk)
+#else
   if ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1] ) // Bi
+#endif
   {
     lastMode = 2;
     affineCost = uiCostBi;
@@ -5149,6 +7817,48 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
     pu.refIdx[REF_PIC_LIST_0] = iRefIdxBi[0];
     pu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1];
 
+#if GDR_ENABLED
+    if (isEncodeGdrClean)
+    {
+      PelUnitBuf     tmpBuf = m_tmpAffiStorage.getBuf(UnitAreaRelative(*pu.cu, pu));
+      const Picture *refPic0 = (pu.refIdx[REF_PIC_LIST_0] < 0) ? nullptr : pu.cu->slice->getRefPic((RefPicList)REF_PIC_LIST_0, pu.refIdx[REF_PIC_LIST_0]);
+      const Picture *refPic1 = (pu.refIdx[REF_PIC_LIST_1] < 0) ? nullptr : pu.cu->slice->getRefPic((RefPicList)REF_PIC_LIST_1, pu.refIdx[REF_PIC_LIST_1]);
+
+      pu.mvAffiSolid[REF_PIC_LIST_0][0] = cMvBiSolid[REF_PIC_LIST_0][0];
+      pu.mvAffiSolid[REF_PIC_LIST_0][1] = cMvBiSolid[REF_PIC_LIST_0][1];
+      pu.mvAffiSolid[REF_PIC_LIST_0][2] = cMvBiSolid[REF_PIC_LIST_0][2];
+
+      bool isSubPuYYClean0 = false;
+      bool isSubPuCbClean0 = false;
+      if (refPic0) {
+        isSubPuYYClean0 = xPredAffineBlk(COMPONENT_Y, pu, refPic0, cMvBi[0], tmpBuf, false, pu.cu->slice->clpRng(COMPONENT_Y));
+        isSubPuCbClean0 = (isSubPuYYClean0) ? xPredAffineBlk(COMPONENT_Cb, pu, refPic0, cMvBi[0], tmpBuf, false, pu.cu->slice->clpRng(COMPONENT_Cb)) : false;
+      }
+
+      pu.mvAffiValid[REF_PIC_LIST_0][0] = cMvBiValid[REF_PIC_LIST_0][0] = isSubPuYYClean0 && isSubPuCbClean0;
+      pu.mvAffiValid[REF_PIC_LIST_0][1] = cMvBiValid[REF_PIC_LIST_0][1] = isSubPuYYClean0 && isSubPuCbClean0;
+      pu.mvAffiValid[REF_PIC_LIST_0][2] = cMvBiValid[REF_PIC_LIST_0][2] = isSubPuYYClean0 && isSubPuCbClean0;
+
+
+      pu.mvAffiSolid[REF_PIC_LIST_1][0] = cMvBiSolid[REF_PIC_LIST_1][0];
+      pu.mvAffiSolid[REF_PIC_LIST_1][1] = cMvBiSolid[REF_PIC_LIST_1][1];
+      pu.mvAffiSolid[REF_PIC_LIST_1][2] = cMvBiSolid[REF_PIC_LIST_1][2];
+
+      bool isSubPuYYClean1 = false;
+      bool isSubPuCbClean1 = false;
+      if (refPic1)
+      {
+        isSubPuYYClean1 = xPredAffineBlk(COMPONENT_Y, pu, refPic1, cMvBi[1], tmpBuf, false, pu.cu->slice->clpRng(COMPONENT_Y));
+        isSubPuCbClean1 = (isSubPuYYClean1) ? xPredAffineBlk(COMPONENT_Cb, pu, refPic1, cMvBi[1], tmpBuf, false, pu.cu->slice->clpRng(COMPONENT_Cb)) : false;
+      }
+
+      pu.mvAffiValid[REF_PIC_LIST_1][0] = cMvBiValid[REF_PIC_LIST_1][0] = isSubPuYYClean1 && isSubPuCbClean1;
+      pu.mvAffiValid[REF_PIC_LIST_1][1] = cMvBiValid[REF_PIC_LIST_1][1] = isSubPuYYClean1 && isSubPuCbClean1;
+      pu.mvAffiValid[REF_PIC_LIST_1][2] = cMvBiValid[REF_PIC_LIST_1][2] = isSubPuYYClean1 && isSubPuCbClean1;
+    }
+#endif
+
+
     for ( int verIdx = 0; verIdx < mvNum; verIdx++ )
     {
       pu.mvdAffi[REF_PIC_LIST_0][verIdx] = cMvBi[0][verIdx] - cMvPredBi[0][iRefIdxBi[0]][verIdx];
@@ -5160,13 +7870,30 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
       }
     }
 
-
     pu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdxBi[0][iRefIdxBi[0]];
     pu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdxBi[0]];
     pu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdxBi[1][iRefIdxBi[1]];
     pu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdxBi[1]];
+
+#if GDR_ENABLED
+    if (isEncodeGdrClean)
+    {
+      pu.mvpSolid[REF_PIC_LIST_0] = affiAMVPInfoTemp[0].mvSolidLT[pu.mvpIdx[0]] && affiAMVPInfoTemp[0].mvSolidRT[pu.mvpIdx[0]];
+      pu.mvpSolid[REF_PIC_LIST_1] = affiAMVPInfoTemp[1].mvSolidLT[pu.mvpIdx[1]] && affiAMVPInfoTemp[1].mvSolidRT[pu.mvpIdx[1]];
+
+      if (pu.cu->affineType == AFFINEMODEL_6PARAM)
+      {
+        pu.mvpSolid[REF_PIC_LIST_0] = pu.mvpSolid[REF_PIC_LIST_0] && affiAMVPInfoTemp[0].mvSolidLB[pu.mvpIdx[0]];
+        pu.mvpSolid[REF_PIC_LIST_1] = pu.mvpSolid[REF_PIC_LIST_1] && affiAMVPInfoTemp[1].mvSolidLB[pu.mvpIdx[1]];
+      }
+    }
+#endif
   }
+#if GDR_ENABLED
+  else if (L0ok) // List 0
+#else
   else if ( uiCost[0] <= uiCost[1] ) // List 0
+#endif
   {
     lastMode = 0;
     affineCost = uiCost[0];
@@ -5174,6 +7901,27 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
     PU::setAllAffineMv( pu, aacMv[0][0], aacMv[0][1], aacMv[0][2], REF_PIC_LIST_0);
     pu.refIdx[REF_PIC_LIST_0] = iRefIdx[0];
 
+#if GDR_ENABLED
+    if (isEncodeGdrClean)
+    {
+      bool isSubPuYYClean;
+      bool isSubPuCbClean;
+      PelUnitBuf     tmpBuf = m_tmpAffiStorage.getBuf(UnitAreaRelative(*pu.cu, pu));
+      const Picture *refPic = pu.cu->slice->getRefPic((RefPicList)REF_PIC_LIST_0, pu.refIdx[REF_PIC_LIST_0]);
+
+      pu.mvAffiSolid[0][0] = aacMvSolid[0][0];
+      pu.mvAffiSolid[0][1] = aacMvSolid[0][1];
+      pu.mvAffiSolid[0][2] = aacMvSolid[0][2];
+
+      isSubPuYYClean = xPredAffineBlk(COMPONENT_Y, pu, refPic, aacMv[0], tmpBuf, false, pu.cu->slice->clpRng(COMPONENT_Y));
+      isSubPuCbClean = (isSubPuYYClean) ? xPredAffineBlk(COMPONENT_Cb, pu, refPic, aacMv[0], tmpBuf, false, pu.cu->slice->clpRng(COMPONENT_Cb)) : false;
+
+      pu.mvAffiValid[0][0] = aacMvValid[0][0] = isSubPuYYClean && isSubPuCbClean;
+      pu.mvAffiValid[0][1] = aacMvValid[0][1] = isSubPuYYClean && isSubPuCbClean;
+      pu.mvAffiValid[0][2] = aacMvValid[0][2] = isSubPuYYClean && isSubPuCbClean;
+    }
+#endif
+
     for ( int verIdx = 0; verIdx < mvNum; verIdx++ )
     {
       pu.mvdAffi[REF_PIC_LIST_0][verIdx] = aacMv[0][verIdx] - cMvPred[0][iRefIdx[0]][verIdx];
@@ -5185,6 +7933,17 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
 
     pu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdx[0][iRefIdx[0]];
     pu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdx[0]];
+#if GDR_ENABLED
+    if (isEncodeGdrClean)
+    {
+      pu.mvpSolid[REF_PIC_LIST_0] = affiAMVPInfoTemp[0].mvSolidLT[pu.mvpIdx[0]] && affiAMVPInfoTemp[0].mvSolidRT[pu.mvpIdx[0]];
+
+      if (pu.cu->affineType == AFFINEMODEL_6PARAM)
+      {
+        pu.mvpSolid[REF_PIC_LIST_0] = pu.mvpSolid[REF_PIC_LIST_0] && affiAMVPInfoTemp[0].mvSolidLB[pu.mvpIdx[0]];
+      }
+    }
+#endif
   }
   else
   {
@@ -5194,6 +7953,27 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
     PU::setAllAffineMv( pu, aacMv[1][0], aacMv[1][1], aacMv[1][2], REF_PIC_LIST_1);
     pu.refIdx[REF_PIC_LIST_1] = iRefIdx[1];
 
+#if GDR_ENABLED
+    if (isEncodeGdrClean)
+    {
+      bool isSubPuYYClean;
+      bool isSubPuCbClean;
+      PelUnitBuf     tmpBuf = m_tmpAffiStorage.getBuf(UnitAreaRelative(*pu.cu, pu));
+      const Picture *refPic = pu.cu->slice->getRefPic((RefPicList)REF_PIC_LIST_1, pu.refIdx[REF_PIC_LIST_1]);
+
+      pu.mvAffiSolid[1][0] = aacMvSolid[1][0];
+      pu.mvAffiSolid[1][1] = aacMvSolid[1][1];
+      pu.mvAffiSolid[1][2] = aacMvSolid[1][2];
+
+      isSubPuYYClean = xPredAffineBlk(COMPONENT_Y, pu, refPic, aacMv[1], tmpBuf, false, pu.cu->slice->clpRng(COMPONENT_Y));
+      isSubPuCbClean = (isSubPuYYClean) ? xPredAffineBlk(COMPONENT_Cb, pu, refPic, aacMv[1], tmpBuf, false, pu.cu->slice->clpRng(COMPONENT_Cb)) : false;
+
+      pu.mvAffiValid[1][0] = aacMvValid[1][0] = isSubPuYYClean && isSubPuCbClean;
+      pu.mvAffiValid[1][1] = aacMvValid[1][1] = isSubPuYYClean && isSubPuCbClean;
+      pu.mvAffiValid[1][2] = aacMvValid[1][2] = isSubPuYYClean && isSubPuCbClean;
+    }
+#endif
+
     for ( int verIdx = 0; verIdx < mvNum; verIdx++ )
     {
       pu.mvdAffi[REF_PIC_LIST_1][verIdx] = aacMv[1][verIdx] - cMvPred[1][iRefIdx[1]][verIdx];
@@ -5205,6 +7985,17 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
 
     pu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdx[1][iRefIdx[1]];
     pu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdx[1]];
+#if GDR_ENABLED
+    if (isEncodeGdrClean)
+    {
+      pu.mvpSolid[REF_PIC_LIST_1] = affiAMVPInfoTemp[1].mvSolidLT[pu.mvpIdx[1]] && affiAMVPInfoTemp[1].mvSolidRT[pu.mvpIdx[1]];
+
+      if (pu.cu->affineType == AFFINEMODEL_6PARAM)
+      {
+        pu.mvpSolid[REF_PIC_LIST_1] = pu.mvpSolid[REF_PIC_LIST_1] && affiAMVPInfoTemp[1].mvSolidLB[pu.mvpIdx[1]];
+      }
+    }
+#endif
   }
   if( bcwIdx != BCW_DEFAULT )
   {
@@ -5285,6 +8076,11 @@ void solveEqual(double dEqualCoeff[7][7], int iOrder, double *dAffinePara)
 
 void InterSearch::xCheckBestAffineMVP( PredictionUnit &pu, AffineAMVPInfo &affineAMVPInfo, RefPicList eRefPicList, Mv acMv[3], Mv acMvPred[3], int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost )
 {
+#if GDR_ENABLED
+  const CodingStructure &cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+#endif
+
   if ( affineAMVPInfo.numCand < 2 )
   {
     return;
@@ -5318,7 +8114,39 @@ void InterSearch::xCheckBestAffineMVP( PredictionUnit &pu, AffineAMVPInfo &affin
     int iMvBits = xCalcAffineMVBits( pu, acMv, tmpPredMv );
     iMvBits += m_auiMVPIdxCost[iMVPIdx][AMVP_MAX_NUM_CANDS];
 
+#if GDR_ENABLED
+    bool allOk = (iMvBits < iBestMvBits);
+    if (isEncodeGdrClean)
+    {
+      bool curOk = affineAMVPInfo.mvSolidLT[iMVPIdx] && affineAMVPInfo.mvSolidRT[iMVPIdx];
+      if (pu.cu->affineType == AFFINEMODEL_6PARAM)
+      {
+        curOk = curOk && affineAMVPInfo.mvSolidLB[iMVPIdx];
+      }
+
+      bool best_ok = affineAMVPInfo.mvSolidLT[iBestMVPIdx] && affineAMVPInfo.mvSolidRT[iBestMVPIdx];
+      if (pu.cu->affineType == AFFINEMODEL_6PARAM)
+      {
+        curOk = curOk && affineAMVPInfo.mvSolidLB[iBestMVPIdx];
+      }
+
+      if (curOk)
+      {
+        allOk = (best_ok) ? (iMvBits < iBestMvBits) : true;
+      }
+      else
+      {
+        allOk = false;
+      }
+    }
+#endif
+
+
+#if GDR_ENABLED
+    if (allOk)
+#else
     if (iMvBits < iBestMvBits)
+#endif
     {
       iBestMvBits = iMvBits;
       iBestMVPIdx = iMVPIdx;
@@ -5337,6 +8165,21 @@ void InterSearch::xCheckBestAffineMVP( PredictionUnit &pu, AffineAMVPInfo &affin
   }
 }
 
+#if GDR_ENABLED
+void InterSearch::xAffineMotionEstimation(PredictionUnit& pu,
+  PelUnitBuf&     origBuf,
+  RefPicList      eRefPicList,
+  Mv              acMvPred[3],
+  int             iRefIdxPred,
+  Mv              acMv[3],
+  bool            acMvSolid[3],
+  uint32_t&       ruiBits,
+  Distortion&     ruiCost,
+  int&            mvpIdx,
+  const AffineAMVPInfo& aamvpi,
+  bool&           rbCleanCandExist,
+  bool            bBi)
+#else
 void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
                                            PelUnitBuf&     origBuf,
                                            RefPicList      eRefPicList,
@@ -5348,13 +8191,25 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
                                            int&            mvpIdx,
                                            const AffineAMVPInfo& aamvpi,
                                            bool            bBi)
+#endif
 {
+#if GDR_ENABLED
+  if (pu.cu->cs->sps->getUseBcw() && pu.cu->BcwIdx != BCW_DEFAULT && !bBi && xReadBufferedAffineUniMv(pu, eRefPicList, iRefIdxPred, acMvPred, acMv, acMvSolid, ruiBits, ruiCost
+    , mvpIdx, aamvpi
+  ))
+#else
   if( pu.cu->cs->sps->getUseBcw() && pu.cu->BcwIdx != BCW_DEFAULT && !bBi && xReadBufferedAffineUniMv(pu, eRefPicList, iRefIdxPred, acMvPred, acMv, ruiBits, ruiCost
       , mvpIdx, aamvpi
   ) )
+#endif
   {
     return;
   }
+#if GDR_ENABLED
+  const CodingStructure &cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+  bool acMvValid[3];
+#endif
 
   uint32_t dirBits = ruiBits - m_auiMVPIdxCost[mvpIdx][aamvpi.numCand];
   int bestMvpIdx   = mvpIdx;
@@ -5391,6 +8246,11 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
   // Set start Mv position, use input mv as started search mv
   Mv acMvTemp[3];
   ::memcpy( acMvTemp, acMv, sizeof(Mv)*3 );
+
+#if GDR_ENABLED
+  bool acMvTempSolid[3];
+  ::memcpy(acMvTempSolid, acMvSolid, sizeof(bool) * 3);
+#endif
   // Set delta mv
   // malloc buffer
   int iParaNum = pu.cu->affineType ? 7 : 5;
@@ -5406,6 +8266,13 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
 
   Distortion uiCostBest = std::numeric_limits<Distortion>::max();
   uint32_t uiBitsBest = 0;
+#if GDR_ENABLED
+  bool uiCostBestOk = true;
+  bool uiCostTempOk = true;
+  bool costTempOk = true;
+
+  bool allOk = true;
+#endif
 
   // do motion compensation with origin mv
   if( m_pcEncCfg->getMCTSEncConstraint() )
@@ -5433,7 +8300,11 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
   {
     acMvTemp[2].roundAffinePrecInternal2Amvr(pu.cu->imv);
   }
+#if GDR_ENABLED
+  bool YYOk = xPredAffineBlk(COMPONENT_Y, pu, refPic, acMvTemp, predBuf, false, pu.cs->slice->clpRng(COMPONENT_Y));
+#else
   xPredAffineBlk( COMPONENT_Y, pu, refPic, acMvTemp, predBuf, false, pu.cs->slice->clpRng( COMPONENT_Y ) );
+#endif
 
   // get error
   uiCostBest = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, distFunc);
@@ -5454,6 +8325,25 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
     uiBitsBest += xCalcAffineMVBits( pu, acMvTemp, acMvPred );
     DTRACE( g_trace_ctx, D_COMMON, " (%d) yy uiBitsBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiBitsBest );
   }
+
+#if GDR_ENABLED
+  if (isEncodeGdrClean)
+  {
+    acMvSolid[0] = aamvpi.mvSolidLT[mvpIdx];
+    acMvSolid[1] = aamvpi.mvSolidRT[mvpIdx];
+    acMvSolid[2] = aamvpi.mvSolidLB[mvpIdx];
+
+    bool isSubPuYYClean = YYOk;
+    bool isSubPuCbClean = true;
+
+    acMvValid[0] = isSubPuYYClean && isSubPuCbClean;
+    acMvValid[1] = isSubPuYYClean && isSubPuCbClean;
+    acMvValid[2] = isSubPuYYClean && isSubPuCbClean;
+
+    uiCostBestOk = (acMvSolid[0] && acMvSolid[1] && acMvSolid[2]) && (acMvValid[0] && acMvValid[1] && acMvValid[2]);
+  }
+#endif
+
   uiCostBest = (Distortion)( floor( fWeight * (double)uiCostBest ) + (double)m_pcRdCost->getCost( uiBitsBest ) );
 
   DTRACE( g_trace_ctx, D_COMMON, " (%d) uiBitsBest=%d, uiCostBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiBitsBest, uiCostBest );
@@ -5617,7 +8507,11 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
       }
     }
 
+#if GDR_ENABLED
+    bool YYOk = xPredAffineBlk(COMPONENT_Y, pu, refPic, acMvTemp, predBuf, false, pu.cu->slice->clpRng(COMPONENT_Y));
+#else
     xPredAffineBlk( COMPONENT_Y, pu, refPic, acMvTemp, predBuf, false, pu.cu->slice->clpRng( COMPONENT_Y ) );
+#endif
 
     // get error
     Distortion uiCostTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, distFunc);
@@ -5637,12 +8531,53 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
     {
       uiBitsTemp += xCalcAffineMVBits( pu, acMvTemp, acMvPred );
     }
+#if GDR_ENABLED
+    if (isEncodeGdrClean)
+    {
+      acMvSolid[0] = aamvpi.mvSolidLT[bestMvpIdx];
+      acMvSolid[1] = aamvpi.mvSolidRT[bestMvpIdx];
+      acMvSolid[2] = aamvpi.mvSolidLB[bestMvpIdx];
+
+      bool isSubPuYYClean = YYOk;
+      bool isSubPuCbClean = true;
+
+      acMvValid[0] = isSubPuYYClean && isSubPuCbClean;
+      acMvValid[1] = isSubPuYYClean && isSubPuCbClean;
+      acMvValid[2] = isSubPuYYClean && isSubPuCbClean;
+
+      uiCostTempOk = (acMvSolid[0] && acMvSolid[1] && acMvSolid[2]) && (acMvValid[0] && acMvValid[1] && acMvValid[2]);
+    }
+#endif
+
     uiCostTemp = (Distortion)( floor( fWeight * (double)uiCostTemp ) + (double)m_pcRdCost->getCost( uiBitsTemp ) );
 
     // store best cost and mv
+#if GDR_ENABLED
+    allOk = (uiCostTemp < uiCostBest);
+    if (isEncodeGdrClean)
+    {
+      if (uiCostTempOk)
+      {
+        allOk = (uiCostBestOk) ? (uiCostTemp < uiCostBest) : true;
+      }
+      else
+      {
+        allOk = false;
+      }
+    }
+
+    if (allOk)
+#else
     if ( uiCostTemp < uiCostBest )
+#endif
     {
       uiCostBest = uiCostTemp;
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        uiCostBestOk = uiCostTempOk;
+      }
+#endif
       uiBitsBest = uiBitsTemp;
       memcpy( acMv, acMvTemp, sizeof(Mv) * 3 );
       mvpIdx = bestMvpIdx;
@@ -5651,7 +8586,30 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
 
   auto checkCPMVRdCost = [&](Mv ctrlPtMv[3])
   {
+#if GDR_ENABLED
+    bool YYOk = xPredAffineBlk(COMPONENT_Y, pu, refPic, ctrlPtMv, predBuf, false, pu.cu->slice->clpRng(COMPONENT_Y));
+#else
     xPredAffineBlk(COMPONENT_Y, pu, refPic, ctrlPtMv, predBuf, false, pu.cu->slice->clpRng(COMPONENT_Y));
+#endif
+
+#if GDR_ENABLED
+    if (isEncodeGdrClean)
+    {
+      acMvSolid[0] = aamvpi.mvSolidLT[bestMvpIdx];
+      acMvSolid[1] = aamvpi.mvSolidRT[bestMvpIdx];
+      acMvSolid[2] = aamvpi.mvSolidLB[bestMvpIdx];
+
+      bool isSubPuYYClean = YYOk;
+      bool isSubPuCbClean = true; // (isSubPuYYClean) ? xPredAffineBlk(COMPONENT_Cb, pu, refPic, ctrlPtMv, tmpBuf, false, pu.cu->slice->clpRng(COMPONENT_Cb)) : false;
+
+      acMvValid[0] = isSubPuYYClean && isSubPuCbClean;
+      acMvValid[1] = isSubPuYYClean && isSubPuCbClean;
+      acMvValid[2] = isSubPuYYClean && isSubPuCbClean;
+
+      costTempOk = (acMvSolid[0] && acMvSolid[1] && acMvSolid[2]) && (acMvValid[0] && acMvValid[1] && acMvValid[2]);
+    }
+#endif
+
     // get error
     Distortion costTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, distFunc);
     // get cost with mv
@@ -5660,9 +8618,33 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
     bitsTemp += xCalcAffineMVBits( pu, ctrlPtMv, acMvPred );
     costTemp = (Distortion)(floor(fWeight * (double)costTemp) + (double)m_pcRdCost->getCost(bitsTemp));
     // store best cost and mv
+#if GDR_ENABLED
+    bool allOk = (costTemp < uiCostBest);
+    if (isEncodeGdrClean)
+    {
+      if (costTempOk)
+      {
+        allOk = (uiCostBestOk) ? (costTemp < uiCostBest) : true;
+      }
+      else
+      {
+        allOk = false;
+      }
+    }
+
+    if (allOk)
+#else
     if (costTemp < uiCostBest)
+#endif
     {
       uiCostBest = costTemp;
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        uiCostBestOk = costTempOk;
+        rbCleanCandExist = true;
+      }
+#endif
       uiBitsBest = bitsTemp;
       ::memcpy(acMv, ctrlPtMv, sizeof(Mv) * 3);
     }
@@ -5686,7 +8668,9 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
         acMvTemp[j] = mvPredTmp[j];
 
         if (j)
+        {
           acMvTemp[j] += dMv;
+        }
 
         checkCPMVRdCost(acMvTemp);
       }
@@ -5741,15 +8725,62 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
           {
             acMvTemp[j].set(centerMv[j].getHor() + (testPos[i][0] << mvShift), centerMv[j].getVer() + (testPos[i][1] << mvShift));
             clipMv( acMvTemp[j], pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps, *pu.cs->pps );
+#if GDR_ENABLED
+            bool YYOk = xPredAffineBlk(COMPONENT_Y, pu, refPic, acMvTemp, predBuf, false, pu.cu->slice->clpRng(COMPONENT_Y));
+#else
             xPredAffineBlk(COMPONENT_Y, pu, refPic, acMvTemp, predBuf, false, pu.cu->slice->clpRng(COMPONENT_Y));
+#endif
+
+#if GDR_ENABLED
+            if (isEncodeGdrClean)
+            {
+              acMvSolid[0] = aamvpi.mvSolidLT[bestMvpIdx];
+              acMvSolid[1] = aamvpi.mvSolidRT[bestMvpIdx];
+              acMvSolid[2] = aamvpi.mvSolidLB[bestMvpIdx];
+
+              bool isSubPuYYClean = YYOk;
+              bool isSubPuCbClean = true;
+
+              acMvValid[0] = isSubPuYYClean && isSubPuCbClean;
+              acMvValid[1] = isSubPuYYClean && isSubPuCbClean;
+              acMvValid[2] = isSubPuYYClean && isSubPuCbClean;
+
+              costTempOk = (acMvSolid[0] && acMvSolid[1] && acMvSolid[2]) && (acMvValid[0] && acMvValid[1] && acMvValid[2]);
+            }
+#endif
+
             Distortion costTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, distFunc);
             uint32_t bitsTemp = ruiBits;
             bitsTemp += xCalcAffineMVBits(pu, acMvTemp, acMvPred);
             costTemp = (Distortion)(floor(fWeight * (double)costTemp) + (double)m_pcRdCost->getCost(bitsTemp));
 
+#if GDR_ENABLED
+            bool allOk = (costTemp < uiCostBest);
+            if (isEncodeGdrClean)
+            {
+              if (costTempOk)
+              {
+                allOk = (uiCostBestOk) ? (costTemp < uiCostBest) : true;
+              }
+              else
+              {
+                allOk = false;
+              }
+            }
+
+            if (allOk)
+#else
             if (costTemp < uiCostBest)
+#endif
             {
               uiCostBest = costTemp;
+#if GDR_ENABLED
+              if (isEncodeGdrClean)
+              {
+                uiCostBestOk = costTempOk;
+                rbCleanCandExist = true;
+              }
+#endif
               uiBitsBest = bitsTemp;
               ::memcpy(acMv, acMvTemp, sizeof(Mv) * 3);
               modelChange = true;
@@ -5769,6 +8800,12 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
   acMvPred[1] = aamvpi.mvCandRT[mvpIdx];
   acMvPred[2] = aamvpi.mvCandLB[mvpIdx];
 
+#if GDR_ENABLED
+  acMvSolid[0] = aamvpi.mvSolidLT[mvpIdx];
+  acMvSolid[1] = aamvpi.mvSolidRT[mvpIdx];
+  acMvSolid[2] = aamvpi.mvSolidLB[mvpIdx];
+#endif
+
   ruiBits = uiBitsBest;
   ruiCost = uiCostBest;
   DTRACE( g_trace_ctx, D_COMMON, " (%d) uiBitsBest=%d, uiCostBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiBitsBest, uiCostBest );
@@ -5786,6 +8823,12 @@ void InterSearch::xEstimateAffineAMVP( PredictionUnit&  pu,
   int        iBestIdx = 0;
   Distortion uiBestCost = std::numeric_limits<Distortion>::max();
 
+#if GDR_ENABLED
+  const CodingStructure &cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+  bool uiBestCostOk = false;
+#endif
+
   // Fill the MV Candidates
   PU::fillAffineMvpCand( pu, eRefPicList, iRefIdx, affineAMVPInfo );
   CHECK( affineAMVPInfo.numCand == 0, "Assertion failed." );
@@ -5798,9 +8841,36 @@ void InterSearch::xEstimateAffineAMVP( PredictionUnit&  pu,
   {
     Mv mv[3] = { affineAMVPInfo.mvCandLT[i], affineAMVPInfo.mvCandRT[i], affineAMVPInfo.mvCandLB[i] };
 
+#if GDR_ENABLED
+    bool uiTmpCostOk = true;
+    Distortion uiTmpCost = xGetAffineTemplateCost(pu, origBuf, predBuf, mv, i, AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdx, uiTmpCostOk);
+    uiTmpCostOk = uiTmpCostOk && affineAMVPInfo.mvSolidLT[i] && affineAMVPInfo.mvSolidRT[i];
+    uiTmpCostOk = uiTmpCostOk && ((pu.cu->affineType == AFFINEMODEL_6PARAM) ? affineAMVPInfo.mvSolidLB[i] : true);
+#else
     Distortion uiTmpCost = xGetAffineTemplateCost( pu, origBuf, predBuf, mv, i, AMVP_MAX_NUM_CANDS, eRefPicList, iRefIdx );
+#endif
+
+#if GDR_ENABLED
+    bool allOk = uiBestCost > uiTmpCost;
+
+    if (isEncodeGdrClean)
+    {
+      if (uiTmpCostOk)
+      {
+        allOk = uiBestCostOk ? (uiBestCost > uiTmpCost) : true;
+      }
+      else
+      {
+        allOk = false;
+      }
+    }
+#endif
 
+#if GDR_ENABLED
+    if (allOk)
+#else
     if ( uiBestCost > uiTmpCost )
+#endif
     {
       uiBestCost = uiTmpCost;
       bestMvLT = affineAMVPInfo.mvCandLT[i];
@@ -5808,6 +8878,12 @@ void InterSearch::xEstimateAffineAMVP( PredictionUnit&  pu,
       bestMvLB = affineAMVPInfo.mvCandLB[i];
       iBestIdx  = i;
       *puiDistBiP = uiTmpCost;
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        uiBestCostOk = uiTmpCostOk;
+      }
+#endif
     }
   }
 
@@ -5818,6 +8894,10 @@ void InterSearch::xEstimateAffineAMVP( PredictionUnit&  pu,
 
   pu.mvpIdx[eRefPicList] = iBestIdx;
   pu.mvpNum[eRefPicList] = affineAMVPInfo.numCand;
+
+#if GDR_ENABLED
+  pu.mvpSolid[eRefPicList] = uiBestCostOk;
+#endif
   DTRACE( g_trace_ctx, D_COMMON, "#estAffi=%d \n", affineAMVPInfo.numCand );
 }
 
@@ -5828,6 +8908,24 @@ void InterSearch::xCopyAffineAMVPInfo (AffineAMVPInfo& src, AffineAMVPInfo& dst)
   ::memcpy( dst.mvCandLT, src.mvCandLT, sizeof(Mv)*src.numCand );
   ::memcpy( dst.mvCandRT, src.mvCandRT, sizeof(Mv)*src.numCand );
   ::memcpy( dst.mvCandLB, src.mvCandLB, sizeof(Mv)*src.numCand );
+
+#if GDR_ENABLED
+  ::memcpy(dst.mvSolidLT, src.mvSolidLT, sizeof(bool)*src.numCand);
+  ::memcpy(dst.mvSolidRT, src.mvSolidRT, sizeof(bool)*src.numCand);
+  ::memcpy(dst.mvSolidLB, src.mvSolidLB, sizeof(bool)*src.numCand);
+
+  ::memcpy(dst.mvValidLT, src.mvValidLT, sizeof(bool)*src.numCand);
+  ::memcpy(dst.mvValidRT, src.mvValidRT, sizeof(bool)*src.numCand);
+  ::memcpy(dst.mvValidLB, src.mvValidLB, sizeof(bool)*src.numCand);
+
+  ::memcpy(dst.mvTypeLT, src.mvTypeLT, sizeof(MvpType)*src.numCand);
+  ::memcpy(dst.mvTypeRT, src.mvTypeRT, sizeof(MvpType)*src.numCand);
+  ::memcpy(dst.mvTypeLB, src.mvTypeLB, sizeof(MvpType)*src.numCand);
+
+  ::memcpy(dst.mvPosLT, src.mvPosLT, sizeof(Position)*src.numCand);
+  ::memcpy(dst.mvPosRT, src.mvPosRT, sizeof(Position)*src.numCand);
+  ::memcpy(dst.mvPosLB, src.mvPosLB, sizeof(Position)*src.numCand);
+#endif
 }
 
 
@@ -5852,17 +8950,18 @@ void InterSearch::xExtDIFUpSamplingH(CPelBuf* pattern, bool useAltHpelIf)
   int halfFilterSize = (filterSize>>1);
   const Pel *srcPtr = pattern->buf - halfFilterSize*srcStride - 1;
 
-  const ChromaFormat chFmt = m_currChromaFormat;
-
-  m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[0][0], intStride, width + 1, height + filterSize, 0 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, 0, false, useAltHpelIf);
+  m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[0][0], intStride, width + 1, height + filterSize,
+                 0 << MV_FRACTIONAL_BITS_DIFF, false, clpRng, 0, false, useAltHpelIf);
   if (!m_skipFracME)
   {
-    m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[2][0], intStride, width + 1, height + filterSize, 2 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, 0, false, useAltHpelIf);
+    m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[2][0], intStride, width + 1, height + filterSize,
+                   2 << MV_FRACTIONAL_BITS_DIFF, false, clpRng, 0, false, useAltHpelIf);
   }
 
   intPtr = m_filteredBlockTmp[0][0] + halfFilterSize * intStride + 1;
   dstPtr = m_filteredBlock[0][0][0];
-  m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, 0, false, useAltHpelIf);
+  m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF,
+                 false, true, clpRng, 0, false, useAltHpelIf);
   if (m_skipFracME)
   {
     return;
@@ -5870,21 +8969,21 @@ void InterSearch::xExtDIFUpSamplingH(CPelBuf* pattern, bool useAltHpelIf)
 
   intPtr = m_filteredBlockTmp[0][0] + (halfFilterSize - 1) * intStride + 1;
   dstPtr = m_filteredBlock[2][0][0];
-  m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, 0, false, useAltHpelIf);
+  m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF,
+                 false, true, clpRng, 0, false, useAltHpelIf);
 
   intPtr = m_filteredBlockTmp[2][0] + halfFilterSize * intStride;
   dstPtr = m_filteredBlock[0][2][0];
-  m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 1, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, 0, false, useAltHpelIf);
+  m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 1, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF,
+                 false, true, clpRng, 0, false, useAltHpelIf);
 
   intPtr = m_filteredBlockTmp[2][0] + (halfFilterSize - 1) * intStride;
   dstPtr = m_filteredBlock[2][2][0];
-  m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 1, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, 0, false, useAltHpelIf);
+  m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 1, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF,
+                 false, true, clpRng, 0, false, useAltHpelIf);
 }
 
 
-
-
-
 /**
 * \brief Generate quarter-sample interpolated blocks
 *
@@ -5910,8 +9009,6 @@ void InterSearch::xExtDIFUpSamplingQ( CPelBuf* pattern, Mv halfPelRef )
 
   int extHeight = (halfPelRef.getVer() == 0) ? height + filterSize : height + filterSize-1;
 
-  const ChromaFormat chFmt = m_currChromaFormat;
-
   // Horizontal filter 1/4
   srcPtr = pattern->buf - halfFilterSize * srcStride - 1;
   intPtr = m_filteredBlockTmp[1][0];
@@ -5923,7 +9020,8 @@ void InterSearch::xExtDIFUpSamplingQ( CPelBuf* pattern, Mv halfPelRef )
   {
     srcPtr += 1;
   }
-  m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, intPtr, intStride, width, extHeight, 1 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng);
+  m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, intPtr, intStride, width, extHeight, 1 << MV_FRACTIONAL_BITS_DIFF,
+                 false, clpRng);
 
   // Horizontal filter 3/4
   srcPtr = pattern->buf - halfFilterSize*srcStride - 1;
@@ -5936,7 +9034,8 @@ void InterSearch::xExtDIFUpSamplingQ( CPelBuf* pattern, Mv halfPelRef )
   {
     srcPtr += 1;
   }
-  m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, intPtr, intStride, width, extHeight, 3 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng);
+  m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, intPtr, intStride, width, extHeight, 3 << MV_FRACTIONAL_BITS_DIFF,
+                 false, clpRng);
 
   // Generate @ 1,1
   intPtr = m_filteredBlockTmp[1][0] + (halfFilterSize-1) * intStride;
@@ -5945,12 +9044,14 @@ void InterSearch::xExtDIFUpSamplingQ( CPelBuf* pattern, Mv halfPelRef )
   {
     intPtr += intStride;
   }
-  m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng);
+  m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1 << MV_FRACTIONAL_BITS_DIFF, false,
+                 true, clpRng);
 
   // Generate @ 3,1
   intPtr = m_filteredBlockTmp[1][0] + (halfFilterSize-1) * intStride;
   dstPtr = m_filteredBlock[3][1][0];
-  m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng);
+  m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3 << MV_FRACTIONAL_BITS_DIFF, false,
+                 true, clpRng);
 
   if (halfPelRef.getVer() != 0)
   {
@@ -5961,7 +9062,8 @@ void InterSearch::xExtDIFUpSamplingQ( CPelBuf* pattern, Mv halfPelRef )
     {
       intPtr += intStride;
     }
-    m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng);
+    m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 2 << MV_FRACTIONAL_BITS_DIFF,
+                   false, true, clpRng);
 
     // Generate @ 2,3
     intPtr = m_filteredBlockTmp[3][0] + (halfFilterSize - 1) * intStride;
@@ -5970,19 +9072,22 @@ void InterSearch::xExtDIFUpSamplingQ( CPelBuf* pattern, Mv halfPelRef )
     {
       intPtr += intStride;
     }
-    m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng);
+    m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 2 << MV_FRACTIONAL_BITS_DIFF,
+                   false, true, clpRng);
   }
   else
   {
     // Generate @ 0,1
     intPtr = m_filteredBlockTmp[1][0] + halfFilterSize * intStride;
     dstPtr = m_filteredBlock[0][1][0];
-    m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng);
+    m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 0 << MV_FRACTIONAL_BITS_DIFF,
+                   false, true, clpRng);
 
     // Generate @ 0,3
     intPtr = m_filteredBlockTmp[3][0] + halfFilterSize * intStride;
     dstPtr = m_filteredBlock[0][3][0];
-    m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng);
+    m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 0 << MV_FRACTIONAL_BITS_DIFF,
+                   false, true, clpRng);
   }
 
   if (halfPelRef.getHor() != 0)
@@ -5998,7 +9103,8 @@ void InterSearch::xExtDIFUpSamplingQ( CPelBuf* pattern, Mv halfPelRef )
     {
       intPtr += intStride;
     }
-    m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng);
+    m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1 << MV_FRACTIONAL_BITS_DIFF,
+                   false, true, clpRng);
 
     // Generate @ 3,2
     intPtr = m_filteredBlockTmp[2][0] + (halfFilterSize - 1) * intStride;
@@ -6011,7 +9117,8 @@ void InterSearch::xExtDIFUpSamplingQ( CPelBuf* pattern, Mv halfPelRef )
     {
       intPtr += intStride;
     }
-    m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng);
+    m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3 << MV_FRACTIONAL_BITS_DIFF,
+                   false, true, clpRng);
   }
   else
   {
@@ -6022,7 +9129,8 @@ void InterSearch::xExtDIFUpSamplingQ( CPelBuf* pattern, Mv halfPelRef )
     {
       intPtr += intStride;
     }
-    m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng);
+    m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1 << MV_FRACTIONAL_BITS_DIFF,
+                   false, true, clpRng);
 
     // Generate @ 3,0
     intPtr = m_filteredBlockTmp[0][0] + (halfFilterSize - 1) * intStride + 1;
@@ -6031,7 +9139,8 @@ void InterSearch::xExtDIFUpSamplingQ( CPelBuf* pattern, Mv halfPelRef )
     {
       intPtr += intStride;
     }
-    m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng);
+    m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3 << MV_FRACTIONAL_BITS_DIFF,
+                   false, true, clpRng);
   }
 
   // Generate @ 1,3
@@ -6041,12 +9150,14 @@ void InterSearch::xExtDIFUpSamplingQ( CPelBuf* pattern, Mv halfPelRef )
   {
     intPtr += intStride;
   }
-  m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng);
+  m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1 << MV_FRACTIONAL_BITS_DIFF, false,
+                 true, clpRng);
 
   // Generate @ 3,3
   intPtr = m_filteredBlockTmp[3][0] + (halfFilterSize - 1) * intStride;
   dstPtr = m_filteredBlock[3][3][0];
-  m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng);
+  m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3 << MV_FRACTIONAL_BITS_DIFF, false,
+                 true, clpRng);
 }
 
 
@@ -6128,16 +9239,15 @@ void InterSearch::xEncodeInterResidualQT(CodingStructure &cs, Partitioner &parti
       )
     {
       {
-        {
-          const bool  chroma_cbf = TU::getCbfAtDepth( currTU, COMPONENT_Cb, currDepth );
-          if (!(cu.sbtInfo && (currDepth == 0 || (currDepth == 1 && currTU.noResidual))))
-          m_CABACEstimator->cbf_comp( cs, chroma_cbf, currArea.blocks[COMPONENT_Cb], currDepth );
-        }
-        {
-          const bool  chroma_cbf = TU::getCbfAtDepth( currTU, COMPONENT_Cr, currDepth );
-          if (!(cu.sbtInfo && (currDepth == 0 || (currDepth == 1 && currTU.noResidual))))
-          m_CABACEstimator->cbf_comp( cs, chroma_cbf, currArea.blocks[COMPONENT_Cr], currDepth, TU::getCbfAtDepth( currTU, COMPONENT_Cb, currDepth ) );
-        }
+        const bool chroma_cbf = TU::getCbfAtDepth(currTU, COMPONENT_Cb, currDepth);
+        if (!(cu.sbtInfo && (currDepth == 0 || (currDepth == 1 && currTU.noResidual))))
+          m_CABACEstimator->cbf_comp(cs, chroma_cbf, currArea.blocks[COMPONENT_Cb], currDepth);
+      }
+      {
+        const bool chroma_cbf = TU::getCbfAtDepth(currTU, COMPONENT_Cr, currDepth);
+        if (!(cu.sbtInfo && (currDepth == 0 || (currDepth == 1 && currTU.noResidual))))
+          m_CABACEstimator->cbf_comp(cs, chroma_cbf, currArea.blocks[COMPONENT_Cr], currDepth,
+                                     TU::getCbfAtDepth(currTU, COMPONENT_Cb, currDepth));
       }
     }
 
@@ -6180,7 +9290,9 @@ void InterSearch::xEncodeInterResidualQT(CodingStructure &cs, Partitioner &parti
         partitioner.splitCurrArea( PartSplit( cu.getSbtTuSplit() ), cs );
       }
       else
+      {
         THROW( "Implicit TU split not available!" );
+      }
 
       do
       {
@@ -6529,9 +9641,13 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
     {
       const ComponentID compID    = ComponentID(c);
       if (compID == COMPONENT_Y && !luma)
+      {
         continue;
+      }
       if (compID != COMPONENT_Y && !chroma)
+      {
         continue;
+      }
       const CompArea&   compArea  = tu.blocks[compID];
       const int channelBitDepth   = sps.getBitDepth(toChannelType(compID));
 
@@ -6552,8 +9668,8 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
       }
       else
       {
-      trModes.push_back( TrMode( 0, true ) ); //DCT2
-      nNumTransformCands = 1;
+        trModes.push_back(TrMode(0, true));   // DCT2
+        nNumTransformCands = 1;
       }
       //for a SBT-no-residual TU, the RDO process should be called once, in order to get the RD cost
       if( tsAllowed && !tu.noResidual )
@@ -6598,149 +9714,149 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
       const int numTransformCandidates = nNumTransformCands;
       for( int transformMode = 0; transformMode < numTransformCandidates; transformMode++ )
       {
-          const bool isFirstMode  = transformMode == 0;
-          // copy the original residual into the residual buffer
-          if (colorTransFlag)
-          {
-            csFull->getResiBuf(compArea).copyFrom(colorTransResidual.bufs[compID]);
-          }
-          else
+        const bool isFirstMode = transformMode == 0;
+        // copy the original residual into the residual buffer
+        if (colorTransFlag)
+        {
+          csFull->getResiBuf(compArea).copyFrom(colorTransResidual.bufs[compID]);
+        }
+        else
           csFull->getResiBuf(compArea).copyFrom(cs.getOrgResiBuf(compArea));
 
-          m_CABACEstimator->getCtx() = ctxStart;
-          m_CABACEstimator->resetBits();
+        m_CABACEstimator->getCtx() = ctxStart;
+        m_CABACEstimator->resetBits();
 
+        if (!(m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && slice.isLossless()))
+        {
+          if (bestTU.mtsIdx[compID] == MTS_SKIP && m_pcEncCfg->getUseTransformSkipFast())
           {
-            if (!(m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && slice.isLossless()))
-            {
-            if (bestTU.mtsIdx[compID] == MTS_SKIP && m_pcEncCfg->getUseTransformSkipFast())
-            {
-              continue;
-            }
-            if( !trModes[transformMode].second )
-            {
-              continue;
-            }
-            }
-            tu.mtsIdx[compID] = trModes[transformMode].first;
+            continue;
+          }
+          if (!trModes[transformMode].second)
+          {
+            continue;
           }
-          QpParam cQP(tu, compID);  // note: uses tu.transformSkip[compID]
+        }
+        tu.mtsIdx[compID] = trModes[transformMode].first;
+        QpParam cQP(tu, compID);   // note: uses tu.transformSkip[compID]
 
 #if RDOQ_CHROMA_LAMBDA
-          m_pcTrQuant->selectLambda(compID);
+        m_pcTrQuant->selectLambda(compID);
 #endif
-          if (slice.getLmcsEnabledFlag() && isChroma(compID) && slice.getPicHeader()->getLmcsChromaResidualScaleFlag())
-          {
-            double cRescale = (double)(1 << CSCALE_FP_PREC) / (double)(tu.getChromaAdj());
-            m_pcTrQuant->setLambda(m_pcTrQuant->getLambda() / (cRescale*cRescale));
-          }
-          if ( sps.getJointCbCrEnabledFlag() && isChroma( compID ) && ( tu.cu->cs->slice->getSliceQp() > 18 ) )
-          {
-            m_pcTrQuant->setLambda( 1.05 * m_pcTrQuant->getLambda() );
-          }
+        if (slice.getLmcsEnabledFlag() && isChroma(compID) && slice.getPicHeader()->getLmcsChromaResidualScaleFlag())
+        {
+          double cRescale = (double) (1 << CSCALE_FP_PREC) / (double) (tu.getChromaAdj());
+          m_pcTrQuant->setLambda(m_pcTrQuant->getLambda() / (cRescale * cRescale));
+        }
+        if (sps.getJointCbCrEnabledFlag() && isChroma(compID) && (tu.cu->cs->slice->getSliceQp() > 18))
+        {
+          m_pcTrQuant->setLambda(1.05 * m_pcTrQuant->getLambda());
+        }
 
-          TCoeff     currAbsSum = 0;
-          uint64_t   currCompFracBits = 0;
-          Distortion currCompDist = 0;
-          double     currCompCost = 0;
-          uint64_t   nonCoeffFracBits = 0;
-          Distortion nonCoeffDist = 0;
-          double     nonCoeffCost = 0;
+        TCoeff     currAbsSum       = 0;
+        uint64_t   currCompFracBits = 0;
+        Distortion currCompDist     = 0;
+        double     currCompCost     = 0;
+        uint64_t   nonCoeffFracBits = 0;
+        Distortion nonCoeffDist     = 0;
+        double     nonCoeffCost     = 0;
 
-          if (!colorTransFlag && slice.getLmcsEnabledFlag() && isChroma(compID) && slice.getPicHeader()->getLmcsChromaResidualScaleFlag() && tu.blocks[compID].width*tu.blocks[compID].height > 4)
+        if (!colorTransFlag && slice.getLmcsEnabledFlag() && isChroma(compID)
+            && slice.getPicHeader()->getLmcsChromaResidualScaleFlag()
+            && tu.blocks[compID].width * tu.blocks[compID].height > 4)
+        {
+          PelBuf resiBuf = csFull->getResiBuf(compArea);
+          resiBuf.scaleSignal(tu.getChromaAdj(), 1, tu.cu->cs->slice->clpRng(compID));
+        }
+        if (nNumTransformCands > 1)
+        {
+          if (transformMode == 0)
           {
-            PelBuf resiBuf = csFull->getResiBuf(compArea);
-            resiBuf.scaleSignal(tu.getChromaAdj(), 1, tu.cu->cs->slice->clpRng(compID));
+            m_pcTrQuant->transformNxN(tu, compID, cQP, &trModes, m_pcEncCfg->getMTSInterMaxCand());
+            tu.mtsIdx[compID] = trModes[0].first;
           }
-          if( nNumTransformCands > 1 )
+          if (!(m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && slice.isLossless() && tu.mtsIdx[compID] == 0))
           {
-            if( transformMode == 0 )
-            {
-              m_pcTrQuant->transformNxN( tu, compID, cQP, &trModes, m_pcEncCfg->getMTSInterMaxCand() );
-              tu.mtsIdx[compID] = trModes[0].first;
-            }
-            if (!(m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && slice.isLossless() && tu.mtsIdx[compID] == 0))
-            {
-              m_pcTrQuant->transformNxN( tu, compID, cQP, currAbsSum, m_CABACEstimator->getCtx(), true );
-            }
+            m_pcTrQuant->transformNxN(tu, compID, cQP, currAbsSum, m_CABACEstimator->getCtx(), true);
           }
-          else
+        }
+        else
+        {
+          m_pcTrQuant->transformNxN(tu, compID, cQP, currAbsSum, m_CABACEstimator->getCtx());
+        }
+
+        if (isFirstMode || (currAbsSum == 0))
+        {
+          const CPelBuf zeroBuf(m_pTempPel, compArea);
+          const CPelBuf orgResi = colorTransFlag ? colorTransResidual.bufs[compID] : csFull->getOrgResiBuf(compArea);
+
           {
-            m_pcTrQuant->transformNxN( tu, compID, cQP, currAbsSum, m_CABACEstimator->getCtx() );
+            nonCoeffDist = m_pcRdCost->getDistPart(zeroBuf, orgResi, channelBitDepth, compID,
+                                                   DF_SSE);   // initialized with zero residual distortion
           }
 
-          if (isFirstMode || (currAbsSum == 0))
+          if (!tu.noResidual)
           {
-            const CPelBuf zeroBuf(m_pTempPel, compArea);
-            const CPelBuf orgResi = colorTransFlag ? colorTransResidual.bufs[compID] : csFull->getOrgResiBuf(compArea);
-
-            {
-              nonCoeffDist = m_pcRdCost->getDistPart( zeroBuf, orgResi, channelBitDepth, compID, DF_SSE ); // initialized with zero residual distortion
-            }
-
-            if( !tu.noResidual )
-            {
             const bool prevCbf = ( compID == COMPONENT_Cr ? tu.cbf[COMPONENT_Cb] : false );
             m_CABACEstimator->cbf_comp( *csFull, false, compArea, currDepth, prevCbf );
+          }
 
-            }
-
-            nonCoeffFracBits = m_CABACEstimator->getEstFracBits();
+          nonCoeffFracBits = m_CABACEstimator->getEstFracBits();
 #if WCG_EXT
-            if( m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() )
-            {
-              nonCoeffCost   = m_pcRdCost->calcRdCost(nonCoeffFracBits, nonCoeffDist, false);
-            }
-            else
-#endif
-              if (cs.slice->getSPS()->getUseColorTrans())
-              {
-                nonCoeffCost = m_pcRdCost->calcRdCost(nonCoeffFracBits, nonCoeffDist, false);
-              }
-              else
-              {
-                nonCoeffCost = m_pcRdCost->calcRdCost(nonCoeffFracBits, nonCoeffDist);
-              }
+          if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled())
+          {
+            nonCoeffCost = m_pcRdCost->calcRdCost(nonCoeffFracBits, nonCoeffDist, false);
           }
-
-          if ((puiZeroDist != NULL) && isFirstMode)
+          else
+#endif
+            if (cs.slice->getSPS()->getUseColorTrans())
           {
-            *puiZeroDist += nonCoeffDist; // initialized with zero residual distortion
+            nonCoeffCost = m_pcRdCost->calcRdCost(nonCoeffFracBits, nonCoeffDist, false);
           }
-          if (m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && slice.isLossless() && tu.mtsIdx[compID] == 0)
+          else
           {
-            currAbsSum = 0;
+            nonCoeffCost = m_pcRdCost->calcRdCost(nonCoeffFracBits, nonCoeffDist);
           }
+        }
 
-          if (currAbsSum > 0) //if non-zero coefficients are present, a residual needs to be derived for further prediction
-          {
-            if (isFirstMode)
-            {
-              m_CABACEstimator->getCtx() = ctxStart;
-              m_CABACEstimator->resetBits();
-            }
+        if ((puiZeroDist != NULL) && isFirstMode)
+        {
+          *puiZeroDist += nonCoeffDist;   // initialized with zero residual distortion
+        }
+        if (m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && slice.isLossless() && tu.mtsIdx[compID] == 0)
+        {
+          currAbsSum = 0;
+        }
 
-            const bool prevCbf = ( compID == COMPONENT_Cr ? tu.cbf[COMPONENT_Cb] : false );
-            m_CABACEstimator->cbf_comp( *csFull, true, compArea, currDepth, prevCbf );
-            if( compID == COMPONENT_Cr )
-            {
-              const int cbfMask = ( tu.cbf[COMPONENT_Cb] ? 2 : 0 ) + 1;
-              m_CABACEstimator->joint_cb_cr( tu, cbfMask );
-            }
+        if (currAbsSum
+            > 0)   // if non-zero coefficients are present, a residual needs to be derived for further prediction
+        {
+          if (isFirstMode)
+          {
+            m_CABACEstimator->getCtx() = ctxStart;
+            m_CABACEstimator->resetBits();
+          }
 
-            CUCtx cuCtx;
-            cuCtx.isDQPCoded = true;
-            cuCtx.isChromaQpAdjCoded = true;
-            m_CABACEstimator->residual_coding(tu, compID, &cuCtx);
-            m_CABACEstimator->mts_idx(cu, &cuCtx);
+          const bool prevCbf = (compID == COMPONENT_Cr ? tu.cbf[COMPONENT_Cb] : false);
+          m_CABACEstimator->cbf_comp(*csFull, true, compArea, currDepth, prevCbf);
+          if (compID == COMPONENT_Cr)
+          {
+            const int cbfMask = (tu.cbf[COMPONENT_Cb] ? 2 : 0) + 1;
+            m_CABACEstimator->joint_cb_cr(tu, cbfMask);
+          }
 
-            if (compID == COMPONENT_Y && tu.mtsIdx[compID] > MTS_SKIP && !cuCtx.mtsLastScanPos)
-            {
-              currCompCost = MAX_DOUBLE;
-            }
-            else
-            {
+          CUCtx cuCtx;
+          cuCtx.isDQPCoded         = true;
+          cuCtx.isChromaQpAdjCoded = true;
+          m_CABACEstimator->residual_coding(tu, compID, &cuCtx);
+          m_CABACEstimator->mts_idx(cu, &cuCtx);
 
+          if (compID == COMPONENT_Y && tu.mtsIdx[compID] > MTS_SKIP && !cuCtx.mtsLastScanPos)
+          {
+            currCompCost = MAX_DOUBLE;
+          }
+          else
+          {
             currCompFracBits = m_CABACEstimator->getEstFracBits();
 
             PelBuf resiBuf = csFull->getResiBuf(compArea);
@@ -6759,53 +9875,53 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
 #else
             currCompCost = m_pcRdCost->calcRdCost(currCompFracBits, currCompDist);
 #endif
-            }
-          }
-          else if( transformMode > 0 )
-          {
-            currCompCost = MAX_DOUBLE;
           }
-          else
+        }
+        else if (transformMode > 0)
+        {
+          currCompCost = MAX_DOUBLE;
+        }
+        else
+        {
+          currCompFracBits = nonCoeffFracBits;
+          currCompDist     = nonCoeffDist;
+          currCompCost     = nonCoeffCost;
+
+          tu.cbf[compID] = 0;
+        }
+
+        // evaluate
+        if ((currCompCost < minCost[compID]) || (transformMode == 1 && currCompCost == minCost[compID]))
+        {
+          // copy component
+          if (isFirstMode && ((nonCoeffCost < currCompCost) || (currAbsSum == 0)))   // check for forced null
           {
+            tu.getCoeffs(compID).fill(0);
+            csFull->getResiBuf(compArea).fill(0);
+            tu.cbf[compID] = 0;
+
+            currAbsSum       = 0;
             currCompFracBits = nonCoeffFracBits;
             currCompDist     = nonCoeffDist;
             currCompCost     = nonCoeffCost;
-
-            tu.cbf[compID] = 0;
           }
 
-          // evaluate
-          if( ( currCompCost < minCost[compID] ) || ( transformMode == 1 && currCompCost == minCost[compID] ) )
-          {
-            // copy component
-            if (isFirstMode && ((nonCoeffCost < currCompCost) || (currAbsSum == 0))) // check for forced null
-            {
-              tu.getCoeffs( compID ).fill( 0 );
-              csFull->getResiBuf( compArea ).fill( 0 );
-              tu.cbf[compID]   = 0;
-
-              currAbsSum       = 0;
-              currCompFracBits = nonCoeffFracBits;
-              currCompDist     = nonCoeffDist;
-              currCompCost     = nonCoeffCost;
-            }
-
-            uiSingleDistComp[compID] = currCompDist;
-            uiSingleFracBits[compID] = currCompFracBits;
-            minCost[compID]          = currCompCost;
+          uiSingleDistComp[compID] = currCompDist;
+          uiSingleFracBits[compID] = currCompFracBits;
+          minCost[compID]          = currCompCost;
 
-              bestTU.copyComponentFrom( tu, compID );
-              saveCS.getResiBuf( compArea ).copyFrom( csFull->getResiBuf( compArea ) );
-          }
-          if( tu.noResidual )
-          {
-            CHECK( currCompFracBits > 0 || currAbsSum, "currCompFracBits > 0 when tu noResidual" );
-          }
+          bestTU.copyComponentFrom(tu, compID);
+          saveCS.getResiBuf(compArea).copyFrom(csFull->getResiBuf(compArea));
+        }
+        if (tu.noResidual)
+        {
+          CHECK(currCompFracBits > 0 || currAbsSum, "currCompFracBits > 0 when tu noResidual");
+        }
       }
 
-        // copy component
-        tu.copyComponentFrom( bestTU, compID );
-        csFull->getResiBuf( compArea ).copyFrom( saveCS.getResiBuf( compArea ) );
+      // copy component
+      tu.copyComponentFrom(bestTU, compID);
+      csFull->getResiBuf(compArea).copyFrom(saveCS.getResiBuf(compArea));
       if (colorTransFlag && (m_pcEncCfg->getCostMode() != COST_LOSSLESS_CODING || !slice.isLossless()))
       {
         m_pcTrQuant->lambdaAdjustColorTrans(false);
@@ -6867,8 +9983,8 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
         }
         else
         {
-        orgResiCb[0].copyFrom(cs.getOrgResiBuf(cbArea));
-        orgResiCr[0].copyFrom(cs.getOrgResiBuf(crArea));
+          orgResiCb[0].copyFrom(cs.getOrgResiBuf(cbArea));
+          orgResiCr[0].copyFrom(cs.getOrgResiBuf(crArea));
         }
         if (!colorTransFlag && reshape)
         {
@@ -6889,185 +10005,203 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
         std::vector<TrMode> trModes;
         if (checkDCTOnly || checkTSOnly)
         {
-          numTransformCands = 1;
-        }
-
-        if (!checkTSOnly)
-        {
-          trModes.push_back(TrMode(0, true)); // DCT2
-        }
-        if (tsAllowed && !checkDCTOnly)
-        {
-          trModes.push_back(TrMode(1, true));//TS
-        }
-        for (int modeId = 0; modeId < numTransformCands; modeId++)
-        {
-          if (modeId && !cbfDCT2)
-          {
-            continue;
-          }
-          if (!trModes[modeId].second)
-          {
-            continue;
-          }
-        TCoeff     currAbsSum       = 0;
-        uint64_t   currCompFracBits = 0;
-        Distortion currCompDistCb   = 0;
-        Distortion currCompDistCr   = 0;
-        double     currCompCost     = 0;
-
-        tu.jointCbCr = (uint8_t) cbfMask;
-          // encoder bugfix: initialize mtsIdx for chroma under JointCbCrMode.
-        tu.mtsIdx[codeCompId]  = trModes[modeId].first;
-        tu.mtsIdx[otherCompId] = MTS_DCT2_DCT2;
-        int         codedCbfMask = 0;
-        if (colorTransFlag && (m_pcEncCfg->getCostMode() != COST_LOSSLESS_CODING || !slice.isLossless()))
-        {
-          m_pcTrQuant->lambdaAdjustColorTrans(true);
-          m_pcTrQuant->selectLambda(codeCompId);
-        }
-        else
-        {
-          m_pcTrQuant->selectLambda(codeCompId);
-        }
-        // Lambda is loosened for the joint mode with respect to single modes as the same residual is used for both chroma blocks
-        const int    absIct = abs( TU::getICTMode(tu) );
-        const double lfact  = ( absIct == 1 || absIct == 3 ? 0.8 : 0.5 );
-        m_pcTrQuant->setLambda( lfact * m_pcTrQuant->getLambda() );
-        if ( checkJointCbCr && (tu.cu->cs->slice->getSliceQp() > 18))
-        {
-          m_pcTrQuant->setLambda( 1.05 * m_pcTrQuant->getLambda() );
-        }
-
-        m_CABACEstimator->getCtx() = ctxStart;
-        m_CABACEstimator->resetBits();
-
-        PelBuf cbResi = csFull->getResiBuf(cbArea);
-        PelBuf crResi = csFull->getResiBuf(crArea);
-        cbResi.copyFrom(orgResiCb[cbfMask]);
-        crResi.copyFrom(orgResiCr[cbfMask]);
-
-        if ( reshape )
-        {
-          double cRescale = (double)(1 << CSCALE_FP_PREC) / (double)(tu.getChromaAdj());
-          m_pcTrQuant->setLambda(m_pcTrQuant->getLambda() / (cRescale*cRescale));
-        }
-
-        Distortion currCompDistY = MAX_UINT64;
-        QpParam qpCbCr(tu, codeCompId);
-
-        tu.getCoeffs(otherCompId).fill(0);   // do we need that?
-        TU::setCbfAtDepth(tu, otherCompId, tu.depth, false);
-
-        PelBuf &codeResi   = (codeCompId == COMPONENT_Cr ? crResi : cbResi);
-        TCoeff  compAbsSum = 0;
-        if (numTransformCands > 1)
-        {
-          if (modeId == 0)
-          {
-            m_pcTrQuant->transformNxN(tu, codeCompId, qpCbCr, &trModes, m_pcEncCfg->getMTSInterMaxCand());
-            tu.mtsIdx[codeCompId] = trModes[modeId].first;
-            tu.mtsIdx[otherCompId] = MTS_DCT2_DCT2;
-          }
-          m_pcTrQuant->transformNxN(tu, codeCompId, qpCbCr, compAbsSum, m_CABACEstimator->getCtx(), true);
-        }
-        else
-        m_pcTrQuant->transformNxN(tu, codeCompId, qpCbCr, compAbsSum, m_CABACEstimator->getCtx());
-        if (compAbsSum > 0)
-        {
-          m_pcTrQuant->invTransformNxN(tu, codeCompId, codeResi, qpCbCr);
-          codedCbfMask += (codeCompId == COMPONENT_Cb ? 2 : 1);
-        }
-        else
-        {
-          codeResi.fill(0);
-        }
-
-        if (tu.jointCbCr == 3 && codedCbfMask == 2)
-        {
-          codedCbfMask = 3;
-          TU::setCbfAtDepth(tu, COMPONENT_Cr, tu.depth, true);
+          numTransformCands = 1;
         }
-        if (codedCbfMask && tu.jointCbCr != codedCbfMask)
+
+        if (!checkTSOnly)
         {
-          codedCbfMask = 0;
+          trModes.push_back(TrMode(0, true)); // DCT2
         }
-        currAbsSum = codedCbfMask;
-
-        if (!tu.mtsIdx[codeCompId])
+        if (tsAllowed && !checkDCTOnly)
         {
-          cbfDCT2 = (currAbsSum > 0);
+          trModes.push_back(TrMode(1, true));//TS
         }
-        if (currAbsSum > 0)
+        for (int modeId = 0; modeId < numTransformCands; modeId++)
         {
-          m_CABACEstimator->cbf_comp(cs, codedCbfMask >> 1, cbArea, currDepth, false);
-          m_CABACEstimator->cbf_comp(cs, codedCbfMask & 1, crArea, currDepth, codedCbfMask >> 1);
-          m_CABACEstimator->joint_cb_cr(tu, codedCbfMask);
-          if (codedCbfMask >> 1)
-            m_CABACEstimator->residual_coding(tu, COMPONENT_Cb);
-          if (codedCbfMask & 1)
-            m_CABACEstimator->residual_coding(tu, COMPONENT_Cr);
-          currCompFracBits = m_CABACEstimator->getEstFracBits();
+          if (modeId && !cbfDCT2)
+          {
+            continue;
+          }
+          if (!trModes[modeId].second)
+          {
+            continue;
+          }
+          TCoeff     currAbsSum       = 0;
+          uint64_t   currCompFracBits = 0;
+          Distortion currCompDistCb   = 0;
+          Distortion currCompDistCr   = 0;
+          double     currCompCost     = 0;
+
+          tu.jointCbCr = (uint8_t) cbfMask;
+          // encoder bugfix: initialize mtsIdx for chroma under JointCbCrMode.
+          tu.mtsIdx[codeCompId]  = trModes[modeId].first;
+          tu.mtsIdx[otherCompId] = MTS_DCT2_DCT2;
+          int codedCbfMask       = 0;
+          if (colorTransFlag && (m_pcEncCfg->getCostMode() != COST_LOSSLESS_CODING || !slice.isLossless()))
+          {
+            m_pcTrQuant->lambdaAdjustColorTrans(true);
+            m_pcTrQuant->selectLambda(codeCompId);
+          }
+          else
+          {
+            m_pcTrQuant->selectLambda(codeCompId);
+          }
+          // Lambda is loosened for the joint mode with respect to single modes as the same residual is used for both
+          // chroma blocks
+          const int    absIct = abs(TU::getICTMode(tu));
+          const double lfact  = (absIct == 1 || absIct == 3 ? 0.8 : 0.5);
+          m_pcTrQuant->setLambda(lfact * m_pcTrQuant->getLambda());
+          if (checkJointCbCr && (tu.cu->cs->slice->getSliceQp() > 18))
+          {
+            m_pcTrQuant->setLambda(1.05 * m_pcTrQuant->getLambda());
+          }
+
+          m_CABACEstimator->getCtx() = ctxStart;
+          m_CABACEstimator->resetBits();
+
+          PelBuf cbResi = csFull->getResiBuf(cbArea);
+          PelBuf crResi = csFull->getResiBuf(crArea);
+          cbResi.copyFrom(orgResiCb[cbfMask]);
+          crResi.copyFrom(orgResiCr[cbfMask]);
 
-          m_pcTrQuant->invTransformICT(tu, cbResi, crResi);
-          if (!colorTransFlag && reshape)
+          if (reshape)
           {
-            cbResi.scaleSignal(tu.getChromaAdj(), 0, tu.cu->cs->slice->clpRng(COMPONENT_Cb));
-            crResi.scaleSignal(tu.getChromaAdj(), 0, tu.cu->cs->slice->clpRng(COMPONENT_Cr));
+            double cRescale = (double) (1 << CSCALE_FP_PREC) / (double) (tu.getChromaAdj());
+            m_pcTrQuant->setLambda(m_pcTrQuant->getLambda() / (cRescale * cRescale));
           }
 
-          if (colorTransFlag)
+          Distortion currCompDistY = MAX_UINT64;
+          QpParam    qpCbCr(tu, codeCompId);
+
+          tu.getCoeffs(otherCompId).fill(0);   // do we need that?
+          TU::setCbfAtDepth(tu, otherCompId, tu.depth, false);
+
+          PelBuf &codeResi   = (codeCompId == COMPONENT_Cr ? crResi : cbResi);
+          TCoeff  compAbsSum = 0;
+          if (numTransformCands > 1)
           {
-            PelUnitBuf     orgResidual = orgResi->subBuf(relativeUnitArea);
-            PelUnitBuf     invColorTransResidual = m_colorTransResiBuf[2].getBuf(relativeUnitArea);
-            csFull->getResiBuf(currArea).colorSpaceConvert(invColorTransResidual, false, slice.clpRng(COMPONENT_Y));
-            if (reshape)
+            if (modeId == 0)
             {
-              invColorTransResidual.bufs[1].scaleSignal(tu.getChromaAdj(), 0, tu.cu->cs->slice->clpRng(COMPONENT_Cb));
-              invColorTransResidual.bufs[2].scaleSignal(tu.getChromaAdj(), 0, tu.cu->cs->slice->clpRng(COMPONENT_Cr));
+              m_pcTrQuant->transformNxN(tu, codeCompId, qpCbCr, &trModes, m_pcEncCfg->getMTSInterMaxCand());
+              tu.mtsIdx[codeCompId]  = trModes[modeId].first;
+              tu.mtsIdx[otherCompId] = MTS_DCT2_DCT2;
             }
-
-            currCompDistY = m_pcRdCost->getDistPart(orgResidual.bufs[COMPONENT_Y], invColorTransResidual.bufs[COMPONENT_Y], sps.getBitDepth(toChannelType(COMPONENT_Y)), COMPONENT_Y, DF_SSE);
-            currCompDistCb = m_pcRdCost->getDistPart(orgResidual.bufs[COMPONENT_Cb], invColorTransResidual.bufs[COMPONENT_Cb], sps.getBitDepth(toChannelType(COMPONENT_Cb)), COMPONENT_Cb, DF_SSE);
-            currCompDistCr = m_pcRdCost->getDistPart(orgResidual.bufs[COMPONENT_Cr], invColorTransResidual.bufs[COMPONENT_Cr], sps.getBitDepth(toChannelType(COMPONENT_Cr)), COMPONENT_Cr, DF_SSE);
-            currCompCost = m_pcRdCost->calcRdCost(uiSingleFracBits[COMPONENT_Y] + currCompFracBits, currCompDistY + currCompDistCr + currCompDistCb, false);
+            m_pcTrQuant->transformNxN(tu, codeCompId, qpCbCr, compAbsSum, m_CABACEstimator->getCtx(), true);
+          }
+          else
+          {
+            m_pcTrQuant->transformNxN(tu, codeCompId, qpCbCr, compAbsSum, m_CABACEstimator->getCtx());
+          }
+          if (compAbsSum > 0)
+          {
+            m_pcTrQuant->invTransformNxN(tu, codeCompId, codeResi, qpCbCr);
+            codedCbfMask += (codeCompId == COMPONENT_Cb ? 2 : 1);
           }
           else
           {
-          currCompDistCb = m_pcRdCost->getDistPart(csFull->getOrgResiBuf(cbArea), cbResi, channelBitDepth, COMPONENT_Cb, DF_SSE);
-          currCompDistCr = m_pcRdCost->getDistPart(csFull->getOrgResiBuf(crArea), crResi, channelBitDepth, COMPONENT_Cr, DF_SSE);
+            codeResi.fill(0);
+          }
+
+          if (tu.jointCbCr == 3 && codedCbfMask == 2)
+          {
+            codedCbfMask = 3;
+            TU::setCbfAtDepth(tu, COMPONENT_Cr, tu.depth, true);
+          }
+          if (codedCbfMask && tu.jointCbCr != codedCbfMask)
+          {
+            codedCbfMask = 0;
+          }
+          currAbsSum = codedCbfMask;
+
+          if (!tu.mtsIdx[codeCompId])
+          {
+            cbfDCT2 = (currAbsSum > 0);
+          }
+          if (currAbsSum > 0)
+          {
+            m_CABACEstimator->cbf_comp(cs, codedCbfMask >> 1, cbArea, currDepth, false);
+            m_CABACEstimator->cbf_comp(cs, codedCbfMask & 1, crArea, currDepth, codedCbfMask >> 1);
+            m_CABACEstimator->joint_cb_cr(tu, codedCbfMask);
+            if (codedCbfMask >> 1)
+            {
+              m_CABACEstimator->residual_coding(tu, COMPONENT_Cb);
+            }
+            if (codedCbfMask & 1)
+            {
+              m_CABACEstimator->residual_coding(tu, COMPONENT_Cr);
+            }
+            currCompFracBits = m_CABACEstimator->getEstFracBits();
+
+            m_pcTrQuant->invTransformICT(tu, cbResi, crResi);
+            if (!colorTransFlag && reshape)
+            {
+              cbResi.scaleSignal(tu.getChromaAdj(), 0, tu.cu->cs->slice->clpRng(COMPONENT_Cb));
+              crResi.scaleSignal(tu.getChromaAdj(), 0, tu.cu->cs->slice->clpRng(COMPONENT_Cr));
+            }
+
+            if (colorTransFlag)
+            {
+              PelUnitBuf orgResidual           = orgResi->subBuf(relativeUnitArea);
+              PelUnitBuf invColorTransResidual = m_colorTransResiBuf[2].getBuf(relativeUnitArea);
+              csFull->getResiBuf(currArea).colorSpaceConvert(invColorTransResidual, false, slice.clpRng(COMPONENT_Y));
+              if (reshape)
+              {
+                invColorTransResidual.bufs[1].scaleSignal(tu.getChromaAdj(), 0, tu.cu->cs->slice->clpRng(COMPONENT_Cb));
+                invColorTransResidual.bufs[2].scaleSignal(tu.getChromaAdj(), 0, tu.cu->cs->slice->clpRng(COMPONENT_Cr));
+              }
+
+              currCompDistY =
+                m_pcRdCost->getDistPart(orgResidual.bufs[COMPONENT_Y], invColorTransResidual.bufs[COMPONENT_Y],
+                                        sps.getBitDepth(toChannelType(COMPONENT_Y)), COMPONENT_Y, DF_SSE);
+              currCompDistCb =
+                m_pcRdCost->getDistPart(orgResidual.bufs[COMPONENT_Cb], invColorTransResidual.bufs[COMPONENT_Cb],
+                                        sps.getBitDepth(toChannelType(COMPONENT_Cb)), COMPONENT_Cb, DF_SSE);
+              currCompDistCr =
+                m_pcRdCost->getDistPart(orgResidual.bufs[COMPONENT_Cr], invColorTransResidual.bufs[COMPONENT_Cr],
+                                        sps.getBitDepth(toChannelType(COMPONENT_Cr)), COMPONENT_Cr, DF_SSE);
+              currCompCost = m_pcRdCost->calcRdCost(uiSingleFracBits[COMPONENT_Y] + currCompFracBits,
+                                                    currCompDistY + currCompDistCr + currCompDistCb, false);
+            }
+            else
+            {
+              currCompDistCb =
+                m_pcRdCost->getDistPart(csFull->getOrgResiBuf(cbArea), cbResi, channelBitDepth, COMPONENT_Cb, DF_SSE);
+              currCompDistCr =
+                m_pcRdCost->getDistPart(csFull->getOrgResiBuf(crArea), crResi, channelBitDepth, COMPONENT_Cr, DF_SSE);
 #if WCG_EXT
-          currCompCost   = m_pcRdCost->calcRdCost(currCompFracBits, currCompDistCr + currCompDistCb, false);
+              currCompCost = m_pcRdCost->calcRdCost(currCompFracBits, currCompDistCr + currCompDistCb, false);
 #else
-          currCompCost   = m_pcRdCost->calcRdCost(currCompFracBits, currCompDistCr + currCompDistCb);
+              currCompCost = m_pcRdCost->calcRdCost(currCompFracBits, currCompDistCr + currCompDistCb);
 #endif
+            }
           }
-        }
-        else
-          currCompCost = MAX_DOUBLE;
-
-        // evaluate
-        if( currCompCost < minCostCbCr )
-        {
-          uiSingleDistComp[COMPONENT_Cb] = currCompDistCb;
-          uiSingleDistComp[COMPONENT_Cr] = currCompDistCr;
-          if (colorTransFlag)
+          else
           {
-            uiSingleDistComp[COMPONENT_Y] = currCompDistY;
+            currCompCost = MAX_DOUBLE;
           }
-          minCostCbCr                    = currCompCost;
+
+          // evaluate
+          if (currCompCost < minCostCbCr)
           {
-            bestTU.copyComponentFrom(tu, COMPONENT_Cb);
-            bestTU.copyComponentFrom(tu, COMPONENT_Cr);
-            saveCS.getResiBuf(cbArea).copyFrom(csFull->getResiBuf(cbArea));
-            saveCS.getResiBuf(crArea).copyFrom(csFull->getResiBuf(crArea));
+            uiSingleDistComp[COMPONENT_Cb] = currCompDistCb;
+            uiSingleDistComp[COMPONENT_Cr] = currCompDistCr;
+            if (colorTransFlag)
+            {
+              uiSingleDistComp[COMPONENT_Y] = currCompDistY;
+            }
+            minCostCbCr = currCompCost;
+            {
+              bestTU.copyComponentFrom(tu, COMPONENT_Cb);
+              bestTU.copyComponentFrom(tu, COMPONENT_Cr);
+              saveCS.getResiBuf(cbArea).copyFrom(csFull->getResiBuf(cbArea));
+              saveCS.getResiBuf(crArea).copyFrom(csFull->getResiBuf(crArea));
+            }
           }
-        }
 
-        if (colorTransFlag && (m_pcEncCfg->getCostMode() != COST_LOSSLESS_CODING || !slice.isLossless()))
-        {
-          m_pcTrQuant->lambdaAdjustColorTrans(false);
-        }
+          if (colorTransFlag && (m_pcEncCfg->getCostMode() != COST_LOSSLESS_CODING || !slice.isLossless()))
+          {
+            m_pcTrQuant->lambdaAdjustColorTrans(false);
+          }
         }
       }
       // copy component
@@ -7083,27 +10217,36 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
     {
       static const ComponentID cbf_getComp[MAX_NUM_COMPONENT] = { COMPONENT_Cb, COMPONENT_Cr, COMPONENT_Y };
       for( unsigned c = isChromaEnabled(tu.chromaFormat)?0 : 2; c < MAX_NUM_COMPONENT; c++)
-    {
-      const ComponentID compID = cbf_getComp[c];
-      if (compID == COMPONENT_Y && !luma)
-        continue;
-      if (compID != COMPONENT_Y && !chroma)
-        continue;
-      if( tu.blocks[compID].valid() )
       {
-        const bool prevCbf = ( compID == COMPONENT_Cr ? TU::getCbfAtDepth( tu, COMPONENT_Cb, currDepth ) : false );
-        m_CABACEstimator->cbf_comp( *csFull, TU::getCbfAtDepth( tu, compID, currDepth ), tu.blocks[compID], currDepth, prevCbf );
+        const ComponentID compID = cbf_getComp[c];
+        if (compID == COMPONENT_Y && !luma)
+        {
+          continue;
+        }
+        if (compID != COMPONENT_Y && !chroma)
+        {
+          continue;
+        }
+        if (tu.blocks[compID].valid())
+        {
+          const bool prevCbf = (compID == COMPONENT_Cr ? TU::getCbfAtDepth(tu, COMPONENT_Cb, currDepth) : false);
+          m_CABACEstimator->cbf_comp(*csFull, TU::getCbfAtDepth(tu, compID, currDepth), tu.blocks[compID], currDepth,
+                                     prevCbf);
+        }
       }
     }
-    }
 
     for (uint32_t ch = 0; ch < numValidComp; ch++)
     {
       const ComponentID compID = ComponentID(ch);
       if (compID == COMPONENT_Y && !luma)
+      {
         continue;
+      }
       if (compID != COMPONENT_Y && !chroma)
+      {
         continue;
+      }
       if (tu.blocks[compID].valid())
       {
         if( compID == COMPONENT_Cr )
@@ -7142,7 +10285,9 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
     }
     else
 #endif
-    csFull->cost      = m_pcRdCost->calcRdCost(csFull->fracBits, csFull->dist);
+    {
+      csFull->cost = m_pcRdCost->calcRdCost(csFull->fracBits, csFull->dist);
+    }
   } // check full
 
   // code sub-blocks
@@ -7162,14 +10307,13 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
       partitioner.splitCurrArea( PartSplit( cu.getSbtTuSplit() ), cs );
     }
     else
+    {
       THROW( "Implicit TU split not available!" );
+    }
 
     do
     {
-      xEstimateInterResidualQT(*csSplit, partitioner, bCheckFull ? nullptr : puiZeroDist
-        , luma, chroma
-        , orgResi
-      );
+      xEstimateInterResidualQT(*csSplit, partitioner, bCheckFull ? nullptr : puiZeroDist, luma, chroma, orgResi);
 
       csSplit->cost = m_pcRdCost->calcRdCost( csSplit->fracBits, csSplit->dist );
     } while( partitioner.nextPart( *csSplit ) );
@@ -7189,26 +10333,23 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
         }
       }
 
+      for (auto &currTU: csSplit->traverseTUs(currArea, partitioner.chType))
       {
-
-        for( auto &currTU : csSplit->traverseTUs( currArea, partitioner.chType ) )
-        {
-          TU::setCbfAtDepth   ( currTU, COMPONENT_Y,  currDepth, compCbf[ COMPONENT_Y  ] );
-          if( currArea.chromaFormat != CHROMA_400 )
-          {
-            TU::setCbfAtDepth ( currTU, COMPONENT_Cb, currDepth, compCbf[ COMPONENT_Cb ] );
-            TU::setCbfAtDepth ( currTU, COMPONENT_Cr, currDepth, compCbf[ COMPONENT_Cr ] );
-          }
-        }
-
-        anyCbfSet    = compCbf[ COMPONENT_Y  ];
+        TU::setCbfAtDepth(currTU, COMPONENT_Y, currDepth, compCbf[COMPONENT_Y]);
         if( currArea.chromaFormat != CHROMA_400 )
         {
-          anyCbfSet |= compCbf[ COMPONENT_Cb ];
-          anyCbfSet |= compCbf[ COMPONENT_Cr ];
+          TU::setCbfAtDepth(currTU, COMPONENT_Cb, currDepth, compCbf[COMPONENT_Cb]);
+          TU::setCbfAtDepth(currTU, COMPONENT_Cr, currDepth, compCbf[COMPONENT_Cr]);
         }
       }
 
+      anyCbfSet = compCbf[COMPONENT_Y];
+      if (currArea.chromaFormat != CHROMA_400)
+      {
+        anyCbfSet |= compCbf[COMPONENT_Cb];
+        anyCbfSet |= compCbf[COMPONENT_Cr];
+      }
+
       m_CABACEstimator->getCtx() = ctxStart;
       m_CABACEstimator->resetBits();
 
@@ -7288,9 +10429,13 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
     {
       const ComponentID compID = ComponentID(comp);
       if (compID == COMPONENT_Y && !luma)
+      {
         continue;
+      }
       if (compID != COMPONENT_Y && !chroma)
+      {
         continue;
+      }
       CPelBuf reco = cs.getRecoBuf (compID);
       CPelBuf org  = cs.getOrgBuf  (compID);
 #if WCG_EXT
@@ -7308,11 +10453,16 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
           distortion += m_pcRdCost->getDistPart(org, tmpRecLuma, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
         }
         else
-        distortion += m_pcRdCost->getDistPart( org, reco, sps.getBitDepth( toChannelType( compID ) ), compID, DF_SSE_WTD, &orgLuma );
+        {
+          distortion +=
+            m_pcRdCost->getDistPart(org, reco, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
+        }
       }
       else
 #endif
-      distortion += m_pcRdCost->getDistPart( org, reco, sps.getBitDepth( toChannelType( compID ) ), compID, DF_SSE );
+      {
+        distortion += m_pcRdCost->getDistPart(org, reco, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE);
+      }
     }
 
     m_CABACEstimator->resetBits();
@@ -7341,12 +10491,16 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
       tmpPred.copyFrom(cs.getPredBuf(COMPONENT_Y));
 
       if (!cu.firstPU->ciipFlag && !CU::isIBC(cu))
+      {
         tmpPred.rspSignal(m_pcReshape->getFwdLUT());
+      }
       cs.getResiBuf(COMPONENT_Y).rspSignal(m_pcReshape->getFwdLUT());
       cs.getResiBuf(COMPONENT_Y).subtract(tmpPred);
     }
     else
-    cs.getResiBuf().bufs[0].subtract(cs.getPredBuf().bufs[0]);
+    {
+      cs.getResiBuf().bufs[0].subtract(cs.getPredBuf().bufs[0]);
+    }
   }
   if (chroma && isChromaEnabled(cs.pcv->chrFormat))
   {
@@ -7447,101 +10601,104 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
     cs.dist = 0;
     cs.cost = 0;
 
-  if (colorTransFlag)
-  {
-    cs.getOrgResiBuf().bufs[0].copyFrom(orgResidual.bufs[0]);
-    cs.getOrgResiBuf().bufs[1].copyFrom(orgResidual.bufs[1]);
-    cs.getOrgResiBuf().bufs[2].copyFrom(orgResidual.bufs[2]);
+    if (colorTransFlag)
+    {
+      cs.getOrgResiBuf().bufs[0].copyFrom(orgResidual.bufs[0]);
+      cs.getOrgResiBuf().bufs[1].copyFrom(orgResidual.bufs[1]);
+      cs.getOrgResiBuf().bufs[2].copyFrom(orgResidual.bufs[2]);
 
-    memset(m_pTempPel, 0, sizeof(Pel) * localUnitArea.blocks[0].area());
-    zeroDistortion = 0;
-    for (int compIdx = 0; compIdx < 3; compIdx++)
+      memset(m_pTempPel, 0, sizeof(Pel) * localUnitArea.blocks[0].area());
+      zeroDistortion = 0;
+      for (int compIdx = 0; compIdx < 3; compIdx++)
+      {
+        ComponentID   componentID = (ComponentID) compIdx;
+        const CPelBuf zeroBuf(m_pTempPel, localUnitArea.blocks[compIdx]);
+        zeroDistortion += m_pcRdCost->getDistPart(zeroBuf, orgResidual.bufs[compIdx],
+                                                  sps.getBitDepth(toChannelType(componentID)), componentID, DF_SSE);
+      }
+      xEstimateInterResidualQT(cs, partitioner, NULL, luma, chroma, &orgResidual);
+    }
+    else
     {
-      ComponentID componentID = (ComponentID)compIdx;
-      const CPelBuf zeroBuf(m_pTempPel, localUnitArea.blocks[compIdx]);
-      zeroDistortion += m_pcRdCost->getDistPart(zeroBuf, orgResidual.bufs[compIdx], sps.getBitDepth(toChannelType(componentID)), componentID, DF_SSE);
+      zeroDistortion = 0;
+      if (luma)
+      {
+        cs.getOrgResiBuf().bufs[0].copyFrom(orgResidual.bufs[0]);
+      }
+      if (chroma && isChromaEnabled(cs.pcv->chrFormat))
+      {
+        cs.getOrgResiBuf().bufs[1].copyFrom(orgResidual.bufs[1]);
+        cs.getOrgResiBuf().bufs[2].copyFrom(orgResidual.bufs[2]);
+      }
+      xEstimateInterResidualQT(cs, partitioner, &zeroDistortion, luma, chroma);
     }
-    xEstimateInterResidualQT(cs, partitioner, NULL, luma, chroma, &orgResidual);
-  }
-  else
-  {
-    zeroDistortion = 0;
-  if (luma)
-  {
-    cs.getOrgResiBuf().bufs[0].copyFrom(orgResidual.bufs[0]);
-  }
-  if (chroma && isChromaEnabled(cs.pcv->chrFormat))
-  {
-    cs.getOrgResiBuf().bufs[1].copyFrom(orgResidual.bufs[1]);
-    cs.getOrgResiBuf().bufs[2].copyFrom(orgResidual.bufs[2]);
-  }
-  xEstimateInterResidualQT(cs, partitioner, &zeroDistortion, luma, chroma);
-  }
-  TransformUnit &firstTU = *cs.getTU( partitioner.chType );
+    TransformUnit &firstTU = *cs.getTU(partitioner.chType);
 
-  cu.rootCbf = false;
-  m_CABACEstimator->resetBits();
-  m_CABACEstimator->rqt_root_cbf( cu );
-  const uint64_t  zeroFracBits = m_CABACEstimator->getEstFracBits();
-  double zeroCost;
-  {
-#if WCG_EXT
-    if( m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() )
+    cu.rootCbf = false;
+    m_CABACEstimator->resetBits();
+    m_CABACEstimator->rqt_root_cbf(cu);
+    const uint64_t zeroFracBits = m_CABACEstimator->getEstFracBits();
+    double         zeroCost;
     {
-      zeroCost = m_pcRdCost->calcRdCost( zeroFracBits, zeroDistortion, false );
-    }
-    else
+#if WCG_EXT
+      if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled())
+      {
+        zeroCost = m_pcRdCost->calcRdCost(zeroFracBits, zeroDistortion, false);
+      }
+      else
 #endif
-    zeroCost = m_pcRdCost->calcRdCost( zeroFracBits, zeroDistortion );
-  }
-
-  const int  numValidTBlocks   = ::getNumberValidTBlocks( *cs.pcv );
-  for (uint32_t i = 0; i < numValidTBlocks; i++)
-  {
-    cu.rootCbf |= TU::getCbfAtDepth(firstTU, ComponentID(i), 0);
-  }
+      {
+        zeroCost = m_pcRdCost->calcRdCost(zeroFracBits, zeroDistortion);
+      }
+    }
 
-  // -------------------------------------------------------
-  // If a block full of 0's is efficient, then just use 0's.
-  // The costs at this point do not include header bits.
+    const int numValidTBlocks = ::getNumberValidTBlocks(*cs.pcv);
+    for (uint32_t i = 0; i < numValidTBlocks; i++)
+    {
+      cu.rootCbf |= TU::getCbfAtDepth(firstTU, ComponentID(i), 0);
+    }
 
-  if (zeroCost < cs.cost || !cu.rootCbf)
-  {
-    cs.cost = zeroCost;
-    cu.colorTransform = false;
-    cu.sbtInfo = 0;
-    cu.rootCbf = false;
+    // -------------------------------------------------------
+    // If a block full of 0's is efficient, then just use 0's.
+    // The costs at this point do not include header bits.
 
-    cs.clearTUs();
+    if (zeroCost < cs.cost || !cu.rootCbf)
+    {
+      cs.cost           = zeroCost;
+      cu.colorTransform = false;
+      cu.sbtInfo        = 0;
+      cu.rootCbf        = false;
 
-    // add new "empty" TU(s) spanning the whole CU
-    cs.addEmptyTUs( partitioner );
-  }
-  if (!iter)
-  {
-    rootCbfFirstColorSpace = cu.rootCbf;
-  }
-  if (cs.cost < bestCost)
-  {
-    bestIter = iter;
+      cs.clearTUs();
 
-    if (iter != (numAllowedColorSpace - 1))
+      // add new "empty" TU(s) spanning the whole CU
+      cs.addEmptyTUs(partitioner);
+    }
+    if (!iter)
+    {
+      rootCbfFirstColorSpace = cu.rootCbf;
+    }
+    if (cs.cost < bestCost)
     {
-      bestCost = cs.cost;
-      bestColorTrans = cu.colorTransform;
-      bestRootCbf = cu.rootCbf;
-      bestsbtInfo = cu.sbtInfo;
+      bestIter = iter;
 
-      saveCS.clearTUs();
-      for (const auto &ptu : cs.tus)
+      if (iter != (numAllowedColorSpace - 1))
       {
-        TransformUnit &tu = saveCS.addTU(*ptu, ptu->chType);
-        tu = *ptu;
+        bestCost       = cs.cost;
+        bestColorTrans = cu.colorTransform;
+        bestRootCbf    = cu.rootCbf;
+        bestsbtInfo    = cu.sbtInfo;
+
+        saveCS.clearTUs();
+        for (const auto &ptu: cs.tus)
+        {
+          TransformUnit &tu = saveCS.addTU(*ptu, ptu->chType);
+          tu                = *ptu;
+        }
+        saveCS.getResiBuf(curUnitArea).copyFrom(cs.getResiBuf(curUnitArea));
       }
-      saveCS.getResiBuf(curUnitArea).copyFrom(cs.getResiBuf(curUnitArea));
     }
   }
-  }
 
   if (bestIter != (numAllowedColorSpace - 1))
   {
@@ -7586,7 +10743,9 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
       tmpPred.copyFrom(cs.getPredBuf(COMPONENT_Y));
 
       if (!cu.firstPU->ciipFlag && !CU::isIBC(cu))
+      {
         tmpPred.rspSignal(m_pcReshape->getFwdLUT());
+      }
 
       cs.getRecoBuf(COMPONENT_Y).reconstruct(tmpPred, cs.getResiBuf(COMPONENT_Y), cs.slice->clpRng(COMPONENT_Y));
     }
@@ -7612,9 +10771,13 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
   {
     const ComponentID compID = ComponentID(comp);
     if (compID == COMPONENT_Y && !luma)
+    {
       continue;
+    }
     if (compID != COMPONENT_Y && !chroma)
+    {
       continue;
+    }
     CPelBuf reco = cs.getRecoBuf (compID);
     CPelBuf org  = cs.getOrgBuf  (compID);
 
@@ -7633,7 +10796,9 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
         finalDistortion += m_pcRdCost->getDistPart(org, tmpRecLuma, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
       }
       else
+      {
         finalDistortion += m_pcRdCost->getDistPart(org, reco, sps.getBitDepth(toChannelType(compID)), compID, DF_SSE_WTD, &orgLuma);
+      }
     }
     else
 #endif
@@ -7715,11 +10880,19 @@ double InterSearch::xGetMEDistortionWeight(uint8_t bcwIdx, RefPicList eRefPicLis
     return 0.5;
   }
 }
+#if GDR_ENABLED
+bool InterSearch::xReadBufferedUniMv(PredictionUnit& pu, RefPicList eRefPicList, int32_t iRefIdx, Mv& pcMvPred, Mv& rcMv, bool& rcMvSolid, uint32_t& ruiBits, Distortion& ruiCost)
+#else
 bool InterSearch::xReadBufferedUniMv(PredictionUnit& pu, RefPicList eRefPicList, int32_t iRefIdx, Mv& pcMvPred, Mv& rcMv, uint32_t& ruiBits, Distortion& ruiCost)
+#endif
 {
   if (m_uniMotions.isReadMode((uint32_t)eRefPicList, (uint32_t)iRefIdx))
   {
+#if GDR_ENABLED
+    m_uniMotions.copyTo(rcMv, rcMvSolid, ruiCost, (uint32_t)eRefPicList, (uint32_t)iRefIdx);
+#else
     m_uniMotions.copyTo(rcMv, ruiCost, (uint32_t)eRefPicList, (uint32_t)iRefIdx);
+#endif
 
     Mv pred = pcMvPred;
     pred.changeTransPrecInternal2Amvr(pu.cu->imv);
@@ -7737,13 +10910,23 @@ bool InterSearch::xReadBufferedUniMv(PredictionUnit& pu, RefPicList eRefPicList,
   return false;
 }
 
+#if GDR_ENABLED
+bool InterSearch::xReadBufferedAffineUniMv(PredictionUnit& pu, RefPicList eRefPicList, int32_t iRefIdx, Mv acMvPred[3], Mv acMv[3], bool acMvSolid[3], uint32_t& ruiBits, Distortion& ruiCost
+  , int& mvpIdx, const AffineAMVPInfo& aamvpi
+)
+#else
 bool InterSearch::xReadBufferedAffineUniMv(PredictionUnit& pu, RefPicList eRefPicList, int32_t iRefIdx, Mv acMvPred[3], Mv acMv[3], uint32_t& ruiBits, Distortion& ruiCost
   , int& mvpIdx, const AffineAMVPInfo& aamvpi
 )
+#endif
 {
   if (m_uniMotions.isReadModeAffine((uint32_t)eRefPicList, (uint32_t)iRefIdx, pu.cu->affineType))
   {
+#if GDR_ENABLED
+    m_uniMotions.copyAffineMvTo(acMv, acMvSolid, ruiCost, (uint32_t)eRefPicList, (uint32_t)iRefIdx, pu.cu->affineType, mvpIdx);
+#else
     m_uniMotions.copyAffineMvTo(acMv, ruiCost, (uint32_t)eRefPicList, (uint32_t)iRefIdx, pu.cu->affineType, mvpIdx);
+#endif
     m_pcRdCost->setCostScale(0);
     acMvPred[0] = aamvpi.mvCandLT[mvpIdx];
     acMvPred[1] = aamvpi.mvCandRT[mvpIdx];
@@ -7765,6 +10948,7 @@ bool InterSearch::xReadBufferedAffineUniMv(PredictionUnit& pu, RefPicList eRefPi
   }
   return false;
 }
+
 void InterSearch::initWeightIdxBits()
 {
   for (int n = 0; n < BCW_NUM; ++n)
@@ -7808,20 +10992,45 @@ uint32_t InterSearch::xDetermineBestMvp( PredictionUnit& pu, Mv acMvTemp[3], int
 {
   bool mvpUpdated  = false;
   uint32_t minBits = std::numeric_limits<uint32_t>::max();
+#if GDR_ENABLED
+  const CodingStructure &cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+#endif
+
   for ( int i = 0; i < aamvpi.numCand; i++ )
   {
     Mv mvPred[3] = { aamvpi.mvCandLT[i], aamvpi.mvCandRT[i], aamvpi.mvCandLB[i] };
     uint32_t candBits = m_auiMVPIdxCost[i][aamvpi.numCand];
     candBits += xCalcAffineMVBits( pu, acMvTemp, mvPred );
 
+#if GDR_ENABLED
+    bool isSolid = true;
+    if (isEncodeGdrClean)
+    {
+      isSolid = aamvpi.mvSolidLT[i] && aamvpi.mvSolidRT[i];
+      if (pu.cu->affineType == AFFINEMODEL_6PARAM)
+      {
+        isSolid = isSolid && aamvpi.mvSolidLB[i];
+      }
+    }
+
+    if ((candBits < minBits) && isSolid)
+#else
     if ( candBits < minBits )
+#endif
     {
       minBits    = candBits;
       mvpIdx     = i;
       mvpUpdated = true;
     }
   }
+
+#if GDR_ENABLED
+  mvpUpdated = true; // do not check mvp update for GDR
+#endif
+
   CHECK( !mvpUpdated, "xDetermineBestMvp() error" );
+
   return minBits;
 }
 
@@ -7833,11 +11042,22 @@ void InterSearch::symmvdCheckBestMvp(
   AMVPInfo amvpInfo[2][33],
   int32_t bcwIdx,
   Mv cMvPredSym[2],
+#if GDR_ENABLED
+  bool cMvPredSymSolid[2],
+#endif
   int32_t mvpIdxSym[2],
   Distortion& bestCost,
   bool skip
 )
 {
+#if GDR_ENABLED
+  CodingStructure &cs = *pu.cs;
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+  bool bestCostOk = true;
+  bool costOk = true;
+  bool allOk;
+#endif
+
   RefPicList tarRefList = (RefPicList)(1 - curRefList);
   int32_t refIdxCur = pu.cu->slice->getSymRefIdx(curRefList);
   int32_t refIdxTar = pu.cu->slice->getSymRefIdx(tarRefList);
@@ -7880,7 +11100,9 @@ void InterSearch::symmvdCheckBestMvp(
     for (int j = 0; j < amvpTar.numCand; j++)
     {
       if (skipMvpIdx[curRefList] == i && skipMvpIdx[tarRefList] == j)
+      {
         continue;
+      }
 
       cTarMvField.setMvField(curMv.getSymmvdMv(amvpCur.mvCand[i], amvpTar.mvCand[j]), refIdxTar);
 
@@ -7913,11 +11135,48 @@ void InterSearch::symmvdCheckBestMvp(
       bits += m_auiMVPIdxCost[i][AMVP_MAX_NUM_CANDS];
       bits += m_auiMVPIdxCost[j][AMVP_MAX_NUM_CANDS];
       cost += m_pcRdCost->getCost(bits);
+#if GDR_ENABLED
+      if (isEncodeGdrClean)
+      {
+        bool curSolid = amvpCur.mvSolid[i];
+        bool tarSolid = amvpTar.mvSolid[j];
+        costOk = curSolid && tarSolid;
+      }
+#endif
+
+
+#if GDR_ENABLED
+      allOk = (cost < bestCost);
+      if (isEncodeGdrClean)
+      {
+        if (costOk)
+        {
+          allOk = (bestCostOk) ? (cost < bestCost) : true;
+        }
+        else
+        {
+          allOk = false;
+        }
+      }
+#endif
+
+#if GDR_ENABLED
+      if (allOk)
+#else
       if (cost < bestCost)
+#endif
       {
         bestCost = cost;
         cMvPredSym[curRefList] = amvpCur.mvCand[i];
         cMvPredSym[tarRefList] = amvpTar.mvCand[j];
+#if GDR_ENABLED
+        if (isEncodeGdrClean)
+        {
+          bestCostOk = costOk;
+          cMvPredSymSolid[curRefList] = amvpCur.mvSolid[i];
+          cMvPredSymSolid[tarRefList] = amvpTar.mvSolid[j];
+        }
+#endif
         mvpIdxSym[curRefList] = i;
         mvpIdxSym[tarRefList] = j;
       }
@@ -7972,7 +11231,9 @@ bool InterSearch::searchBv(PredictionUnit& pu, int xPos, int yPos, int width, in
 
   // Don't search the above CTU row
   if (refTopY >> ctuSizeLog2 < yPos >> ctuSizeLog2)
+  {
     return false;
+  }
 
   // Don't search the below CTU row
   if (refBottomY >> ctuSizeLog2 > yPos >> ctuSizeLog2)
@@ -8017,22 +11278,32 @@ bool InterSearch::searchBv(PredictionUnit& pu, int xPos, int yPos, int width, in
       int offset64y = (refPosCol.y >> (ctuSizeLog2 - 1)) << (ctuSizeLog2 - 1);
       const Position refPosCol64x64 = {offset64x, offset64y};
       if (pu.cs->isDecomp(refPosCol64x64, toChannelType(COMPONENT_Y)))
+      {
         return false;
+      }
       if (refPosCol64x64 == pu.Y().topLeft())
+      {
         return false;
+      }
     }
   }
   else
+  {
     return false;
+  }
 
   // in the same CTU, or valid area from left CTU. Check if the reference block is already coded
   const Position refPosLT = pu.Y().topLeft().offset(xBv, yBv);
   const Position refPosBR = pu.Y().bottomRight().offset(xBv, yBv);
   const ChannelType      chType = toChannelType(COMPONENT_Y);
   if (!pu.cs->isDecomp(refPosBR, chType))
+  {
     return false;
+  }
   if (!pu.cs->isDecomp(refPosLT, chType))
+  {
     return false;
+  }
   return true;
 }
 
diff --git a/source/Lib/EncoderLib/InterSearch.h b/source/Lib/EncoderLib/InterSearch.h
index 5e9df41e918a82227cb4068a5054889422913892..1296420265ed62999fd1a21ef9f2df60c1f3136a 100644
--- a/source/Lib/EncoderLib/InterSearch.h
+++ b/source/Lib/EncoderLib/InterSearch.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -77,6 +77,12 @@ struct AffineMVInfo
   int x, y, w, h;
 };
 
+#if GDR_ENABLED 
+struct AffineMVInfoSolid
+{
+  bool  affMVsSolid[2][33][3];
+};
+#endif
 struct BlkUniMvInfo
 {
   Mv uniMvs[2][33];
@@ -93,6 +99,11 @@ typedef struct
   Distortion affineCost[3];
   bool affine4ParaAvail;
   bool affine6ParaAvail;
+
+#if GDR_ENABLED
+  bool acMvAffine4ParaSolid[2][3];
+  bool acMvAffine6ParaSolid[2][3];
+#endif
 } EncAffineMotion;
 
 /// encoder search class
@@ -118,6 +129,9 @@ private:
   bool            m_affineModeSelected;
   std::unordered_map< Position, std::unordered_map< Size, BlkRecord> > m_ctuRecord;
   AffineMVInfo       *m_affMVList;
+#if GDR_ENABLED  
+  AffineMVInfoSolid  *m_affMVListSolid;
+#endif
   int             m_affMVListIdx;
   int             m_affMVListSize;
   int             m_affMVListMaxSize;
@@ -126,6 +140,9 @@ private:
   int             m_uniMvListSize;
   int             m_uniMvListMaxSize;
   Distortion      m_hevcCost;
+#if GDR_ENABLED  
+  bool            m_hevcCostOk;
+#endif
   EncAffineMotion m_affineMotion;
   PatentBvCand    m_defaultCachedBvs;
 protected:
@@ -206,38 +223,65 @@ public:
 
   void setTempBuffers               (CodingStructure ****pSlitCS, CodingStructure ****pFullCS, CodingStructure **pSaveCS );
   void resetCtuRecord               ()             { m_ctuRecord.clear(); }
-#if ENABLE_SPLIT_PARALLELISM
-  void copyState                    ( const InterSearch& other );
-#endif
   void setAffineModeSelected        ( bool flag) { m_affineModeSelected = flag; }
   void resetAffineMVList() { m_affMVListIdx = 0; m_affMVListSize = 0; }
+#if GDR_ENABLED
+  void savePrevAffMVInfo(int idx, AffineMVInfo &tmpMVInfo, AffineMVInfoSolid &tmpMVInfoSolid, bool& isSaved)
+#else
   void savePrevAffMVInfo(int idx, AffineMVInfo &tmpMVInfo, bool& isSaved)
+#endif
   {
     if (m_affMVListSize > idx)
     {
       tmpMVInfo = m_affMVList[(m_affMVListIdx - 1 - idx + m_affMVListMaxSize) % m_affMVListMaxSize];
+#if GDR_ENABLED
+      tmpMVInfoSolid = m_affMVListSolid[(m_affMVListIdx - 1 - idx + m_affMVListMaxSize) % m_affMVListMaxSize];
+#endif
       isSaved = true;
     }
     else
       isSaved = false;
   }
+#if GDR_ENABLED
+  void addAffMVInfo(AffineMVInfo &tmpMVInfo, AffineMVInfoSolid &tmpMVInfoSolid)
+#else
   void addAffMVInfo(AffineMVInfo &tmpMVInfo)
+#endif
   {
     int j = 0;
     AffineMVInfo *prevInfo = nullptr;
+#if GDR_ENABLED
+    AffineMVInfoSolid *prevInfoSolid = nullptr;
+#endif
     for (; j < m_affMVListSize; j++)
     {
       prevInfo = m_affMVList + ((m_affMVListIdx - j - 1 + m_affMVListMaxSize) % (m_affMVListMaxSize));
+#if GDR_ENABLED
+      prevInfoSolid = m_affMVListSolid + ((m_affMVListIdx - j - 1 + m_affMVListMaxSize) % (m_affMVListMaxSize));
+#endif
       if ((tmpMVInfo.x == prevInfo->x) && (tmpMVInfo.y == prevInfo->y) && (tmpMVInfo.w == prevInfo->w) && (tmpMVInfo.h == prevInfo->h))
       {
         break;
       }
     }
+#if GDR_ENABLED
     if (j < m_affMVListSize)
+    {
       *prevInfo = tmpMVInfo;
+      *prevInfoSolid = tmpMVInfoSolid;
+    }
+#else
+    if (j < m_affMVListSize)
+    {
+      *prevInfo = tmpMVInfo;
+    }
+#endif
     else
     {
       m_affMVList[m_affMVListIdx] = tmpMVInfo;
+#if GDR_ENABLED
+      m_affMVListSolid[m_affMVListIdx] = tmpMVInfoSolid;
+#endif
       m_affMVListIdx = (m_affMVListIdx + 1) % m_affMVListMaxSize;
       m_affMVListSize = std::min(m_affMVListSize + 1, m_affMVListMaxSize);
     }
@@ -315,13 +359,21 @@ public:
     }
   }
   void resetSavedAffineMotion();
+#if GDR_ENABLED
+  void storeAffineMotion(Mv acAffineMv[2][3], bool acAffineMvSolid[2][3], int16_t affineRefIdx[2], EAffineModel affineType, int bcwIdx);
+#else
   void storeAffineMotion( Mv acAffineMv[2][3], int16_t affineRefIdx[2], EAffineModel affineType, int bcwIdx );
+#endif
   bool searchBv(PredictionUnit& pu, int xPos, int yPos, int width, int height, int picWidth, int picHeight, int xBv, int yBv, int ctuSize);
   void setClipMvInSubPic(bool flag) { m_clipMvInSubPic = flag; }
 protected:
 
   /// sub-function for motion vector refinement used in fractional-pel accuracy
+#if GDR_ENABLED
+  Distortion  xPatternRefinement(const PredictionUnit& pu, RefPicList eRefPicList, int iRefIdx, const CPelBuf* pcPatternKey, Mv baseRefMv, int iFrac, Mv& rcMvFrac, bool bAllowUseOfHadamard, bool& rbCleanCandExist);
+#else
   Distortion  xPatternRefinement    ( const CPelBuf* pcPatternKey, Mv baseRefMv, int iFrac, Mv& rcMvFrac, bool bAllowUseOfHadamard );
+#endif
 
    typedef struct
    {
@@ -403,6 +455,20 @@ protected:
                                     Distortion*           puiDistBiP = NULL
                                   );
 
+ #if GDR_ENABLED
+  void xCheckBestMVP(
+    PredictionUnit &pu,
+    RefPicList  eRefPicList,
+    Mv          cMv,
+    Mv&         rcMvPred,
+    int&        riMVPIdx,
+    AMVPInfo&   amvpInfo,
+    uint32_t&   ruiBits,
+    Distortion& ruiCost
+    ,
+    const uint8_t  imv
+  );
+#else
   void xCheckBestMVP              ( RefPicList  eRefPicList,
                                     Mv          cMv,
                                     Mv&         rcMvPred,
@@ -413,6 +479,7 @@ protected:
                                     ,
                                     const uint8_t  imv
                                   );
+#endif
 
   Distortion xGetTemplateCost     ( const PredictionUnit& pu,
                                     PelUnitBuf&           origBuf,
@@ -435,6 +502,22 @@ protected:
   // motion estimation
   // -------------------------------------------------------------------------------------------------------------------
 
+#if GDR_ENABLED
+  void xMotionEstimation          ( PredictionUnit&       pu,
+                                    PelUnitBuf&           origBuf,
+                                    RefPicList            eRefPicList,
+                                    Mv&                   rcMvPred,                                    
+                                    int                   iRefIdxPred,
+                                    Mv&                   rcMv,
+                                    bool&                 rcMvSolid,
+                                    int&                  riMVPIdx,
+                                    uint32_t&             ruiBits,
+                                    Distortion&           ruiCost,
+                                    const AMVPInfo&       amvpInfo,
+                                    bool&                 rbCleanCandExist,
+                                    bool                  bBi = false
+                                  );
+#else
   void xMotionEstimation          ( PredictionUnit&       pu,
                                     PelUnitBuf&           origBuf,
                                     RefPicList            eRefPicList,
@@ -447,7 +530,7 @@ protected:
                                     const AMVPInfo&       amvpInfo,
                                     bool                  bBi = false
                                   );
-
+#endif
   void xTZSearch                  ( const PredictionUnit& pu,
                                     RefPicList            eRefPicList,
                                     int                   iRefIdxPred,
@@ -473,6 +556,10 @@ protected:
                                     const int             iSrchRng,
                                     SearchRange&          sr
                                   , IntTZSearchStruct &  cStruct
+#if GDR_ENABLED
+                                  , RefPicList eRefPicList
+                                  , int iRefIdx
+#endif
                                   );
 
   void xPatternSearchFast         ( const PredictionUnit& pu,
@@ -498,6 +585,11 @@ protected:
                                     Distortion&         ruiCost,
                                     const AMVPInfo&     amvpInfo,
                                     double              fWeight
+#if GDR_ENABLED
+                                    ,RefPicList         eRefPicList
+                                    ,int                iRefIdxPred
+                                    , bool&             rbCleanCandExist
+#endif
                                   );
 
   void xPatternSearchFracDIF      ( const PredictionUnit& pu,
@@ -508,6 +600,9 @@ protected:
                                     Mv&                   rcMvHalf,
                                     Mv&                   rcMvQter,
                                     Distortion&           ruiCost
+#if GDR_ENABLED
+                                    ,bool&                rbCleanCandExist
+#endif
                                   );
 
   void xPredAffineInterSearch     ( PredictionUnit&       pu,
@@ -516,13 +611,35 @@ protected:
                                     uint32_t&                 lastMode,
                                     Distortion&           affineCost,
                                     Mv                    hevcMv[2][33]
+#if GDR_ENABLED
+                                  , bool                  hevcMvSolid[2][33]
+#endif
                                   , Mv                    mvAffine4Para[2][33][3]
+#if GDR_ENABLED
+                                  , bool                  mvAffine4ParaSolid[2][33][3]
+#endif
                                   , int                   refIdx4Para[2]
                                   , uint8_t               bcwIdx = BCW_DEFAULT
                                   , bool                  enforceBcwPred = false
                                   , uint32_t              bcwIdxBits = 0
                                   );
 
+#if GDR_ENABLED
+  void xAffineMotionEstimation    ( PredictionUnit& pu,
+                                    PelUnitBuf&     origBuf,
+                                    RefPicList      eRefPicList,
+                                    Mv              acMvPred[3],                                    
+                                    int             iRefIdxPred,
+                                    Mv              acMv[3],
+                                    bool            acMvSolid[3],
+                                    uint32_t&       ruiBits,
+                                    Distortion&     ruiCost,
+                                    int&            mvpIdx,
+                                    const AffineAMVPInfo& aamvpi,
+                                    bool&           rbCleanCandExist,
+                                    bool            bBi = false
+  );
+#else
   void xAffineMotionEstimation    ( PredictionUnit& pu,
                                     PelUnitBuf&     origBuf,
                                     RefPicList      eRefPicList,
@@ -535,6 +652,7 @@ protected:
                                     const AffineAMVPInfo& aamvpi,
                                     bool            bBi = false
                                   );
+#endif
 
   void xEstimateAffineAMVP        ( PredictionUnit&  pu,
                                     AffineAMVPInfo&  affineAMVPInfo,
@@ -545,23 +663,50 @@ protected:
                                     Distortion*      puiDistBiP
                                   );
 
+#if GDR_ENABLED
+  Distortion xGetAffineTemplateCost(PredictionUnit& pu, PelUnitBuf& origBuf, PelUnitBuf& predBuf, Mv acMvCand[3], int iMVPIdx, int iMVPNum, RefPicList eRefPicList, int iRefIdx, bool& rbOk);
+#else
   Distortion xGetAffineTemplateCost( PredictionUnit& pu, PelUnitBuf& origBuf, PelUnitBuf& predBuf, Mv acMvCand[3], int iMVPIdx, int iMVPNum, RefPicList eRefPicList, int iRefIdx );
+#endif
 
   void xCopyAffineAMVPInfo        ( AffineAMVPInfo& src, AffineAMVPInfo& dst );
   void xCheckBestAffineMVP        ( PredictionUnit &pu, AffineAMVPInfo &affineAMVPInfo, RefPicList eRefPicList, Mv acMv[3], Mv acMvPred[3], int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost );
 
   Distortion xGetSymmetricCost( PredictionUnit& pu, PelUnitBuf& origBuf, RefPicList eCurRefPicList, const MvField& cCurMvField, MvField& cTarMvField , int bcwIdx );
 
+#if GDR_ENABLED
+  Distortion xSymmeticRefineMvSearch( 
+    PredictionUnit& pu, PelUnitBuf& origBuf, 
+    Mv& rcMvCurPred, Mv& rcMvTarPred,
+    RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, 
+    Distortion uiMinCost, int searchPattern, int nSearchStepShift, uint32_t uiMaxSearchRounds , int bcwIdx, bool& rbOk );
+#else
   Distortion xSymmeticRefineMvSearch( PredictionUnit& pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred
     , RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion uiMinCost, int searchPattern, int nSearchStepShift, uint32_t uiMaxSearchRounds , int bcwIdx );
+#endif
 
+#if GDR_ENABLED
+  bool xSymmetricMotionEstimation( PredictionUnit& pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred,
+  RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion& ruiCost, int bcwIdx, bool& ruiCostOk );
+#else
   void xSymmetricMotionEstimation( PredictionUnit& pu, PelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred, RefPicList eRefPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion& ruiCost, int bcwIdx );
+#endif
 
+#if GDR_ENABLED
+  bool xReadBufferedAffineUniMv(PredictionUnit& pu, RefPicList eRefPicList, int32_t iRefIdx, Mv acMvPred[3], Mv acMv[3], bool acMvSolid[3], uint32_t& ruiBits, Distortion& ruiCost
+    , int& mvpIdx, const AffineAMVPInfo& aamvpi
+  );
+#else
   bool xReadBufferedAffineUniMv   ( PredictionUnit& pu, RefPicList eRefPicList, int32_t iRefIdx, Mv acMvPred[3], Mv acMv[3], uint32_t& ruiBits, Distortion& ruiCost
                                     , int& mvpIdx, const AffineAMVPInfo& aamvpi
   );
+#endif
   double xGetMEDistortionWeight   ( uint8_t bcwIdx, RefPicList eRefPicList);
+#if GDR_ENABLED
+  bool xReadBufferedUniMv         (PredictionUnit& pu, RefPicList eRefPicList, int32_t iRefIdx, Mv& pcMvPred, Mv& rcMv, bool& rcMvSolid, uint32_t& ruiBits, Distortion& ruiCost);
+#else
   bool xReadBufferedUniMv         ( PredictionUnit& pu, RefPicList eRefPicList, int32_t iRefIdx, Mv& pcMvPred, Mv& rcMv, uint32_t& ruiBits, Distortion& ruiCost);
+#endif
 
   void xClipMv                    ( Mv& rcMv, const struct Position& pos, const struct Size& size, const class SPS& sps, const class PPS& pps );
 
@@ -577,6 +722,9 @@ public:
     AMVPInfo amvpInfo[2][33],
     int32_t bcwIdx,
     Mv cMvPredSym[2],
+#if GDR_ENABLED
+    bool cMvPredSymSolid[2],
+#endif
     int32_t mvpIdxSym[2],
     Distortion& bestCost,
     bool skip = false
diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp
index 063ef8b3af12195d427cc82260aa1f4114c003ad..0bd6d5ee8ea83d77a3bc72c399a32c2ef59dc0c2 100644
--- a/source/Lib/EncoderLib/IntraSearch.cpp
+++ b/source/Lib/EncoderLib/IntraSearch.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -68,8 +68,6 @@ IntraSearch::IntraSearch()
   {
     m_pSharedPredTransformSkip[ch] = nullptr;
   }
-  m_truncBinBits = nullptr;
-  m_escapeNumBins = nullptr;
   m_minErrorIndexMap = nullptr;
   for (unsigned i = 0; i < (MAXPLTSIZE + 1); i++)
   {
@@ -163,21 +161,6 @@ void IntraSearch::destroy()
   m_tmpStorageLCU.destroy();
   m_colorTransResiBuf.destroy();
   m_isInitialized = false;
-  if (m_truncBinBits != nullptr)
-  {
-    for (unsigned i = 0; i < m_symbolSize; i++)
-    {
-      delete[] m_truncBinBits[i];
-      m_truncBinBits[i] = nullptr;
-    }
-    delete[] m_truncBinBits;
-    m_truncBinBits = nullptr;
-  }
-  if (m_escapeNumBins != nullptr)
-  {
-    delete[] m_escapeNumBins;
-    m_escapeNumBins = nullptr;
-  }
   if (m_indexError[0] != nullptr)
   {
     for (unsigned i = 0; i < (MAXPLTSIZE + 1); i++)
@@ -310,20 +293,6 @@ void IntraSearch::init( EncCfg*        pcEncCfg,
   m_isInitialized = true;
   if (pcEncCfg->getPLTMode())
   {
-    m_symbolSize = (1 << bitDepthY); // pixel values are within [0, SymbolSize-1] with size SymbolSize
-    if (m_truncBinBits == nullptr)
-    {
-      m_truncBinBits = new uint16_t*[m_symbolSize];
-      for (unsigned i = 0; i < m_symbolSize; i++)
-      {
-        m_truncBinBits[i] = new uint16_t[m_symbolSize + 1];
-      }
-    }
-    if (m_escapeNumBins == nullptr)
-    {
-      m_escapeNumBins = new uint16_t[m_symbolSize];
-    }
-    initTBCTable(bitDepthY);
     if (m_indexError[0] == nullptr)
     {
       for (unsigned i = 0; i < (MAXPLTSIZE + 1); i++)
@@ -367,12 +336,197 @@ double IntraSearch::findInterCUCost( CodingUnit &cu )
   return COST_UNKNOWN;
 }
 
+#if GDR_ENABLED
+int IntraSearch::getNumTopRecons(PredictionUnit &pu, int luma_dirMode, bool isChroma)
+{
+  int w = isChroma ? pu.Cb().width  : pu.Y().width;
+  int h = isChroma ? pu.Cb().height : pu.Y().height;
+
+  int numOfTopRecons = w;
+
+  static const int angTable[32] = { 0,    1,    2,    3,    4,    6,     8,   10,   12,   14,   16,   18,   20,   23,   26,   29,   32,   35,   39,  45,  51,  57,  64,  73,  86, 102, 128, 171, 256, 341, 512, 1024 };
+  static const int invAngTable[32] = {
+    0,   16384, 8192, 5461, 4096, 2731, 2048, 1638, 1365, 1170, 1024, 910, 819, 712, 630, 565,
+    512, 468,   420,  364,  321,  287,  256,  224,  191,  161,  128,  96,  64,  48,  32,  16
+  };   // (512 * 32) / Angle
+
+  const int refIdx             = pu.multiRefIdx;
+  const int predModeIntra      = getModifiedWideAngle(w, h, luma_dirMode);
+  const int isModeVer          = predModeIntra >= DIA_IDX;
+  const int intraPredAngleMode = (isModeVer) ? predModeIntra - VER_IDX : -(predModeIntra - HOR_IDX);
+
+  const int absAngMode         = abs(intraPredAngleMode);
+  const int signAng            = intraPredAngleMode < 0 ? -1 : 1;
+  const int absAng             = (luma_dirMode > DC_IDX && luma_dirMode < NUM_LUMA_MODE) ? angTable[absAngMode] : 0;
+
+  const int invAngle           = invAngTable[absAngMode];
+  const int intraPredAngle     = signAng * absAng;
+
+  const int sideSize = isModeVer ? h : w;
+  const int maxScale = 2;
+
+  const int angularScale = std::min(maxScale, floorLog2(sideSize) - (floorLog2(3 * invAngle - 2) - 8));
+
+  bool applyPDPC;
+
+
+  // 1.0 derive PDPC
+  applyPDPC  = (refIdx == 0) ? true : false;
+  if (luma_dirMode > DC_IDX && luma_dirMode < NUM_LUMA_MODE)
+  {
+    if (intraPredAngleMode < 0)
+    {
+      applyPDPC &= false;
+    }
+    else if (intraPredAngleMode > 0)
+    {
+      applyPDPC &= (angularScale >= 0);
+    }
+  }
+
+  // 2.0 calculate number of recons
+  switch (luma_dirMode)
+  {
+  case PLANAR_IDX:
+    numOfTopRecons = applyPDPC ? (w + 1) : (w + 1);
+    break;
+
+  case DC_IDX:
+    numOfTopRecons = applyPDPC ? (w) : (w);
+    break;
+
+  case HOR_IDX:
+    numOfTopRecons = applyPDPC ? (w) : (w);
+    break;
+
+  case VER_IDX:
+    numOfTopRecons = applyPDPC ? (w) : (w);
+    break;
+
+  default:
+    // 2..66
+    // note: There should be a way to reduce the number of top recons, in case of non PDPC
+    applyPDPC |= isChroma;
+
+    if (predModeIntra >= DIA_IDX)
+    {
+      if (intraPredAngle < 0)
+      {
+        numOfTopRecons = (applyPDPC) ? (w + w) : (w + 1);
+      }
+      else
+      {
+        numOfTopRecons = (applyPDPC) ? (w + w) : (w + w);
+      }
+    }
+    else
+    {
+      if (intraPredAngle < 0)
+      {
+        numOfTopRecons = (applyPDPC) ? (w + w) : (w);
+      }
+      else
+      {
+        numOfTopRecons = (applyPDPC) ? (w + w) : (w);
+      }
+    }
+    break;
+  }
+
+  return numOfTopRecons;
+}
+
+bool IntraSearch::isValidIntraPredLuma(PredictionUnit &pu, int luma_dirMode)
+{
+  bool isValid  = true;
+  PicHeader *ph = pu.cs->picHeader;
+
+  if (ph->getInGdrInterval())
+  {
+    int x = pu.Y().x;
+
+    // count num of recons on the top
+    int virX             = ph->getVirtualBoundariesPosX(0);
+    int numOfTopRecons = getNumTopRecons(pu, luma_dirMode, false);
+
+    // check if recon is out of boundary
+    if (x < virX && virX < (x + numOfTopRecons))
+    {
+      isValid = false;
+    }
+  }
+
+  return isValid;
+}
+
+bool IntraSearch::isValidIntraPredChroma(PredictionUnit &pu, int luma_dirMode, int chroma_dirMode)
+{
+  bool isValid = true;
+  CodingStructure *cs = pu.cs;
+  PicHeader       *ph = cs->picHeader;
+
+  if (ph->getInGdrInterval())
+  {
+    // note: chroma cordinate
+    int cbX = pu.Cb().x;
+    //int cbY = pu.Cb().y;
+    int cbW = pu.Cb().width;
+    int cbH = pu.Cb().height;
+
+    int chromaScaleX = getComponentScaleX(COMPONENT_Cb, cs->area.chromaFormat);
+    int chromaScaleY = getComponentScaleY(COMPONENT_Cb, cs->area.chromaFormat);
+
+    int lumaX = cbX << chromaScaleX;
+    // int lumaY = cbY << chromaScaleY;
+    int lumaW = cbW << chromaScaleX;
+    int lumaH = cbH << chromaScaleY;
+
+    int numOfTopRecons = lumaW;
+    int virX           = ph->getVirtualBoundariesPosX(0);
+
+    // count num of recons on the top
+    switch (chroma_dirMode)
+    {
+
+    case LM_CHROMA_IDX :
+      numOfTopRecons = lumaW;
+      break;
+
+    case MDLM_L_IDX :
+      numOfTopRecons = lumaW;
+      break;
+
+    // note: could reduce the actual #of
+    case MDLM_T_IDX:
+      numOfTopRecons = (lumaW + lumaH);
+      break;
+
+    case DM_CHROMA_IDX :
+      numOfTopRecons = getNumTopRecons(pu, luma_dirMode, true) << chromaScaleX;
+      break;
+
+    default :
+      numOfTopRecons = getNumTopRecons(pu, chroma_dirMode, true) << chromaScaleX;
+      break;
+    }
+
+    // check if recon is out of boundary
+    if (lumaX < virX && virX < (lumaX + numOfTopRecons))
+    {
+      isValid = false;
+    }
+  }
+
+  return isValid;
+}
+#endif
+
 bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, const double bestCostSoFar, bool mtsCheckRangeFlag, int mtsFirstCheckId, int mtsLastCheckId, bool moreProbMTSIdxFirst, CodingStructure* bestCS)
 {
   CodingStructure       &cs            = *cu.cs;
   const SPS             &sps           = *cs.sps;
-  const uint32_t             uiWidthBit    = floorLog2(partitioner.currArea().lwidth() );
-  const uint32_t             uiHeightBit   =                   floorLog2(partitioner.currArea().lheight());
+  const uint32_t         logWidth      = floorLog2(partitioner.currArea().lwidth());
+  const uint32_t         logHeight     = floorLog2(partitioner.currArea().lheight());
 
   // Lambda calculation at equivalent Qp of 4 is recommended because at that Qp, the quantization divisor is 1.
   const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda( ) * FRAC_BITS_SCALE;
@@ -435,7 +589,7 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
     //reset the variables used for the tests
     m_regIntraRDListWithCosts.clear();
     int numTotalPartsHor = (int)height >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_HORZ_SPLIT));
-    int numTotalPartsVer = (int)width  >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_VERT_SPLIT)); 
+    int numTotalPartsVer = (int)width  >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_VERT_SPLIT));
     m_ispTestedModes[0].init( numTotalPartsHor, numTotalPartsVer );
     //the total number of subpartitions is modified to take into account the cases where LFNST cannot be combined with ISP due to size restrictions
     numTotalPartsHor = sps.getUseLFNST() && CU::canUseLfnstWithISP(cu.Y(), HOR_INTRA_SUBPARTITIONS) ? numTotalPartsHor : 0;
@@ -447,16 +601,19 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
   }
 
   const bool testBDPCM = sps.getBDPCMEnabledFlag() && CU::bdpcmAllowed(cu, ComponentID(partitioner.chType)) && cu.mtsFlag == 0 && cu.lfnstIdx == 0;
-  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> uiHadModeList;
-  static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandCostList;
-  static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandHadList;
+  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> hadModeList;
+  static_vector<double, FAST_UDI_MAX_RDMODE_NUM>   candCostList;
+  static_vector<double, FAST_UDI_MAX_RDMODE_NUM>   candHadList;
 
   auto &pu = *cu.firstPU;
+#if GDR_ENABLED
+  const bool isEncodeGdrClean = cs.sps->getGDREnabledFlag() && cs.pcv->isEncoder && ((cs.picHeader->getInGdrInterval() && cs.isClean(pu.Y().topRight(), CHANNEL_TYPE_LUMA)) || (cs.picHeader->getNumVerVirtualBoundaries() == 0));
+#endif
   bool validReturn = false;
   {
-    CandHadList.clear();
-    CandCostList.clear();
-    uiHadModeList.clear();
+    candHadList.clear();
+    candCostList.clear();
+    hadModeList.clear();
 
     CHECK(pu.cu != &cu, "PU is not contained in the CU");
 
@@ -467,10 +624,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
     const bool testMip = mipAllowed && !(cu.lwidth() > (8 * cu.lheight()) || cu.lheight() > (8 * cu.lwidth()));
     const bool supportedMipBlkSize = pu.lwidth() <= MIP_MAX_WIDTH && pu.lheight() <= MIP_MAX_HEIGHT;
 
-    static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> uiRdModeList;
+    static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> rdModeList;
 
-    int numModesForFullRD = 3;
-    numModesForFullRD = g_aucIntraModeNumFast_UseMPM_2D[uiWidthBit - MIN_CU_LOG2][uiHeightBit - MIN_CU_LOG2];
+    int numModesForFullRD = g_intraModeNumFastUseMPM2D[logWidth - MIN_CU_LOG2][logHeight - MIN_CU_LOG2];
 
 #if INTRA_FULL_SEARCH
     numModesForFullRD = numModesAvailable;
@@ -478,12 +634,12 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
 
     if (isSecondColorSpace)
     {
-      uiRdModeList.clear();
+      rdModeList.clear();
       if (m_numSavedRdModeFirstColorSpace[m_savedRdModeIdx] > 0)
       {
         for (int i = 0; i < m_numSavedRdModeFirstColorSpace[m_savedRdModeIdx]; i++)
         {
-          uiRdModeList.push_back(m_savedRdModeFirstColorSpace[m_savedRdModeIdx][i]);
+          rdModeList.push_back(m_savedRdModeFirstColorSpace[m_savedRdModeIdx][i]);
         }
       }
       else
@@ -547,23 +703,23 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
 
           //===== init pattern for luma prediction =====
           initIntraPatternChType(cu, pu.Y(), true);
-          bool bSatdChecked[NUM_INTRA_MODE];
-          memset(bSatdChecked, 0, sizeof(bSatdChecked));
+          bool satdChecked[NUM_INTRA_MODE];
+          std::fill_n(satdChecked, NUM_INTRA_MODE, false);
 
           if (!LFNSTLoadFlag)
           {
             for (int modeIdx = 0; modeIdx < numModesAvailable; modeIdx++)
             {
-              uint32_t   uiMode    = modeIdx;
+              uint32_t   mode      = modeIdx;
               Distortion minSadHad = 0;
 
               // Skip checking extended Angular modes in the first round of SATD
-              if (uiMode > DC_IDX && (uiMode & 1))
+              if (mode > DC_IDX && (mode & 1))
               {
                 continue;
               }
 
-              bSatdChecked[uiMode] = true;
+              satdChecked[mode] = true;
 
               pu.intraDir[0] = modeIdx;
 
@@ -580,46 +736,76 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
               m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMpmFlag, ctxStartIntraMode);
               m_CABACEstimator->getCtx() = SubCtx( Ctx::MultiRefLineIdx, ctxStartMrlIdx );
 
-              uint64_t fracModeBits = xFracModeBitsIntra(pu, uiMode, CHANNEL_TYPE_LUMA);
+              uint64_t fracModeBits = xFracModeBitsIntra(pu, mode, CHANNEL_TYPE_LUMA);
 
               double cost = (double) minSadHad + (double) fracModeBits * sqrtLambdaForFirstPass;
 
-              DTRACE(g_trace_ctx, D_INTRA_COST, "IntraHAD: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost, uiMode);
+              DTRACE(g_trace_ctx, D_INTRA_COST, "IntraHAD: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost, mode);
 
-              updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, uiMode), cost, uiRdModeList,
-                             CandCostList, numModesForFullRD);
-              updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, uiMode), double(minSadHad),
-                             uiHadModeList, CandHadList, numHadCand);
+#if GDR_ENABLED
+              if (isEncodeGdrClean)
+              {
+                if (isValidIntraPredLuma(pu, mode))
+                {
+                  updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, mode), cost, rdModeList,
+                                 candCostList, numModesForFullRD);
+                  updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, mode), double(minSadHad),
+                                 hadModeList, candHadList, numHadCand);
+                }
+              }
+              else
+              {
+                updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, mode), cost, rdModeList, candCostList,
+                               numModesForFullRD);
+                updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, mode), double(minSadHad), hadModeList,
+                               candHadList, numHadCand);
+              }
+#else
+              updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, mode), cost, rdModeList, candCostList,
+                             numModesForFullRD);
+              updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, mode), double(minSadHad), hadModeList,
+                             candHadList, numHadCand);
+#endif
             }
             if (!sps.getUseMIP() && LFNSTSaveFlag)
             {
               // save found best modes
-              m_uiSavedNumRdModesLFNST = numModesForFullRD;
-              m_uiSavedRdModeListLFNST = uiRdModeList;
-              m_dSavedModeCostLFNST    = CandCostList;
+              m_savedNumRdModesLFNST = numModesForFullRD;
+              m_savedRdModeListLFNST = rdModeList;
+              m_savedModeCostLFNST   = candCostList;
               // PBINTRA fast
-              m_uiSavedHadModeListLFNST = uiHadModeList;
-              m_dSavedHadListLFNST      = CandHadList;
+              m_savedHadModeListLFNST   = hadModeList;
+              m_savedHadListLFNST       = candHadList;
               LFNSTSaveFlag             = false;
             }
           }   // NSSTFlag
           if (!sps.getUseMIP() && LFNSTLoadFlag)
           {
             // restore saved modes
-            numModesForFullRD = m_uiSavedNumRdModesLFNST;
-            uiRdModeList      = m_uiSavedRdModeListLFNST;
-            CandCostList      = m_dSavedModeCostLFNST;
+            numModesForFullRD = m_savedNumRdModesLFNST;
+            rdModeList        = m_savedRdModeListLFNST;
+            candCostList      = m_savedModeCostLFNST;
             // PBINTRA fast
-            uiHadModeList = m_uiSavedHadModeListLFNST;
-            CandHadList   = m_dSavedHadListLFNST;
+            hadModeList = m_savedHadModeListLFNST;
+            candHadList = m_savedHadListLFNST;
           }   // !LFNSTFlag
 
           if (!(sps.getUseMIP() && LFNSTLoadFlag))
           {
-            static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> parentCandList = uiRdModeList;
+            static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> parentCandList = rdModeList;
 
             // Second round of SATD for extended Angular modes
+#if GDR_ENABLED
+            int nn = numModesForFullRD;
+            if (isEncodeGdrClean)
+            {
+              nn = std::min((int)numModesForFullRD, (int)parentCandList.size());
+            }
+
+            for (int modeIdx = 0; modeIdx < nn; modeIdx++)
+#else
             for (int modeIdx = 0; modeIdx < numModesForFullRD; modeIdx++)
+#endif
             {
               unsigned parentMode = parentCandList[modeIdx].modeId;
               if (parentMode > (DC_IDX + 1) && parentMode < (NUM_LUMA_MODE - 1))
@@ -628,7 +814,7 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
                 {
                   unsigned mode = parentMode + subModeIdx;
 
-                  if (!bSatdChecked[mode])
+                  if (!satdChecked[mode])
                   {
                     pu.intraDir[0] = mode;
 
@@ -652,12 +838,32 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
 
                     double cost = (double) minSadHad + (double) fracModeBits * sqrtLambdaForFirstPass;
 
-                    updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, mode), cost, uiRdModeList,
-                                   CandCostList, numModesForFullRD);
+#if GDR_ENABLED
+                    if (isEncodeGdrClean)
+                    {
+                      if (isValidIntraPredLuma(pu, mode))
+                      {
+                        updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, mode), cost, rdModeList,
+                                       candCostList, numModesForFullRD);
+                        updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, mode), double(minSadHad),
+                                       hadModeList, candHadList, numHadCand);
+                      }
+                    }
+                    else
+                    {
+                      updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, mode), cost, rdModeList,
+                                     candCostList, numModesForFullRD);
+                      updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, mode), double(minSadHad),
+                                     hadModeList, candHadList, numHadCand);
+                    }
+#else
+                    updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, mode), cost, rdModeList,
+                                   candCostList, numModesForFullRD);
                     updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, mode), double(minSadHad),
-                                   uiHadModeList, CandHadList, numHadCand);
+                                   hadModeList, candHadList, numHadCand);
+#endif
 
-                    bSatdChecked[mode] = true;
+                    satdChecked[mode] = true;
                   }
                 }
               }
@@ -665,7 +871,7 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
             if (saveDataForISP)
             {
               // we save the regular intra modes list
-              m_ispCandListHor = uiRdModeList;
+              m_ispCandListHor = rdModeList;
             }
             pu.multiRefIdx    = 1;
             const int numMPMs = NUM_MOST_PROBABLE_MODES;
@@ -703,31 +909,57 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
                   uint64_t fracModeBits = xFracModeBitsIntra(pu, mode, CHANNEL_TYPE_LUMA);
 
                   double cost = (double) minSadHad + (double) fracModeBits * sqrtLambdaForFirstPass;
-                  updateCandList(ModeInfo(false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, mode), cost, uiRdModeList,
-                                 CandCostList, numModesForFullRD);
+#if GDR_ENABLED
+                  if (isEncodeGdrClean)
+                  {
+                    if (isValidIntraPredLuma(pu, mode))
+                    {
+                      updateCandList(ModeInfo(false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, mode), cost,
+                                     rdModeList, candCostList, numModesForFullRD);
+                      updateCandList(ModeInfo(false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, mode),
+                                     double(minSadHad), hadModeList, candHadList, numHadCand);
+                    }
+                  }
+                  else
+                  {
+                    updateCandList(ModeInfo(false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, mode), cost, rdModeList,
+                                   candCostList, numModesForFullRD);
+                    updateCandList(ModeInfo(false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, mode),
+                                   double(minSadHad), hadModeList, candHadList, numHadCand);
+                  }
+#else
+                  updateCandList(ModeInfo(false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, mode), cost, rdModeList,
+                                 candCostList, numModesForFullRD);
                   updateCandList(ModeInfo(false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, mode), double(minSadHad),
-                                 uiHadModeList, CandHadList, numHadCand);
+                                 hadModeList, candHadList, numHadCand);
+#endif
                 }
               }
             }
-            CHECKD(uiRdModeList.size() != numModesForFullRD, "Error: RD mode list size");
+#if GDR_ENABLED
+            if (!isEncodeGdrClean)
+            {
+              CHECKD(rdModeList.size() != numModesForFullRD, "Error: RD mode list size");
+            }
+#else
+            CHECKD(rdModeList.size() != numModesForFullRD, "Error: RD mode list size");
+#endif
 
             if (LFNSTSaveFlag && testMip
                 && !allowLfnstWithMip(cu.firstPU->lumaSize()))   // save a different set for the next run
             {
               // save found best modes
-              m_uiSavedRdModeListLFNST = uiRdModeList;
-              m_dSavedModeCostLFNST    = CandCostList;
+              m_savedRdModeListLFNST = rdModeList;
+              m_savedModeCostLFNST   = candCostList;
               // PBINTRA fast
-              m_uiSavedHadModeListLFNST = uiHadModeList;
-              m_dSavedHadListLFNST      = CandHadList;
-              m_uiSavedNumRdModesLFNST =
-                g_aucIntraModeNumFast_UseMPM_2D[uiWidthBit - MIN_CU_LOG2][uiHeightBit - MIN_CU_LOG2];
-              m_uiSavedRdModeListLFNST.resize(m_uiSavedNumRdModesLFNST);
-              m_dSavedModeCostLFNST.resize(m_uiSavedNumRdModesLFNST);
+              m_savedHadModeListLFNST   = hadModeList;
+              m_savedHadListLFNST       = candHadList;
+              m_savedNumRdModesLFNST    = g_intraModeNumFastUseMPM2D[logWidth - MIN_CU_LOG2][logHeight - MIN_CU_LOG2];
+              m_savedRdModeListLFNST.resize(m_savedNumRdModesLFNST);
+              m_savedModeCostLFNST.resize(m_savedNumRdModesLFNST);
               // PBINTRA fast
-              m_uiSavedHadModeListLFNST.resize(3);
-              m_dSavedHadListLFNST.resize(3);
+              m_savedHadModeListLFNST.resize(3);
+              m_savedHadListLFNST.resize(3);
               LFNSTSaveFlag = false;
             }
             //*** Derive MIP candidates using Hadamard
@@ -739,11 +971,11 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
               for (uint32_t uiModeFull = 0; uiModeFull < numModesFull; uiModeFull++)
               {
                 const bool     isTransposed = (uiModeFull >= transpOff ? true : false);
-                const uint32_t uiMode       = (isTransposed ? uiModeFull - transpOff : uiModeFull);
+                const uint32_t mode         = (isTransposed ? uiModeFull - transpOff : uiModeFull);
 
                 numModesForFullRD++;
-                uiRdModeList.push_back(ModeInfo(true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, uiMode));
-                CandCostList.push_back(0);
+                rdModeList.push_back(ModeInfo(true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, mode));
+                candCostList.push_back(0);
               }
             }
             else if (testMip)
@@ -761,10 +993,10 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
               for (uint32_t uiModeFull = 0; uiModeFull < numModesFull; uiModeFull++)
               {
                 const bool     isTransposed = (uiModeFull >= transpOff ? true : false);
-                const uint32_t uiMode       = (isTransposed ? uiModeFull - transpOff : uiModeFull);
+                const uint32_t mode         = (isTransposed ? uiModeFull - transpOff : uiModeFull);
 
                 pu.mipTransposedFlag           = isTransposed;
-                pu.intraDir[CHANNEL_TYPE_LUMA] = uiMode;
+                pu.intraDir[CHANNEL_TYPE_LUMA] = mode;
                 predIntraMip(COMPONENT_Y, piPred, pu);
 
                 // Use the min between SAD and HAD as the cost criterion
@@ -774,44 +1006,63 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
 
                 m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag);
 
-                uint64_t fracModeBits = xFracModeBitsIntra(pu, uiMode, CHANNEL_TYPE_LUMA);
+                uint64_t fracModeBits = xFracModeBitsIntra(pu, mode, CHANNEL_TYPE_LUMA);
 
                 double cost            = double(minSadHad) + double(fracModeBits) * sqrtLambdaForFirstPass;
                 mipHadCost[uiModeFull] = cost;
                 DTRACE(g_trace_ctx, D_INTRA_COST, "IntraMIP: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost,
                        uiModeFull);
 
-                updateCandList(ModeInfo(true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, uiMode), cost, uiRdModeList,
-                               CandCostList, numModesForFullRD + 1);
-                updateCandList(ModeInfo(true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, uiMode),
-                               0.8 * double(minSadHad), uiHadModeList, CandHadList, numHadCand);
+#if GDR_ENABLED
+                if (isEncodeGdrClean)
+                {
+                  if (isValidIntraPredLuma(pu, mode))
+                  {
+                    updateCandList(ModeInfo(true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, mode), cost, rdModeList,
+                                   candCostList, numModesForFullRD + 1);
+                    updateCandList(ModeInfo(true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, mode),
+                                   0.8 * double(minSadHad), hadModeList, candHadList, numHadCand);
+                  }
+                }
+                else
+                {
+                  updateCandList(ModeInfo(true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, mode), cost, rdModeList,
+                                 candCostList, numModesForFullRD + 1);
+                  updateCandList(ModeInfo(true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, mode),
+                                 0.8 * double(minSadHad), hadModeList, candHadList, numHadCand);
+                }
+#else
+                updateCandList(ModeInfo(true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, mode), cost, rdModeList,
+                               candCostList, numModesForFullRD + 1);
+                updateCandList(ModeInfo(true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, mode), 0.8 * double(minSadHad),
+                               hadModeList, candHadList, numHadCand);
+#endif
               }
 
               const double thresholdHadCost = 1.0 + 1.4 / sqrt((double) (pu.lwidth() * pu.lheight()));
-              reduceHadCandList(uiRdModeList, CandCostList, numModesForFullRD, thresholdHadCost, mipHadCost, pu,
-                                fastMip);
+              reduceHadCandList(rdModeList, candCostList, numModesForFullRD, thresholdHadCost, mipHadCost, pu, fastMip);
             }
             if (sps.getUseMIP() && LFNSTSaveFlag)
             {
               // save found best modes
-              m_uiSavedNumRdModesLFNST = numModesForFullRD;
-              m_uiSavedRdModeListLFNST = uiRdModeList;
-              m_dSavedModeCostLFNST    = CandCostList;
+              m_savedNumRdModesLFNST = numModesForFullRD;
+              m_savedRdModeListLFNST = rdModeList;
+              m_savedModeCostLFNST   = candCostList;
               // PBINTRA fast
-              m_uiSavedHadModeListLFNST = uiHadModeList;
-              m_dSavedHadListLFNST      = CandHadList;
+              m_savedHadModeListLFNST   = hadModeList;
+              m_savedHadListLFNST       = candHadList;
               LFNSTSaveFlag             = false;
             }
           }
           else   // if( sps.getUseMIP() && LFNSTLoadFlag)
           {
             // restore saved modes
-            numModesForFullRD = m_uiSavedNumRdModesLFNST;
-            uiRdModeList      = m_uiSavedRdModeListLFNST;
-            CandCostList      = m_dSavedModeCostLFNST;
+            numModesForFullRD = m_savedNumRdModesLFNST;
+            rdModeList        = m_savedRdModeListLFNST;
+            candCostList      = m_savedModeCostLFNST;
             // PBINTRA fast
-            uiHadModeList = m_uiSavedHadModeListLFNST;
-            CandHadList   = m_dSavedHadListLFNST;
+            hadModeList = m_savedHadModeListLFNST;
+            candHadList = m_savedHadListLFNST;
           }
 
           if (m_pcEncCfg->getFastUDIUseMPMEnabled())
@@ -828,16 +1079,38 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
               bool     mostProbableModeIncluded = false;
               ModeInfo mostProbableMode( false, false, 0, NOT_INTRA_SUBPARTITIONS, uiPreds[j] );
 
+#if GDR_ENABLED
+              int nn = numModesForFullRD;
+              if (isEncodeGdrClean)
+              {
+                nn = std::min((int) numModesForFullRD, (int) rdModeList.size());
+              }
+
+              for (int i = 0; i < nn; i++)
+#else
               for (int i = 0; i < numModesForFullRD; i++)
+#endif
+              {
+                mostProbableModeIncluded |= (mostProbableMode == rdModeList[i]);
+              }
+#if GDR_ENABLED
+              if (!isEncodeGdrClean)
               {
-                mostProbableModeIncluded |= (mostProbableMode == uiRdModeList[i]);
+                if (!mostProbableModeIncluded)
+                {
+                  numModesForFullRD++;
+                  rdModeList.push_back(mostProbableMode);
+                  candCostList.push_back(0);
+                }
               }
+#else
               if (!mostProbableModeIncluded)
               {
                 numModesForFullRD++;
-                uiRdModeList.push_back(mostProbableMode);
-                CandCostList.push_back(0);
+                rdModeList.push_back(mostProbableMode);
+                candCostList.push_back(0);
               }
+#endif
             }
             if (saveDataForISP)
             {
@@ -851,10 +1124,20 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
                 {
                   mostProbableModeIncluded |= (mostProbableMode == m_ispCandListHor[i]);
                 }
+#if GDR_ENABLED
+                if (!isEncodeGdrClean)
+                {
+                  if (!mostProbableModeIncluded)
+                  {
+                    m_ispCandListHor.push_back(mostProbableMode);
+                  }
+                }
+#else
                 if (!mostProbableModeIncluded)
                 {
                   m_ispCandListHor.push_back(mostProbableMode);
                 }
+#endif
               }
             }
           }
@@ -867,7 +1150,7 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
         {
           // Store the modes to be checked with RD
           m_savedNumRdModes[lfnstIdx] = numModesForFullRD;
-          std::copy_n(uiRdModeList.begin(), numModesForFullRD, m_savedRdModeList[lfnstIdx]);
+          std::copy_n(rdModeList.begin(), numModesForFullRD, m_savedRdModeList[lfnstIdx]);
         }
       }
       else   // mtsUsage = 2 (here we potentially reduce the number of modes that will be full-RD checked)
@@ -883,7 +1166,7 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
           {
             if (m_modeCostStore[lfnstIdx][i] <= thresholdSkipMode * m_bestModeCostStore[lfnstIdx])
             {
-              uiRdModeList.push_back(m_savedRdModeList[lfnstIdx][i]);
+              rdModeList.push_back(m_savedRdModeList[lfnstIdx][i]);
               numModesForFullRD++;
             }
           }
@@ -893,29 +1176,36 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
         {
           // Restore the modes to be checked with RD
           numModesForFullRD = m_savedNumRdModes[lfnstIdx];
-          uiRdModeList.resize(numModesForFullRD);
-          std::copy_n(m_savedRdModeList[lfnstIdx], m_savedNumRdModes[lfnstIdx], uiRdModeList.begin());
-          CandCostList.resize(numModesForFullRD);
+          rdModeList.resize(numModesForFullRD);
+          std::copy_n(m_savedRdModeList[lfnstIdx], m_savedNumRdModes[lfnstIdx], rdModeList.begin());
+          candCostList.resize(numModesForFullRD);
         }
       }
 
-      CHECK(numModesForFullRD != uiRdModeList.size(), "Inconsistent state!");
+#if GDR_ENABLED
+      if (!isEncodeGdrClean)
+      {
+        CHECK(numModesForFullRD != rdModeList.size(), "Inconsistent state!");
+      }
+#else
+      CHECK(numModesForFullRD != rdModeList.size(), "Inconsistent state!");
+#endif
 
       // after this point, don't use numModesForFullRD
 
       // PBINTRA fast
-      if (m_pcEncCfg->getUsePbIntraFast() && !cs.slice->isIntra() && uiRdModeList.size() < numModesAvailable
+      if (m_pcEncCfg->getUsePbIntraFast() && !cs.slice->isIntra() && rdModeList.size() < numModesAvailable
           && !cs.slice->getDisableSATDForRD() && (mtsUsageFlag != 2 || lfnstIdx > 0))
       {
         double   pbintraRatio = (lfnstIdx > 0) ? 1.25 : PBINTRA_RATIO;
         int      maxSize      = -1;
         ModeInfo bestMipMode;
         int      bestMipIdx = -1;
-        for (int idx = 0; idx < uiRdModeList.size(); idx++)
+        for (int idx = 0; idx < rdModeList.size(); idx++)
         {
-          if (uiRdModeList[idx].mipFlg)
+          if (rdModeList[idx].mipFlg)
           {
-            bestMipMode = uiRdModeList[idx];
+            bestMipMode = rdModeList[idx];
             bestMipIdx  = idx;
             break;
           }
@@ -923,19 +1213,19 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
         const int numHadCand = 3;
         for (int k = numHadCand - 1; k >= 0; k--)
         {
-          if (CandHadList.size() < (k + 1) || CandHadList[k] > cs.interHad * pbintraRatio)
+          if (candHadList.size() < (k + 1) || candHadList[k] > cs.interHad * pbintraRatio)
           {
             maxSize = k;
           }
         }
         if (maxSize > 0)
         {
-          uiRdModeList.resize(std::min<size_t>(uiRdModeList.size(), maxSize));
+          rdModeList.resize(std::min<size_t>(rdModeList.size(), maxSize));
           if (bestMipIdx >= 0)
           {
-            if (uiRdModeList.size() <= bestMipIdx)
+            if (rdModeList.size() <= bestMipIdx)
             {
-              uiRdModeList.push_back(bestMipMode);
+              rdModeList.push_back(bestMipMode);
             }
           }
           if (saveDataForISP)
@@ -960,17 +1250,29 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
       }
     }
 
-    int numNonISPModes = (int)uiRdModeList.size();
+    int numNonISPModes = (int) rdModeList.size();
 
     if ( testISP )
     {
       // we reserve positions for ISP in the common full RD list
+#if GDR_ENABLED
+      if (!isEncodeGdrClean)
+      {
+        const int maxNumRDModesISP = sps.getUseLFNST() ? 16 * NUM_LFNST_NUM_PER_SET : 16;
+        m_curIspLfnstIdx = 0;
+        for (int i = 0; i < maxNumRDModesISP; i++)
+        {
+          rdModeList.push_back(ModeInfo(false, false, 0, INTRA_SUBPARTITIONS_RESERVED, 0));
+        }
+      }
+#else
       const int maxNumRDModesISP = sps.getUseLFNST() ? 16 * NUM_LFNST_NUM_PER_SET : 16;
       m_curIspLfnstIdx = 0;
       for (int i = 0; i < maxNumRDModesISP; i++)
       {
-        uiRdModeList.push_back( ModeInfo( false, false, 0, INTRA_SUBPARTITIONS_RESERVED, 0 ) );
+        rdModeList.push_back(ModeInfo(false, false, 0, INTRA_SUBPARTITIONS_RESERVED, 0));
       }
+#endif
     }
 
     //===== check modes (using r-d costs) =====
@@ -989,7 +1291,7 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
     csBest->picture = cs.picture;
 
     // just to be sure
-    numModesForFullRD = ( int ) uiRdModeList.size();
+    numModesForFullRD = (int) rdModeList.size();
     TUIntraSubPartitioner subTuPartitioner( partitioner );
     if ( testISP )
     {
@@ -998,7 +1300,7 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
     }
     int bestLfnstIdx = cu.lfnstIdx;
 
-    for (int mode = isSecondColorSpace ? 0 : -2 * int(testBDPCM); mode < (int)uiRdModeList.size(); mode++)
+    for (int mode = isSecondColorSpace ? 0 : -2 * int(testBDPCM); mode < (int) rdModeList.size(); mode++)
     {
       // set CU/PU to luma prediction mode
       ModeInfo uiOrgMode;
@@ -1015,9 +1317,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
       else
       {
         cu.bdpcmMode = 0;
-        uiOrgMode = uiRdModeList[mode];
+        uiOrgMode    = rdModeList[mode];
       }
-      if (!cu.bdpcmMode && uiRdModeList[mode].ispMod == INTRA_SUBPARTITIONS_RESERVED)
+      if (!cu.bdpcmMode && rdModeList[mode].ispMod == INTRA_SUBPARTITIONS_RESERVED)
       {
         if (mode == numNonISPModes)   // the list needs to be sorted only once
         {
@@ -1030,13 +1332,13 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
             break;
           }
         }
-        xGetNextISPMode(uiRdModeList[mode], (mode > 0 ? &uiRdModeList[mode - 1] : nullptr), Size(width, height));
-        if (uiRdModeList[mode].ispMod == INTRA_SUBPARTITIONS_RESERVED)
+        xGetNextISPMode(rdModeList[mode], (mode > 0 ? &rdModeList[mode - 1] : nullptr), Size(width, height));
+        if (rdModeList[mode].ispMod == INTRA_SUBPARTITIONS_RESERVED)
         {
           continue;
         }
         cu.lfnstIdx = m_curIspLfnstIdx;
-        uiOrgMode   = uiRdModeList[mode];
+        uiOrgMode   = rdModeList[mode];
       }
       cu.mipFlag                     = uiOrgMode.mipFlg;
       pu.mipTransposedFlag           = uiOrgMode.mipTrFlg;
@@ -1084,9 +1386,7 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
         }
         else
         {
-          tmpValidReturn = xRecurIntraCodingLumaQT(
-            *csTemp, partitioner, uiBestPUMode.ispMod ? bestCurrentCost : MAX_DOUBLE, -1, TU_NO_ISP,
-            uiBestPUMode.ispMod, mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId, moreProbMTSIdxFirst);
+          tmpValidReturn = xRecurIntraCodingLumaQT(*csTemp, partitioner, mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId, moreProbMTSIdxFirst);
         }
       }
 
@@ -1330,7 +1630,11 @@ void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner
       {
         int mode = chromaCandModes[idx];
         satdModeList[idx] = mode;
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+        if (PU::isLMCMode(mode) && (!PU::isLMCModeEnabled(pu, mode) || cu.slice->getDisableLmChromaCheck()))
+#else
         if (PU::isLMCMode(mode) && !PU::isLMCModeEnabled(pu, mode))
+#endif
         {
           continue;
         }
@@ -1417,21 +1721,25 @@ void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner
       Distortion baseDist = cs.dist;
       bool testBDPCM = true;
       testBDPCM = testBDPCM && CU::bdpcmAllowed(cu, COMPONENT_Cb) && cu.ispMode == 0 && cu.mtsFlag == 0 && cu.lfnstIdx == 0;
-      for (int32_t uiMode = uiMinMode - (2 * int(testBDPCM)); uiMode < uiMaxMode; uiMode++)
+      for (int32_t mode = uiMinMode - (2 * int(testBDPCM)); mode < uiMaxMode; mode++)
       {
         int chromaIntraMode;
 
-        if (uiMode < 0)
+        if (mode < 0)
         {
-            cu.bdpcmModeChroma = -uiMode;
-            chromaIntraMode = cu.bdpcmModeChroma == 2 ? chromaCandModes[1] : chromaCandModes[2];
+          cu.bdpcmModeChroma = -mode;
+          chromaIntraMode    = cu.bdpcmModeChroma == 2 ? chromaCandModes[1] : chromaCandModes[2];
         }
         else
         {
-          chromaIntraMode = chromaCandModes[uiMode];
+          chromaIntraMode = chromaCandModes[mode];
 
           cu.bdpcmModeChroma = 0;
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+          if( PU::isLMCMode( chromaIntraMode ) && ( !PU::isLMCModeEnabled( pu, chromaIntraMode ) || cu.slice->getDisableLmChromaCheck() ) )
+#else
           if( PU::isLMCMode( chromaIntraMode ) && ! PU::isLMCModeEnabled( pu, chromaIntraMode ) )
+#endif
           {
             continue;
           }
@@ -1464,7 +1772,17 @@ void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner
         double    dCost   = m_pcRdCost->calcRdCost( fracBits, uiDist - baseDist );
 
         //----- compare -----
+#if GDR_ENABLED
+        bool allOk = (dCost < dBestCost);
+        if (m_pcEncCfg->getGdrEnabled())
+        {
+          allOk = allOk && dBestCost && isValidIntraPredChroma(pu, (int)PU::getCoLocatedIntraLumaMode(pu), chromaIntraMode);
+        }
+
+        if (allOk)
+#else
         if( dCost < dBestCost )
+#endif
         {
           if( lumaUsesISP && dCost < bestCostSoFar )
           {
@@ -1868,7 +2186,7 @@ void IntraSearch::preCalcPLTIndexRD(CodingStructure& cs, Partitioner& partitione
       {
         if (lossless)
         {
-          rate += m_escapeNumBins[curPel[comp]];
+          rate += getEpExGolombNumBins(curPel[comp], 5);
         }
         else
         {
@@ -1881,7 +2199,7 @@ void IntraSearch::preCalcPLTIndexRD(CodingStructure& cs, Partitioner& partitione
           {
             error += tmpErr * tmpErr;
           }
-          rate += m_escapeNumBins[paPixelValue[comp]];   // encode quantized escape color
+          rate += getEpExGolombNumBins(paPixelValue[comp], 5);   // encode quantized escape color
         }
       }
       double rdCost = (double)error + m_pcRdCost->getLambda()*(double)rate;
@@ -2188,7 +2506,7 @@ double IntraSearch::rateDistOptPLT(
       rdCost = MAX_DOUBLE;
       return rdCost;
     }
-    rdCost += m_pcRdCost->getLambda()*(m_truncBinBits[(runIndex > refIndex) ? runIndex - 1 : runIndex][(scanPos == 0) ? (indexMaxValue + 1) : indexMaxValue] << SCALE_BITS);
+    rdCost += m_pcRdCost->getLambda()*(getTruncBinBits((runIndex > refIndex) ? runIndex - 1 : runIndex, (scanPos == 0) ? (indexMaxValue + 1) : indexMaxValue)  << SCALE_BITS);
   }
   rdCost += m_indexError[runIndex][m_scanOrder[scanPos].idx] * (1 << SCALE_BITS);
   if (scanPos > 0)
@@ -2206,6 +2524,7 @@ double IntraSearch::rateDistOptPLT(
   }
   return rdCost;
 }
+
 uint32_t IntraSearch::getEpExGolombNumBins(uint32_t symbol, uint32_t count)
 {
   uint32_t numBins = 0;
@@ -2257,26 +2576,6 @@ uint32_t IntraSearch::getTruncBinBits(uint32_t symbol, uint32_t maxSymbol)
   return idxCodeBit;
 }
 
-void IntraSearch::initTBCTable(int bitDepth)
-{
-  for (uint32_t i = 0; i < m_symbolSize; i++)
-  {
-    memset(m_truncBinBits[i], 0, sizeof(uint16_t)*(m_symbolSize + 1));
-  }
-  for (uint32_t i = 0; i < (m_symbolSize + 1); i++)
-  {
-    for (uint32_t j = 0; j < i; j++)
-    {
-      m_truncBinBits[j][i] = getTruncBinBits(j, i);
-    }
-  }
-  memset(m_escapeNumBins, 0, sizeof(uint16_t)*m_symbolSize);
-  for (uint32_t i = 0; i < m_symbolSize; i++)
-  {
-    m_escapeNumBins[i] = getEpExGolombNumBins(i, 5);
-  }
-}
-
 void IntraSearch::calcPixelPred(CodingStructure& cs, Partitioner& partitioner, uint32_t yPos, uint32_t xPos, ComponentID compBegin, uint32_t numComp)
 {
   CodingUnit    &cu = *cs.getCU(partitioner.chType);
@@ -2401,9 +2700,10 @@ void IntraSearch::derivePLTLossy(CodingStructure& cs, Partitioner& partitioner,
 
   TransformUnit &tu = *cs.getTU(partitioner.chType);
   QpParam cQP(tu, compBegin);
-  int qp = cQP.Qp(true) - 12;
+  int qp = cQP.Qp(true) - 6*(channelBitDepth_L - 8);
   qp = (qp < 0) ? 0 : ((qp > 56) ? 56 : qp);
   int errorLimit = g_paletteQuant[qp];
+
   if (lossless)
   {
     errorLimit = 0;
@@ -3356,7 +3656,7 @@ void IntraSearch::xIntraCodingACTTUBlock(TransformUnit &tu, const ComponentID &c
 {
   if (!tu.blocks[compID].valid())
   {
-    CHECK(1, "tu does not exist");
+    THROW("tu does not exist");
   }
 
   CodingStructure     &cs = *tu.cs;
@@ -3614,13 +3914,10 @@ bool IntraSearch::xIntraCodingLumaISP(CodingStructure& cs, Partitioner& partitio
   return !earlySkipISP;
 }
 
-
-bool IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &partitioner, const double bestCostSoFar, const int subTuIdx, const PartSplit ispType, const bool ispIsCurrentWinner, bool mtsCheckRangeFlag, int mtsFirstCheckId, int mtsLastCheckId, bool moreProbMTSIdxFirst )
+bool IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &partitioner, bool mtsCheckRangeFlag, int mtsFirstCheckId, int mtsLastCheckId, bool moreProbMTSIdxFirst )
 {
-        int   subTuCounter = subTuIdx;
   const UnitArea &currArea = partitioner.currArea();
   const CodingUnit     &cu = *cs.getCU( currArea.lumaPos(), partitioner.chType );
-        bool  earlySkipISP = false;
   uint32_t currDepth       = partitioner.currTrDepth;
   const SPS &sps           = *cs.sps;
   bool bCheckFull          = true;
@@ -3629,11 +3926,8 @@ bool IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par
   bCheckSplit              = partitioner.canSplit( TU_MAX_TR_SPLIT, cs );
   const Slice           &slice = *cs.slice;
 
-  if( cu.ispMode )
-  {
-    bCheckSplit = partitioner.canSplit( ispType, cs );
-    bCheckFull = !bCheckSplit;
-  }
+  CHECK(cu.ispMode != NOT_INTRA_SUBPARTITIONS, "Use the function xIntraCodingLumaISP for ISP cases.");
+
   uint32_t    numSig           = 0;
 
   double     dSingleCost                        = MAX_DOUBLE;
@@ -3750,8 +4044,6 @@ bool IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par
     bool    cbfBestModeValid = false;
     bool    cbfDCT2  = true;
 
-    double bestDCT2cost = MAX_DOUBLE;
-    double threshold = m_pcEncCfg->getUseFastISP() && !cu.ispMode && ispIsCurrentWinner && nNumTransformCands > 1 ? 1 + 1.4 / sqrt( cu.lwidth() * cu.lheight() ) : 1;
     for( int modeId = firstCheckId; modeId <= ( sps.getUseLFNST() ? lastCheckId : ( nNumTransformCands - 1 ) ); modeId++ )
     {
       uint8_t transformIndex = modeId;
@@ -3779,12 +4071,6 @@ bool IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par
           {
             continue;
           }
-          // we compare the DCT-II cost against the best ISP cost so far (except for TS)
-          if (m_pcEncCfg->getUseFastISP() && !cu.ispMode && ispIsCurrentWinner && trModes[modeId].first != MTS_DCT2_DCT2
-              && (trModes[modeId].first != MTS_SKIP || !tsAllowed) && bestDCT2cost > bestCostSoFar * threshold)
-          {
-            continue;
-          }
         }
         tu.mtsIdx[COMPONENT_Y] = trModes[modeId].first;
       }
@@ -3813,10 +4099,6 @@ bool IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par
           default0Save1Load2 = 2;
         }
       }
-      if( cu.ispMode )
-      {
-        default0Save1Load2 = 0;
-      }
       if( sps.getUseLFNST() )
       {
         if( cu.mtsFlag )
@@ -3893,6 +4175,7 @@ bool IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par
       }
 
       cuCtx.mtsLastScanPos = false;
+      cuCtx.violatesMtsCoeffConstraint = false;
       //----- determine rate and r-d cost -----
       if( ( sps.getUseLFNST() ? ( modeId == lastCheckId && modeId != 0 && checkTransformSkip ) : ( trModes[ modeId ].first != 0 ) ) && !TU::getCbfAtDepth( tu, COMPONENT_Y, currDepth ) )
       {
@@ -3903,20 +4186,13 @@ bool IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par
         }
         else
         {
-          singleTmpFracBits = xGetIntraFracBitsQT(*csFull, partitioner, true, false, subTuCounter, ispType, &cuCtx);
+          singleTmpFracBits = xGetIntraFracBitsQT(*csFull, partitioner, true, false, -1, TU_NO_ISP, &cuCtx);
           singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma);
         }
       }
       else
       {
-        if( cu.ispMode && m_pcRdCost->calcRdCost( csFull->fracBits, csFull->dist + singleDistTmpLuma ) > bestCostSoFar )
-        {
-          earlySkipISP = true;
-        }
-        else
-        {
-          singleTmpFracBits = xGetIntraFracBitsQT( *csFull, partitioner, true, false, subTuCounter, ispType, &cuCtx );
-        }
+        singleTmpFracBits = xGetIntraFracBitsQT(*csFull, partitioner, true, false, -1, TU_NO_ISP, &cuCtx);
         if (tu.mtsIdx[COMPONENT_Y] > MTS_SKIP)
         {
           if (!cuCtx.mtsLastScanPos)
@@ -3934,10 +4210,6 @@ bool IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par
         }
       }
 
-      if ( !cu.ispMode && nNumTransformCands > 1 && modeId == firstCheckId )
-      {
-        bestDCT2cost = singleCostTmp;
-      }
 
       if (singleCostTmp < dSingleCost)
       {
@@ -4037,53 +4309,19 @@ bool IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par
       partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs );
     }
 
-    if( cu.ispMode )
-    {
-      partitioner.splitCurrArea( ispType, *csSplit );
-    }
     do
     {
-      bool tmpValidReturnSplit = xRecurIntraCodingLumaQT( *csSplit, partitioner, bestCostSoFar, subTuCounter, ispType, false, mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId );
-      subTuCounter += subTuCounter != -1 ? 1 : 0;
+      bool tmpValidReturnSplit = xRecurIntraCodingLumaQT( *csSplit, partitioner, false, mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId );
       if( sps.getUseLFNST() && !tmpValidReturnSplit )
       {
         splitIsSelected = false;
         break;
       }
 
-      if( !cu.ispMode )
-      {
-        csSplit->setDecomp( partitioner.currArea().Y() );
-      }
-      else if( CU::isISPFirst( cu, partitioner.currArea().Y(), COMPONENT_Y ) )
-      {
-        csSplit->setDecomp( cu.Y() );
-      }
+      csSplit->setDecomp(partitioner.currArea().Y());
+
+      uiSplitCbfLuma |= TU::getCbfAtDepth(*csSplit->getTU(partitioner.currArea().lumaPos(), partitioner.chType, -1), COMPONENT_Y, partitioner.currTrDepth);
 
-      uiSplitCbfLuma |= TU::getCbfAtDepth( *csSplit->getTU( partitioner.currArea().lumaPos(), partitioner.chType, subTuCounter - 1 ), COMPONENT_Y, partitioner.currTrDepth );
-      if( cu.ispMode )
-      {
-        //exit condition if the accumulated cost is already larger than the best cost so far (no impact in RD performance)
-        if( csSplit->cost > bestCostSoFar )
-        {
-          earlySkipISP    = true;
-          splitIsSelected = false;
-          break;
-        }
-        else
-        {
-          //more restrictive exit condition
-          bool tuIsDividedInRows = CU::divideTuInRows( cu );
-          int nSubPartitions = tuIsDividedInRows ? cu.lheight() >> floorLog2(cu.firstTU->lheight()) : cu.lwidth() >> floorLog2(cu.firstTU->lwidth());
-          double threshold = nSubPartitions == 2 ? 0.95 : subTuCounter == 1 ? 0.83 : 0.91;
-          if( subTuCounter < nSubPartitions && csSplit->cost > bestCostSoFar*threshold )
-          {
-            earlySkipISP    = true;
-            splitIsSelected = false;
-            break;
-          }
-        }
-      }
     } while( partitioner.nextPart( *csSplit ) );
 
     partitioner.exitCurrSplit();
@@ -4108,7 +4346,7 @@ bool IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par
       cuCtx.mtsLastScanPos = false;
 
       //----- determine rate and r-d cost -----
-      csSplit->fracBits = xGetIntraFracBitsQT( *csSplit, partitioner, true, false, cu.ispMode ? 0 : -1, ispType, &cuCtx );
+      csSplit->fracBits = xGetIntraFracBitsQT( *csSplit, partitioner, true, false, -1, TU_NO_ISP, &cuCtx );
 
       //--- update cost ---
       csSplit->cost     = m_pcRdCost->calcRdCost(csSplit->fracBits, csSplit->dist);
@@ -4125,16 +4363,8 @@ bool IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par
       // otherwise this would've happened in useSubStructure
       cs.picture->getRecoBuf(currArea.Y()).copyFrom(cs.getRecoBuf(currArea.Y()));
       cs.picture->getPredBuf(currArea.Y()).copyFrom(cs.getPredBuf(currArea.Y()));
-
-      if( cu.ispMode && earlySkipISP )
-      {
-        cs.cost = MAX_DOUBLE;
-      }
-      else
-      {
-        cs.cost = m_pcRdCost->calcRdCost( cs.fracBits, cs.dist );
-        retVal = true;
-      }
+      cs.cost = m_pcRdCost->calcRdCost(cs.fracBits, cs.dist);
+      retVal = true;
     }
   }
   return retVal;
@@ -4245,6 +4475,7 @@ bool IntraSearch::xRecurIntraCodingACTQT(CodingStructure &cs, Partitioner &parti
 
     const bool tsAllowed = TU::isTSAllowed(tu, COMPONENT_Y);
     const bool mtsAllowed = CU::isMTSAllowed(cu, COMPONENT_Y);
+    const bool lossless = m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && slice.isLossless();
     std::vector<TrMode> trModes;
 
     if (sps.getUseLFNST())
@@ -4261,7 +4492,7 @@ bool IntraSearch::xRecurIntraCodingACTQT(CodingStructure &cs, Partitioner &parti
     }
     else
     {
-      if (m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && slice.isLossless())
+      if (lossless)
       {
         nNumTransformCands = 1;
         CHECK(!tsAllowed && !cu.bdpcmMode, "transform skip should be enabled for LS");
@@ -4316,8 +4547,13 @@ bool IntraSearch::xRecurIntraCodingACTQT(CodingStructure &cs, Partitioner &parti
     bool    cbfDCT2 = true;
     if (m_pcEncCfg->getCostMode() != COST_LOSSLESS_CODING || !slice.isLossless())
     m_pcRdCost->lambdaAdjustColorTrans(true, COMPONENT_Y);
-    for (int modeId = firstCheckId; modeId <= ((m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && slice.isLossless()) ? (nNumTransformCands - 1) : lastCheckId); modeId++)
+    for (int modeIndex = firstCheckId; sps.getUseLFNST() || modeIndex < trModes.size(); modeIndex++)
     {
+      const int modeId = sps.getUseLFNST() ? modeIndex : trModes[modeIndex].first;
+      if (modeId > lastCheckId)
+      {
+        break;
+      }
       uint8_t transformIndex = modeId;
       csFull->getResiBuf(tu.Y()).copyFrom(csFull->getOrgResiBuf(tu.Y()));
 
@@ -4337,18 +4573,18 @@ bool IntraSearch::xRecurIntraCodingACTQT(CodingStructure &cs, Partitioner &parti
       }
       else
       {
-        if (!(m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && slice.isLossless()))
+        if (!lossless)
         {
           if (!cbfDCT2 || (m_pcEncCfg->getUseTransformSkipFast() && bestLumaModeId == 1))
           {
             break;
           }
-          if (!trModes[modeId].second)
+          if (!trModes[modeIndex].second)
           {
             continue;
           }
         }
-        tu.mtsIdx[COMPONENT_Y] = trModes[modeId].first;
+        tu.mtsIdx[COMPONENT_Y] = modeId;
       }
 
       singleDistTmpLuma = 0;
@@ -4428,7 +4664,7 @@ bool IntraSearch::xRecurIntraCodingACTQT(CodingStructure &cs, Partitioner &parti
       cuCtx.isDQPCoded = true;
       cuCtx.isChromaQpAdjCoded = true;
       //----- determine rate and r-d cost -----
-      if ((sps.getUseLFNST() ? (modeId == lastCheckId && modeId != 0 && checkTransformSkip) : (trModes[modeId].first != 0)) && !TU::getCbfAtDepth(tu, COMPONENT_Y, currDepth))
+      if ((sps.getUseLFNST() ? (modeId == lastCheckId && modeId != 0 && checkTransformSkip) : (modeId != 0)) && !TU::getCbfAtDepth(tu, COMPONENT_Y, currDepth))
       {
         //In order not to code TS flag when cbf is zero, the case for TS with cbf being zero is forbidden.
         if (m_pcEncCfg->getCostMode() != COST_LOSSLESS_CODING || !slice.isLossless())
@@ -4473,8 +4709,8 @@ bool IntraSearch::xRecurIntraCodingACTQT(CodingStructure &cs, Partitioner &parti
         }
         else
         {
-          bestLumaModeId = trModes[modeId].first;
-          if (trModes[modeId].first == 0)
+          bestLumaModeId = modeId;
+          if (modeId == 0)
           {
             cbfDCT2 = TU::getCbfAtDepth(tu, COMPONENT_Y, currDepth);
           }
@@ -4548,7 +4784,7 @@ bool IntraSearch::xRecurIntraCodingACTQT(CodingStructure &cs, Partitioner &parti
       bool        cbfDCT2 = true;
 
       trModes.clear();
-      if (m_pcEncCfg->getCostMode() == COST_LOSSLESS_CODING && slice.isLossless())
+      if (lossless)
       {
         numTransformCands = 1;
         CHECK(!tsAllowed && !cu.bdpcmModeChroma, "transform skip should be enabled for LS");
@@ -5438,9 +5674,9 @@ ChromaCbfs IntraSearch::xRecurIntraChromaCodingQT( CodingStructure &cs, Partitio
   return cbfs;
 }
 
-uint64_t IntraSearch::xFracModeBitsIntra(PredictionUnit &pu, const uint32_t &uiMode, const ChannelType &chType)
+uint64_t IntraSearch::xFracModeBitsIntra(PredictionUnit &pu, const uint32_t &mode, const ChannelType &chType)
 {
-  uint32_t orgMode = uiMode;
+  uint32_t orgMode = mode;
 
   if (!pu.ciipFlag)
   std::swap(orgMode, pu.intraDir[chType]);
@@ -5910,7 +6146,7 @@ bool IntraSearch::xSortISPCandList(double bestCostSoFar, double bestNonISPCost,
   m_ispCandListVer.clear();
 
   // we sort the normal intra modes according to their full RD costs
-  std::sort(m_regIntraRDListWithCosts.begin(), m_regIntraRDListWithCosts.end(), ModeInfoWithCost::compareModeInfoWithCost);
+  std::stable_sort(m_regIntraRDListWithCosts.begin(), m_regIntraRDListWithCosts.end(), ModeInfoWithCost::compareModeInfoWithCost);
 
   // we get the best angle from the regular intra list
   int bestNormalIntraAngle = -1;
diff --git a/source/Lib/EncoderLib/IntraSearch.h b/source/Lib/EncoderLib/IntraSearch.h
index f911e0fe2db4c5473a9436646c4dec63097fc290..67253de40bbcc4470c81735c5280c7793551593c 100644
--- a/source/Lib/EncoderLib/IntraSearch.h
+++ b/source/Lib/EncoderLib/IntraSearch.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -363,11 +363,11 @@ private:
   int                                                m_numSavedRdModeFirstColorSpace[4 * NUM_LFNST_NUM_PER_SET * 2];
   int                                                m_savedRdModeIdx;
 
-  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> m_uiSavedRdModeListLFNST;
-  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> m_uiSavedHadModeListLFNST;
-  uint32_t                                         m_uiSavedNumRdModesLFNST;
-  static_vector<double,   FAST_UDI_MAX_RDMODE_NUM> m_dSavedModeCostLFNST;
-  static_vector<double,   FAST_UDI_MAX_RDMODE_NUM> m_dSavedHadListLFNST;
+  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> m_savedRdModeListLFNST;
+  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> m_savedHadModeListLFNST;
+  uint32_t                                         m_savedNumRdModesLFNST;
+  static_vector<double, FAST_UDI_MAX_RDMODE_NUM>   m_savedModeCostLFNST;
+  static_vector<double, FAST_UDI_MAX_RDMODE_NUM>   m_savedHadListLFNST;
 
   PelStorage      m_tmpStorageLCU;
   PelStorage      m_colorTransResiBuf;
@@ -385,9 +385,6 @@ protected:
   CtxCache*       m_CtxCache;
 
   bool            m_isInitialized;
-  uint32_t        m_symbolSize;
-  uint16_t**      m_truncBinBits;
-  uint16_t*       m_escapeNumBins;
   bool            m_bestEscape;
   double*         m_indexError[MAXPLTSIZE + 1];
   uint8_t*        m_minErrorIndexMap; // store the best index in terms of distortion for each pixel
@@ -433,13 +430,18 @@ public:
   bool estIntraPredLumaQT(CodingUnit &cu, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE, bool mtsCheckRangeFlag = false, int mtsFirstCheckId = 0, int mtsLastCheckId = 0, bool moreProbMTSIdxFirst = false, CodingStructure* bestCS = NULL);
   void estIntraPredChromaQT       ( CodingUnit &cu, Partitioner& pm, const double maxCostAllowed = MAX_DOUBLE );
   void PLTSearch                  ( CodingStructure &cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp);
-  uint64_t xFracModeBitsIntra     (PredictionUnit &pu, const uint32_t &uiMode, const ChannelType &compID);
+  uint64_t xFracModeBitsIntra(PredictionUnit &pu, const uint32_t &mode, const ChannelType &compID);
   void invalidateBestModeCost     () { for( int i = 0; i < NUM_LFNST_NUM_PER_SET; i++ ) m_bestModeCostValid[ i ] = false; };
 
   void sortRdModeListFirstColorSpace(ModeInfo mode, double cost, char bdpcmMode, ModeInfo* rdModeList, double* rdCostList, char* bdpcmModeList, int& candNum);
   void invalidateBestRdModeFirstColorSpace();
   void setSavedRdModeIdx(int idx) { m_savedRdModeIdx = idx; }
 
+#if GDR_ENABLED
+  int  getNumTopRecons(PredictionUnit &pu, int luma_dirMode, bool isChroma);
+  bool isValidIntraPredLuma(PredictionUnit &pu, int luma_dirMode);
+  bool isValidIntraPredChroma(PredictionUnit &pu, int luma_dirMode, int chroma_dirMode);
+#endif
 protected:
 
   // -------------------------------------------------------------------------------------------------------------------
@@ -463,7 +465,7 @@ protected:
   void xIntraCodingACTTUBlock(TransformUnit &tu, const ComponentID &compID, Distortion& ruiDist, std::vector<TrMode>* trModes = nullptr, const bool loadTr = false);
 
   ChromaCbfs xRecurIntraChromaCodingQT( CodingStructure &cs, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE,                          const PartSplit ispType = TU_NO_ISP );
-  bool       xRecurIntraCodingLumaQT  ( CodingStructure &cs, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE, const int subTuIdx = -1, const PartSplit ispType = TU_NO_ISP, const bool ispIsCurrentWinner = false, bool mtsCheckRangeFlag = false, int mtsFirstCheckId = 0, int mtsLastCheckId = 0, bool moreProbMTSIdxFirst = false );
+  bool       xRecurIntraCodingLumaQT  ( CodingStructure &cs, Partitioner& pm, bool mtsCheckRangeFlag = false, int mtsFirstCheckId = 0, int mtsLastCheckId = 0, bool moreProbMTSIdxFirst = false );
   bool       xRecurIntraCodingACTQT(CodingStructure &cs, Partitioner& pm, bool mtsCheckRangeFlag = false, int mtsFirstCheckId = 0, int mtsLastCheckId = 0, bool moreProbMTSIdxFirst = false);
   bool       xIntraCodingLumaISP      ( CodingStructure& cs, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE );
 
@@ -476,7 +478,6 @@ protected:
   void     deriveIndexMap         (CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp, PLTScanMode pltScanMode, double& dCost, bool* idxExist);
   bool     deriveSubblockIndexMap(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, PLTScanMode pltScanMode, int minSubPos, int maxSubPos, const BinFracBits& fracBitsPltRunType, const BinFracBits* fracBitsPltIndexINDEX, const BinFracBits* fracBitsPltIndexCOPY, const double minCost, bool useRotate);
   double   rateDistOptPLT         (bool RunType, uint8_t RunIndex, bool prevRunType, uint8_t prevRunIndex, uint8_t aboveRunIndex, bool& prevCodedRunType, int& prevCodedRunPos, int scanPos, uint32_t width, int dist, int indexMaxValue, const BinFracBits* IndexfracBits, const BinFracBits& TypefracBits);
-  void     initTBCTable           (int bitDepth);
   uint32_t getTruncBinBits        (uint32_t symbol, uint32_t maxSymbol);
   uint32_t getEpExGolombNumBins   (uint32_t symbol, uint32_t count);
   void xGetNextISPMode                    ( ModeInfo& modeInfo, const ModeInfo* lastMode, const Size cuSize );
diff --git a/source/Lib/EncoderLib/NALwrite.cpp b/source/Lib/EncoderLib/NALwrite.cpp
index 49cc9aa6d4588e0454071e88a65cb0df21633606..799e5c6c54bc10270059385c412423cb2466a7ce 100644
--- a/source/Lib/EncoderLib/NALwrite.cpp
+++ b/source/Lib/EncoderLib/NALwrite.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -48,7 +48,7 @@ static const uint8_t emulation_prevention_three_byte = 3;
 
 void writeNalUnitHeader(ostream& out, OutputNALUnit& nalu)       // nal_unit_header()
 {
-OutputBitstream bsNALUHeader;
+  OutputBitstream bsNALUHeader;
   int forbiddenZero = 0;
   bsNALUHeader.write(forbiddenZero, 1);   // forbidden_zero_bit
   int nuhReservedZeroBit = 0;
diff --git a/source/Lib/EncoderLib/NALwrite.h b/source/Lib/EncoderLib/NALwrite.h
index d7024292673f9c47c4e429cbc386098d705dc9cc..477cbb7b2d420bac4e88d54872007faf6c0ba968 100644
--- a/source/Lib/EncoderLib/NALwrite.h
+++ b/source/Lib/EncoderLib/NALwrite.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/EncoderLib/RateCtrl.cpp b/source/Lib/EncoderLib/RateCtrl.cpp
index 18a9580ede2b79832805ef19bad1846839066169..0e33acb633a8223f5ee920a26770bf68fd5106b0 100644
--- a/source/Lib/EncoderLib/RateCtrl.cpp
+++ b/source/Lib/EncoderLib/RateCtrl.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -402,120 +402,117 @@ void EncRCGOP::create( EncRCSeq* encRCSeq, int numPic )
     }
     else if (encRCSeq->getAdaptiveBits() == 2 && encRCSeq->getGOPSize() == 16 )  // for GOP size = 16, random access case
     {
-      {
-        const double qdfParaLev2A = 0.5847;
-        const double qdfParaLev2B = -0.0782;
-        const double qdfParaLev3A = 0.5468;
-        const double qdfParaLev3B = -0.1364;
-        const double qdfParaLev4A = 0.6539;
-        const double qdfParaLev4B = -0.203;
-        const double qdfParaLev5A = 0.8623;
-        const double qdfParaLev5B = -0.4676;
-        double qdfLev1Lev2 = Clip3(0.12, 0.9, qdfParaLev2A * encRCSeq->getPicPara(2).m_skipRatio + qdfParaLev2B);
-        double qdfLev1Lev3 = Clip3(0.13, 0.9, qdfParaLev3A * encRCSeq->getPicPara(3).m_skipRatio + qdfParaLev3B);
-        double qdfLev1Lev4 = Clip3(0.15, 0.9, qdfParaLev4A * encRCSeq->getPicPara(4).m_skipRatio + qdfParaLev4B);
-        double qdfLev1Lev5 = Clip3(0.20, 0.9, qdfParaLev5A * encRCSeq->getPicPara(5).m_skipRatio + qdfParaLev5B);
-        double qdfLev2Lev3 = Clip3(0.09, 0.9, qdfLev1Lev3 * (1 - qdfLev1Lev2));
-        double qdfLev2Lev4 = Clip3(0.12, 0.9, qdfLev1Lev4 * (1 - qdfLev1Lev2));
-        double qdfLev2Lev5 = Clip3(0.14, 0.9, qdfLev1Lev5 * (1 - qdfLev1Lev2));
-        double qdfLev3Lev4 = Clip3(0.06, 0.9, qdfLev1Lev4 * (1 - qdfLev1Lev3));
-        double qdfLev3Lev5 = Clip3(0.09, 0.9, qdfLev1Lev5 * (1 - qdfLev1Lev3));
-        double qdfLev4Lev5 = Clip3(0.10, 0.9, qdfLev1Lev5 * (1 - qdfLev1Lev4));
-
-        double lambdaLev1 = 1 / (1 + 2 * (qdfLev1Lev2 + 2 * qdfLev1Lev3 + 4 * qdfLev1Lev4 + 8 * qdfLev1Lev5));
-        double lambdaLev2 = 1 / (1 + (3 * qdfLev2Lev3 + 5 * qdfLev2Lev4 + 8 * qdfLev2Lev5));
-        double lambdaLev3 = 1 / (1 + 2 * qdfLev3Lev4 + 4 * qdfLev3Lev5);
-        double lambdaLev4 = 1 / (1 + 2 * qdfLev4Lev5);
-        double lambdaLev5 = 1 / (1.0);
-
-        lambdaRatio[0] = 1.0;
-        lambdaRatio[1] = lambdaLev2 / lambdaLev1;
-        lambdaRatio[2] = lambdaLev3 / lambdaLev1;
-        lambdaRatio[3] = lambdaLev4 / lambdaLev1;
-        lambdaRatio[4] = lambdaLev5 / lambdaLev1;
-        lambdaRatio[5] = lambdaLev5 / lambdaLev1;
-        lambdaRatio[6] = lambdaLev4 / lambdaLev1;
-        lambdaRatio[7] = lambdaLev5 / lambdaLev1;
-        lambdaRatio[8] = lambdaLev5 / lambdaLev1;
-        lambdaRatio[9] = lambdaLev3 / lambdaLev1;
-        lambdaRatio[10] = lambdaLev4 / lambdaLev1;
-        lambdaRatio[11] = lambdaLev5 / lambdaLev1;
-        lambdaRatio[12] = lambdaLev5 / lambdaLev1;
-        lambdaRatio[13] = lambdaLev4 / lambdaLev1;
-        lambdaRatio[14] = lambdaLev5 / lambdaLev1;
-        lambdaRatio[15] = lambdaLev5 / lambdaLev1;
-      }
+      const double qdfParaLev2A = 0.5847;
+      const double qdfParaLev2B = -0.0782;
+      const double qdfParaLev3A = 0.5468;
+      const double qdfParaLev3B = -0.1364;
+      const double qdfParaLev4A = 0.6539;
+      const double qdfParaLev4B = -0.203;
+      const double qdfParaLev5A = 0.8623;
+      const double qdfParaLev5B = -0.4676;
+      double       qdfLev1Lev2  = Clip3(0.12, 0.9, qdfParaLev2A * encRCSeq->getPicPara(2).m_skipRatio + qdfParaLev2B);
+      double       qdfLev1Lev3  = Clip3(0.13, 0.9, qdfParaLev3A * encRCSeq->getPicPara(3).m_skipRatio + qdfParaLev3B);
+      double       qdfLev1Lev4  = Clip3(0.15, 0.9, qdfParaLev4A * encRCSeq->getPicPara(4).m_skipRatio + qdfParaLev4B);
+      double       qdfLev1Lev5  = Clip3(0.20, 0.9, qdfParaLev5A * encRCSeq->getPicPara(5).m_skipRatio + qdfParaLev5B);
+      double       qdfLev2Lev3  = Clip3(0.09, 0.9, qdfLev1Lev3 * (1 - qdfLev1Lev2));
+      double       qdfLev2Lev4  = Clip3(0.12, 0.9, qdfLev1Lev4 * (1 - qdfLev1Lev2));
+      double       qdfLev2Lev5  = Clip3(0.14, 0.9, qdfLev1Lev5 * (1 - qdfLev1Lev2));
+      double       qdfLev3Lev4  = Clip3(0.06, 0.9, qdfLev1Lev4 * (1 - qdfLev1Lev3));
+      double       qdfLev3Lev5  = Clip3(0.09, 0.9, qdfLev1Lev5 * (1 - qdfLev1Lev3));
+      double       qdfLev4Lev5  = Clip3(0.10, 0.9, qdfLev1Lev5 * (1 - qdfLev1Lev4));
+
+      double lambdaLev1 = 1 / (1 + 2 * (qdfLev1Lev2 + 2 * qdfLev1Lev3 + 4 * qdfLev1Lev4 + 8 * qdfLev1Lev5));
+      double lambdaLev2 = 1 / (1 + (3 * qdfLev2Lev3 + 5 * qdfLev2Lev4 + 8 * qdfLev2Lev5));
+      double lambdaLev3 = 1 / (1 + 2 * qdfLev3Lev4 + 4 * qdfLev3Lev5);
+      double lambdaLev4 = 1 / (1 + 2 * qdfLev4Lev5);
+      double lambdaLev5 = 1 / (1.0);
+
+      lambdaRatio[0]  = 1.0;
+      lambdaRatio[1]  = lambdaLev2 / lambdaLev1;
+      lambdaRatio[2]  = lambdaLev3 / lambdaLev1;
+      lambdaRatio[3]  = lambdaLev4 / lambdaLev1;
+      lambdaRatio[4]  = lambdaLev5 / lambdaLev1;
+      lambdaRatio[5]  = lambdaLev5 / lambdaLev1;
+      lambdaRatio[6]  = lambdaLev4 / lambdaLev1;
+      lambdaRatio[7]  = lambdaLev5 / lambdaLev1;
+      lambdaRatio[8]  = lambdaLev5 / lambdaLev1;
+      lambdaRatio[9]  = lambdaLev3 / lambdaLev1;
+      lambdaRatio[10] = lambdaLev4 / lambdaLev1;
+      lambdaRatio[11] = lambdaLev5 / lambdaLev1;
+      lambdaRatio[12] = lambdaLev5 / lambdaLev1;
+      lambdaRatio[13] = lambdaLev4 / lambdaLev1;
+      lambdaRatio[14] = lambdaLev5 / lambdaLev1;
+      lambdaRatio[15] = lambdaLev5 / lambdaLev1;
     }
     else if (encRCSeq->getAdaptiveBits() == 2 && encRCSeq->getGOPSize() == 32 )  // for GOP size = 32, random access case
     {
-      {
-        const double qdfParaLev2A = 0.7534;
-        const double qdfParaLev2B = -0.0303;
-        const double qdfParaLev3A = 0.7044;
-        const double qdfParaLev3B = -0.0445;
-        const double qdfParaLev4A = 0.7084;
-        const double qdfParaLev4B = -0.1401;
-        const double qdfParaLev5A = 0.8844;
-        const double qdfParaLev5B = -0.3676;
-        const double qdfParaLev6A = 1.2336;
-        const double qdfParaLev6B = -0.7511;
-
-        double qdfLev1Lev2 = Clip3(0.12, 0.9, qdfParaLev2A * encRCSeq->getPicPara(2).m_skipRatio + qdfParaLev2B);
-        double qdfLev1Lev3 = Clip3(0.13, 0.9, qdfParaLev3A * encRCSeq->getPicPara(3).m_skipRatio + qdfParaLev3B);
-        double qdfLev1Lev4 = Clip3(0.15, 0.9, qdfParaLev4A * encRCSeq->getPicPara(4).m_skipRatio + qdfParaLev4B);
-        double qdfLev1Lev5 = Clip3(0.20, 0.9, qdfParaLev5A * encRCSeq->getPicPara(5).m_skipRatio + qdfParaLev5B);
-        double qdfLev1Lev6 = Clip3(0.25, 0.9, qdfParaLev6A * encRCSeq->getPicPara(6).m_skipRatio + qdfParaLev6B);
-        double qdfLev2Lev3 = Clip3(0.09, 0.9, qdfLev1Lev3 * (1 - qdfLev1Lev2));
-        double qdfLev2Lev4 = Clip3(0.12, 0.9, qdfLev1Lev4 * (1 - qdfLev1Lev2));
-        double qdfLev2Lev5 = Clip3(0.14, 0.9, qdfLev1Lev5 * (1 - qdfLev1Lev2));
-        double qdfLev2Lev6 = Clip3(0.16, 0.9, qdfLev1Lev6 * (1 - qdfLev1Lev2));
-        double qdfLev3Lev4 = Clip3(0.06, 0.9, qdfLev1Lev4 * (1 - qdfLev1Lev3));
-        double qdfLev3Lev5 = Clip3(0.09, 0.9, qdfLev1Lev5 * (1 - qdfLev1Lev3));
-        double qdfLev3Lev6 = Clip3(0.10, 0.9, qdfLev1Lev6 * (1 - qdfLev1Lev3));
-        double qdfLev4Lev5 = Clip3(0.10, 0.9, qdfLev1Lev5 * (1 - qdfLev1Lev4));
-        double qdfLev4Lev6 = Clip3(0.10, 0.9, qdfLev1Lev6 * (1 - qdfLev1Lev4));
-        double qdfLev5Lev6 = Clip3(0.12, 0.9, qdfLev1Lev6 * (1 - qdfLev1Lev5));
-
-         double lambdaLev1 = 1 / (1 + 2 * qdfLev1Lev2 + 4 * qdfLev1Lev3 + 6 * qdfLev1Lev4 + 8 * qdfLev1Lev5 + 10 * qdfLev1Lev6);
-         double lambdaLev2 = 1 / (1 + 3 * qdfLev2Lev3 + 5 * qdfLev2Lev4 + 8 * qdfLev2Lev5 + 9 * qdfLev2Lev6);
-         double lambdaLev3 = 1 / (1 + 2 * qdfLev3Lev4 + 4 * qdfLev3Lev5 + 6 * qdfLev3Lev6);
-         double lambdaLev4 = 1 / (1 + 2 * qdfLev4Lev5 + 4 * qdfLev4Lev6);
-         double lambdaLev5 = 1 / (1 + 2 * qdfLev5Lev6);
-         double lambdaLev6 = 1 / (1.0);
-
-         lambdaRatio[0] = 1.0;
-         lambdaRatio[1]  = lambdaLev2 / lambdaLev1;
-         lambdaRatio[2]  = lambdaLev3 / lambdaLev1;
-         lambdaRatio[3]  = lambdaLev4 / lambdaLev1;
-         lambdaRatio[4]  = lambdaLev5 / lambdaLev1;
-         lambdaRatio[5]  = lambdaLev6 / lambdaLev1;
-         lambdaRatio[6]  = lambdaLev6 / lambdaLev1;
-         lambdaRatio[7]  = lambdaLev5 / lambdaLev1;
-         lambdaRatio[8]  = lambdaLev6 / lambdaLev1;
-         lambdaRatio[9]  = lambdaLev6 / lambdaLev1;
-         lambdaRatio[10] = lambdaLev4 / lambdaLev1;
-         lambdaRatio[11] = lambdaLev5 / lambdaLev1;
-         lambdaRatio[12] = lambdaLev6 / lambdaLev1;
-         lambdaRatio[13] = lambdaLev6 / lambdaLev1;
-         lambdaRatio[14] = lambdaLev5 / lambdaLev1;
-         lambdaRatio[15] = lambdaLev6 / lambdaLev1;
-         lambdaRatio[16] = lambdaLev6 / lambdaLev1;
-         lambdaRatio[17] = lambdaLev3 / lambdaLev1;
-         lambdaRatio[18] = lambdaLev4 / lambdaLev1;
-         lambdaRatio[19] = lambdaLev5 / lambdaLev1;
-         lambdaRatio[20] = lambdaLev6 / lambdaLev1;
-         lambdaRatio[21] = lambdaLev6 / lambdaLev1;
-         lambdaRatio[22] = lambdaLev5 / lambdaLev1;
-         lambdaRatio[23] = lambdaLev6 / lambdaLev1;
-         lambdaRatio[24] = lambdaLev6 / lambdaLev1;
-         lambdaRatio[25] = lambdaLev4 / lambdaLev1;
-         lambdaRatio[26] = lambdaLev5 / lambdaLev1;
-         lambdaRatio[27] = lambdaLev6 / lambdaLev1;
-         lambdaRatio[28] = lambdaLev6 / lambdaLev1;
-         lambdaRatio[29] = lambdaLev5 / lambdaLev1;
-         lambdaRatio[30] = lambdaLev6 / lambdaLev1;
-         lambdaRatio[31] = lambdaLev6 / lambdaLev1;
-      }
+      const double qdfParaLev2A = 0.7534;
+      const double qdfParaLev2B = -0.0303;
+      const double qdfParaLev3A = 0.7044;
+      const double qdfParaLev3B = -0.0445;
+      const double qdfParaLev4A = 0.7084;
+      const double qdfParaLev4B = -0.1401;
+      const double qdfParaLev5A = 0.8844;
+      const double qdfParaLev5B = -0.3676;
+      const double qdfParaLev6A = 1.2336;
+      const double qdfParaLev6B = -0.7511;
+
+      double qdfLev1Lev2 = Clip3(0.12, 0.9, qdfParaLev2A * encRCSeq->getPicPara(2).m_skipRatio + qdfParaLev2B);
+      double qdfLev1Lev3 = Clip3(0.13, 0.9, qdfParaLev3A * encRCSeq->getPicPara(3).m_skipRatio + qdfParaLev3B);
+      double qdfLev1Lev4 = Clip3(0.15, 0.9, qdfParaLev4A * encRCSeq->getPicPara(4).m_skipRatio + qdfParaLev4B);
+      double qdfLev1Lev5 = Clip3(0.20, 0.9, qdfParaLev5A * encRCSeq->getPicPara(5).m_skipRatio + qdfParaLev5B);
+      double qdfLev1Lev6 = Clip3(0.25, 0.9, qdfParaLev6A * encRCSeq->getPicPara(6).m_skipRatio + qdfParaLev6B);
+      double qdfLev2Lev3 = Clip3(0.09, 0.9, qdfLev1Lev3 * (1 - qdfLev1Lev2));
+      double qdfLev2Lev4 = Clip3(0.12, 0.9, qdfLev1Lev4 * (1 - qdfLev1Lev2));
+      double qdfLev2Lev5 = Clip3(0.14, 0.9, qdfLev1Lev5 * (1 - qdfLev1Lev2));
+      double qdfLev2Lev6 = Clip3(0.16, 0.9, qdfLev1Lev6 * (1 - qdfLev1Lev2));
+      double qdfLev3Lev4 = Clip3(0.06, 0.9, qdfLev1Lev4 * (1 - qdfLev1Lev3));
+      double qdfLev3Lev5 = Clip3(0.09, 0.9, qdfLev1Lev5 * (1 - qdfLev1Lev3));
+      double qdfLev3Lev6 = Clip3(0.10, 0.9, qdfLev1Lev6 * (1 - qdfLev1Lev3));
+      double qdfLev4Lev5 = Clip3(0.10, 0.9, qdfLev1Lev5 * (1 - qdfLev1Lev4));
+      double qdfLev4Lev6 = Clip3(0.10, 0.9, qdfLev1Lev6 * (1 - qdfLev1Lev4));
+      double qdfLev5Lev6 = Clip3(0.12, 0.9, qdfLev1Lev6 * (1 - qdfLev1Lev5));
+
+      double lambdaLev1 =
+        1 / (1 + 2 * qdfLev1Lev2 + 4 * qdfLev1Lev3 + 6 * qdfLev1Lev4 + 8 * qdfLev1Lev5 + 10 * qdfLev1Lev6);
+      double lambdaLev2 = 1 / (1 + 3 * qdfLev2Lev3 + 5 * qdfLev2Lev4 + 8 * qdfLev2Lev5 + 9 * qdfLev2Lev6);
+      double lambdaLev3 = 1 / (1 + 2 * qdfLev3Lev4 + 4 * qdfLev3Lev5 + 6 * qdfLev3Lev6);
+      double lambdaLev4 = 1 / (1 + 2 * qdfLev4Lev5 + 4 * qdfLev4Lev6);
+      double lambdaLev5 = 1 / (1 + 2 * qdfLev5Lev6);
+      double lambdaLev6 = 1 / (1.0);
+
+      lambdaRatio[0]  = 1.0;
+      lambdaRatio[1]  = lambdaLev2 / lambdaLev1;
+      lambdaRatio[2]  = lambdaLev3 / lambdaLev1;
+      lambdaRatio[3]  = lambdaLev4 / lambdaLev1;
+      lambdaRatio[4]  = lambdaLev5 / lambdaLev1;
+      lambdaRatio[5]  = lambdaLev6 / lambdaLev1;
+      lambdaRatio[6]  = lambdaLev6 / lambdaLev1;
+      lambdaRatio[7]  = lambdaLev5 / lambdaLev1;
+      lambdaRatio[8]  = lambdaLev6 / lambdaLev1;
+      lambdaRatio[9]  = lambdaLev6 / lambdaLev1;
+      lambdaRatio[10] = lambdaLev4 / lambdaLev1;
+      lambdaRatio[11] = lambdaLev5 / lambdaLev1;
+      lambdaRatio[12] = lambdaLev6 / lambdaLev1;
+      lambdaRatio[13] = lambdaLev6 / lambdaLev1;
+      lambdaRatio[14] = lambdaLev5 / lambdaLev1;
+      lambdaRatio[15] = lambdaLev6 / lambdaLev1;
+      lambdaRatio[16] = lambdaLev6 / lambdaLev1;
+      lambdaRatio[17] = lambdaLev3 / lambdaLev1;
+      lambdaRatio[18] = lambdaLev4 / lambdaLev1;
+      lambdaRatio[19] = lambdaLev5 / lambdaLev1;
+      lambdaRatio[20] = lambdaLev6 / lambdaLev1;
+      lambdaRatio[21] = lambdaLev6 / lambdaLev1;
+      lambdaRatio[22] = lambdaLev5 / lambdaLev1;
+      lambdaRatio[23] = lambdaLev6 / lambdaLev1;
+      lambdaRatio[24] = lambdaLev6 / lambdaLev1;
+      lambdaRatio[25] = lambdaLev4 / lambdaLev1;
+      lambdaRatio[26] = lambdaLev5 / lambdaLev1;
+      lambdaRatio[27] = lambdaLev6 / lambdaLev1;
+      lambdaRatio[28] = lambdaLev6 / lambdaLev1;
+      lambdaRatio[29] = lambdaLev5 / lambdaLev1;
+      lambdaRatio[30] = lambdaLev6 / lambdaLev1;
+      lambdaRatio[31] = lambdaLev6 / lambdaLev1;
     }
     else
     {
@@ -1235,7 +1232,7 @@ double EncRCPic::calAverageLambda()
       {
         totalSSE += m_LCUs[i].m_actualSSE;
         totalPixels += m_LCUs[i].m_numberOfPixel;
-       }
+      }
     }
   }
 
diff --git a/source/Lib/EncoderLib/RateCtrl.h b/source/Lib/EncoderLib/RateCtrl.h
index 078c27125017676e874ff296bd96a84bc2c640a5..3c17d6be5aeb3319a4eb68cd8b560f88a29df815 100644
--- a/source/Lib/EncoderLib/RateCtrl.h
+++ b/source/Lib/EncoderLib/RateCtrl.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/EncoderLib/SEIEncoder.cpp b/source/Lib/EncoderLib/SEIEncoder.cpp
index 2190f454f9f4de1425ffea51fda8a816f156b15f..9ae6a87d8b7ee8244f91e05834236ccd331be61c 100644
--- a/source/Lib/EncoderLib/SEIEncoder.cpp
+++ b/source/Lib/EncoderLib/SEIEncoder.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -35,6 +35,7 @@
 #include "CommonLib/SEI.h"
 #include "EncGOP.h"
 #include "EncLib.h"
+#include <fstream>
 
 uint32_t calcMD5(const CPelUnitBuf& pic, PictureHash &digest, const BitDepths &bitDepths);
 uint32_t calcCRC(const CPelUnitBuf& pic, PictureHash &digest, const BitDepths &bitDepths);
@@ -93,10 +94,10 @@ void SEIEncoder::initSEIBufferingPeriod(SEIBufferingPeriod *bufferingPeriodSEI,
   {
     for(int j=0; j < bufferingPeriodSEI->m_bpCpbCnt; j++)
     {
-      bufferingPeriodSEI->m_initialCpbRemovalDelay[j][i][0] = uiInitialCpbRemovalDelay;
-      bufferingPeriodSEI->m_initialCpbRemovalDelay[j][i][1] = uiInitialCpbRemovalDelay;
-      bufferingPeriodSEI->m_initialCpbRemovalOffset[j][i][0] = uiInitialCpbRemovalDelay;
-      bufferingPeriodSEI->m_initialCpbRemovalOffset[j][i][1] = uiInitialCpbRemovalDelay;
+      bufferingPeriodSEI->m_initialCpbRemovalDelay[i][j][0] = uiInitialCpbRemovalDelay;
+      bufferingPeriodSEI->m_initialCpbRemovalDelay[i][j][1] = uiInitialCpbRemovalDelay;
+      bufferingPeriodSEI->m_initialCpbRemovalOffset[i][j][0] = uiInitialCpbRemovalDelay;
+      bufferingPeriodSEI->m_initialCpbRemovalOffset[i][j][1] = uiInitialCpbRemovalDelay;
     }
   }
   // We don't set concatenation_flag here. max_initial_removal_delay_for_concatenation depends on the usage scenario.
@@ -397,7 +398,7 @@ void SEIEncoder::initSEISampleAspectRatioInfo(SEISampleAspectRatioInfo* seiSampl
 //! Note: The SEI message structures input into this function will become part of the scalable nesting SEI and will be
 //!       automatically freed, when the nesting SEI is disposed.
 //  either targetOLS or targetLayer should be active, call with empty vector for the inactive mode
-void SEIEncoder::initSEIScalableNesting(SEIScalableNesting *scalableNestingSEI, SEIMessages &nestedSEIs, const std::vector<int> &targetOLSs, const std::vector<int> &targetLayers, const std::vector<uint16_t> &subpictureIDs)
+void SEIEncoder::initSEIScalableNesting(SEIScalableNesting *scalableNestingSEI, SEIMessages &nestedSEIs, const std::vector<int> &targetOLSs, const std::vector<int> &targetLayers, const std::vector<uint16_t> &subpictureIDs, uint16_t maxSubpicIdInPic)
 {
   CHECK(!(m_isInitialized), "Scalable Nesting SEI already initialized ");
   CHECK(!(scalableNestingSEI != NULL), "No Scalable Nesting SEI object passed");
@@ -442,8 +443,8 @@ void SEIEncoder::initSEIScalableNesting(SEIScalableNesting *scalableNestingSEI,
     scalableNestingSEI->m_snSubpicFlag = 1;
     scalableNestingSEI->m_snNumSubpics = (uint32_t) subpictureIDs.size();
     scalableNestingSEI->m_snSubpicId   = subpictureIDs;
-    scalableNestingSEI->m_snSubpicIdLen = max(1, ceilLog2((*std::max_element(subpictureIDs.begin(), subpictureIDs.end())) + 1));
-    CHECK ( scalableNestingSEI->m_snSubpicIdLen > 15, "Subpicture ID too large. Length must be <= 15 bits");
+    scalableNestingSEI->m_snSubpicIdLen = max(1, ceilLog2(maxSubpicIdInPic + 1));
+    CHECK ( scalableNestingSEI->m_snSubpicIdLen > 16, "Subpicture ID too large. Length must be <= 16 bits");
   }
   scalableNestingSEI->m_nestedSEIs.clear();
   for (SEIMessages::iterator it = nestedSEIs.begin(); it != nestedSEIs.end(); it++)
@@ -460,9 +461,7 @@ void SEIEncoder::initDecodedPictureHashSEI(SEIDecodedPictureHash *decodedPicture
   CHECK(!(decodedPictureHashSEI!=NULL), "Unspecified error");
 
   decodedPictureHashSEI->method = m_pcCfg->getDecodedPictureHashSEIType();
-#if FIX_TICKET_1405
   decodedPictureHashSEI->singleCompFlag = (m_pcCfg->getChromaFormatIdc() == 0);
-#endif
   switch (m_pcCfg->getDecodedPictureHashSEIType())
   {
     case HASHTYPE_MD5:
@@ -493,6 +492,21 @@ void SEIEncoder::initSEIDependentRAPIndication(SEIDependentRAPIndication *seiDep
   CHECK(!(seiDependentRAPIndication!=NULL), "Unspecified error");
 }
 
+void SEIEncoder::initSEIExtendedDrapIndication(SEIExtendedDrapIndication *sei)
+{
+  CHECK(!(m_isInitialized), "Extended DRAP SEI already initialized");
+  CHECK(!(sei!=NULL), "Need a seiExtendedDrapIndication for initialization (got nullptr)");
+  sei->m_edrapIndicationRapIdMinus1 = 0;
+  sei->m_edrapIndicationLeadingPicturesDecodableFlag = false;
+  sei->m_edrapIndicationReservedZero12Bits = 0;
+  sei->m_edrapIndicationNumRefRapPicsMinus1 = 0;
+  sei->m_edrapIndicationRefRapId.resize(sei->m_edrapIndicationNumRefRapPicsMinus1 + 1);
+  for (int i = 0; i <= sei->m_edrapIndicationNumRefRapPicsMinus1; i++)
+  {
+    sei->m_edrapIndicationRefRapId[i] = 0;
+  }
+}
+
 
 template <typename T>
 static void readTokenValue(T            &returnedValue, /// value returned
@@ -562,6 +576,143 @@ static void readTokenValueAndValidate(T            &returnedValue, /// value ret
   }
 }
 
+void SEIEncoder::readAnnotatedRegionSEI(std::istream &fic, SEIAnnotatedRegions *seiAnnoRegion, bool &failed)
+{
+  readTokenValue(seiAnnoRegion->m_hdr.m_cancelFlag, failed, fic, "SEIArCancelFlag");
+  if (!seiAnnoRegion->m_hdr.m_cancelFlag)
+  {
+    readTokenValue(seiAnnoRegion->m_hdr.m_notOptimizedForViewingFlag, failed, fic, "SEIArNotOptForViewingFlag");
+    readTokenValue(seiAnnoRegion->m_hdr.m_trueMotionFlag, failed, fic, "SEIArTrueMotionFlag");
+    readTokenValue(seiAnnoRegion->m_hdr.m_occludedObjectFlag, failed, fic, "SEIArOccludedObjsFlag");
+    readTokenValue(seiAnnoRegion->m_hdr.m_partialObjectFlagPresentFlag, failed, fic, "SEIArPartialObjsFlagPresentFlag");
+    readTokenValue(seiAnnoRegion->m_hdr.m_objectLabelPresentFlag, failed, fic, "SEIArObjLabelPresentFlag");
+    readTokenValue(seiAnnoRegion->m_hdr.m_objectConfidenceInfoPresentFlag, failed, fic, "SEIArObjConfInfoPresentFlag");
+    if (seiAnnoRegion->m_hdr.m_objectConfidenceInfoPresentFlag)
+    {
+      readTokenValueAndValidate<uint32_t>(seiAnnoRegion->m_hdr.m_objectConfidenceLength, failed, fic, "SEIArObjDetConfLength", uint32_t(0), uint32_t(255));
+    }
+    if (seiAnnoRegion->m_hdr.m_objectLabelPresentFlag)
+    {
+      readTokenValue(seiAnnoRegion->m_hdr.m_objectLabelLanguagePresentFlag, failed, fic, "SEIArObjLabelLangPresentFlag");
+      if (seiAnnoRegion->m_hdr.m_objectLabelLanguagePresentFlag)
+      {
+        readTokenValue(seiAnnoRegion->m_hdr.m_annotatedRegionsObjectLabelLang, failed, fic, "SEIArLabelLanguage");
+      }
+      uint32_t numLabelUpdates=0;
+      readTokenValueAndValidate<uint32_t>(numLabelUpdates, failed, fic, "SEIArNumLabelUpdates", uint32_t(0), uint32_t(255));
+      seiAnnoRegion->m_annotatedLabels.resize(numLabelUpdates);
+      for (auto it=seiAnnoRegion->m_annotatedLabels.begin(); it!=seiAnnoRegion->m_annotatedLabels.end(); it++)
+      {
+        SEIAnnotatedRegions::AnnotatedRegionLabel &ar=it->second;
+        readTokenValueAndValidate(it->first, failed, fic, "SEIArLabelIdc[c]", uint32_t(0), uint32_t(255));
+        bool cancelFlag;
+        readTokenValue(cancelFlag, failed, fic, "SEIArLabelCancelFlag[c]");
+        ar.labelValid=!cancelFlag;
+        if (ar.labelValid)
+        {
+          readTokenValue(ar.label, failed, fic, "SEIArLabel[c]");
+        }
+      }
+    }
+
+    uint32_t numObjectUpdates=0;
+    readTokenValueAndValidate<uint32_t>(numObjectUpdates, failed, fic, "SEIArNumObjUpdates", uint32_t(0), uint32_t(255));
+    seiAnnoRegion->m_annotatedRegions.resize(numObjectUpdates);
+    for (auto it=seiAnnoRegion->m_annotatedRegions.begin(); it!=seiAnnoRegion->m_annotatedRegions.end(); it++)
+    {
+      SEIAnnotatedRegions::AnnotatedRegionObject &ar = it->second;
+      readTokenValueAndValidate(it->first, failed, fic, "SEIArObjIdx[c]", uint32_t(0), uint32_t(255));
+      readTokenValue(ar.objectCancelFlag, failed, fic, "SEIArObjCancelFlag[c]");
+      ar.objectLabelValid=false;
+      ar.boundingBoxValid=false;
+      if (!ar.objectCancelFlag)
+      {
+        if (seiAnnoRegion->m_hdr.m_objectLabelPresentFlag)
+        {
+          readTokenValue(ar.objectLabelValid, failed, fic, "SEIArObjLabelUpdateFlag[c]");
+          if (ar.objectLabelValid)
+          {
+            readTokenValueAndValidate<uint32_t>(ar.objLabelIdx, failed, fic, "SEIArObjectLabelIdc[c]", uint32_t(0), uint32_t(255));
+          }
+          readTokenValue(ar.boundingBoxValid, failed, fic, "SEIArBoundBoxUpdateFlag[c]");
+          if (ar.boundingBoxValid)
+          {
+            readTokenValueAndValidate<uint32_t>(ar.boundingBoxTop, failed, fic, "SEIArObjTop[c]", uint32_t(0), uint32_t(0x7fffffff));
+            readTokenValueAndValidate<uint32_t>(ar.boundingBoxLeft, failed, fic, "SEIArObjLeft[c]", uint32_t(0), uint32_t(0x7fffffff));
+            readTokenValueAndValidate<uint32_t>(ar.boundingBoxWidth, failed, fic, "SEIArObjWidth[c]", uint32_t(0), uint32_t(0x7fffffff));
+            readTokenValueAndValidate<uint32_t>(ar.boundingBoxHeight, failed, fic, "SEIArObjHeight[c]", uint32_t(0), uint32_t(0x7fffffff));
+            if (seiAnnoRegion->m_hdr.m_partialObjectFlagPresentFlag)
+            {
+              readTokenValue(ar.partialObjectFlag, failed, fic, "SEIArObjPartUpdateFlag[c]");
+            }
+            if (seiAnnoRegion->m_hdr.m_objectConfidenceInfoPresentFlag)
+            {
+              readTokenValueAndValidate<uint32_t>(ar.objectConfidence, failed, fic, "SEIArObjDetConf[c]", uint32_t(0), uint32_t(1<<seiAnnoRegion->m_hdr.m_objectConfidenceLength)-1);
+            }
+          }
+          //Compare with existing attributes to decide whether it's a static object
+          //First check whether it's an existing object (or) new object
+          auto destIt = m_pcCfg->m_arObjects.find(it->first);
+          //New object
+          if (destIt == m_pcCfg->m_arObjects.end())
+          {
+            //New object arrived, needs to be appended to the map of tracked objects
+            m_pcCfg->m_arObjects[it->first] = ar;
+          }
+          //Existing object
+          else
+          {
+            // Size remains the same
+            if(m_pcCfg->m_arObjects[it->first].boundingBoxWidth == ar.boundingBoxWidth &&
+              m_pcCfg->m_arObjects[it->first].boundingBoxHeight == ar.boundingBoxHeight)
+              {
+                if(m_pcCfg->m_arObjects[it->first].boundingBoxTop == ar.boundingBoxTop &&
+                  m_pcCfg->m_arObjects[it->first].boundingBoxLeft == ar.boundingBoxLeft)
+                  {
+                    ar.boundingBoxValid = 0;
+                  }
+              }
+          }
+        }
+      }
+    }
+  }
+}
+
+bool SEIEncoder::initSEIAnnotatedRegions(SEIAnnotatedRegions* SEIAnnoReg, int currPOC)
+{
+  assert(m_isInitialized);
+  assert(SEIAnnoReg != NULL);
+
+  // reading external Annotated Regions Information SEI message parameters from file
+  if (!m_pcCfg->getAnnotatedRegionSEIFileRoot().empty())
+  {
+    bool failed = false;
+    // building the annotated regions file name with poc num in prefix "_poc.txt"
+    std::string AnnoRegionSEIFileWithPoc(m_pcCfg->getAnnotatedRegionSEIFileRoot());
+    {
+      std::stringstream suffix;
+      suffix << "_" << currPOC << ".txt";
+      AnnoRegionSEIFileWithPoc += suffix.str();
+    }
+    std::ifstream fic(AnnoRegionSEIFileWithPoc.c_str());
+    if (!fic.good() || !fic.is_open())
+    {
+      std::cerr << "No Annotated Regions SEI parameters file " << AnnoRegionSEIFileWithPoc << " for POC " << currPOC << std::endl;
+      return false;
+    }
+    //Read annotated region SEI parameters from the cfg file
+    readAnnotatedRegionSEI(fic, SEIAnnoReg, failed);
+    if (failed)
+    {
+      std::cerr << "Error while reading Annotated Regions SEI parameters file '" << AnnoRegionSEIFileWithPoc << "'" << std::endl;
+      exit(EXIT_FAILURE);
+    }
+  }
+  return true;
+}
+
+
 #if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
 void SEIEncoder::initSEIAlternativeTransferCharacteristics(SEIAlternativeTransferCharacteristics *seiAltTransCharacteristics)
 {
@@ -660,6 +811,230 @@ void SEIEncoder::initSEIContentColourVolume(SEIContentColourVolume *seiContentCo
     seiContentColourVolume->m_ccvAvgLuminanceValue = (uint32_t)(10000000 * m_pcCfg->getCcvSEIAvgLuminanceValue());
   }
 }
+
+void SEIEncoder::initSEIScalabilityDimensionInfo(SEIScalabilityDimensionInfo *sei)
+{
+  CHECK(!(m_isInitialized), "Scalability dimension information SEI already initialized");
+  CHECK(!(sei != NULL), "Need a seiScalabilityDimensionInfo for initialization (got nullptr)");
+  sei->m_sdiMaxLayersMinus1 = m_pcCfg->getSdiSEIMaxLayersMinus1();
+  sei->m_sdiMultiviewInfoFlag = m_pcCfg->getSdiSEIMultiviewInfoFlag();
+  sei->m_sdiAuxiliaryInfoFlag = m_pcCfg->getSdiSEIAuxiliaryInfoFlag();
+  if (sei->m_sdiMultiviewInfoFlag || sei->m_sdiAuxiliaryInfoFlag)
+  {
+    if (sei->m_sdiMultiviewInfoFlag)
+    {
+      sei->m_sdiViewIdLenMinus1 = m_pcCfg->getSdiSEIViewIdLenMinus1();
+    }
+    sei->m_sdiLayerId.resize(sei->m_sdiMaxLayersMinus1 + 1);
+    for (int i = 0; i <= sei->m_sdiMaxLayersMinus1; i++)
+    {
+      sei->m_sdiLayerId[i] = m_pcCfg->getSdiSEILayerId(i);
+      sei->m_sdiViewIdVal.resize(sei->m_sdiMaxLayersMinus1 + 1);
+      if (sei->m_sdiMultiviewInfoFlag)
+      {
+        sei->m_sdiViewIdVal[i] = m_pcCfg->getSdiSEIViewIdVal(i);
+      }
+      sei->m_sdiAuxId.resize(sei->m_sdiMaxLayersMinus1 + 1);
+      if (sei->m_sdiAuxiliaryInfoFlag)
+      {
+        sei->m_sdiAuxId[i] = m_pcCfg->getSdiSEIAuxId(i);
+        sei->m_sdiNumAssociatedPrimaryLayersMinus1.resize(sei->m_sdiMaxLayersMinus1 + 1);
+        sei->m_sdiAssociatedPrimaryLayerIdx.resize(sei->m_sdiMaxLayersMinus1 + 1);
+        if (sei->m_sdiAuxId[i] > 0)
+        {
+          sei->m_sdiNumAssociatedPrimaryLayersMinus1[i] = m_pcCfg->getSdiSEINumAssociatedPrimaryLayersMinus1(i);
+          sei->m_sdiAssociatedPrimaryLayerIdx[i].resize(sei->m_sdiNumAssociatedPrimaryLayersMinus1[i] + 1);
+          for (int j = 0; j <= sei->m_sdiNumAssociatedPrimaryLayersMinus1[i]; j++)
+          {
+            sei->m_sdiAssociatedPrimaryLayerIdx[i][j] = 0;
+          }
+        }
+      }
+    }
+    sei->m_sdiNumViews = 1;
+    if (sei->m_sdiMultiviewInfoFlag)
+    {
+      for (int i = 1; i <= sei->m_sdiMaxLayersMinus1; i++)
+      {
+        bool newViewFlag = true;
+        for (int j = 0; j < i; j++)
+        {
+          if (sei->m_sdiViewIdVal[i] == sei->m_sdiViewIdVal[j])
+          {
+            newViewFlag = false;
+          }
+        }
+        if (newViewFlag)
+        {
+          sei->m_sdiNumViews++;
+        }
+      }
+    }
+  }
+}
+
+void SEIEncoder::initSEIMultiviewAcquisitionInfo(SEIMultiviewAcquisitionInfo *sei)
+{
+  CHECK(!(m_isInitialized), "Multiview acquisition information SEI already initialized");
+  CHECK(!(sei != NULL), "Need a seiMultiviewAcquisitionInfo for initialization (got nullptr)");
+  sei->m_maiIntrinsicParamFlag        = m_pcCfg->getMaiSEIIntrinsicParamFlag();
+  sei->m_maiExtrinsicParamFlag        = m_pcCfg->getMaiSEIExtrinsicParamFlag();
+  sei->m_maiNumViewsMinus1            = m_pcCfg->getMaiSEINumViewsMinus1();
+  if (sei->m_maiIntrinsicParamFlag)
+  {
+    sei->m_maiIntrinsicParamsEqualFlag  = m_pcCfg->getMaiSEIIntrinsicParamsEqualFlag();
+    sei->m_maiPrecFocalLength           = m_pcCfg->getMaiSEIPrecFocalLength();
+    sei->m_maiPrecPrincipalPoint        = m_pcCfg->getMaiSEIPrecPrincipalPoint();
+    sei->m_maiPrecSkewFactor            = m_pcCfg->getMaiSEIPrecSkewFactor();
+    int numViews = sei->m_maiIntrinsicParamsEqualFlag ? 1 : sei->m_maiNumViewsMinus1 + 1;
+    sei->m_maiSignFocalLengthX       .resize( numViews );
+    sei->m_maiExponentFocalLengthX   .resize( numViews );
+    sei->m_maiMantissaFocalLengthX   .resize( numViews );
+    sei->m_maiSignFocalLengthY       .resize( numViews );
+    sei->m_maiExponentFocalLengthY   .resize( numViews );
+    sei->m_maiMantissaFocalLengthY   .resize( numViews );
+    sei->m_maiSignPrincipalPointX    .resize( numViews );
+    sei->m_maiExponentPrincipalPointX.resize( numViews );
+    sei->m_maiMantissaPrincipalPointX.resize( numViews );
+    sei->m_maiSignPrincipalPointY    .resize( numViews );
+    sei->m_maiExponentPrincipalPointY.resize( numViews );
+    sei->m_maiMantissaPrincipalPointY.resize( numViews );
+    sei->m_maiSignSkewFactor         .resize( numViews );
+    sei->m_maiExponentSkewFactor     .resize( numViews );
+    sei->m_maiMantissaSkewFactor     .resize( numViews );
+    for( int i = 0; i  <=  ( sei->m_maiIntrinsicParamsEqualFlag ? 0 : sei->m_maiNumViewsMinus1 ); i++ )
+    {
+      sei->m_maiSignFocalLengthX       [i] = m_pcCfg->getMaiSEISignFocalLengthX(i);
+      sei->m_maiExponentFocalLengthX   [i] = m_pcCfg->getMaiSEIExponentFocalLengthX(i);
+      sei->m_maiMantissaFocalLengthX   [i] = m_pcCfg->getMaiSEIMantissaFocalLengthX(i);
+      sei->m_maiSignFocalLengthY       [i] = m_pcCfg->getMaiSEISignFocalLengthY(i);
+      sei->m_maiExponentFocalLengthY   [i] = m_pcCfg->getMaiSEIExponentFocalLengthY(i);
+      sei->m_maiMantissaFocalLengthY   [i] = m_pcCfg->getMaiSEIMantissaFocalLengthY(i);
+      sei->m_maiSignPrincipalPointX    [i] = m_pcCfg->getMaiSEISignPrincipalPointX(i);
+      sei->m_maiExponentPrincipalPointX[i] = m_pcCfg->getMaiSEIExponentPrincipalPointX(i);
+      sei->m_maiMantissaPrincipalPointX[i] = m_pcCfg->getMaiSEIMantissaPrincipalPointX(i);
+      sei->m_maiSignPrincipalPointY    [i] = m_pcCfg->getMaiSEISignPrincipalPointY(i);
+      sei->m_maiExponentPrincipalPointY[i] = m_pcCfg->getMaiSEIExponentPrincipalPointY(i);
+      sei->m_maiMantissaPrincipalPointY[i] = m_pcCfg->getMaiSEIMantissaPrincipalPointY(i);
+      sei->m_maiSignSkewFactor         [i] = m_pcCfg->getMaiSEISignSkewFactor(i);
+      sei->m_maiExponentSkewFactor     [i] = m_pcCfg->getMaiSEIExponentSkewFactor(i);
+      sei->m_maiMantissaSkewFactor     [i] = m_pcCfg->getMaiSEIMantissaSkewFactor(i);
+    }
+  }
+  if (sei->m_maiExtrinsicParamFlag)
+  {
+    sei->m_maiPrecRotationParam = m_pcCfg->getMaiSEIPrecRotationParam();
+    sei->m_maiPrecTranslationParam = m_pcCfg->getMaiSEIPrecTranslationParam();
+    sei->m_maiSignR.resize(sei->m_maiNumViewsMinus1 + 1);
+    sei->m_maiExponentR.resize(sei->m_maiNumViewsMinus1 + 1);
+    sei->m_maiMantissaR.resize(sei->m_maiNumViewsMinus1 + 1);
+    sei->m_maiSignT.resize(sei->m_maiNumViewsMinus1 + 1);
+    sei->m_maiExponentT.resize(sei->m_maiNumViewsMinus1 + 1);
+    sei->m_maiMantissaT.resize(sei->m_maiNumViewsMinus1 + 1);
+    for (int i = 0; i <= sei->m_maiNumViewsMinus1; i++)
+    {
+      sei->m_maiSignR[i].resize(3);
+      sei->m_maiExponentR[i].resize(3);
+      sei->m_maiMantissaR[i].resize(3);
+      sei->m_maiSignT[i].resize(3);
+      sei->m_maiExponentT[i].resize(3);
+      sei->m_maiMantissaT[i].resize(3);
+      for (int j = 0; j < 3; j++)
+      {
+        sei->m_maiSignR[i][j].resize(3);
+        sei->m_maiExponentR[i][j].resize(3);
+        sei->m_maiMantissaR[i][j].resize(3);
+        for (int k = 0; k < 3; k++)
+        {
+          sei->m_maiSignR[i][j][k] = 0;
+          sei->m_maiExponentR[i][j][k] = 0;
+          sei->m_maiMantissaR[i][j][k] = 0;
+        }
+        sei->m_maiSignT[i][j] = 0;
+        sei->m_maiExponentT[i][j] = 0;
+        sei->m_maiMantissaT[i][j] = 0;
+      }
+    }
+  }
+}
+
+#if JVET_W0078_MVP_SEI 
+void SEIEncoder::initSEIMultiviewViewPosition(SEIMultiviewViewPosition *sei)
+{
+  CHECK(!(m_isInitialized), "Multiview view position SEI already initialized");
+  CHECK(!(sei != NULL), "Need a seiMultiviewViewPosition for initialization (got nullptr)");
+  sei->m_mvpNumViewsMinus1 = m_pcCfg->getMvpSEINumViewsMinus1();
+
+  int numViews = sei->m_mvpNumViewsMinus1 + 1;
+  sei->m_mvpViewPosition.resize(numViews);
+  for (int i = 0; i <= sei->m_mvpNumViewsMinus1; i++)
+  {
+    sei->m_mvpViewPosition[i] = m_pcCfg->getMvpSEIViewPosition(i);
+  }
+}
+#endif
+
+void SEIEncoder::initSEIAlphaChannelInfo(SEIAlphaChannelInfo *sei)
+{
+  CHECK(!(m_isInitialized), "Alpha channel information SEI already initialized");
+  CHECK(!(sei != NULL), "Need a seiAlphaChannelInfo for initialization (got nullptr)");
+  sei->m_aciCancelFlag = m_pcCfg->getAciSEICancelFlag();
+  sei->m_aciUseIdc = m_pcCfg->getAciSEIUseIdc();
+  sei->m_aciBitDepthMinus8 = m_pcCfg->getAciSEIBitDepthMinus8();
+  sei->m_aciTransparentValue = m_pcCfg->getAciSEITransparentValue();
+  sei->m_aciOpaqueValue = m_pcCfg->getAciSEIOpaqueValue();
+  sei->m_aciIncrFlag = m_pcCfg->getAciSEIIncrFlag();
+  sei->m_aciClipFlag = m_pcCfg->getAciSEIClipFlag();
+  sei->m_aciClipTypeFlag = m_pcCfg->getAciSEIClipTypeFlag();
+}
+
+void SEIEncoder::initSEIDepthRepresentationInfo(SEIDepthRepresentationInfo *sei)
+{
+  CHECK(!(m_isInitialized), "Depth representation information SEI already initialized");
+  CHECK(!(sei != NULL), "Need a seiDepthRepresentationInfo for initialization (got nullptr)");
+  sei->m_driZNearFlag = m_pcCfg->getDriSEIZNearFlag();
+  sei->m_driZFarFlag = m_pcCfg->getDriSEIZFarFlag();
+  sei->m_driDMinFlag = m_pcCfg->getDriSEIDMinFlag();
+  sei->m_driDMaxFlag = m_pcCfg->getDriSEIDMaxFlag();
+  sei->m_driZNear = m_pcCfg->getDriSEIZNear();
+  sei->m_driZFar = m_pcCfg->getDriSEIZFar();
+  sei->m_driDMin = m_pcCfg->getDriSEIDMin();
+  sei->m_driDMax = m_pcCfg->getDriSEIDMax();
+  sei->m_driDisparityRefViewId = m_pcCfg->getDriSEIDisparityRefViewId();
+  sei->m_driDepthRepresentationType = m_pcCfg->getDriSEIDepthRepresentationType();
+  sei->m_driDepthNonlinearRepresentationNumMinus1 = m_pcCfg->getDriSEINonlinearNumMinus1();
+  sei->m_driDepthNonlinearRepresentationModel.resize(sei->m_driDepthNonlinearRepresentationNumMinus1 + 1);
+  for(int i = 0; i < (sei->m_driDepthNonlinearRepresentationNumMinus1 + 1); i++)
+  {
+    sei->m_driDepthNonlinearRepresentationModel[i] = m_pcCfg->getDriSEINonlinearModel(i);
+  }
+}
+
+void SEIEncoder::initSEIColourTransformInfo(SEIColourTransformInfo* seiCTI)
+{
+  CHECK(!(m_isInitialized), "Unspecified error");
+  CHECK(!(seiCTI != NULL), "Unspecified error");
+
+  //  Set SEI message parameters read from command line options
+  seiCTI->m_id = m_pcCfg->getCtiSEIId();
+  seiCTI->m_signalInfoFlag = m_pcCfg->getCtiSEISignalInfoFlag();
+  seiCTI->m_fullRangeFlag = m_pcCfg->getCtiSEIFullRangeFlag();
+  seiCTI->m_primaries = m_pcCfg->getCtiSEIPrimaries();
+  seiCTI->m_transferFunction = m_pcCfg->getCtiSEITransferFunction();
+  seiCTI->m_matrixCoefs = m_pcCfg->getCtiSEIMatrixCoefs();
+  seiCTI->m_crossComponentFlag = m_pcCfg->getCtiSEICrossComponentFlag();
+  seiCTI->m_crossComponentInferred = m_pcCfg->getCtiSEICrossComponentInferred();
+  seiCTI->m_numberChromaLutMinus1 = m_pcCfg->getCtiSEINbChromaLut() - 1;
+  seiCTI->m_chromaOffset = m_pcCfg->getCtiSEIChromaOffset();
+
+  seiCTI->m_bitdepth = m_pcCfg->getBitDepth(CHANNEL_TYPE_LUMA);
+
+  for (int i = 0; i < MAX_NUM_COMPONENT; i++) {
+    seiCTI->m_lut[i] = m_pcCfg->getCtiSEILut(i);
+  }
+  seiCTI->m_log2NumberOfPointsPerLut = floorLog2(seiCTI->m_lut[0].numLutValues - 1);
+}
+
 void SEIEncoder::initSEISubpictureLevelInfo(SEISubpicureLevelInfo *sei, const SPS *sps)
 {
   const EncCfgParam::CfgSEISubpictureLevel &cfgSubPicLevel = m_pcCfg->getSubpicureLevelInfoSEICfg();
diff --git a/source/Lib/EncoderLib/SEIEncoder.h b/source/Lib/EncoderLib/SEIEncoder.h
index 0fc26901da38b2de45d0a9cc25a7dd667c791223..c608bb57adefdcb84f2d4f88f54fb830b843905d 100644
--- a/source/Lib/EncoderLib/SEIEncoder.h
+++ b/source/Lib/EncoderLib/SEIEncoder.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -68,11 +68,12 @@ public:
   void initSEIFramePacking(SEIFramePacking *sei, int currPicNum);
   void initSEIParameterSetsInclusionIndication(SEIParameterSetsInclusionIndication* sei);
   void initSEIDependentRAPIndication(SEIDependentRAPIndication *sei);
+  void initSEIExtendedDrapIndication(SEIExtendedDrapIndication *sei);
   void initSEIBufferingPeriod(SEIBufferingPeriod *sei, bool noLeadingPictures);
 #if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
   void initSEIAlternativeTransferCharacteristics(SEIAlternativeTransferCharacteristics *sei);
 #endif
-  void initSEIScalableNesting(SEIScalableNesting *scalableNestingSEI, SEIMessages &nestedSEIs, const std::vector<int> &targetOLSs, const std::vector<int> &targetLayers, const std::vector<uint16_t> &subpictureIDs);
+  void initSEIScalableNesting(SEIScalableNesting *scalableNestingSEI, SEIMessages &nestedSEIs, const std::vector<int> &targetOLSs, const std::vector<int> &targetLayers, const std::vector<uint16_t> &subpictureIDs, uint16_t maxSubpicIdInPic);
   void initDecodedPictureHashSEI(SEIDecodedPictureHash *sei, PelUnitBuf& pic, std::string &rHashString, const BitDepths &bitDepths);
   void initSEIErp(SEIEquirectangularProjection *sei);
   void initSEISphereRotation(SEISphereRotation *sei);
@@ -86,6 +87,16 @@ public:
   void initSEIContentLightLevel(SEIContentLightLevelInfo *sei);
   void initSEIAmbientViewingEnvironment(SEIAmbientViewingEnvironment *sei);
   void initSEIContentColourVolume(SEIContentColourVolume *sei);
+  void initSEIScalabilityDimensionInfo(SEIScalabilityDimensionInfo *sei);
+  void initSEIMultiviewAcquisitionInfo(SEIMultiviewAcquisitionInfo *sei);
+  void initSEIAlphaChannelInfo(SEIAlphaChannelInfo *sei);
+  void initSEIDepthRepresentationInfo(SEIDepthRepresentationInfo *sei);
+  bool initSEIAnnotatedRegions(SEIAnnotatedRegions *sei, int currPOC);
+  void initSEIColourTransformInfo(SEIColourTransformInfo* sei);
+  void readAnnotatedRegionSEI(std::istream &fic, SEIAnnotatedRegions *seiAnnoRegion, bool &failed);
+#if JVET_W0078_MVP_SEI
+  void initSEIMultiviewViewPosition(SEIMultiviewViewPosition *sei);
+#endif
 private:
   EncCfg* m_pcCfg;
   EncLib* m_pcEncLib;
diff --git a/source/Lib/EncoderLib/SEIwrite.cpp b/source/Lib/EncoderLib/SEIwrite.cpp
index e2897955574e0580ba05e006eb49b5d5d222e28c..9719068e0ca0e26cde175cc377dd2b5658e85edd 100644
--- a/source/Lib/EncoderLib/SEIwrite.cpp
+++ b/source/Lib/EncoderLib/SEIwrite.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -77,9 +77,15 @@ void SEIWriter::xWriteSEIpayloadData(OutputBitstream &bs, const SEI& sei, HRD &h
   case SEI::DEPENDENT_RAP_INDICATION:
     xWriteSEIDependentRAPIndication(*static_cast<const SEIDependentRAPIndication*>(&sei));
     break;
+  case SEI::EXTENDED_DRAP_INDICATION:
+    xWriteSEIEdrapIndication(*static_cast<const SEIExtendedDrapIndication*>(&sei));
+    break;
   case SEI::FRAME_PACKING:
     xWriteSEIFramePacking(*static_cast<const SEIFramePacking*>(&sei));
     break;
+  case SEI::DISPLAY_ORIENTATION:
+    xWriteSEIDisplayOrientation(*static_cast<const SEIDisplayOrientation*>(&sei));
+    break;
   case SEI::PARAMETER_SETS_INCLUSION_INDICATION:
     xWriteSEIParameterSetsInclusionIndication(*static_cast<const SEIParameterSetsInclusionIndication*>(&sei));
     break;
@@ -106,6 +112,23 @@ void SEIWriter::xWriteSEIpayloadData(OutputBitstream &bs, const SEI& sei, HRD &h
   case SEI::GENERALIZED_CUBEMAP_PROJECTION:
     xWriteSEIGeneralizedCubemapProjection(*static_cast<const SEIGeneralizedCubemapProjection*>(&sei));
     break;
+  case SEI::SCALABILITY_DIMENSION_INFO:
+    xWriteSEIScalabilityDimensionInfo(*static_cast<const SEIScalabilityDimensionInfo*>(&sei));
+    break;
+  case SEI::MULTIVIEW_ACQUISITION_INFO:
+    xWriteSEIMultiviewAcquisitionInfo(*static_cast<const SEIMultiviewAcquisitionInfo*>(&sei));
+    break;
+#if JVET_W0078_MVP_SEI 
+  case SEI::MULTIVIEW_VIEW_POSITION:
+    xWriteSEIMultiviewViewPosition(*static_cast<const SEIMultiviewViewPosition*>(&sei));
+    break;
+#endif
+  case SEI::ALPHA_CHANNEL_INFO:
+    xWriteSEIAlphaChannelInfo(*static_cast<const SEIAlphaChannelInfo*>(&sei));
+    break;
+  case SEI::DEPTH_REPRESENTATION_INFO:
+    xWriteSEIDepthRepresentationInfo(*static_cast<const SEIDepthRepresentationInfo*>(&sei));
+    break;
   case SEI::USER_DATA_REGISTERED_ITU_T_T35:
     xWriteSEIUserDataRegistered(*static_cast<const SEIUserDataRegistered*>(&sei));
     break;
@@ -121,12 +144,23 @@ void SEIWriter::xWriteSEIpayloadData(OutputBitstream &bs, const SEI& sei, HRD &h
   case SEI::CONTENT_COLOUR_VOLUME:
     xWriteSEIContentColourVolume(*static_cast<const SEIContentColourVolume*>(&sei));
     break;
+  case SEI::COLOUR_TRANSFORM_INFO:
+    xWriteSEIColourTransformInfo(*static_cast<const SEIColourTransformInfo*>(&sei));
+    break;
   case SEI::SUBPICTURE_LEVEL_INFO:
     xWriteSEISubpictureLevelInfo(*static_cast<const SEISubpicureLevelInfo*>(&sei));
     break;
   case SEI::SAMPLE_ASPECT_RATIO_INFO:
     xWriteSEISampleAspectRatioInfo(*static_cast<const SEISampleAspectRatioInfo*>(&sei));
     break;
+  case SEI::ANNOTATED_REGIONS:
+    xWriteSEIAnnotatedRegions(*static_cast<const SEIAnnotatedRegions*>(&sei));
+    break;
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+  case SEI::CONSTRAINED_RASL_ENCODING:
+    xWriteSEIConstrainedRaslIndication(*static_cast<const SEIConstrainedRaslIndication*>(&sei));
+    break;
+#endif
   default:
     THROW("Trying to write unhandled SEI message");
     break;
@@ -228,10 +262,8 @@ void SEIWriter::xWriteSEIDecodedPictureHash(const SEIDecodedPictureHash& sei)
   if (traceString != 0) //use of this variable is needed to avoid a compiler error with G++ 4.6.1
   {
     WRITE_CODE(sei.method, 8, "dph_sei_hash_type");
-#if FIX_TICKET_1405
     WRITE_CODE(sei.singleCompFlag, 1, "dph_sei_single_component_flag");
     WRITE_CODE(0, 7, "dph_sei_reserved_zero_7bits");
-#endif
     for(uint32_t i=0; i<uint32_t(sei.m_pictureHash.hash.size()); i++)
     {
       WRITE_CODE(sei.m_pictureHash.hash[i], 8, traceString);
@@ -255,10 +287,11 @@ void SEIWriter::xWriteSEIDecodingUnitInfo(const SEIDecodingUnitInfo& sei, const
         WRITE_CODE( sei.m_duSptCpbRemovalDelayIncrement[i], bp.getDuCpbRemovalDelayIncrementLength(), "du_spt_cpb_removal_delay_increment[i]");
     }
   }
-  if (bp.m_decodingUnitDpbDuParamsInPicTimingSeiFlag)
+  if (!bp.m_decodingUnitDpbDuParamsInPicTimingSeiFlag)
   {
     WRITE_FLAG(sei.m_dpbOutputDuDelayPresentFlag, "dpb_output_du_delay_present_flag");
   }
+ 
   if(sei.m_dpbOutputDuDelayPresentFlag)
   {
     WRITE_CODE(sei.m_picSptDpbOutputDuDelay, bp.getDpbOutputDelayDuLength(), "pic_spt_dpb_output_du_delay");
@@ -328,13 +361,13 @@ void SEIWriter::xWriteSEIBufferingPeriod(const SEIBufferingPeriod& sei)
       {
         for( int j = 0; j < sei.m_bpCpbCnt; j ++ )
         {
-          WRITE_CODE( sei.m_initialCpbRemovalDelay[j][i][nalOrVcl],  sei.m_initialCpbRemovalDelayLength,           "initial_cpb_removal_delay[j][i][nalOrVcl]" );
-          WRITE_CODE( sei.m_initialCpbRemovalOffset[j][i][nalOrVcl], sei.m_initialCpbRemovalDelayLength,           "initial_cpb_removal_delay_offset[j][i][nalOrVcl]" );
+          WRITE_CODE( sei.m_initialCpbRemovalDelay[i][j][nalOrVcl],  sei.m_initialCpbRemovalDelayLength,           "initial_cpb_removal_delay[i][j][nalOrVcl]" );
+          WRITE_CODE( sei.m_initialCpbRemovalOffset[i][j][nalOrVcl], sei.m_initialCpbRemovalDelayLength,           "initial_cpb_removal_delay_offset[i][j][nalOrVcl]" );
         }
       }
     }
   }
-  if (sei.m_bpMaxSubLayers-1 > 0) 
+  if (sei.m_bpMaxSubLayers-1 > 0)
   {
     WRITE_FLAG(sei.m_sublayerDpbOutputOffsetsPresentFlag, "bp_sublayer_dpb_output_offsets_present_flag");
   }
@@ -453,11 +486,7 @@ void SEIWriter::xWriteSEIPictureTiming(const SEIPictureTiming& sei, const SEIBuf
       }
     }
   }
-#if !JVET_S0175_ASPECT5
-  WRITE_UVLC( sei.m_ptDisplayElementalPeriodsMinus1,          "pt_display_elemental_periods_minus1" );
-#else
   WRITE_CODE( sei.m_ptDisplayElementalPeriodsMinus1, 8,       "pt_display_elemental_periods_minus1" );
-#endif
 }
 
 void SEIWriter::xWriteSEIFrameFieldInfo(const SEIFrameFieldInfo& sei)
@@ -479,11 +508,7 @@ void SEIWriter::xWriteSEIFrameFieldInfo(const SEIFrameFieldInfo& sei)
     {
       WRITE_FLAG( sei.m_topFieldFirstFlag ? 1 : 0,            "ffi_top_field_first_flag" );
     }
-#if !JVET_S0175_ASPECT5
-    WRITE_UVLC( sei.m_displayElementalPeriodsMinus1,          "ffi_display_elemental_periods_minus1" );
-#else
     WRITE_CODE( sei.m_displayElementalPeriodsMinus1, 8,       "ffi_display_elemental_periods_minus1" );
-#endif
   }
   WRITE_CODE( sei.m_sourceScanType, 2,                        "ffi_source_scan_type" );
   WRITE_FLAG( sei.m_duplicateFlag ? 1 : 0,                    "ffi_duplicate_flag" );
@@ -494,6 +519,18 @@ void SEIWriter::xWriteSEIDependentRAPIndication(const SEIDependentRAPIndication&
   // intentionally empty
 }
 
+void SEIWriter::xWriteSEIEdrapIndication(const SEIExtendedDrapIndication& sei)
+{
+  WRITE_CODE( sei.m_edrapIndicationRapIdMinus1, 16,        "edrap_rap_id_minsu1" );
+  WRITE_FLAG( sei.m_edrapIndicationLeadingPicturesDecodableFlag ? 1 : 0, "edrap_leading_pictures_decodable_flag" );
+  WRITE_CODE( sei.m_edrapIndicationReservedZero12Bits, 12, "edrap_reserved_zero_12bits" );
+  WRITE_CODE( sei.m_edrapIndicationNumRefRapPicsMinus1, 3, "edrap_num_ref_rap_pics_minus1" );
+  for (int i = 0; i <= sei.m_edrapIndicationNumRefRapPicsMinus1; i++)
+  {
+    WRITE_CODE( sei.m_edrapIndicationRefRapId[i], 16, "edrap_ref_rap_id[i]" );
+  }
+}
+
 void SEIWriter::xWriteSEIScalableNesting(OutputBitstream& bs, const SEIScalableNesting& sei)
 {
   CHECK (sei.m_nestedSEIs.size()<1, "There must be at lease one SEI message nested in the scalable nesting SEI.")
@@ -585,6 +622,18 @@ void SEIWriter::xWriteSEIFramePacking(const SEIFramePacking& sei)
 }
 
 
+void SEIWriter::xWriteSEIDisplayOrientation(const SEIDisplayOrientation& sei)
+{
+  WRITE_FLAG(sei.m_doCancelFlag, "display_orientation_cancel_flag");
+
+  if (sei.m_doCancelFlag == 0)
+  {
+    WRITE_FLAG(sei.m_doPersistenceFlag, "display_orientation_persistence_flag");
+    WRITE_CODE(sei.m_doTransformType, 3, "display_orientation_transform_type");
+    WRITE_CODE(0, 3, "display_orientation_reserved_zero_3bits");
+  }
+}
+
 void SEIWriter::xWriteSEIParameterSetsInclusionIndication(const SEIParameterSetsInclusionIndication& sei)
 {
   WRITE_FLAG(sei.m_selfContainedClvsFlag, "psii_self_contained_clvs_flag");
@@ -608,6 +657,96 @@ void SEIWriter::xWriteSEIMasteringDisplayColourVolume(const SEIMasteringDisplayC
   WRITE_CODE( sei.values.minLuminance,     32,  "mdcv_min_display_mastering_luminance" );
 }
 
+void SEIWriter::xWriteSEIAnnotatedRegions(const SEIAnnotatedRegions &sei)
+{
+  WRITE_FLAG(sei.m_hdr.m_cancelFlag, "ar_cancel_flag");
+  if (!sei.m_hdr.m_cancelFlag)
+  {
+    WRITE_FLAG(sei.m_hdr.m_notOptimizedForViewingFlag, "ar_not_optimized_for_viewing_flag");
+    WRITE_FLAG(sei.m_hdr.m_trueMotionFlag, "ar_true_motion_flag");
+    WRITE_FLAG(sei.m_hdr.m_occludedObjectFlag, "ar_occluded_object_flag");
+    WRITE_FLAG(sei.m_hdr.m_partialObjectFlagPresentFlag, "ar_partial_object_flag_present_flag");
+    WRITE_FLAG(sei.m_hdr.m_objectLabelPresentFlag, "ar_object_label_present_flag");
+    WRITE_FLAG(sei.m_hdr.m_objectConfidenceInfoPresentFlag, "ar_object_confidence_info_present_flag");
+    if (sei.m_hdr.m_objectConfidenceInfoPresentFlag)
+    {
+      assert(sei.m_hdr.m_objectConfidenceLength <= 16 && sei.m_hdr.m_objectConfidenceLength>0);
+      WRITE_CODE((sei.m_hdr.m_objectConfidenceLength - 1), 4, "ar_object_confidence_length_minus_1");
+    }
+    if (sei.m_hdr.m_objectLabelPresentFlag)
+    {
+      WRITE_FLAG(sei.m_hdr.m_objectLabelLanguagePresentFlag, "ar_object_label_language_present_flag");
+      if (sei.m_hdr.m_objectLabelLanguagePresentFlag)
+      {
+        xWriteByteAlign();
+        assert(sei.m_hdr.m_annotatedRegionsObjectLabelLang.size()<256);
+        for (uint32_t j = 0; j < sei.m_hdr.m_annotatedRegionsObjectLabelLang.size(); j++)
+        {
+          char ch = sei.m_hdr.m_annotatedRegionsObjectLabelLang[j];
+          WRITE_CODE(ch, 8, "ar_object_label_language");
+        }
+        WRITE_CODE('\0', 8, "ar_label_language");
+      }
+    }
+    WRITE_UVLC((uint32_t)sei.m_annotatedLabels.size(), "ar_num_label_updates");
+    assert(sei.m_annotatedLabels.size()<256);
+    for(auto it=sei.m_annotatedLabels.begin(); it!=sei.m_annotatedLabels.end(); it++)
+    {
+      assert(it->first < 256);
+      WRITE_UVLC(it->first, "ar_label_idx[]");
+      const SEIAnnotatedRegions::AnnotatedRegionLabel &ar=it->second;
+      WRITE_FLAG(!ar.labelValid, "ar_label_cancel_flag");
+      if (ar.labelValid)
+      {
+        xWriteByteAlign();
+        assert(ar.label.size()<256);
+        for (uint32_t j = 0; j < ar.label.size(); j++)
+        {
+          char ch = ar.label[j];
+          WRITE_CODE(ch, 8, "ar_label[]");
+        }
+        WRITE_CODE('\0', 8, "ar_label[]");
+      }
+    }
+    WRITE_UVLC((uint32_t)sei.m_annotatedRegions.size(), "ar_num_object_updates");
+    assert(sei.m_annotatedRegions.size()<256);
+    for (auto it=sei.m_annotatedRegions.begin(); it!=sei.m_annotatedRegions.end(); it++)
+    {
+      const SEIAnnotatedRegions::AnnotatedRegionObject &ar = it->second;
+      WRITE_UVLC(it->first, "ar_object_idx");
+      WRITE_FLAG(ar.objectCancelFlag, "ar_object_cancel_flag");
+      if (!ar.objectCancelFlag)
+      {
+        if (sei.m_hdr.m_objectLabelPresentFlag)
+        {
+          WRITE_FLAG(ar.objectLabelValid, "ar_object_label_update_flag");
+          if (ar.objectLabelValid)
+          {
+            assert(ar.objLabelIdx<256);
+            WRITE_UVLC(ar.objLabelIdx, "ar_object_label_idx");
+          }
+        }
+        WRITE_FLAG(ar.boundingBoxValid, "ar_object_bounding_box_update_flag");
+        if (ar.boundingBoxValid)
+        {
+          WRITE_CODE(ar.boundingBoxTop,   16, "ar_bounding_box_top");
+          WRITE_CODE(ar.boundingBoxLeft,  16, "ar_bounding_box_left");
+          WRITE_CODE(ar.boundingBoxWidth, 16, "ar_bounding_box_width");
+          WRITE_CODE(ar.boundingBoxHeight,16, "ar_bounding_box_height");
+          if (sei.m_hdr.m_partialObjectFlagPresentFlag)
+          {
+            WRITE_UVLC(ar.partialObjectFlag, "ar_partial_object_flag");
+          }
+          if (sei.m_hdr.m_objectConfidenceInfoPresentFlag)
+          {
+            assert(ar.objectConfidence < (1<<sei.m_hdr.m_objectConfidenceLength));
+            WRITE_CODE(ar.objectConfidence, sei.m_hdr.m_objectConfidenceLength, "ar_object_confidence");
+          }
+        }
+      }
+    }
+  }
+}
 void SEIWriter::xWriteByteAlign()
 {
   if( m_pcBitIf->getNumberOfWrittenBits() % 8 != 0)
@@ -752,6 +891,245 @@ void SEIWriter::xWriteSEIGeneralizedCubemapProjection(const SEIGeneralizedCubema
   }
 }
 
+void SEIWriter::xWriteSEIScalabilityDimensionInfo(const SEIScalabilityDimensionInfo &sei)
+{
+  WRITE_CODE(sei.m_sdiMaxLayersMinus1, 6,                           "sdi_max_layers_minus1");
+  WRITE_FLAG(sei.m_sdiMultiviewInfoFlag,                            "sdi_multiview_info_flag");
+  WRITE_FLAG(sei.m_sdiAuxiliaryInfoFlag,                            "sdi_auxiliary_info_flag");
+  if (sei.m_sdiMultiviewInfoFlag || sei.m_sdiAuxiliaryInfoFlag)
+  {
+    if (sei.m_sdiMultiviewInfoFlag)
+    {
+      WRITE_CODE(sei.m_sdiViewIdLenMinus1, 4,                              "sdi_view_id_len_minus1");
+    }
+    for (int i = 0; i <= sei.m_sdiMaxLayersMinus1; i++)
+    {
+      WRITE_CODE(sei.m_sdiLayerId[i], 6,                                         "sdi_layer_id");
+      if (sei.m_sdiMultiviewInfoFlag)
+      {
+        WRITE_CODE(sei.m_sdiViewIdVal[i], sei.m_sdiViewIdLenMinus1 + 1,       "sdi_view_id_val");
+      }
+      if (sei.m_sdiAuxiliaryInfoFlag)
+      {
+        WRITE_CODE(sei.m_sdiAuxId[i], 8,                           "sdi_aux_id");
+        if (sei.m_sdiAuxId[i] > 0)
+        {
+          WRITE_CODE(sei.m_sdiNumAssociatedPrimaryLayersMinus1[i], 6,          "sdi_num_associated_primary_layers_minus1");
+          for (int j = 0; j <= sei.m_sdiNumAssociatedPrimaryLayersMinus1[i]; j++)
+          {
+            WRITE_CODE(sei.m_sdiAssociatedPrimaryLayerIdx[i][j], 6,               "sdi_associated_primary_layer_idx");
+          }
+        }
+      }
+    }
+  }
+}
+
+void SEIWriter::xWriteSEIMultiviewAcquisitionInfo(const SEIMultiviewAcquisitionInfo& sei)
+{
+  WRITE_FLAG( ( sei.m_maiIntrinsicParamFlag ? 1 : 0 ), "intrinsic_param_flag" );
+  WRITE_FLAG( ( sei.m_maiExtrinsicParamFlag ? 1 : 0 ), "extrinsic_param_flag" );
+  WRITE_UVLC(   sei.m_maiNumViewsMinus1               , "num_views_minus1"           );
+  if( sei.m_maiIntrinsicParamFlag )
+  {
+    WRITE_FLAG( ( sei.m_maiIntrinsicParamsEqualFlag ? 1 : 0 ), "intrinsic_params_equal_flag" );
+    WRITE_UVLC(   sei.m_maiPrecFocalLength                   , "prec_focal_length"           );
+    WRITE_UVLC(   sei.m_maiPrecPrincipalPoint                , "prec_principal_point"        );
+    WRITE_UVLC(   sei.m_maiPrecSkewFactor                    , "prec_skew_factor"            );
+
+    for( int i = 0; i  <=  ( sei.m_maiIntrinsicParamsEqualFlag ? 0 : sei.m_maiNumViewsMinus1 ); i++ )
+    {
+      WRITE_FLAG( ( sei.m_maiSignFocalLengthX       [i] ? 1 : 0 ),                                         "sign_focal_length_x"        );
+      WRITE_CODE(   sei.m_maiExponentFocalLengthX   [i]          , 6                                  ,    "exponent_focal_length_x"    );
+      WRITE_CODE(   sei.m_maiMantissaFocalLengthX   [i]          , sei.getMantissaFocalLengthXLen( i ),    "mantissa_focal_length_x"    );
+      WRITE_FLAG( ( sei.m_maiSignFocalLengthY       [i] ? 1 : 0 ),                                         "sign_focal_length_y"        );
+      WRITE_CODE(   sei.m_maiExponentFocalLengthY   [i]          , 6                                  ,    "exponent_focal_length_y"    );
+      WRITE_CODE(   sei.m_maiMantissaFocalLengthY   [i]          , sei.getMantissaFocalLengthYLen( i ),    "mantissa_focal_length_y"    );
+      WRITE_FLAG( ( sei.m_maiSignPrincipalPointX    [i] ? 1 : 0 ),                                         "sign_principal_point_x"     );
+      WRITE_CODE(   sei.m_maiExponentPrincipalPointX[i]          , 6,                                      "exponent_principal_point_x" );
+      WRITE_CODE(   sei.m_maiMantissaPrincipalPointX[i]          , sei.getMantissaPrincipalPointXLen( i ), "mantissa_principal_point_x" );
+      WRITE_FLAG( ( sei.m_maiSignPrincipalPointY    [i] ? 1 : 0 ),                                         "sign_principal_point_y"     );
+      WRITE_CODE(   sei.m_maiExponentPrincipalPointY[i]          , 6,                                      "exponent_principal_point_y" );
+      WRITE_CODE(   sei.m_maiMantissaPrincipalPointY[i]          , sei.getMantissaPrincipalPointYLen( i ), "mantissa_principal_point_y" );
+      WRITE_FLAG( ( sei.m_maiSignSkewFactor         [i] ? 1 : 0 ),                                         "sign_skew_factor"           );
+      WRITE_CODE(   sei.m_maiExponentSkewFactor     [i]          , 6,                                      "exponent_skew_factor"       );
+      WRITE_CODE(   sei.m_maiMantissaSkewFactor     [i]          , sei.getMantissaSkewFactorLen( i )  ,    "mantissa_skew_factor"       );
+    }
+  }
+  if( sei.m_maiExtrinsicParamFlag )
+  {
+    WRITE_UVLC( sei.m_maiPrecRotationParam   , "prec_rotation_param"    );
+    WRITE_UVLC( sei.m_maiPrecTranslationParam, "prec_translation_param" );
+    for( int i = 0; i  <=  sei.m_maiNumViewsMinus1; i++ )
+    {
+      for( int j = 0; j  <=  2; j++ )  /* row */
+      {
+        for( int k = 0; k  <=  2; k++ )  /* column */
+        {
+          WRITE_FLAG( ( sei.m_maiSignR    [i][j][k] ? 1 : 0 ),                                "sign_r"     );
+          WRITE_CODE(   sei.m_maiExponentR[i][j][k]          , 6,                             "exponent_r" );
+          WRITE_CODE(   sei.m_maiMantissaR[i][j][k]          , sei.getMantissaRLen( i,j,k ) , "mantissa_r" );
+        }
+        WRITE_FLAG( ( sei.m_maiSignT    [i][j] ? 1 : 0 ),                          "sign_t"     );
+        WRITE_CODE(   sei.m_maiExponentT[i][j]          , 6,                       "exponent_t" );
+        WRITE_CODE(   sei.m_maiMantissaT[i][j]          , sei.getMantissaTLen( i,j ),"mantissa_t" );
+      }
+    }
+  }
+};
+
+#if JVET_W0078_MVP_SEI 
+void SEIWriter::xWriteSEIMultiviewViewPosition(const SEIMultiviewViewPosition& sei)
+{
+  WRITE_UVLC(sei.m_mvpNumViewsMinus1, "num_views_minus1");
+  for (int i = 0; i <= sei.m_mvpNumViewsMinus1; i++)
+  {
+    WRITE_UVLC(sei.m_mvpViewPosition[i], "view_position");
+  }
+};
+#endif
+
+void SEIWriter::xWriteSEIAlphaChannelInfo( const SEIAlphaChannelInfo& sei)
+{
+  WRITE_FLAG( ( sei.m_aciCancelFlag ? 1 : 0 ), "alpha_channel_cancel_flag" );
+  if( !sei.m_aciCancelFlag )
+  {
+    WRITE_CODE( sei.m_aciUseIdc, 3, "alpha_channel_use_idc" );
+    WRITE_CODE( sei.m_aciBitDepthMinus8, 3, "alpha_channel_bit_depth_minus8" );
+    WRITE_CODE( sei.m_aciTransparentValue, sei.m_aciBitDepthMinus8+9, "alpha_transparent_value" );
+    WRITE_CODE( sei.m_aciOpaqueValue, sei.m_aciBitDepthMinus8+9, "alpha_opaque_value" );
+    WRITE_FLAG( ( sei.m_aciIncrFlag ? 1 : 0 ), "alpha_channel_incr_flag" );
+    WRITE_FLAG( ( sei.m_aciClipFlag ? 1 : 0 ), "alpha_channel_clip_flag" );
+    if( sei.m_aciClipFlag )
+    {
+      WRITE_FLAG( ( sei.m_aciClipTypeFlag ? 1 : 0 ), "alpha_channel_clip_type_flag" );
+    }
+  }
+};
+
+void SEIWriter::xWriteSEIDepthRepresentationInfo( const SEIDepthRepresentationInfo& sei)
+{
+  WRITE_FLAG( ( sei.m_driZNearFlag ? 1 : 0 ), "z_near_flag" );
+  WRITE_FLAG( ( sei.m_driZFarFlag ? 1 : 0 ), "z_far_flag" );
+  WRITE_FLAG( ( sei.m_driDMinFlag ? 1 : 0 ), "d_min_flag" );
+  WRITE_FLAG( ( sei.m_driDMaxFlag ? 1 : 0 ), "d_max_flag" );
+  WRITE_UVLC( sei.m_driDepthRepresentationType, "depth_representation_type" );
+  if( sei.m_driDMinFlag || sei.m_driDMaxFlag )
+  {
+    WRITE_UVLC( sei.m_driDisparityRefViewId, "disparity_ref_view_id" );
+  }
+  if( sei.m_driZNearFlag )
+  {
+    xWriteSEIDepthRepInfoElement(sei.m_driZNear);
+  }
+  if( sei.m_driZFarFlag )
+  {
+    xWriteSEIDepthRepInfoElement(sei.m_driZFar);
+  }
+  if( sei.m_driDMinFlag )
+  {
+    xWriteSEIDepthRepInfoElement(sei.m_driDMin);
+  }
+  if( sei.m_driDMaxFlag )
+  {
+    xWriteSEIDepthRepInfoElement(sei.m_driDMax);
+  }
+
+  if (sei.m_driDepthRepresentationType == 3)
+  {
+    WRITE_UVLC( sei.m_driDepthNonlinearRepresentationNumMinus1, "depth_nonlinear_representation_num_minus1" );
+    for( int i = 1; i  <=  sei.m_driDepthNonlinearRepresentationNumMinus1 + 1; i++ )
+    {
+      WRITE_UVLC(sei.m_driDepthNonlinearRepresentationModel.at(i - 1),"depth_nonlinear_representation_model[ i ]");
+    }
+  }
+}
+
+void SEIWriter::xWriteSEIDepthRepInfoElement( double f )
+{
+  uint32_t x_sign, x_exp, x_mantissa,x_mantissa_len;
+  if (f < 0)
+  {
+    f = f * (-1);
+    x_sign = 1;
+  }
+  else
+  {
+    x_sign = 0;
+  }
+  int exponent=0;
+  if(f >= 1)
+  {
+    while(f>=2)
+    {
+      exponent++;
+      f = f/2;
+    }
+  }
+  else
+  {
+    while (f<1)
+    {
+      exponent++;
+      f = f*2;
+    }
+    exponent=-exponent;
+  }
+
+  int i;
+  f = f -1;
+  double s = 1;
+  char s_mantissa[32];
+  double thr=1.0/(4.0*(1<<30));
+
+  if (f>=thr)
+  {
+    for(i=0;i<32;i++)
+    {
+      s /= 2;
+      if(f>=s)
+      {
+        f = f-s;
+        s_mantissa[i]=1;
+
+        if (f<thr)
+          break;
+      }
+      else
+      {
+        s_mantissa[i]=0;
+      }
+    }
+
+    if (i<32)
+      x_mantissa_len=i+1;
+    else
+      x_mantissa_len=32;
+
+    x_mantissa=0;
+
+    for(i=0;i<x_mantissa_len;i++)
+    {
+      if (s_mantissa[i]==1)
+        x_mantissa += (1u)<<(x_mantissa_len-1-i) ;
+    }
+
+  }
+  else
+  {
+    x_mantissa=0;
+    x_mantissa_len=1;
+  }
+
+  assert(exponent>=-31 && exponent<= (1<<7)-32);
+  x_exp=exponent+31;
+
+  WRITE_FLAG( x_sign,                          "da_sign_flag" );
+  WRITE_CODE( x_exp, 7 ,                       "da_exponent" );
+  WRITE_CODE( x_mantissa_len-1, 5 ,            "da_mantissa_len_minus1" );
+  WRITE_CODE( x_mantissa, x_mantissa_len ,     "da_mantissa" );
+};
+
 void SEIWriter::xWriteSEISubpictureLevelInfo(const SEISubpicureLevelInfo &sei)
 {
   CHECK(sei.m_numRefLevels < 1, "SEISubpicureLevelInfo: numRefLevels must be greater than zero");
@@ -922,4 +1300,65 @@ void SEIWriter::xWriteSEIContentColourVolume(const SEIContentColourVolume &sei)
   }
 }
 
+void SEIWriter::xWriteSEIColourTransformInfo(const SEIColourTransformInfo& sei)
+{
+  bool colourTransformCancelFlag = 0;
+  bool colourTransformPersistenceFlag = 0;
+
+  WRITE_UVLC(sei.m_id, "colour_transform_id");
+  WRITE_FLAG(colourTransformCancelFlag, "colour_transform_cancel_flag");
+
+  if (colourTransformCancelFlag == 0)
+  {
+    WRITE_FLAG(colourTransformPersistenceFlag, "colour_transform_persistence_flag");
+    WRITE_FLAG(sei.m_signalInfoFlag, "colour_transform_video_signal_info_present_flag");
+
+    if (sei.m_signalInfoFlag)
+    {
+      WRITE_FLAG(sei.m_fullRangeFlag, "colour_transform_full_range_flag");
+      WRITE_CODE(sei.m_primaries, 8, "colour_transform_primaries");
+      WRITE_CODE(sei.m_transferFunction, 8, "colour_transform_transfer_function");
+      WRITE_CODE(sei.m_matrixCoefs, 8, "colour_transform_matrix_coefficients");
+    }
+    WRITE_CODE(sei.m_bitdepth - 8, 4, "colour_transform_bit_depth_minus8"); 
+    WRITE_CODE(sei.m_log2NumberOfPointsPerLut - 1, 3, "colour_transform_log2_number_of_points_per_lut_minus1");
+    WRITE_FLAG(sei.m_crossComponentFlag, "colour_transform_cross_comp_flag");
+    if (sei.m_crossComponentFlag)
+    {
+      WRITE_FLAG(sei.m_crossComponentInferred, "colour_transform_cross_comp_inferred");
+    }
+
+    uint16_t lutCodingLength = 2 + sei.m_bitdepth - sei.m_log2NumberOfPointsPerLut;
+    for (uint32_t j = 0; j < sei.m_lut[0].numLutValues; j++)
+    {
+      WRITE_CODE(sei.m_lut[0].lutValues[j], lutCodingLength, "colour_transform_lut[0][i]");
+    }
+    if (sei.m_crossComponentFlag == 0 || sei.m_crossComponentInferred == 0)
+    {
+      WRITE_FLAG(sei.m_numberChromaLutMinus1, "colour_transform_number_chroma_lut_minus1");
+      for (uint32_t j = 0; j < sei.m_lut[1].numLutValues; j++)
+      {
+        WRITE_CODE(sei.m_lut[1].lutValues[j], lutCodingLength, "colour_transform_lut[1][i]");
+      }
+      if (sei.m_numberChromaLutMinus1 == 1)
+      {
+        for (uint32_t j = 0; j < sei.m_lut[2].numLutValues; j++)
+        {
+          WRITE_CODE(sei.m_lut[2].lutValues[j], lutCodingLength, "colour_transform_lut[2][i]");
+        }
+      }
+    }
+    else
+    {
+      WRITE_CODE(sei.m_chromaOffset, lutCodingLength, "colour_transform_chroma_offset");
+    }
+  }
+}
+
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+void SEIWriter::xWriteSEIConstrainedRaslIndication(const SEIConstrainedRaslIndication& /*sei*/)
+{
+  // intentionally empty
+}
+#endif
 //! \}
diff --git a/source/Lib/EncoderLib/SEIwrite.h b/source/Lib/EncoderLib/SEIwrite.h
index 912e0ee872704cef589b1ac6b1b93ddd7f65001b..4145c03cf4adf0fd154f3b1d641c7517d920ef5c 100644
--- a/source/Lib/EncoderLib/SEIwrite.h
+++ b/source/Lib/EncoderLib/SEIwrite.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -59,8 +59,10 @@ protected:
   void xWriteSEIPictureTiming(const SEIPictureTiming& sei, const SEIBufferingPeriod& bp, const uint32_t temporalId);
   void xWriteSEIFrameFieldInfo(const SEIFrameFieldInfo& sei);
   void xWriteSEIDependentRAPIndication(const SEIDependentRAPIndication& sei);
+  void xWriteSEIEdrapIndication(const SEIExtendedDrapIndication& sei);
   void xWriteSEIScalableNesting(OutputBitstream& bs, const SEIScalableNesting& sei);
   void xWriteSEIFramePacking(const SEIFramePacking& sei);
+  void xWriteSEIDisplayOrientation(const SEIDisplayOrientation& sei);
   void xWriteSEIParameterSetsInclusionIndication(const SEIParameterSetsInclusionIndication& sei);
   void xWriteSEIMasteringDisplayColourVolume( const SEIMasteringDisplayColourVolume& sei);
 #if U0033_ALTERNATIVE_TRANSFER_CHARACTERISTICS_SEI
@@ -71,14 +73,26 @@ protected:
   void xWriteSEIOmniViewport                      (const SEIOmniViewport& sei);
   void xWriteSEIRegionWisePacking                 (const SEIRegionWisePacking &sei);
   void xWriteSEIGeneralizedCubemapProjection      (const SEIGeneralizedCubemapProjection &sei);
+  void xWriteSEIScalabilityDimensionInfo          (const SEIScalabilityDimensionInfo& sei);
+  void xWriteSEIMultiviewAcquisitionInfo          (const SEIMultiviewAcquisitionInfo& sei);
+#if JVET_W0078_MVP_SEI 
+  void xWriteSEIMultiviewViewPosition             (const SEIMultiviewViewPosition& sei);
+#endif
+  void xWriteSEIAlphaChannelInfo                  (const SEIAlphaChannelInfo& sei);
+  void xWriteSEIDepthRepresentationInfo           (const SEIDepthRepresentationInfo& sei);
+  void xWriteSEIDepthRepInfoElement               (double f);
   void xWriteSEISubpictureLevelInfo               (const SEISubpicureLevelInfo &sei);
   void xWriteSEISampleAspectRatioInfo             (const SEISampleAspectRatioInfo &sei);
-
+#if JVET_W0133_CONSTRAINED_RASL_ENCODING
+  void xWriteSEIConstrainedRaslIndication         (const SEIConstrainedRaslIndication &sei);
+#endif
   void xWriteSEIUserDataRegistered(const SEIUserDataRegistered& sei);
   void xWriteSEIFilmGrainCharacteristics(const SEIFilmGrainCharacteristics& sei);
   void xWriteSEIContentLightLevelInfo(const SEIContentLightLevelInfo& sei);
   void xWriteSEIAmbientViewingEnvironment(const SEIAmbientViewingEnvironment& sei);
   void xWriteSEIContentColourVolume(const SEIContentColourVolume &sei);
+  void xWriteSEIColourTransformInfo(const SEIColourTransformInfo& sei);
+  void xWriteSEIAnnotatedRegions                  (const SEIAnnotatedRegions& sei);
   void xWriteSEIpayloadData(OutputBitstream &bs, const SEI& sei, HRD &hrd, const uint32_t temporalId);
   void xWriteByteAlign();
 protected:
diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp
index e57585fa193d71ae5359fbdf1801c2c28b62a11f..389e69af44907518046ad6e818170f85e4a42046 100644
--- a/source/Lib/EncoderLib/VLCWriter.cpp
+++ b/source/Lib/EncoderLib/VLCWriter.cpp
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -131,13 +131,13 @@ void VLCWriter::xWriteCode     ( uint32_t uiCode, uint32_t uiLength )
 void VLCWriter::xWriteUvlc     ( uint32_t uiCode )
 {
   uint32_t uiLength = 1;
-  uint32_t uiTemp = ++uiCode;
+  uint32_t temp     = ++uiCode;
 
-  CHECK( !uiTemp, "Integer overflow" );
+  CHECK(!temp, "Integer overflow");
 
-  while( 1 != uiTemp )
+  while (1 != temp)
   {
-    uiTemp >>= 1;
+    temp >>= 1;
     uiLength += 2;
   }
   // Take care of cases where uiLength > 32
@@ -284,9 +284,8 @@ void HLSWriter::codePPS( const PPS* pcPPS )
     WRITE_UVLC(conf.getWindowBottomOffset(), "pps_conf_win_bottom_offset");
   }
   Window scalingWindow = pcPPS->getScalingWindow();
-
-  WRITE_FLAG( scalingWindow.getWindowEnabledFlag(), "pps_scaling_window_explicit_signalling_flag" );
-  if( scalingWindow.getWindowEnabledFlag() )
+  WRITE_FLAG( pcPPS->getExplicitScalingWindowFlag(), "pps_scaling_window_explicit_signalling_flag");
+  if ( pcPPS->getExplicitScalingWindowFlag() )
   {
     WRITE_SVLC( scalingWindow.getWindowLeftOffset(), "pps_scaling_win_left_offset" );
     WRITE_SVLC( scalingWindow.getWindowRightOffset(), "pps_scaling_win_right_offset" );
@@ -715,10 +714,8 @@ void HLSWriter::codeGeneralHrdparameters(const GeneralHrdParams * hrd)
   WRITE_CODE(hrd->getTimeScale(), 32, "time_scale");
   WRITE_FLAG(hrd->getGeneralNalHrdParametersPresentFlag() ? 1 : 0, "general_nal_hrd_parameters_present_flag");
   WRITE_FLAG(hrd->getGeneralVclHrdParametersPresentFlag() ? 1 : 0, "general_vcl_hrd_parameters_present_flag");
-#if JVET_S0175_ASPECT6
   if( hrd->getGeneralNalHrdParametersPresentFlag() || hrd->getGeneralVclHrdParametersPresentFlag() )
   {
-#endif
     WRITE_FLAG(hrd->getGeneralSamePicTimingInAllOlsFlag() ? 1 : 0, "general_same_pic_timing_in_all_ols_flag");
     WRITE_FLAG(hrd->getGeneralDecodingUnitHrdParamsPresentFlag() ? 1 : 0, "general_decoding_unit_hrd_params_present_flag");
     if (hrd->getGeneralDecodingUnitHrdParamsPresentFlag())
@@ -732,9 +729,7 @@ void HLSWriter::codeGeneralHrdparameters(const GeneralHrdParams * hrd)
       WRITE_CODE(hrd->getCpbSizeDuScale(), 4, "cpb_size_du_scale");
     }
     WRITE_UVLC(hrd->getHrdCpbCntMinus1(), "hrd_cpb_cnt_minus1");
-#if JVET_S0175_ASPECT6
   }
-#endif
 }
 void HLSWriter::codeOlsHrdParameters(const GeneralHrdParams * generalHrd, const OlsHrdParams *olsHrd, const uint32_t firstSubLayer, const uint32_t maxNumSubLayersMinus1)
 {
@@ -752,11 +747,7 @@ void HLSWriter::codeOlsHrdParameters(const GeneralHrdParams * generalHrd, const
     {
       WRITE_UVLC(hrd->getElementDurationInTcMinus1(), "elemental_duration_in_tc_minus1");
     }
-#if JVET_S0175_ASPECT6
     else if ( (generalHrd->getGeneralNalHrdParametersPresentFlag() || generalHrd->getGeneralVclHrdParametersPresentFlag()) && generalHrd->getHrdCpbCntMinus1() == 0)
-#else
-    else if (generalHrd->getHrdCpbCntMinus1() == 0)
-#endif
     {
       WRITE_FLAG(hrd->getLowDelayHrdFlag() ? 1 : 0, "low_delay_hrd_flag");
     }
@@ -905,7 +896,7 @@ void HLSWriter::codeSPS( const SPS* pcSPS )
   WRITE_FLAG( pcSPS->getEntropyCodingSyncEnabledFlag() ? 1 : 0, "sps_entropy_coding_sync_enabled_flag" );
   WRITE_FLAG( pcSPS->getEntryPointsPresentFlag() ? 1 : 0, "sps_entry_point_offsets_present_flag" );
   WRITE_CODE(pcSPS->getBitsForPOC()-4, 4, "sps_log2_max_pic_order_cnt_lsb_minus4");
-  
+
   WRITE_FLAG(pcSPS->getPocMsbCycleFlag() ? 1 : 0, "sps_poc_msb_cycle_flag");
   if (pcSPS->getPocMsbCycleFlag())
   {
@@ -1272,6 +1263,15 @@ void HLSWriter::codeSPS( const SPS* pcSPS )
   {
 #if ENABLE_TRACING /*|| RExt__DECODER_DEBUG_BIT_STATISTICS*/
     static const char *syntaxStrings[]={ "sps_range_extension_flag",
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+      "sps_extension_7bits[0]",
+      "sps_extension_7bits[1]",
+      "sps_extension_7bits[2]",
+      "sps_extension_7bits[3]",
+      "sps_extension_7bits[4]",
+      "sps_extension_7bits[5]",
+      "sps_extension_7bits[6]" };
+#else
       "sps_multilayer_extension_flag",
       "sps_extension_6bits[0]",
       "sps_extension_6bits[1]",
@@ -1280,6 +1280,13 @@ void HLSWriter::codeSPS( const SPS* pcSPS )
       "sps_extension_6bits[4]",
       "sps_extension_6bits[5]" };
 #endif
+#endif
+
+#if JVET_W0178_CONSTRAINTS_ON_REXT_TOOLS
+    if (pcSPS->getBitDepth(CHANNEL_TYPE_LUMA) <= 10)
+      CHECK((sps_extension_flags[SPS_EXT__REXT] == 1),
+            "The value of sps_range_extension_flag shall be 0 when BitDepth is less than or equal to 10.");
+#endif
 
     for(int i=0; i<NUM_SPS_EXTENSION_FLAGS; i++)
     {
@@ -1296,13 +1303,31 @@ void HLSWriter::codeSPS( const SPS* pcSPS )
         {
           const SPSRExt &spsRangeExtension=pcSPS->getSpsRangeExtension();
 
+#if !JVET_W2005_RANGE_EXTENSION_PROFILES
           WRITE_FLAG( (spsRangeExtension.getTransformSkipRotationEnabledFlag() ? 1 : 0),      "transform_skip_rotation_enabled_flag");
           WRITE_FLAG( (spsRangeExtension.getTransformSkipContextEnabledFlag() ? 1 : 0),       "transform_skip_context_enabled_flag");
+#endif
           WRITE_FLAG( (spsRangeExtension.getExtendedPrecisionProcessingFlag() ? 1 : 0),       "extended_precision_processing_flag" );
+#if JVET_W0070_W0121_SPSRE_CLEANUP
+          if (pcSPS->getTransformSkipEnabledFlag())
+          {
+            WRITE_FLAG( (spsRangeExtension.getTSRCRicePresentFlag() ? 1 : 0),                 "sps_ts_residual_coding_rice_present_in_sh_flag");
+          }
+#else
+          WRITE_FLAG( (spsRangeExtension.getTSRCRicePresentFlag() ? 1 : 0),                   "sps_ts_residual_coding_rice_present_in_sh_flag");
+#endif
+#if !JVET_W2005_RANGE_EXTENSION_PROFILES
           WRITE_FLAG( (spsRangeExtension.getIntraSmoothingDisabledFlag() ? 1 : 0),            "intra_smoothing_disabled_flag" );
           WRITE_FLAG( (spsRangeExtension.getHighPrecisionOffsetsEnabledFlag() ? 1 : 0),       "high_precision_offsets_enabled_flag" );
+#endif
+          WRITE_FLAG( (spsRangeExtension.getRrcRiceExtensionEnableFlag() ? 1 : 0),                   "rrc_rice_extension_flag");
           WRITE_FLAG( (spsRangeExtension.getPersistentRiceAdaptationEnabledFlag() ? 1 : 0),   "persistent_rice_adaptation_enabled_flag" );
+#if JVET_W0046_RLSCP
+          WRITE_FLAG( (spsRangeExtension.getReverseLastSigCoeffEnabledFlag() ? 1 : 0),        "reverse_last_sig_coeff_enabled_flag" );
+#endif
+#if !JVET_W2005_RANGE_EXTENSION_PROFILES
           WRITE_FLAG( (spsRangeExtension.getCabacBypassAlignmentEnabledFlag() ? 1 : 0),       "cabac_bypass_alignment_enabled_flag" );
+#endif
           break;
         }
         default:
@@ -1335,7 +1360,6 @@ void HLSWriter::codeDCI(const DCI* dci)
   xWriteRbspTrailingBits();
 }
 
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
 void HLSWriter::codeOPI(const OPI *opi)
 {
 #if ENABLE_TRACING
@@ -1344,19 +1368,18 @@ void HLSWriter::codeOPI(const OPI *opi)
   WRITE_FLAG(opi->getOlsInfoPresentFlag(), "opi_ols_info_present_flag");
   WRITE_FLAG(opi->getHtidInfoPresentFlag(), "opi_htid_info_present_flag");
 
-  if (opi->getOlsInfoPresentFlag()) 
+  if (opi->getOlsInfoPresentFlag())
   {
-    WRITE_UVLC(opi->getOpiOlsIdx(), "opi_ols_idx");  
+    WRITE_UVLC(opi->getOpiOlsIdx(), "opi_ols_idx");
   }
 
-  if (opi->getHtidInfoPresentFlag()) 
+  if (opi->getHtidInfoPresentFlag())
   {
-    WRITE_CODE(opi->getOpiHtidPlus1(), 3, "opi_htid_plus1");  
+    WRITE_CODE(opi->getOpiHtidPlus1(), 3, "opi_htid_plus1");
   }
   WRITE_FLAG(0, "opi_extension_flag");
   xWriteRbspTrailingBits();
 }
-#endif
 
 void HLSWriter::codeVPS(const VPS* pcVPS)
 {
@@ -1382,7 +1405,6 @@ void HLSWriter::codeVPS(const VPS* pcVPS)
       WRITE_FLAG(pcVPS->getIndependentLayerFlag(i), "vps_independent_layer_flag");
       if (!pcVPS->getIndependentLayerFlag(i))
       {
-#if JVET_R0193
         bool presentFlag = false;
         for (int j = 0; j < i; j++)
         {
@@ -1397,18 +1419,6 @@ void HLSWriter::codeVPS(const VPS* pcVPS)
             WRITE_CODE(pcVPS->getMaxTidIlRefPicsPlus1(i, j), 3, "max_tid_il_ref_pics_plus1[ i ][ j ]");
           }
         }
-#else
-        for (int j = 0; j < i; j++)
-        {
-          WRITE_FLAG(pcVPS->getDirectRefLayerFlag(i, j), "vps_direct_dependency_flag");
-        }
-        bool presentFlag = ( pcVPS->getMaxTidIlRefPicsPlus1(i) != 7 );
-        WRITE_FLAG(presentFlag, "vps_max_tid_ref_present_flag[ i ]");
-        if (presentFlag)
-        {
-          WRITE_CODE(pcVPS->getMaxTidIlRefPicsPlus1(i), 3, "vps_max_tid_il_ref_pics_plus1[ i ]");
-        }
-#endif
       }
     }
   }
@@ -1444,11 +1454,11 @@ void HLSWriter::codeVPS(const VPS* pcVPS)
   {
     if(i > 0)
       WRITE_FLAG(pcVPS->getPtPresentFlag(i), "vps_pt_present_flag");
-    if (!pcVPS->getDefaultPtlDpbHrdMaxTidFlag()) 
+    if (!pcVPS->getDefaultPtlDpbHrdMaxTidFlag())
     {
       WRITE_CODE(pcVPS->getPtlMaxTemporalId(i), 3, "vps_ptl_max_tid");
     }
-    else 
+    else
     {
       CHECK(pcVPS->getPtlMaxTemporalId(i) != pcVPS->getMaxSubLayers() - 1, "When vps_default_ptl_dpb_hrd_max_tid_flag is equal to 1, the value of vps_ptl_max_tid[ i ] is inferred to be equal to vps_max_sublayers_minus1");
     }
@@ -1462,7 +1472,7 @@ void HLSWriter::codeVPS(const VPS* pcVPS)
   CHECK(cnt>=8, "More than '8' alignment bytes written");
   for (int i = 0; i < pcVPS->getNumPtls(); i++)
   {
-    codeProfileTierLevel(&pcVPS->getProfileTierLevel(i), pcVPS->getPtPresentFlag(i), pcVPS->getPtlMaxTemporalId(i) - 1);
+    codeProfileTierLevel(&pcVPS->getProfileTierLevel(i), pcVPS->getPtPresentFlag(i), pcVPS->getPtlMaxTemporalId(i));
   }
   for (int i = 0; i < totalNumOlss; i++)
   {
@@ -1537,7 +1547,7 @@ void HLSWriter::codeVPS(const VPS* pcVPS)
       {
         WRITE_CODE(pcVPS->getHrdMaxTid(i), 3, "vps_hrd_max_tid[i]");
       }
-      else 
+      else
       {
         CHECK(pcVPS->getHrdMaxTid(i) != pcVPS->getMaxSubLayers() - 1, "When vps_default_ptl_dpb_hrd_max_tid_flag is equal to 1, the value of vps_hrd_max_tid[ i ] is inferred to be equal to vps_max_sublayers_minus1");
       }
@@ -1572,7 +1582,7 @@ void HLSWriter::codePictureHeader( PicHeader* picHeader, bool writeRbspTrailingB
   {
     slice = picHeader->getPic()->cs->slice;
   }
-WRITE_FLAG(picHeader->getGdrOrIrapPicFlag(), "ph_gdr_or_irap_pic_flag");
+  WRITE_FLAG(picHeader->getGdrOrIrapPicFlag(), "ph_gdr_or_irap_pic_flag");
   WRITE_FLAG(picHeader->getNonReferencePictureFlag(), "ph_non_ref_pic_flag");
   if (picHeader->getGdrOrIrapPicFlag())
   {
@@ -1601,6 +1611,11 @@ WRITE_FLAG(picHeader->getGdrOrIrapPicFlag(), "ph_gdr_or_irap_pic_flag");
   {
     picHeader->setRecoveryPocCnt( -1 );
   }
+#if GDR_ENC_TRACE
+  printf("-gdr_pic_flag:%d\n", picHeader->getGdrPicFlag());
+  printf("-recovery_poc_cnt:%d\n", picHeader->getRecoveryPocCnt());
+  printf("-InGdrInterval:%d\n", picHeader->getInGdrInterval());
+#endif
   // PH extra bits are not written in the reference encoder
   // as these bits are reserved for future extensions
   // for( i = 0; i < NumExtraPhBits; i++ )
@@ -1624,9 +1639,9 @@ WRITE_FLAG(picHeader->getGdrOrIrapPicFlag(), "ph_gdr_or_irap_pic_flag");
       WRITE_FLAG(picHeader->getAlfEnabledFlag(COMPONENT_Y), "ph_alf_enabled_flag");
       if (picHeader->getAlfEnabledFlag(COMPONENT_Y))
       {
-        WRITE_CODE(picHeader->getNumAlfAps(), 3, "ph_num_alf_aps_ids_luma");
-        const std::vector<int>&   apsId = picHeader->getAlfAPSs();
-        for (int i = 0; i < picHeader->getNumAlfAps(); i++)
+        WRITE_CODE(picHeader->getNumAlfApsIdsLuma(), 3, "ph_num_alf_aps_ids_luma");
+        const std::vector<int>&   apsId = picHeader->getAlfApsIdsLuma();
+        for (int i = 0; i < picHeader->getNumAlfApsIdsLuma(); i++)
         {
           WRITE_CODE(apsId[i], 3, "ph_alf_aps_id_luma");
         }
@@ -1696,6 +1711,10 @@ WRITE_FLAG(picHeader->getGdrOrIrapPicFlag(), "ph_gdr_or_irap_pic_flag");
     picHeader->setLmcsEnabledFlag(false);
     picHeader->setLmcsChromaResidualScaleFlag(false);
   }
+#if GDR_ENC_TRACE
+  printf("-pic_lmcs_enabled_flag:%d\n", picHeader->getLmcsEnabledFlag() ? 1 : 0);
+  printf("-pic_chroma_residual_scale_flag:%d\n", picHeader->getLmcsChromaResidualScaleFlag() ? 1 : 0);
+#endif
 
   // quantization scaling lists
   if( sps->getScalingListFlag() )
@@ -1719,6 +1738,56 @@ WRITE_FLAG(picHeader->getGdrOrIrapPicFlag(), "ph_gdr_or_irap_pic_flag");
     WRITE_FLAG( picHeader->getVirtualBoundariesPresentFlag(), "ph_virtual_boundaries_present_flag" );
     if( picHeader->getVirtualBoundariesPresentFlag() )
     {
+#if GDR_ENABLED
+      if (sps->getGDREnabledFlag())
+      {
+        int n = picHeader->getNumVerVirtualBoundaries();
+        for (unsigned i = 0; i < n; i++)
+        {
+          if (picHeader->getVirtualBoundariesPosX(i) == pps->getPicWidthInLumaSamples())
+          {
+            n = n - 1;
+          }
+        }
+
+        WRITE_UVLC(n, "ph_num_ver_virtual_boundaries");
+
+        if (pps->getPicWidthInLumaSamples() <= 8)
+        {
+          CHECK(picHeader->getNumVerVirtualBoundaries() != 0, "PH: When picture width is less than or equal to 8, the number of vertical virtual boundaries shall be equal to 0");
+        }
+        else
+        {
+          CHECK(picHeader->getNumVerVirtualBoundaries() > 3, "PH: The number of vertical virtual boundaries shall be in the range of 0 to 3");
+        }
+
+        for (unsigned i = 0; i < picHeader->getNumVerVirtualBoundaries(); i++)
+        {
+          if (picHeader->getVirtualBoundariesPosX(i) != pps->getPicWidthInLumaSamples())
+          {
+            WRITE_UVLC((picHeader->getVirtualBoundariesPosX(i) >> 3) - 1, "ph_virtual_boundary_pos_x_minus1[i]");
+            CHECK(((picHeader->getVirtualBoundariesPosX(i) >> 3) - 1) > (((pps->getPicWidthInLumaSamples() + 7) >> 3) - 2), "The value of ph_virtual_boundary_pos_x_minus1[ i ] shall be in the range of 0 to Ceil( pps_pic_width_in_luma_samples / 8 ) - 2, inclusive.");
+          }
+        }
+      }
+      else
+      {
+        WRITE_UVLC(picHeader->getNumVerVirtualBoundaries(), "ph_num_ver_virtual_boundaries");
+        if (pps->getPicWidthInLumaSamples() <= 8)
+        {
+          CHECK(picHeader->getNumVerVirtualBoundaries() != 0, "PH: When picture width is less than or equal to 8, the number of vertical virtual boundaries shall be equal to 0");
+        }
+        else
+        {
+          CHECK(picHeader->getNumVerVirtualBoundaries() > 3, "PH: The number of vertical virtual boundaries shall be in the range of 0 to 3");
+        }
+        for (unsigned i = 0; i < picHeader->getNumVerVirtualBoundaries(); i++)
+        {
+          WRITE_UVLC((picHeader->getVirtualBoundariesPosX(i) >> 3) - 1, "ph_virtual_boundary_pos_x_minus1[i]");
+          CHECK(((picHeader->getVirtualBoundariesPosX(i) >> 3) - 1) > (((pps->getPicWidthInLumaSamples() + 7) >> 3) - 2), "The value of ph_virtual_boundary_pos_x_minus1[ i ] shall be in the range of 0 to Ceil( pps_pic_width_in_luma_samples / 8 ) - 2, inclusive.");
+        }
+      }
+#else
       WRITE_UVLC(picHeader->getNumVerVirtualBoundaries(), "ph_num_ver_virtual_boundaries");
       if (pps->getPicWidthInLumaSamples() <= 8)
       {
@@ -1733,6 +1802,7 @@ WRITE_FLAG(picHeader->getGdrOrIrapPicFlag(), "ph_gdr_or_irap_pic_flag");
         WRITE_UVLC((picHeader->getVirtualBoundariesPosX(i) >> 3) - 1, "ph_virtual_boundary_pos_x_minus1[i]");
         CHECK(((picHeader->getVirtualBoundariesPosX(i)>>3) - 1) > (((pps->getPicWidthInLumaSamples() + 7) >> 3) - 2), "The value of ph_virtual_boundary_pos_x_minus1[ i ] shall be in the range of 0 to Ceil( pps_pic_width_in_luma_samples / 8 ) - 2, inclusive.");
       }
+#endif
       WRITE_UVLC(picHeader->getNumHorVirtualBoundaries(), "ph_num_hor_virtual_boundaries");
       if (pps->getPicHeightInLumaSamples() <= 8)
       {
@@ -1898,10 +1968,6 @@ WRITE_FLAG(picHeader->getGdrOrIrapPicFlag(), "ph_gdr_or_irap_pic_flag");
     {
       WRITE_UVLC( picHeader->getCuChromaQpOffsetSubdivIntra(), "ph_cu_chroma_qp_offset_subdiv_intra_slice" );
     }
-    else
-    {
-      picHeader->setCuChromaQpOffsetSubdivIntra( 0 );
-    }
   }
 
 
@@ -1931,11 +1997,8 @@ WRITE_FLAG(picHeader->getGdrOrIrapPicFlag(), "ph_gdr_or_irap_pic_flag");
     {
       WRITE_UVLC(picHeader->getCuChromaQpOffsetSubdivInter(), "ph_cu_chroma_qp_offset_subdiv_inter_slice");
     }
-    else
-    {
-      picHeader->setCuChromaQpOffsetSubdivInter(0);
-    }
-  // temporal motion vector prediction
+
+    // temporal motion vector prediction
     if (sps->getSPSTemporalMVPEnabledFlag())
     {
       WRITE_FLAG( picHeader->getEnableTMVPFlag(), "ph_temporal_mvp_enabled_flag" );
@@ -2209,27 +2272,27 @@ void HLSWriter::codeSliceHeader         ( Slice* pcSlice, PicHeader *picHeader )
 
   if (pcSlice->getSPS()->getALFEnabledFlag() && !pcSlice->getPPS()->getAlfInfoInPhFlag())
   {
-    const int alfEnabled = pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Y);
+    const int alfEnabled = pcSlice->getAlfEnabledFlag(COMPONENT_Y);
     WRITE_FLAG(alfEnabled, "sh_alf_enabled_flag");
 
     if (alfEnabled)
     {
-      WRITE_CODE(pcSlice->getTileGroupNumAps(), 3, "sh_num_alf_aps_ids_luma");
-      const std::vector<int>&   apsId = pcSlice->getTileGroupApsIdLuma();
-      for (int i = 0; i < pcSlice->getTileGroupNumAps(); i++)
+      WRITE_CODE(pcSlice->getNumAlfApsIdsLuma(), 3, "sh_num_alf_aps_ids_luma");
+      const std::vector<int>&   apsId = pcSlice->getAlfApsIdsLuma();
+      for (int i = 0; i < pcSlice->getNumAlfApsIdsLuma(); i++)
       {
         WRITE_CODE(apsId[i], 3, "sh_alf_aps_id_luma[i]");
       }
 
-      const int alfChromaIdc = pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Cb) + pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Cr) * 2;
+      const int alfChromaIdc = pcSlice->getAlfEnabledFlag(COMPONENT_Cb) + pcSlice->getAlfEnabledFlag(COMPONENT_Cr) * 2;
       if (chromaEnabled)
       {
-        WRITE_CODE(pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Cb), 1, "sh_alf_cb_enabled_flag");
-        WRITE_CODE(pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Cr), 1, "sh_alf_cr_enabled_flag");
+        WRITE_CODE(pcSlice->getAlfEnabledFlag(COMPONENT_Cb), 1, "sh_alf_cb_enabled_flag");
+        WRITE_CODE(pcSlice->getAlfEnabledFlag(COMPONENT_Cr), 1, "sh_alf_cr_enabled_flag");
       }
       if (alfChromaIdc)
       {
-        WRITE_CODE(pcSlice->getTileGroupApsIdChroma(), 3, "sh_alf_aps_id_chroma");
+        WRITE_CODE(pcSlice->getAlfApsIdChroma(), 3, "sh_alf_aps_id_chroma");
       }
 
       if (pcSlice->getSPS()->getCCALFEnabledFlag())
@@ -2239,14 +2302,14 @@ void HLSWriter::codeSliceHeader         ( Slice* pcSlice, PicHeader *picHeader )
         if (filterParam.ccAlfFilterEnabled[COMPONENT_Cb - 1])
         {
           // write CC ALF Cb APS ID
-          WRITE_CODE(pcSlice->getTileGroupCcAlfCbApsId(), 3, "sh_alf_cc_cb_aps_id");
+          WRITE_CODE(pcSlice->getCcAlfCbApsId(), 3, "sh_alf_cc_cb_aps_id");
         }
         // Cr
         WRITE_FLAG(filterParam.ccAlfFilterEnabled[COMPONENT_Cr - 1] ? 1 : 0, "sh_alf_cc_cr_enabled_flag");
         if (filterParam.ccAlfFilterEnabled[COMPONENT_Cr - 1])
         {
           // write CC ALF Cr APS ID
-          WRITE_CODE(pcSlice->getTileGroupCcAlfCrApsId(), 3, "sh_alf_cc_cr_aps_id");
+          WRITE_CODE(pcSlice->getCcAlfCrApsId(), 3, "sh_alf_cc_cr_aps_id");
         }
       }
     }
@@ -2560,7 +2623,16 @@ void HLSWriter::codeSliceHeader         ( Slice* pcSlice, PicHeader *picHeader )
     WRITE_FLAG(pcSlice->getTSResidualCodingDisabledFlag() ? 1 : 0, "sh_ts_residual_coding_disabled_flag");
   }
 
-
+  if ((!pcSlice->getTSResidualCodingDisabledFlag()) && (pcSlice->getSPS()->getSpsRangeExtension().getTSRCRicePresentFlag()))
+  {
+      WRITE_CODE(pcSlice->get_tsrc_index(), 3, "sh_ts_residual_coding_rice_idx_minus1");
+  }
+#if JVET_W0046_RLSCP
+  if (pcSlice->getSPS()->getSpsRangeExtension().getReverseLastSigCoeffEnabledFlag()) 
+  {
+    WRITE_FLAG(pcSlice->getReverseLastSigCoeffFlag(), "sh_reverse_last_sig_coeff_flag");
+  }
+#endif
   if(pcSlice->getPPS()->getSliceHeaderExtensionPresentFlag())
   {
     WRITE_UVLC(0,"sh_slice_header_extension_length");
@@ -2568,7 +2640,11 @@ void HLSWriter::codeSliceHeader         ( Slice* pcSlice, PicHeader *picHeader )
 
 }
 
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+void  HLSWriter::codeConstraintInfo  ( const ConstraintInfo* cinfo, const ProfileTierLevel* ptl )
+#else
 void  HLSWriter::codeConstraintInfo  ( const ConstraintInfo* cinfo )
+#endif
 {
   WRITE_FLAG(cinfo->getGciPresentFlag(), "gci_present_flag");
   if (cinfo->getGciPresentFlag())
@@ -2657,9 +2733,25 @@ void  HLSWriter::codeConstraintInfo  ( const ConstraintInfo* cinfo )
     WRITE_FLAG(cinfo->getNoLmcsConstraintFlag() ? 1 : 0, "gci_no_lmcs_constraint_flag");
     WRITE_FLAG(cinfo->getNoLadfConstraintFlag() ? 1 : 0, "gci_no_ladf_constraint_flag");
     WRITE_FLAG(cinfo->getNoVirtualBoundaryConstraintFlag() ? 1 : 0, "gci_no_virtual_boundaries_constraint_flag");
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+    Profile::Name profile = ptl->getProfileIdc();
+    if (profile == Profile::MAIN_12 || profile == Profile::MAIN_12_INTRA || profile == Profile::MAIN_12_STILL_PICTURE ||
+        profile == Profile::MAIN_12_444 || profile == Profile::MAIN_12_444_INTRA || profile == Profile::MAIN_12_444_STILL_PICTURE ||
+        profile == Profile::MAIN_16_444 || profile == Profile::MAIN_16_444_INTRA || profile == Profile::MAIN_16_444_STILL_PICTURE)
+    {
+      int numReservedBits = 1;
+      WRITE_CODE(numReservedBits, 8, "gci_num_reserved_bits");
+      WRITE_FLAG(cinfo->getLowerBitRateConstraintFlag() ? 1 : 0, "general_lower_bit_rate_constraint_flag");
+    }
+    else
+    {
+      WRITE_CODE(0, 8, "gci_num_reserved_bits");
+    }
+#else
     //The value of gci_num_reserved_bits shall be equal to 0 in bitstreams conforming to this version of this Specification.
     //Other values of gci_num_reserved_bits are reserved for future use by ITU-T | ISO/IEC.
     WRITE_CODE(0, 8, "gci_num_reserved_bits");
+#endif
   }
 
   while (!isByteAligned())
@@ -2683,7 +2775,11 @@ void  HLSWriter::codeProfileTierLevel    ( const ProfileTierLevel* ptl, bool pro
 
   if(profileTierPresentFlag)
   {
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+    codeConstraintInfo(ptl->getConstraintInfo(), ptl);
+#else
     codeConstraintInfo(ptl->getConstraintInfo());
+#endif
   }
 
   for (int i = maxNumSubLayersMinus1 - 1; i >= 0; i--)
@@ -2926,7 +3022,7 @@ void HLSWriter::xCodePredWeightTable(PicHeader *picHeader, const PPS *pps, const
     if (numRef == 0)
     {
       numLxWeights         = picHeader->getNumL1Weights();
-      if (pps->getWPBiPred() == 0) 
+      if (pps->getWPBiPred() == 0)
       {
         numLxWeights = 0;
       }
@@ -3077,4 +3173,4 @@ void HLSWriter::alfFilter( const AlfParam& alfParam, const bool isChroma, const
 }
 
 
-//! \}
\ No newline at end of file
+//! \}
diff --git a/source/Lib/EncoderLib/VLCWriter.h b/source/Lib/EncoderLib/VLCWriter.h
index db61a5196338f149f2243bef447ad2d7de99fd84..ed1ff32d38b1b528d5186b26107b734139025b71 100644
--- a/source/Lib/EncoderLib/VLCWriter.h
+++ b/source/Lib/EncoderLib/VLCWriter.h
@@ -3,7 +3,7 @@
 * and contributor rights, including patent rights, and no such rights are
 * granted under this license.
 *
-* Copyright (c) 2010-2020, ITU/ISO/IEC
+* Copyright (c) 2010-2021, ITU/ISO/IEC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -141,10 +141,12 @@ public:
   void  codeDCI                 ( const DCI* dci );
   void  codePictureHeader       ( PicHeader* picHeader, bool writeRbspTrailingBits, Slice *slice = 0 );
   void  codeSliceHeader         ( Slice* pcSlice, PicHeader *picheader = 0 );
-#if JVET_S0163_ON_TARGETOLS_SUBLAYERS
   void  codeOPI                 ( const OPI* opi );
-#endif
+#if JVET_W2005_RANGE_EXTENSION_PROFILES
+  void  codeConstraintInfo      ( const ConstraintInfo* cinfo, const ProfileTierLevel* ptl );
+#else
   void  codeConstraintInfo      ( const ConstraintInfo* cinfo );
+#endif
   void  codeProfileTierLevel    ( const ProfileTierLevel* ptl, bool profileTierPresentFlag, int maxNumSubLayersMinus1 );
   void  codeOlsHrdParameters(const GeneralHrdParams * generalHrd, const OlsHrdParams *olsHrd , const uint32_t firstSubLayer, const uint32_t maxNumSubLayersMinus1);
 
diff --git a/source/Lib/EncoderLib/WeightPredAnalysis.cpp b/source/Lib/EncoderLib/WeightPredAnalysis.cpp
index 6db6870f2e6c1440ae790cf2937afe29a8544f64..cad3d6cc8b2fa03596e655c56301fa1ea782f146 100644
--- a/source/Lib/EncoderLib/WeightPredAnalysis.cpp
+++ b/source/Lib/EncoderLib/WeightPredAnalysis.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/EncoderLib/WeightPredAnalysis.h b/source/Lib/EncoderLib/WeightPredAnalysis.h
index 76c91be5f30d5966dfef874927d97c1ce8f50a9c..7c7c0d33320cca77a136af9c1107dbe4cdb76c18 100644
--- a/source/Lib/EncoderLib/WeightPredAnalysis.h
+++ b/source/Lib/EncoderLib/WeightPredAnalysis.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/Utilities/CMakeLists.txt b/source/Lib/Utilities/CMakeLists.txt
index 83b05cf5df7a2935da05e1acbf5e99af6f23fc0a..020a0afed9efb5a03aa18144cbe9de6d06cf3d54 100644
--- a/source/Lib/Utilities/CMakeLists.txt
+++ b/source/Lib/Utilities/CMakeLists.txt
@@ -28,28 +28,8 @@ if( SET_ENABLE_TRACING )
   endif()
 endif()
 
-if( OpenMP_FOUND )
-  if( SET_ENABLE_SPLIT_PARALLELISM )
-    if( ENABLE_SPLIT_PARALLELISM )
-      target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=1 )
-    else()
-      target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
-    endif()
-  endif()
-  if( SET_ENABLE_WPP_PARALLELISM )
-    if( ENABLE_WPP_PARALLELISM )
-      target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=1 )
-    else()
-      target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
-    endif()
-  endif()
-else()
-  target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_SPLIT_PARALLELISM=0 )
-  target_compile_definitions( ${LIB_NAME} PUBLIC ENABLE_WPP_PARALLELISM=0 )
-endif()
-
 target_include_directories( ${LIB_NAME} PUBLIC . .. )
-target_link_libraries( ${LIB_NAME} CommonLib Threads::Threads )
+target_link_libraries( ${LIB_NAME} CommonLib )
 
 # example: place header files in different folders
 source_group( "Natvis Files" FILES ${NATVIS_FILES} )
diff --git a/source/Lib/Utilities/VideoIOYuv.cpp b/source/Lib/Utilities/VideoIOYuv.cpp
index 8a30ccc5ddcf43123914dd55415a2b741294c890..166b214ace6e165da028d8ae80a68968c3a23a17 100644
--- a/source/Lib/Utilities/VideoIOYuv.cpp
+++ b/source/Lib/Utilities/VideoIOYuv.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -948,7 +948,7 @@ bool VideoIOYuv::read ( PelUnitBuf& pic, PelUnitBuf& picOrg, const InputColourSp
 
     if (processComponent)
     {
-      if (! verifyPlane( dst, stride444, width444, height444, pad_h444, pad_v444, compID, format, m_fileBitdepth[chType]) )
+      if (! verifyPlane( dst, stride444, width444, height444, pad_h444, pad_v444, compID, picOrg.chromaFormat, m_fileBitdepth[chType]) )
       {
          EXIT("Source image contains values outside the specified bit range!");
       }
diff --git a/source/Lib/Utilities/VideoIOYuv.h b/source/Lib/Utilities/VideoIOYuv.h
index bf2c470560dae8a67de69bb9c914d55799ac3103..8c5fe22e196f49674c6b1445310efc131477639c 100644
--- a/source/Lib/Utilities/VideoIOYuv.h
+++ b/source/Lib/Utilities/VideoIOYuv.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/Utilities/program_options_lite.cpp b/source/Lib/Utilities/program_options_lite.cpp
index 859c59117d4d8cbb09ff21397a9b00e514bad74f..dd64f407ffafb4c8ffccc5f1766785550cbccd31 100644
--- a/source/Lib/Utilities/program_options_lite.cpp
+++ b/source/Lib/Utilities/program_options_lite.cpp
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/Utilities/program_options_lite.h b/source/Lib/Utilities/program_options_lite.h
index 6fc3dd33789460bd2dd28733cf73851f333de439..c21d69a511249d1799bc42f2ad1430ab5f20077b 100644
--- a/source/Lib/Utilities/program_options_lite.h
+++ b/source/Lib/Utilities/program_options_lite.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/libmd5/MD5.h b/source/Lib/libmd5/MD5.h
index d41a07424199208d2c75fcdb1e634a955594ee4f..15bf392a2e04c59f437d6be34c6b46f21e242b74 100644
--- a/source/Lib/libmd5/MD5.h
+++ b/source/Lib/libmd5/MD5.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
diff --git a/source/Lib/libmd5/libmd5.h b/source/Lib/libmd5/libmd5.h
index 4554c73d5d0235a30a7188719b199af46d5a159f..f3ac137ec6ca7f90675222d257fddd6054fc0b96 100644
--- a/source/Lib/libmd5/libmd5.h
+++ b/source/Lib/libmd5/libmd5.h
@@ -3,7 +3,7 @@
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
  *
- * Copyright (c) 2010-2020, ITU/ISO/IEC
+ * Copyright (c) 2010-2021, ITU/ISO/IEC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without