diff --git a/CMakeLists.txt b/CMakeLists.txt
index 282de51154e9e96256d775b965180f65a940da2f..f052c0828bbe80df7f7cf0cded99ecc16e742924 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -70,9 +70,11 @@ if( OpenMP_FOUND )
 endif()
 
 # Enable warnings for some generators and toolsets.
-bb_enable_warnings( gcc warnings-as-errors -Wno-sign-compare )
+# bb_enable_warnings( gcc warnings-as-errors -Wno-sign-compare )
 # bb_enable_warnings( gcc -Wno-unused-variable )
 # bb_enable_warnings( gcc-4.8 warnings-as-errors -Wno-unused-variable )
+# for gcc 8.2:
+bb_enable_warnings( gcc warnings-as-errors -Wno-sign-compare -Wno-class-memaccess)
 
 if( XCODE )
   bb_enable_warnings( clang warnings-as-errors
@@ -104,6 +106,7 @@ endif()
 if( MSVC )
   add_compile_options( "/MP" )
   add_compile_options( "/EHsc" )
+  add_compile_options( "/MT" )
 endif()
 
 # set address sanitizer compiler arguments
diff --git a/cfg/encoder_intra_vtm.cfg b/cfg/encoder_intra_vtm.cfg
index f6665ccda58e9a5830521de0c992eecac99758eb..9ebe108b223591b97bdf19d0aa9fedf453d9ec95 100644
--- a/cfg/encoder_intra_vtm.cfg
+++ b/cfg/encoder_intra_vtm.cfg
@@ -106,13 +106,11 @@ EMT                          : 1
 EMTFast                      : 1
 Affine                       : 1
 HighPrecMv                   : 1
-
 SubPuMvp                     : 1
 MaxNumMergeCand              : 6
 LMChroma                     : 1      # use CCLM only
 DepQuant                     : 1
 IMV                          : 2
-
 ALF                          : 1
 
 # Fast tools
diff --git a/cfg/encoder_lowdelay_P_vtm.cfg b/cfg/encoder_lowdelay_P_vtm.cfg
index 5f588af822131c8a0c58e7658c4006f23bd51aa0..d83bed2d615bbe62a8bed6139c9daf01129f36f8 100644
--- a/cfg/encoder_lowdelay_P_vtm.cfg
+++ b/cfg/encoder_lowdelay_P_vtm.cfg
@@ -122,13 +122,11 @@ EMT                          : 1
 EMTFast                      : 1
 Affine                       : 1
 HighPrecMv                   : 1
-
 SubPuMvp                     : 1
 MaxNumMergeCand              : 6
 LMChroma                     : 1      # use CCLM only
 DepQuant                     : 1
 IMV                          : 2
-
 ALF                          : 1
 
 # Fast tools
diff --git a/cfg/encoder_lowdelay_vtm.cfg b/cfg/encoder_lowdelay_vtm.cfg
index 281ec093f234ae30f099c1059bb7cd2e3e2851b2..3bcde02d4138a1f6e484d528c77ba67f77d138b4 100644
--- a/cfg/encoder_lowdelay_vtm.cfg
+++ b/cfg/encoder_lowdelay_vtm.cfg
@@ -122,13 +122,11 @@ EMT                          : 1
 EMTFast                      : 1
 Affine                       : 1
 HighPrecMv                   : 1
-
 SubPuMvp                     : 1
 MaxNumMergeCand              : 6
 LMChroma                     : 1      # use CCLM only
 DepQuant                     : 1
 IMV                          : 2
-
 ALF                          : 1
 
 # Fast tools
diff --git a/cfg/encoder_randomaccess_vtm.cfg b/cfg/encoder_randomaccess_vtm.cfg
index 7e391142bb295f82824e5364bc8ffb143f424218..46d02af14ed62aa539294bc39e7b69ca46d7018a 100644
--- a/cfg/encoder_randomaccess_vtm.cfg
+++ b/cfg/encoder_randomaccess_vtm.cfg
@@ -136,13 +136,11 @@ EMT                          : 1
 EMTFast                      : 1
 Affine                       : 1
 HighPrecMv                   : 1
-
 SubPuMvp                     : 1
 MaxNumMergeCand              : 6
 LMChroma                     : 1      # use CCLM only
 DepQuant                     : 1
 IMV                          : 2
-
 ALF                          : 1
 
 # Fast tools
@@ -150,7 +148,6 @@ PBIntraFast                  : 1
 FastMrg                      : 1
 AMaxBT                       : 1
 
-
 ### DO NOT ADD ANYTHING BELOW THIS LINE ###
 ### DO NOT DELETE THE EMPTY LINE BELOW ###
 
diff --git a/doc/.gitignore b/doc/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..d58111897c8449a88a7e83a74f74c44a8ede7055
--- /dev/null
+++ b/doc/.gitignore
@@ -0,0 +1,8 @@
+# ignore these Latex files
+jctvcdoc.cls
+*.aux
+*.log
+*.lot
+*.out
+*.toc
+*~
diff --git a/doc/pyuv_format.pdf b/doc/pyuv_format.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..62c669e0bb4c52d5756b428ce0838eeb57a6d06f
Binary files /dev/null and b/doc/pyuv_format.pdf differ
diff --git a/doc/software-manual.pdf b/doc/software-manual.pdf
index 91bcc5a9f172ec570b4312133e74d3b867e71e52..0b4925cf32a94097c096cdc47707a5dbbebe3637 100644
Binary files a/doc/software-manual.pdf and b/doc/software-manual.pdf differ
diff --git a/doc/software-manual.tex b/doc/software-manual.tex
index ceb6b39c5d5b4736d3d7a96e94ee224d2d847cc2..0cd31316c6225c0c1924a52b909b8e5b26588341 100644
--- a/doc/software-manual.tex
+++ b/doc/software-manual.tex
@@ -3021,4 +3021,226 @@ If the decoder is compiled with the macro RExt__DECODER_DEBUG_BIT_STATISTICS def
 The Linux makefile will compile both the analyser and standard version when the `all' or `everything' target is used (where the latter will also build  high-bit-depth executables).
 
 
+
+\section{Block statistics extension}
+\label{sec:block-stat-extens}
+
+The block statistics extension enables straightforward visualization and statistical analysis of coding tool
+usage in encoded bitstreams. The extension enables the reference
+software encoder and decoder to write out statistics files in a configurable
+way, which in turn can be loaded into a suitable YUV player for overlay of the
+reconstructed YUV sequence, or can be used for statistical analysis at a
+selectable scope (e.g. block/picture/sequence level). An example implementation
+for such visualization is available with the open-source YUView player
+(https://github.com/IENT/YUView). 
+
+
+\subsection{Usage}
+\label{sec:usage}
+
+The software has to be compiled with the macros ENABLE_TRACING and
+K0149_BLOCK_STATISTICS  defined as 1. The statistics can be written by either
+encoder or decoder.
+
+The extension adds additional trace channels to the ``dtrace'' functionality of
+the software. The following trace channels were added:
+\begin{description}
+\item[D_BLOCK_STATISTICS_ALL] All syntax elements are written, no matter whether
+  they are actually encoded or derived.
+\item[D_BLOCK_STATISTICS_CODED] Tries to write only syntax elements, which have
+  also been encoded.
+\end{description}
+
+The following additional encoder options are available (part of ``dtrace''). See
+the file dtrace_next.h for more details.
+
+\begin{OptionTableNoShorthand}{Decoder options}{tab:decoder-block-statistics}
+\Option{TraceFile} &
+%\ShortOption{\None} &
+\Default{\None} &
+File name of the produced trace file.
+\\
+
+\Option{TraceRule} &
+%\ShortOption{-b} &
+\Default{\NotSet} &
+Specifies which traces should be saved, and for which POCs.
+\\
+
+\end{OptionTableNoShorthand}
+
+Concrete examples of calls for  generating a block statistics file are:
+\begin{verbatim}
+bin/DecoderAppStatic -b str/BasketballDrive_1920x1080_QP37.vvc \
+    --TraceFile="stats/BasketballDrive_1920x1080_QP37_coded.vtmbmsstats" \
+    --TraceRule="D_BLOCK_STATISTICS_CODED:poc>=0"
+
+bin/DecoderAppStatic -b str/BasketballDrive_1920x1080_QP37.vvc \
+    --TraceFile="stats/BasketballDrive_1920x1080_QP37_all.vtmbmsstats" \
+    --TraceRule="D_BLOCK_STATISTICS_ALL:poc>=0"   
+\end{verbatim}
+
+
+\subsection{Block statistics file formats}
+\label{sec:block-stat-file}
+The trace file will contain a header listing information of all available block
+statistics. For each statistic it lists a type and a scale for vectors or range
+for integers if applicable: 
+\begin{verbatim}
+# VTMBMS Block Statistics
+# Sequence size: [832x 480]
+# Block Statistic Type: PredMode; Flag; 
+# Block Statistic Type: MergeFlag; Flag; 
+# Block Statistic Type: MVL0; Vector; Scale: 4
+# Block Statistic Type: MVL1; Vector; Scale: 4
+# Block Statistic Type: IPCM; Flag; 
+# Block Statistic Type: Y_IntraMode; Integer; [0, 73]
+# Block Statistic Type: Cb_IntraMode; Integer; [0, 73]
+\end{verbatim}
+
+Two formats are available for the statistics for each block, a human readable
+format and a CSV based format. The header remains the same for both cases. 
+
+For both formats each row contains the information for one block statistic. The
+order of the data is: picture order count (POC), location of top left corner of
+the block, size of the block, name of the statistic, and value of the
+statistic. 
+The macro BLOCK_STATS_AS_CSV is available in order to choose the required format.
+The human readable format can also be easily processed with other software, for
+example YUView, using regular expressions. The CSV based formats provides the
+universal interface required by spreadsheet applications.
+
+The human readable format is based on the format used for the other dtrace
+statistics. Some examples for this format are:
+\begin{verbatim}
+BlockStat: POC 16 @( 112,   0) [ 8x 8] SkipFlag=1
+BlockStat: POC 16 @( 112,   0) [ 8x 8] InterDir=1
+BlockStat: POC 16 @( 112,   0) [ 8x 8] MergeFlag=1
+BlockStat: POC 16 @( 112,   0) [ 8x 8] MergeIdx=0
+BlockStat: POC 16 @( 112,   0) [ 8x 8] MergeType=0
+BlockStat: POC 16 @( 112,   0) [ 8x 8] MVPIdxL0=255
+BlockStat: POC 16 @( 112,   0) [ 8x 8] MVPNumL0=255
+BlockStat: POC 16 @( 112,   0) [ 8x 8] RefIdxL0=0
+BlockStat: POC 16 @( 112,   0) [ 8x 8] MVDL0={   0,   0}
+BlockStat: POC 16 @( 112,   0) [ 8x 8] MVL0={ -70,  18}
+BlockStat: POC 16 @( 112,   8) [ 8x 8] PredMode=0
+BlockStat: POC 16 @( 112,   8) [ 8x 8] PartSize=0
+\end{verbatim}
+
+Some examples of the CSV based format are:
+\begin{verbatim}
+BlockStat;16; 112;   0; 8; 8;SkipFlag;1
+BlockStat;16; 112;   0; 8; 8;InterDir;1
+BlockStat;16; 112;   0; 8; 8;MergeFlag;1
+BlockStat;16; 112;   0; 8; 8;MergeIdx;0
+BlockStat;16; 112;   0; 8; 8;MergeType;0
+BlockStat;16; 112;   0; 8; 8;MVPIdxL0;255
+BlockStat;16; 112;   0; 8; 8;MVPNumL0;255
+BlockStat;16; 112;   0; 8; 8;RefIdxL0;0
+BlockStat;16; 112;   0; 8; 8;MVDL0;   0;   0
+BlockStat;16; 112;   0; 8; 8;MVL0; -70;  18
+BlockStat;16; 112;   8; 8; 8;PredMode;0
+BlockStat;16; 112;   8; 8; 8;PartSize;0
+\end{verbatim}
+
+\subsection{Visualization}
+\label{sec:visualization}
+
+The block statistics can be viewed with YUView, which is freely available under
+GPLv3: \url{https://github.com/IENT/YUView}. The latest releases and the master
+branch have the functionality required for  viewing the block statistics. YUView
+assumes that the file extension of block statistics file is
+“.vtmbmsstats”. However, if a file is not recognized you can choose from a list
+of supported file formats.
+
+
+Statistics can be overlaid with YUV sequences. Some example snapshots are:
+
+\begin{figure}[htpb]
+  \centering
+  \includegraphics[width=0.8\linewidth]{figures/YUView}
+  \caption{YUView}
+  \label{fig:yuview}
+\end{figure}
+
+\begin{figure}[htpb]
+  \centering
+  \includegraphics[width=0.5\linewidth]{figures/raceHorsesShot2MotionVectors}
+  \caption{Motion vectors}
+  \label{fig:motion-vectors}
+\end{figure}
+
+
+\begin{figure}[htpb]
+  \centering
+  \includegraphics[width=0.5\linewidth]{figures/raceHorsesShot3SkipFlag}
+  \caption{Skip flag}
+  \label{fig:skip-flag}
+\end{figure}
+
+\subsection{Adding statistics}
+\label{sec:adding-statistics}
+
+In order to add further block statistics, do the following:
+
+
+\begin{description}
+\item[source/Lib/CommonLib/dtrace_blockstatistics.h]
+  Add your statistic to the BlockStatistic enum:	
+\begin{verbatim}
+enum class BlockStatistic {
+  // general
+  PredMode,
+  PartSize,
+  Depth,
+\end{verbatim}
+  
+Further, add your statistic to the map blockstatistic2description:
+\begin{verbatim}
+static const std::map<BlockStatistic, 
+  std::tuple<std::string, BlockStatisticType, std::string>> 
+  blockstatistic2description =
+{
+  { BlockStatistic::PredMode, 
+    std::tuple<std::string, BlockStatisticType, std::string>
+    {"PredMode", BlockStatisticType::Flag, ""}},
+  { BlockStatistic::MergeFlag,
+    std::tuple<std::string, BlockStatisticType, std::string>
+    {"MergeFlag", BlockStatisticType::Flag, ""}},
+  { BlockStatistic::MVL0,
+    std::tuple<std::string, BlockStatisticType, std::string>
+    {"MVL0", BlockStatisticType::Vector, "Scale: 4"}},
+  YOURS
+\end{verbatim}
+
+
+\item[source/Lib/CommonLib/dtrace_blockstatistics.cpp] All code for
+  writing syntax elements is kept in this file in
+  getAndStoreBlockStatistics. This function is called once for each
+  CTU, after it has been en/decoded. The following macros have been
+  defined to facilitate writing of block statistics:
+\begin{verbatim}
+DTRACE_BLOCK_SCALAR(ctx,channel,cs_cu_pu,stat_type,val)   
+DTRACE_BLOCK_SCALAR_CHROMA(ctx,channel,cs_cu_pu,stat_type,val)
+DTRACE_BLOCK_VECTOR(ctx,channel,cu_pu,stat_type,v_x,v_y)    
+DTRACE_BLOCK_AFFINETF(ctx,channel,pu,stat_type,v_x0,v_y0,v_x1,v_y1,v_x2,v_y2) 
+\end{verbatim}
+
+An example:
+\begin{verbatim}
+DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, 
+  cu, GetBlockStatisticName(BlockStatistic::PredMode), cu.predMode);
+\end{verbatim}
+
+
+\item[Block statistics for debugging] The statistics can also be used
+  to write out other data, not just syntax elements. Add your
+  statistics to dtrace_blockstatistics.h. Where it should be used the
+  following headers have to be included:
+\begin{verbatim}
+#include "dtrace_next.h"
+#include "dtrace_blockstatistics.h"
+\end{verbatim}
+\end{description}
+
 \end{document}
diff --git a/source/App/DecoderAnalyserApp/CMakeLists.txt b/source/App/DecoderAnalyserApp/CMakeLists.txt
index 46f0e44363824ac28b541c4e8576272936c854c6..bd4975afa64248238e18513ebcd6de594c42093c 100644
--- a/source/App/DecoderAnalyserApp/CMakeLists.txt
+++ b/source/App/DecoderAnalyserApp/CMakeLists.txt
@@ -29,7 +29,7 @@ target_compile_definitions( ${EXE_NAME} PUBLIC RExt__DECODER_DEBUG_BIT_STATISTIC
 target_compile_definitions( ${EXE_NAME} PUBLIC RExt__DECODER_DEBUG_TOOL_STATISTICS=1 )
 
 if( ENABLE_VTM )
-  target_compile_definitions( ${EXE_NAME} PUBLIC JEM_TOOLS=0 )
+  target_compile_definitions( ${EXE_NAME} PUBLIC BMS_TOOLS=0 )
 endif()
 
 if( SET_ENABLE_TRACING )
diff --git a/source/App/DecoderApp/CMakeLists.txt b/source/App/DecoderApp/CMakeLists.txt
index 8bd727935494d1eb74379616cd882ebf75986065..dec7c6e1ec22c9734e8b4cb33cae2c3862c0f8f5 100644
--- a/source/App/DecoderApp/CMakeLists.txt
+++ b/source/App/DecoderApp/CMakeLists.txt
@@ -27,7 +27,7 @@ add_executable( ${EXE_NAME} ${SRC_FILES} ${INC_FILES} ${NATVIS_FILES} ${CMAKE_CU
 include_directories(${CMAKE_CURRENT_BINARY_DIR})
 
 if( ENABLE_VTM )
-  target_compile_definitions( ${EXE_NAME} PUBLIC JEM_TOOLS=0 )
+  target_compile_definitions( ${EXE_NAME} PUBLIC BMS_TOOLS=0 )
 endif()
 
 
diff --git a/source/App/DecoderApp/DecApp.cpp b/source/App/DecoderApp/DecApp.cpp
index a7f26e83303ff5b107c9152fc424d4d427f04b93..3c2f719c0959159f7a4280c15e362513d2b0fe7e 100644
--- a/source/App/DecoderApp/DecApp.cpp
+++ b/source/App/DecoderApp/DecApp.cpp
@@ -121,6 +121,10 @@ uint32_t DecApp::decode()
      * requires the DecApp::decode() method to be called again with the same
      * nal unit. */
 #if RExt__DECODER_DEBUG_STATISTICS
+    CodingStatistics& stat = CodingStatistics::GetSingletonInstance();
+    CHECK(m_statMode < STATS__MODE_NONE || m_statMode > STATS__MODE_ALL, "Wrong coding statistics output mode");
+    stat.m_mode = m_statMode;
+
     CodingStatistics::CodingStatisticsData* backupStats = new CodingStatistics::CodingStatisticsData(CodingStatistics::GetStatistics());
 #endif
 
@@ -210,6 +214,11 @@ uint32_t DecApp::decode()
             }
         }
 
+        if (m_packedYUVMode && (m_outputBitDepth[CH_L] != 10 && m_outputBitDepth[CH_L] != 12))
+        {
+          EXIT ("Invalid output bit-depth for packed YUV output, aborting\n");
+        }
+
         m_cVideoIOYuvReconFile.open( m_reconFileName, true, m_outputBitDepth, m_outputBitDepth, bitDepths.recon ); // write mode
         openedReconFile = true;
       }
@@ -279,9 +288,9 @@ void DecApp::xCreateDecLib()
   m_cDecLib.create();
 
   // initialize decoder class
-  m_cDecLib.init( 
+  m_cDecLib.init(
 #if JVET_J0090_MEMORY_BANDWITH_MEASURE
-    m_cacheCfgFile 
+    m_cacheCfgFile
 #endif
   );
   m_cDecLib.setDecodedPictureHashSEIEnabled(m_decodedPictureHashSEIEnabled);
@@ -394,11 +403,13 @@ void DecApp::xWriteOutput( PicList* pcListPic, uint32_t tId )
           if (display)
           {
             m_cVideoIOYuvReconFile.write( pcPicTop->getRecoBuf(), pcPicBottom->getRecoBuf(),
-                                           m_outputColourSpaceConvert,
-                                           conf.getWindowLeftOffset() + defDisp.getWindowLeftOffset(),
-                                           conf.getWindowRightOffset() + defDisp.getWindowRightOffset(),
-                                           conf.getWindowTopOffset() + defDisp.getWindowTopOffset(),
-                                           conf.getWindowBottomOffset() + defDisp.getWindowBottomOffset(), NUM_CHROMA_FORMAT, isTff );
+                                          m_outputColourSpaceConvert,
+                                          false, // TODO: m_packedYUVMode,
+                                          conf.getWindowLeftOffset()   + defDisp.getWindowLeftOffset(),
+                                          conf.getWindowRightOffset()  + defDisp.getWindowRightOffset(),
+                                          conf.getWindowTopOffset()    + defDisp.getWindowTopOffset(),
+                                          conf.getWindowBottomOffset() + defDisp.getWindowBottomOffset(),
+                                          NUM_CHROMA_FORMAT, isTff );
           }
         }
 
@@ -445,6 +456,7 @@ void DecApp::xWriteOutput( PicList* pcListPic, uint32_t tId )
 
           m_cVideoIOYuvReconFile.write( pcPic->getRecoBuf(),
                                         m_outputColourSpaceConvert,
+                                        m_packedYUVMode,
                                         conf.getWindowLeftOffset()   + defDisp.getWindowLeftOffset(),
                                         conf.getWindowRightOffset()  + defDisp.getWindowRightOffset(),
                                         conf.getWindowTopOffset()    + defDisp.getWindowTopOffset(),
@@ -502,15 +514,18 @@ void DecApp::xFlushOutput( PicList* pcListPic )
         // write to file
         if ( !m_reconFileName.empty() )
         {
-          const Window &conf = pcPicTop->cs->sps->getConformanceWindow();
+          const Window &conf    = pcPicTop->cs->sps->getConformanceWindow();
           const Window  defDisp = (m_respectDefDispWindow && pcPicTop->cs->sps->getVuiParametersPresentFlag()) ? pcPicTop->cs->sps->getVuiParameters()->getDefaultDisplayWindow() : Window();
-          const bool isTff = pcPicTop->topField;
+          const bool    isTff   = pcPicTop->topField;
+
           m_cVideoIOYuvReconFile.write( pcPicTop->getRecoBuf(), pcPicBottom->getRecoBuf(),
-                                         m_outputColourSpaceConvert,
-                                         conf.getWindowLeftOffset() + defDisp.getWindowLeftOffset(),
-                                         conf.getWindowRightOffset() + defDisp.getWindowRightOffset(),
-                                         conf.getWindowTopOffset() + defDisp.getWindowTopOffset(),
-                                         conf.getWindowBottomOffset() + defDisp.getWindowBottomOffset(), NUM_CHROMA_FORMAT, isTff );
+                                        m_outputColourSpaceConvert,
+                                        false, // TODO: m_packedYUVMode,
+                                        conf.getWindowLeftOffset()   + defDisp.getWindowLeftOffset(),
+                                        conf.getWindowRightOffset()  + defDisp.getWindowRightOffset(),
+                                        conf.getWindowTopOffset()    + defDisp.getWindowTopOffset(),
+                                        conf.getWindowBottomOffset() + defDisp.getWindowBottomOffset(),
+                                        NUM_CHROMA_FORMAT, isTff );
         }
 
         // update POC of display order
@@ -560,6 +575,7 @@ void DecApp::xFlushOutput( PicList* pcListPic )
 
           m_cVideoIOYuvReconFile.write( pcPic->getRecoBuf(),
                                         m_outputColourSpaceConvert,
+                                        m_packedYUVMode,
                                         conf.getWindowLeftOffset()   + defDisp.getWindowLeftOffset(),
                                         conf.getWindowRightOffset()  + defDisp.getWindowRightOffset(),
                                         conf.getWindowTopOffset()    + defDisp.getWindowTopOffset(),
diff --git a/source/App/DecoderApp/DecAppCfg.cpp b/source/App/DecoderApp/DecAppCfg.cpp
index 6e520b49eeca61d15991ddc1552d89fa9e637276..ca55e2493a7e9f28c0d619666abed5c477cfde9f 100644
--- a/source/App/DecoderApp/DecAppCfg.cpp
+++ b/source/App/DecoderApp/DecAppCfg.cpp
@@ -96,6 +96,7 @@ bool DecAppCfg::parseCfg( int argc, char* argv[] )
   ("SEIColourRemappingInfoFilename",  m_colourRemapSEIFileName,        string(""), "Colour Remapping YUV output file name. If empty, no remapping is applied (ignore SEI message)\n")
   ("OutputDecodedSEIMessagesFilename",  m_outputDecodedSEIMessagesFilename,    string(""), "When non empty, output decoded SEI messages to the indicated file. If file is '-', then output to stdout\n")
   ("ClipOutputVideoToRec709Range",      m_bClipOutputVideoToRec709Range,  false,   "If true then clip output video to the Rec. 709 Range on saving")
+  ("PYUV",                      m_packedYUVMode,                       false,      "If true then output 10-bit and 12-bit YUV data as 5-byte and 3-byte (respectively) packed YUV data. Ignored for interlaced output.")
 #if ENABLE_TRACING
   ("TraceChannelsList",         bTracingChannelsList,                        false, "List all available tracing channels" )
   ("TraceRule",                 sTracingRule,                         string( "" ), "Tracing rule (ex: \"D_CABAC:poc==8\" or \"D_REC_CB_LUMA:poc==8\")" )
@@ -103,6 +104,13 @@ bool DecAppCfg::parseCfg( int argc, char* argv[] )
 #endif
 #if JVET_J0090_MEMORY_BANDWITH_MEASURE
   ("CacheCfg",                  m_cacheCfgFile,                       string( "" ), "CacheCfg File" )
+#endif
+#if RExt__DECODER_DEBUG_STATISTICS
+  ("Stats",                     m_statMode,                           3,           "Control decoder debugging statistic output mode\n"
+                                                                                   "\t0: disable statistic\n"
+                                                                                   "\t1: enable bit statistic\n"
+                                                                                   "\t2: enable tool statistic\n"
+                                                                                   "\t3: enable bit and tool statistic\n")
 #endif
   ;
 
@@ -220,6 +228,8 @@ DecAppCfg::DecAppCfg()
 , m_respectDefDispWindow(0)
 , m_outputDecodedSEIMessagesFilename()
 , m_bClipOutputVideoToRec709Range(false)
+, m_packedYUVMode(false)
+, m_statMode(0)
 {
   for (uint32_t channelTypeIndex = 0; channelTypeIndex < MAX_NUM_CHANNEL_TYPE; channelTypeIndex++)
   {
diff --git a/source/App/DecoderApp/DecAppCfg.h b/source/App/DecoderApp/DecAppCfg.h
index b721849a26366a6fe5b6209036b4db922ff77b92..7b370eb83804f11b678afaa513226d2a10d512da 100644
--- a/source/App/DecoderApp/DecAppCfg.h
+++ b/source/App/DecoderApp/DecAppCfg.h
@@ -70,7 +70,9 @@ protected:
   int           m_respectDefDispWindow;               ///< Only output content inside the default display window
   std::string   m_outputDecodedSEIMessagesFilename;   ///< filename to output decoded SEI messages to. If '-', then use stdout. If empty, do not output details.
   bool          m_bClipOutputVideoToRec709Range;      ///< If true, clip the output video to the Rec 709 range on saving.
+  bool          m_packedYUVMode;                      ///< If true, output 10-bit and 12-bit YUV data as 5-byte and 3-byte (respectively) packed YUV data
   std::string   m_cacheCfgFile;                       ///< Config file of cache model
+  int           m_statMode;                           ///< Config statistic mode (0 - bit stat, 1 - tool stat, 3 - both)
 
 public:
   DecAppCfg();
diff --git a/source/App/DecoderApp/decmain.cpp b/source/App/DecoderApp/decmain.cpp
index 73c44ea0223c122f58fd44720751b6edefcf79d3..61db2a5e36a52e5465b5e265000d069771859d49 100644
--- a/source/App/DecoderApp/decmain.cpp
+++ b/source/App/DecoderApp/decmain.cpp
@@ -57,9 +57,9 @@ int main(int argc, char* argv[])
   // print information
   fprintf( stdout, "\n" );
 #ifdef SVNREVISION
-  fprintf( stdout, "VVCSoftware: VTM Decoder Version %s (%s@r%s) ", NEXT_SOFTWARE_VERSION, SVNRELATIVEURL, SVNREVISION /*NV_VERSION*/ );
+  fprintf( stdout, "VVCSoftware: BMS Decoder Version %s (%s@r%s) ", NEXT_SOFTWARE_VERSION, SVNRELATIVEURL, SVNREVISION /*NV_VERSION*/ );
 #else
-  fprintf( stdout, "VVCSoftware: VTM Decoder Version %s ", NEXT_SOFTWARE_VERSION /*NV_VERSION*/ );
+  fprintf( stdout, "VVCSoftware: BMS Decoder Version %s ", NEXT_SOFTWARE_VERSION /*NV_VERSION*/ );
 #endif
   fprintf( stdout, NVM_ONOS );
   fprintf( stdout, NVM_COMPILEDBY );
diff --git a/source/App/EncoderApp/CMakeLists.txt b/source/App/EncoderApp/CMakeLists.txt
index 49ba1259f7bd21fa18ce71ceec10798eb3eb1ada..d5322a6b6f7740b35c7666be2ec0847f52e50709 100644
--- a/source/App/EncoderApp/CMakeLists.txt
+++ b/source/App/EncoderApp/CMakeLists.txt
@@ -29,7 +29,7 @@ add_executable( ${EXE_NAME} ${SRC_FILES} ${INC_FILES} ${NATVIS_FILES} ${CMAKE_CU
 include_directories(${CMAKE_CURRENT_BINARY_DIR})
 
 if( ENABLE_VTM )
-  target_compile_definitions( ${EXE_NAME} PUBLIC JEM_TOOLS=0 )
+  target_compile_definitions( ${EXE_NAME} PUBLIC BMS_TOOLS=0 )
 endif()
 
 if( SET_ENABLE_TRACING )
diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp
index b48d190874b0fa1066265e4e19e957b232ce894c..59a2e070d57c2a80b45449b634ea5991a5f11768 100644
--- a/source/App/EncoderApp/EncApp.cpp
+++ b/source/App/EncoderApp/EncApp.cpp
@@ -247,6 +247,9 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setFastIntraEMT                                      ( m_FastEMT & m_EMT & 1 );
   m_cEncLib.setInterEMT                                          ( ( m_EMT >> 1 ) & 1 );
   m_cEncLib.setFastInterEMT                                      ( ( m_FastEMT >> 1 ) & ( m_EMT >> 1 ) & 1 );
+#endif
+#if JVET_K0157
+  m_cEncLib.setUseCompositeRef                                   ( m_compositeRefEnabled );
 #endif
   // ADD_NEW_TOOL : (encoder app) add setting of tool enabling flags and associated parameters here
 
@@ -549,6 +552,17 @@ void EncApp::xCreateLib( std::list<PelUnitBuf*>& recBufList
 #endif
   if (!m_reconFileName.empty())
   {
+    if (m_packedYUVMode && ((m_outputBitDepth[CH_L] != 10 && m_outputBitDepth[CH_L] != 12)
+        || ((m_iSourceWidth & (1 + (m_outputBitDepth[CH_L] & 3))) != 0)))
+    {
+      EXIT ("Invalid output bit-depth or image width for packed YUV output, aborting\n");
+    }
+    if (m_packedYUVMode && (m_chromaFormatIDC != CHROMA_400) && ((m_outputBitDepth[CH_C] != 10 && m_outputBitDepth[CH_C] != 12)
+        || (((m_iSourceWidth / SPS::getWinUnitX (m_chromaFormatIDC)) & (1 + (m_outputBitDepth[CH_C] & 3))) != 0)))
+    {
+      EXIT ("Invalid chroma output bit-depth or image width for packed YUV output, aborting\n");
+    }
+
     m_cVideoIOYuvReconFile.open(m_reconFileName, true, m_outputBitDepth, m_outputBitDepth, m_internalBitDepth);  // write mode
   }
 
@@ -737,7 +751,10 @@ void EncApp::xWriteOutput( int iNumEncoded, std::list<PelUnitBuf*>& recBufList
 
       if (!m_reconFileName.empty())
       {
-        m_cVideoIOYuvReconFile.write( *pcPicYuvRecTop, *pcPicYuvRecBottom, ipCSC, m_confWinLeft, m_confWinRight, m_confWinTop, m_confWinBottom, NUM_CHROMA_FORMAT, m_isTopFieldFirst );
+        m_cVideoIOYuvReconFile.write( *pcPicYuvRecTop, *pcPicYuvRecBottom,
+                                      ipCSC,
+                                      false, // TODO: m_packedYUVMode,
+                                      m_confWinLeft, m_confWinRight, m_confWinTop, m_confWinBottom, NUM_CHROMA_FORMAT, m_isTopFieldFirst );
       }
     }
   }
@@ -749,7 +766,9 @@ void EncApp::xWriteOutput( int iNumEncoded, std::list<PelUnitBuf*>& recBufList
       if (!m_reconFileName.empty())
       {
         m_cVideoIOYuvReconFile.write( *pcPicYuvRec,
-                                      ipCSC, m_confWinLeft, m_confWinRight, m_confWinTop, m_confWinBottom, NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range );
+                                      ipCSC,
+                                      m_packedYUVMode,
+                                      m_confWinLeft, m_confWinRight, m_confWinTop, m_confWinBottom, NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range );
       }
     }
   }
diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp
index 4070fcc13a3ca88005a324ed4cc6d256df767ed3..5149ada2620ed43d54ee65ffd6e1c3f57547ef8b 100644
--- a/source/App/EncoderApp/EncAppCfg.cpp
+++ b/source/App/EncoderApp/EncAppCfg.cpp
@@ -107,6 +107,7 @@ EncAppCfg::EncAppCfg()
 : m_inputColourSpaceConvert(IPCOLOURSPACE_UNCHANGED)
 , m_snrInternalColourSpace(false)
 , m_outputInternalColourSpace(false)
+, m_packedYUVMode(false)
 #if EXTENSION_360_VIDEO
 , m_ext360(*this)
 #endif
@@ -766,6 +767,7 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("FramesToBeEncoded,f",                             m_framesToBeEncoded,                                  0, "Number of frames to be encoded (default=all)")
   ("ClipInputVideoToRec709Range",                     m_bClipInputVideoToRec709Range,                   false, "If true then clip input video to the Rec. 709 Range on loading when InternalBitDepth is less than MSBExtendedBitDepth")
   ("ClipOutputVideoToRec709Range",                    m_bClipOutputVideoToRec709Range,                  false, "If true then clip output video to the Rec. 709 Range on saving when OutputBitDepth is less than InternalBitDepth")
+  ("PYUV",                                            m_packedYUVMode,                                  false, "If true then output 10-bit and 12-bit YUV data as 5-byte and 3-byte (respectively) packed YUV data. Ignored for interlaced output.")
   ("SummaryOutFilename",                              m_summaryOutFilename,                          string(), "Filename to use for producing summary output file. If empty, do not produce a file.")
   ("SummaryPicFilenameBase",                          m_summaryPicFilenameBase,                      string(), "Base filename to use for producing summary picture output files. The actual filenames used will have I.txt, P.txt and B.txt appended. If empty, do not produce a file.")
   ("SummaryVerboseness",                              m_summaryVerboseness,                                0u, "Specifies the level of the verboseness of the text output")
@@ -849,6 +851,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
     "\t1:  Enable fast methods only for Intra EMT\n"
     "\t2:  Enable fast methods only for Inter EMT\n"
     "\t3:  Enable fast methods for both Intra & Inter EMT\n")
+#endif
+#if JVET_K0157
+  ("CompositeLTReference",                            m_compositeRefEnabled,                            false, "Enable Composite Long Term Reference Frame")
 #endif
   // ADD_NEW_TOOL : (encoder app) add parsing parameters here
 
@@ -912,7 +917,7 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   /* Quantization parameters */
 #if QP_SWITCHING_FOR_PARALLEL
   ("QP,q",                                            m_iQP,                                               30, "Qp value")
-  ("QPIncrementFrame,-qpif",                          m_qpIncrementAtSourceFrame,       OptionalValue<uint32_t>(), "If a source file frame number is specified, the internal QP will be incremented for all POCs associated with source frames >= frame number. If empty, do not increment.")
+  ("QPIncrementFrame,-qpif",                          m_qpIncrementAtSourceFrame,   OptionalValue<uint32_t>(), "If a source file frame number is specified, the internal QP will be incremented for all POCs associated with source frames >= frame number. If empty, do not increment.")
 #else
   ("QP,q",                                            m_fQP,                                             30.0, "Qp value, if value is float, QP is switched once during encoding")
 #endif
@@ -1291,6 +1296,21 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   po::ErrorReporter err;
   const list<const char*>& argv_unhandled = po::scanArgv(opts, argc, (const char**) argv, err);
 
+#if JVET_K0157
+  if (m_compositeRefEnabled) 
+  {
+    for (int i = 0; i < m_iGOPSize; i++) 
+    {
+      m_GOPList[i].m_POC *= 2;
+      m_GOPList[i].m_deltaRPS *= 2;
+      for (int j = 0; j < m_GOPList[i].m_numRefPics; j++) 
+      {
+        m_GOPList[i].m_referencePics[j] *= 2;
+      }
+    }
+  }
+#endif
+
   for (list<const char*>::const_iterator it = argv_unhandled.begin(); it != argv_unhandled.end(); it++)
   {
     msg( ERROR, "Unhandled argument ignored: `%s'\n", *it);
@@ -1797,14 +1817,25 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
 #endif
 
 #if ENABLE_QPA
-  if( m_LargeCTU && m_bUsePerceptQPA && !m_bUseAdaptiveQP && ( m_iSourceHeight <= 1280 ) && ( m_iSourceWidth <= 2048 ) )
+  if (m_bUsePerceptQPA && !m_bUseAdaptiveQP && m_dualTree && (m_cbQpOffsetDualTree != 0 || m_crQpOffsetDualTree != 0))
+  {
+    msg( WARNING, "*************************************************************************\n" );
+    msg( WARNING, "* WARNING: chroma QPA on, ignoring nonzero dual-tree chroma QP offsets! *\n" );
+    msg( WARNING, "*************************************************************************\n" );
+  }
+
+ #if QP_SWITCHING_FOR_PARALLEL
+  if( m_LargeCTU && ( m_iQP < 38 ) && ( m_iGOPSize > 4 ) && m_bUsePerceptQPA && !m_bUseAdaptiveQP && ( m_iSourceHeight <= 1280 ) && ( m_iSourceWidth <= 2048 ) )
+ #else
+  if( m_LargeCTU && ( ( int ) m_fQP < 38 ) && ( m_iGOPSize > 4 ) && m_bUsePerceptQPA && !m_bUseAdaptiveQP && ( m_iSourceHeight <= 1280 ) && ( m_iSourceWidth <= 2048 ) )
+ #endif
 #else
   if( false )
 #endif
   {
-    msg( WARNING, "***************************************************************************\n" );
-    msg( WARNING, "* WARNING: QPA on with LargeCTU for incompatible size, limiting CTU size! *\n" );
-    msg( WARNING, "***************************************************************************\n" );
+    msg( WARNING, "*************************************************************************\n" );
+    msg( WARNING, "* WARNING: QPA on with large CTU for <=HD sequences, limiting CTU size! *\n" );
+    msg( WARNING, "*************************************************************************\n" );
 
     m_uiCTUSize = m_uiMaxCUWidth;
     if( ( 1u << m_quadtreeTULog2MaxSize ) > m_uiCTUSize ) m_quadtreeTULog2MaxSize--;
@@ -1924,6 +1955,9 @@ bool EncAppCfg::xCheckParameter()
 #if JVET_K1000_SIMPLIFIED_EMT
     xConfirmPara( m_EMT, "EMT only allowed with NEXT profile" );
     xConfirmPara( m_FastEMT, "EMT only allowed with NEXT profile" );
+#endif
+#if JVET_K0157
+    xConfirmPara(m_compositeRefEnabled, "Composite Reference Frame is only allowed with NEXT profile");
 #endif
     // ADD_NEW_TOOL : (parameter check) add a check for next tools here
   }
@@ -2217,6 +2251,9 @@ bool EncAppCfg::xCheckParameter()
   xConfirmPara( m_bipredSearchRange < 0 ,                                                   "Bi-prediction refinement search range must be more than 0" );
   xConfirmPara( m_minSearchWindow < 0,                                                      "Minimum motion search window size for the adaptive window ME must be greater than or equal to 0" );
   xConfirmPara( m_iMaxDeltaQP > MAX_DELTA_QP,                                               "Absolute Delta QP exceeds supported range (0 to 7)" );
+#if ENABLE_QPA
+  xConfirmPara( m_bUsePerceptQPA && m_uiDeltaQpRD > 0,                                      "Perceptual QPA cannot be used together with slice-level multiple-QP optimization" );
+#endif
 #if SHARP_LUMA_DELTA_QP
   xConfirmPara( m_lumaLevelToDeltaQPMapping.mode && m_uiDeltaQpRD > 0,                      "Luma-level-based Delta QP cannot be used together with slice level multiple-QP optimization\n" );
 #endif
@@ -2391,6 +2428,9 @@ bool EncAppCfg::xCheckParameter()
     xConfirmPara( m_intraConstraintFlag, "IntraConstraintFlag cannot be 1 for inter sequences");
   }
 
+#if JVET_K0157
+  int multipleFactor = m_compositeRefEnabled ? 2 : 1;
+#endif
   bool verifiedGOP=false;
   bool errorGOP=false;
   int checkGOP=1;
@@ -2411,7 +2451,11 @@ bool EncAppCfg::xCheckParameter()
 
   for(int i=0; i<m_iGOPSize; i++)
   {
+#if JVET_K0157
+    if (m_GOPList[i].m_POC == m_iGOPSize * multipleFactor)
+#else
     if(m_GOPList[i].m_POC==m_iGOPSize)
+#endif
     {
       xConfirmPara( m_GOPList[i].m_temporalId!=0 , "The last frame in each GOP must have temporal ID = 0 " );
     }
@@ -2445,7 +2489,11 @@ bool EncAppCfg::xCheckParameter()
   while(!verifiedGOP&&!errorGOP)
   {
     int curGOP = (checkGOP-1)%m_iGOPSize;
+#if JVET_K0157
+    int curPOC = ((checkGOP - 1) / m_iGOPSize)*m_iGOPSize * multipleFactor + m_GOPList[curGOP].m_POC;
+#else
     int curPOC = ((checkGOP-1)/m_iGOPSize)*m_iGOPSize + m_GOPList[curGOP].m_POC;
+#endif
     if(m_GOPList[curGOP].m_POC<0)
     {
       msg( WARNING, "\nError: found fewer Reference Picture Sets than GOPSize\n");
@@ -2472,7 +2520,11 @@ bool EncAppCfg::xCheckParameter()
               found=true;
               for(int k=0; k<m_iGOPSize; k++)
               {
+#if JVET_K0157
+                if (absPOC % (m_iGOPSize * multipleFactor) == m_GOPList[k].m_POC % (m_iGOPSize * multipleFactor))
+#else
                 if(absPOC%m_iGOPSize == m_GOPList[k].m_POC%m_iGOPSize)
+#endif
                 {
                   if(m_GOPList[k].m_temporalId==m_GOPList[curGOP].m_temporalId)
                   {
@@ -2524,7 +2576,11 @@ bool EncAppCfg::xCheckParameter()
         {
           //step backwards in coding order and include any extra available pictures we might find useful to replace the ones with POC < 0.
           int offGOP = (checkGOP-1+offset)%m_iGOPSize;
+#if JVET_K0157
+          int offPOC = ((checkGOP - 1 + offset) / m_iGOPSize)*(m_iGOPSize * multipleFactor) + m_GOPList[offGOP].m_POC;
+#else
           int offPOC = ((checkGOP-1+offset)/m_iGOPSize)*m_iGOPSize + m_GOPList[offGOP].m_POC;
+#endif
           if(offPOC>=0&&m_GOPList[offGOP].m_temporalId<=m_GOPList[curGOP].m_temporalId)
           {
             bool newRef=false;
@@ -2926,7 +2982,7 @@ void EncAppCfg::xPrintParameter()
   msg( DETAILS, "Real     Format                        : %dx%d %gHz\n", m_iSourceWidth - m_confWinLeft - m_confWinRight, m_iSourceHeight - m_confWinTop - m_confWinBottom, (double)m_iFrameRate / m_temporalSubsampleRatio );
   msg( DETAILS, "Internal Format                        : %dx%d %gHz\n", m_iSourceWidth, m_iSourceHeight, (double)m_iFrameRate / m_temporalSubsampleRatio );
   msg( DETAILS, "Sequence PSNR output                   : %s\n", ( m_printMSEBasedSequencePSNR ? "Linear average, MSE-based" : "Linear average only" ) );
-  msg(DETAILS, "Hexadecimal PSNR output                : %s\n", ( m_printHexPsnr ? "Enabled" : "Disabled" ) );
+  msg( DETAILS, "Hexadecimal PSNR output                : %s\n", ( m_printHexPsnr ? "Enabled" : "Disabled" ) );
   msg( DETAILS, "Sequence MSE output                    : %s\n", ( m_printSequenceMSE ? "Enabled" : "Disabled" ) );
   msg( DETAILS, "Frame MSE output                       : %s\n", ( m_printFrameMSE ? "Enabled" : "Disabled" ) );
   msg( DETAILS, "Cabac-zero-word-padding                : %s\n", ( m_cabacZeroWordPaddingEnabled ? "Enabled" : "Disabled" ) );
@@ -3164,6 +3220,9 @@ void EncAppCfg::xPrintParameter()
 #endif
 #if JVET_K1000_SIMPLIFIED_EMT
     msg( VERBOSE, "EMT: %1d(intra) %1d(inter) ", m_EMT & 1, ( m_EMT >> 1 ) & 1 );
+#endif
+#if JVET_K0157
+    msg(VERBOSE, "CompositeLTReference:%d ", m_compositeRefEnabled);
 #endif
   }
   // ADD_NEW_TOOL (add some output indicating the usage of tools)
diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h
index 10a0554a5566dda4e2157f8e84a4f11b136eb0a1..2ec9c86e06aa867ee60c1bc34e0936f64a488404 100644
--- a/source/App/EncoderApp/EncAppCfg.h
+++ b/source/App/EncoderApp/EncAppCfg.h
@@ -115,6 +115,7 @@ protected:
   bool      m_cabacZeroWordPaddingEnabled;
   bool      m_bClipInputVideoToRec709Range;
   bool      m_bClipOutputVideoToRec709Range;
+  bool      m_packedYUVMode;                                  ///< If true, output 10-bit and 12-bit YUV data as 5-byte and 3-byte (respectively) packed YUV data
 
   // profile/level
   Profile::Name m_profile;
@@ -229,6 +230,9 @@ protected:
   int       m_FastEMT;                                        ///< XZ: Fast Methods of Enhanced Multiple Transform
 #endif
 
+#if JVET_K0157
+  bool      m_compositeRefEnabled;
+#endif
   // ADD_NEW_TOOL : (encoder app) add tool enabling flags and associated parameters here
 
   unsigned  m_uiMaxCUWidth;                                   ///< max. CU width in pixel
@@ -285,7 +289,7 @@ protected:
   int       m_maxNumOffsetsPerPic;                            ///< SAO maximun number of offset per picture
   bool      m_saoCtuBoundary;                                 ///< SAO parameter estimation using non-deblocked pixels for CTU bottom and right boundary areas
 #if K0238_SAO_GREEDY_MERGE_ENCODING
-  bool      m_saoGreedyMergeEnc;                              ///< SAO greedy merge encoding algorithm 
+  bool      m_saoGreedyMergeEnc;                              ///< SAO greedy merge encoding algorithm
 #endif
   // coding tools (loop filter)
   bool      m_bLoopFilterDisable;                             ///< flag for using deblocking filter
diff --git a/source/App/EncoderApp/encmain.cpp b/source/App/EncoderApp/encmain.cpp
index 7c34b5d14e793dab2ce557f07cfc6dbbb090e9f4..6d5bdf109d5ebcfee1e85c19cb14b0b4601d0597 100644
--- a/source/App/EncoderApp/encmain.cpp
+++ b/source/App/EncoderApp/encmain.cpp
@@ -87,9 +87,9 @@ int main(int argc, char* argv[])
   // print information
   fprintf( stdout, "\n" );
 #ifdef SVNREVISION
-  fprintf( stdout, "VVCSoftware: VTM Encoder Version %s (%s@r%s) ", NEXT_SOFTWARE_VERSION, SVNRELATIVEURL, SVNREVISION /*NV_VERSION*/ );
+  fprintf( stdout, "VVCSoftware: BMS Encoder Version %s (%s@r%s) ", NEXT_SOFTWARE_VERSION, SVNRELATIVEURL, SVNREVISION /*NV_VERSION*/ );
 #else
-  fprintf( stdout, "VVCSoftware: VTM Encoder Version %s ", NEXT_SOFTWARE_VERSION /*NV_VERSION*/ );
+  fprintf( stdout, "VVCSoftware: BMS Encoder Version %s ", NEXT_SOFTWARE_VERSION /*NV_VERSION*/ );
 #endif
   fprintf( stdout, NVM_ONOS );
   fprintf( stdout, NVM_COMPILEDBY );
diff --git a/source/App/SEIRemovalApp/seiremovalmain.cpp b/source/App/SEIRemovalApp/seiremovalmain.cpp
index 4a4d79fa62554f374ca7b3359896630432756987..96eb667d378cd6bca665dd9f589b8d96e2b4f37b 100644
--- a/source/App/SEIRemovalApp/seiremovalmain.cpp
+++ b/source/App/SEIRemovalApp/seiremovalmain.cpp
@@ -57,9 +57,9 @@ int main(int argc, char* argv[])
   // print information
   fprintf( stdout, "\n" );
 #ifdef SVNREVISION
-  fprintf( stdout, "VVCSoftware: VTM Decoder Version %s (%s@r%s) ", NEXT_SOFTWARE_VERSION, SVNRELATIVEURL, SVNREVISION /*NV_VERSION*/ );
+  fprintf( stdout, "VVCSoftware: BMS Decoder Version %s (%s@r%s) ", NEXT_SOFTWARE_VERSION, SVNRELATIVEURL, SVNREVISION /*NV_VERSION*/ );
 #else
-  fprintf( stdout, "VVCSoftware: VTM Decoder Version %s ", NEXT_SOFTWARE_VERSION /*NV_VERSION*/ );
+  fprintf( stdout, "VVCSoftware: BMS Decoder Version %s ", NEXT_SOFTWARE_VERSION /*NV_VERSION*/ );
 #endif
   fprintf( stdout, NVM_ONOS );
   fprintf( stdout, NVM_COMPILEDBY );
diff --git a/source/Lib/CommonAnalyserLib/CMakeLists.txt b/source/Lib/CommonAnalyserLib/CMakeLists.txt
index ff996a3391d016844131439ed8d495671ec2982c..e0cdf5fa1e65f7bf849ba5296d9a0cd897260f68 100644
--- a/source/Lib/CommonAnalyserLib/CMakeLists.txt
+++ b/source/Lib/CommonAnalyserLib/CMakeLists.txt
@@ -27,6 +27,9 @@ file( GLOB AVX2_SRC_FILES "../CommonLib/x86/avx2/*.cpp" )
 # get sse4.1 source files
 file( GLOB SSE41_SRC_FILES "../CommonLib/x86/sse41/*.cpp" )
 
+# get sse4.2 source files
+file( GLOB SSE42_SRC_FILES "../CommonLib/x86/sse42/*.cpp" )
+
 # get libmd5 source files
 file( GLOB MD5_SRC_FILES "../libmd5/*.cpp" )
 
@@ -35,7 +38,7 @@ file( GLOB MD5_INC_FILES "../libmd5/*.h" )
 
 
 # get all source files
-set( SRC_FILES ${BASE_SRC_FILES} ${X86_SRC_FILES} ${SSE41_SRC_FILES} ${AVX_SRC_FILES} ${AVX2_SRC_FILES} ${MD5_SRC_FILES} )
+set( SRC_FILES ${BASE_SRC_FILES} ${X86_SRC_FILES} ${SSE41_SRC_FILES} ${SSE42_SRC_FILES} ${AVX_SRC_FILES} ${AVX2_SRC_FILES} ${MD5_SRC_FILES} )
 
 # get all include files
 set( INC_FILES ${BASE_INC_FILES} ${X86_INC_FILES} ${MD5_INC_FILES} )
@@ -46,7 +49,7 @@ add_library( ${LIB_NAME} STATIC ${SRC_FILES} ${INC_FILES} ${NATVIS_FILES} )
 target_compile_definitions( ${LIB_NAME} PUBLIC RExt__DECODER_DEBUG_TOOL_STATISTICS=1 )
 
 if( ENABLE_VTM )
-  target_compile_definitions( ${LIB_NAME} PUBLIC JEM_TOOLS=0 )
+  target_compile_definitions( ${LIB_NAME} PUBLIC BMS_TOOLS=0 )
 endif()
 
 if( EXTENSION_360_VIDEO )
@@ -86,6 +89,7 @@ target_link_libraries( ${LIB_NAME} Threads::Threads )
 
 # set needed compile definitions
 set_property( SOURCE ${SSE41_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_SSE41 )
+set_property( SOURCE ${SSE42_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_SSE42 )
 set_property( SOURCE ${AVX_SRC_FILES}   APPEND PROPERTY COMPILE_DEFINITIONS USE_AVX )
 set_property( SOURCE ${AVX2_SRC_FILES}  APPEND PROPERTY COMPILE_DEFINITIONS USE_AVX2 )
 # set needed compile flags
@@ -94,6 +98,7 @@ if( MSVC )
   set_property( SOURCE ${AVX2_SRC_FILES}  APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2" )
 elseif( UNIX )
   set_property( SOURCE ${SSE41_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-msse4.1" )
+  set_property( SOURCE ${SSE42_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-msse4.2" )
   set_property( SOURCE ${AVX_SRC_FILES}   APPEND PROPERTY COMPILE_FLAGS "-mavx" )
   set_property( SOURCE ${AVX2_SRC_FILES}  APPEND PROPERTY COMPILE_FLAGS "-mavx2" )
 endif()
diff --git a/source/Lib/CommonLib/AdaptiveLoopFilter.cpp b/source/Lib/CommonLib/AdaptiveLoopFilter.cpp
index 8d5420fefb0c314ef8ead60648be042b9f1a4cad..d00fd667beaf955fb4fcfc7cb3f3a7986528419f 100644
--- a/source/Lib/CommonLib/AdaptiveLoopFilter.cpp
+++ b/source/Lib/CommonLib/AdaptiveLoopFilter.cpp
@@ -217,19 +217,25 @@ void AdaptiveLoopFilter::create( const int picWidth, const int picHeight, const
   // Laplacian based activity
   for( int i = 0; i < NUM_DIRECTIONS; i++ )
   {
-    m_laplacian[i] = new int*[m_CLASSIFICATION_BLK_SIZE + 5];
-
-    for( int y = 0; y < m_CLASSIFICATION_BLK_SIZE + 5; y++ )
+    if ( m_laplacian[i] == nullptr )
     {
-      m_laplacian[i][y] = new int[m_CLASSIFICATION_BLK_SIZE + 5];
+      m_laplacian[i] = new int*[m_CLASSIFICATION_BLK_SIZE + 5];
+
+      for( int y = 0; y < m_CLASSIFICATION_BLK_SIZE + 5; y++ )
+      {
+        m_laplacian[i][y] = new int[m_CLASSIFICATION_BLK_SIZE + 5];
+      }
     }
   }
 
   // Classification
-  m_classifier = new AlfClassifier*[picHeight];
-  for( int i = 0; i < picHeight; i++ )
+  if ( m_classifier == nullptr )
   {
-    m_classifier[i] = new AlfClassifier[picWidth];
+    m_classifier = new AlfClassifier*[picHeight];
+    for( int i = 0; i < picHeight; i++ )
+    {
+      m_classifier[i] = new AlfClassifier[picWidth];
+    }
   }
 }
 
diff --git a/source/Lib/CommonLib/Buffer.h b/source/Lib/CommonLib/Buffer.h
index 565a121fc4bf2392057cc8b2295cd2c7e90886cc..1763242e3dc0af2fd30165b09045a5028c460863 100644
--- a/source/Lib/CommonLib/Buffer.h
+++ b/source/Lib/CommonLib/Buffer.h
@@ -366,7 +366,6 @@ void AreaBuf<T>::addAvg( const AreaBuf<const T> &other1, const AreaBuf<const T>
 template<>
 void AreaBuf<Pel>::addAvg( const AreaBuf<const Pel> &other1, const AreaBuf<const Pel> &other2, const ClpRng& clpRng );
 
-
 template<typename T>
 void AreaBuf<T>::linearTransform( const int scale, const int shift, const int offset, bool bClip, const ClpRng& clpRng )
 {
@@ -385,6 +384,7 @@ void AreaBuf<T>::toLast( const ClpRng& clpRng )
 template<>
 void AreaBuf<Pel>::toLast( const ClpRng& clpRng );
 
+
 template<typename T>
 void AreaBuf<T>::removeHighFreq( const AreaBuf<T>& other, const bool bClip, const ClpRng& clpRng )
 {
@@ -579,7 +579,8 @@ struct UnitBuf
   void addAvg               ( const UnitBuf<const T> &other1, const UnitBuf<const T> &other2, const ClpRngs& clpRngs, const bool chromaOnly = false, const bool lumaOnly = false);
   void extendSingleBorderPel();
   void extendBorderPel      ( unsigned margin );
-  void removeHighFreq       ( const UnitBuf<T>& other, const bool bClip, const ClpRngs& clpRngs);
+  void removeHighFreq       ( const UnitBuf<T>& other, const bool bClip, const ClpRngs& clpRngs
+                            );
 
         UnitBuf<      T> subBuf (const UnitArea& subArea);
   const UnitBuf<const T> subBuf (const UnitArea& subArea) const;
@@ -648,6 +649,7 @@ void UnitBuf<T>::reconstruct(const UnitBuf<const T> &pred, const UnitBuf<const T
   }
 }
 
+
 template<typename T>
 void UnitBuf<T>::addAvg(const UnitBuf<const T> &other1, const UnitBuf<const T> &other2, const ClpRngs& clpRngs, const bool chromaOnly /* = false */, const bool lumaOnly /* = false */)
 {
@@ -681,7 +683,8 @@ void UnitBuf<T>::extendBorderPel( unsigned margin )
 }
 
 template<typename T>
-void UnitBuf<T>::removeHighFreq( const UnitBuf<T>& other, const bool bClip, const ClpRngs& clpRngs)
+void UnitBuf<T>::removeHighFreq( const UnitBuf<T>& other, const bool bClip, const ClpRngs& clpRngs
+                               )
 {
   for( unsigned i = 0; i < bufs.size(); i++ )
   {
diff --git a/source/Lib/CommonLib/CMakeLists.txt b/source/Lib/CommonLib/CMakeLists.txt
index c633fec92feffc7661ac904de1c2632a7b4e3224..7a91672c034457ed88f3bf059fed4a539d1e7a7e 100644
--- a/source/Lib/CommonLib/CMakeLists.txt
+++ b/source/Lib/CommonLib/CMakeLists.txt
@@ -24,6 +24,9 @@ file( GLOB AVX_SRC_FILES "x86/avx/*.cpp" )
 # get avx2 source files
 file( GLOB AVX2_SRC_FILES "x86/avx2/*.cpp" )
 
+# get sse4.2 source files
+file( GLOB SSE42_SRC_FILES "x86/sse42/*.cpp" )
+
 # get sse4.1 source files
 file( GLOB SSE41_SRC_FILES "x86/sse41/*.cpp" )
 
@@ -35,7 +38,7 @@ file( GLOB MD5_INC_FILES "../libmd5/*.h" )
 
 
 # get all source files
-set( SRC_FILES ${BASE_SRC_FILES} ${X86_SRC_FILES} ${SSE41_SRC_FILES} ${AVX_SRC_FILES} ${AVX2_SRC_FILES} ${MD5_SRC_FILES} )
+set( SRC_FILES ${BASE_SRC_FILES} ${X86_SRC_FILES} ${SSE41_SRC_FILES} ${SSE42_SRC_FILES} ${AVX_SRC_FILES} ${AVX2_SRC_FILES} ${MD5_SRC_FILES} )
 
 # get all include files
 set( INC_FILES ${BASE_INC_FILES} ${X86_INC_FILES} ${MD5_INC_FILES} )
@@ -45,7 +48,7 @@ set( INC_FILES ${BASE_INC_FILES} ${X86_INC_FILES} ${MD5_INC_FILES} )
 add_library( ${LIB_NAME} STATIC ${SRC_FILES} ${INC_FILES} ${NATVIS_FILES} )
 
 if( ENABLE_VTM )
-  target_compile_definitions( ${LIB_NAME} PUBLIC JEM_TOOLS=0 )
+  target_compile_definitions( ${LIB_NAME} PUBLIC BMS_TOOLS=0 )
 endif()
 
 if( EXTENSION_360_VIDEO )
@@ -85,6 +88,7 @@ target_link_libraries( ${LIB_NAME} Threads::Threads )
 
 # set needed compile definitions
 set_property( SOURCE ${SSE41_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_SSE41 )
+set_property( SOURCE ${SSE42_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_SSE42 )
 set_property( SOURCE ${AVX_SRC_FILES}   APPEND PROPERTY COMPILE_DEFINITIONS USE_AVX )
 set_property( SOURCE ${AVX2_SRC_FILES}  APPEND PROPERTY COMPILE_DEFINITIONS USE_AVX2 )
 # set needed compile flags
@@ -93,6 +97,7 @@ if( MSVC )
   set_property( SOURCE ${AVX2_SRC_FILES}  APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2" )
 elseif( UNIX )
   set_property( SOURCE ${SSE41_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-msse4.1" )
+  set_property( SOURCE ${SSE42_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-msse4.2" )
   set_property( SOURCE ${AVX_SRC_FILES}   APPEND PROPERTY COMPILE_FLAGS "-mavx" )
   set_property( SOURCE ${AVX2_SRC_FILES}  APPEND PROPERTY COMPILE_FLAGS "-mavx2" )
 endif()
diff --git a/source/Lib/CommonLib/CodingStatistics.h b/source/Lib/CommonLib/CodingStatistics.h
index 619a896553eacc8df47fd3308ea91e1dba6a5e87..43496c0356fad85d3ef477cd0dcc62cd4dd5b41b 100644
--- a/source/Lib/CommonLib/CodingStatistics.h
+++ b/source/Lib/CommonLib/CodingStatistics.h
@@ -66,12 +66,6 @@ enum CodingStatisticsType
   STATS__CABAC_BITS__REF_FRM_IDX,
   STATS__CABAC_BITS__MVD,
   STATS__CABAC_BITS__MVD_EP,
-#if JVET_K_AFFINE
-  STATS__CABAC_BITS__AFFINE_FLAG,
-#if JVET_K0337_AFFINE_6PARA
-  STATS__CABAC_BITS__AFFINE_TYPE,
-#endif
-#endif
   STATS__CABAC_BITS__TRANSFORM_SUBDIV_FLAG,
   STATS__CABAC_BITS__QT_ROOT_CBF,
   STATS__CABAC_BITS__DELTA_QP_EP,
@@ -79,21 +73,14 @@ enum CodingStatisticsType
   STATS__CABAC_BITS__QT_CBF,
   STATS__CABAC_BITS__CROSS_COMPONENT_PREDICTION,
   STATS__CABAC_BITS__TRANSFORM_SKIP_FLAGS,
-
   STATS__CABAC_BITS__LAST_SIG_X_Y,
   STATS__CABAC_BITS__SIG_COEFF_GROUP_FLAG,
   STATS__CABAC_BITS__SIG_COEFF_MAP_FLAG,
-#if JVET_K0072
-  STATS__CABAC_BITS__PAR_FLAG,
-#endif
   STATS__CABAC_BITS__GT1_FLAG,
   STATS__CABAC_BITS__GT2_FLAG,
   STATS__CABAC_BITS__SIGN_BIT,
   STATS__CABAC_BITS__ESCAPE_BITS,
   STATS__CABAC_BITS__SAO,
-#if JVET_K0371_ALF
-  STATS__CABAC_BITS__ALF,
-#endif
   STATS__CABAC_TRM_BITS,
   STATS__CABAC_FIXED_BITS,
   STATS__CABAC_PCM_ALIGN_BITS,
@@ -104,6 +91,22 @@ enum CodingStatisticsType
   STATS__CABAC_EP_BIT_ALIGNMENT,
   STATS__CABAC_BITS__ALIGNED_SIGN_BIT,
   STATS__CABAC_BITS__ALIGNED_ESCAPE_BITS,
+  STATS__CABAC_BITS__OTHER,
+  STATS__CABAC_BITS__INVALID,
+  STATS__TOOL_TOTAL_FRAME,// This is a special case and is not included in the report.
+#if JVET_K_AFFINE
+  STATS__CABAC_BITS__AFFINE_FLAG,
+  STATS__TOOL_AFF,
+#if JVET_K0337_AFFINE_6PARA
+  STATS__CABAC_BITS__AFFINE_TYPE,
+#endif
+#endif
+#if JVET_K0072
+  STATS__CABAC_BITS__PAR_FLAG,
+#endif
+#if JVET_K0371_ALF
+  STATS__CABAC_BITS__ALF,
+#endif
 #if JVET_K0357_AMVR
   STATS__CABAC_BITS__IMV_FLAG,
 #endif
@@ -111,13 +114,23 @@ enum CodingStatisticsType
   STATS__CABAC_BITS__EMT_CU_FLAG,
   STATS__CABAC_BITS__EMT_TU_INDEX,
 #endif
-  STATS__CABAC_BITS__OTHER,
-  STATS__CABAC_BITS__INVALID,
-  STATS__TOOL_TOTAL_FRAME,// This is a special case and is not included in the report.
+#if JVET_K1000_SIMPLIFIED_EMT
+  STATS__TOOL_EMT,
+#endif
   STATS__TOOL_TOTAL,
   STATS__NUM_STATS
 };
 
+enum CodingStatisticsMode
+{
+  STATS__MODE_NONE  = 0,
+  STATS__MODE_BITS  = 1,
+  STATS__MODE_TOOLS = 2,
+
+  STATS__MODE_ALL   =
+    STATS__MODE_BITS | STATS__MODE_TOOLS
+};
+
 static inline const char* getName(CodingStatisticsType name)
 {
   static const char *statNames[]=
@@ -326,11 +339,13 @@ public:
     friend class CodingStatistics;
   };
 
+  int m_mode;
+
 private:
 
   CodingStatisticsData data;
 
-  CodingStatistics() : data()
+  CodingStatistics() : m_mode(STATS__MODE_ALL), data()
   {
   }
 
@@ -408,11 +423,8 @@ private:
     printf( "\n" );
   }
 
-public:
-
-  ~CodingStatistics()
+  void OutputBitStats()
   {
-#if RExt__DECODER_DEBUG_BIT_STATISTICS
     const int64_t es = CODINGSTATISTICS_ENTROPYSCALE;
 
     int64_t countTotal = 0;
@@ -594,9 +606,82 @@ public:
     OutputDashedLine( "GRAND TOTAL" );
     epTotalBits += cavlcTotalBits;
     OutputLine      ( "TOTAL",                  '~', "~~GT~~", "~~GT~~", "~~GT~~", cabacTotalBits, epTotalBits );
+  }
+
+  void OutputToolStats()
+  {
+    printf("\n");
+    printf( " %-45s-   Width Height   Type        Count  Impacted pixels  %% Impacted pixels\n", "Tools statistics" );
+    OutputDashedLine( "" );
+
+    const uint64_t toolCount = STATS__TOOL_TOTAL - (STATS__TOOL_TOTAL_FRAME + 1);
+    StatTool subTotalTool[toolCount];
+    StatTool statTotalTool[toolCount][CODING_STATS_NUM_SUBCLASSES];
+    uint64_t totalPixels = GetStatisticTool( STATS__TOOL_TOTAL_FRAME ).pixels;
+    for( int i = 0; i < toolCount; i++ )
+    {
+      const int type = i + (STATS__TOOL_TOTAL_FRAME + 1);
+      const char *pName = getName( CodingStatisticsType( type ) );
+
+      for( uint32_t c = 0; c < CODING_STATS_NUM_SUBCLASSES; c++ )
+      {
+        StatTool &sTool   = data.statistics_tool[type][c];
+        if( sTool.count == 0 )
+        {
+          continue;
+        }
+
+        uint32_t wIdx = CodingStatisticsClassType::GetSubClassWidth( c );
+        uint32_t hIdx = CodingStatisticsClassType::GetSubClassHeight( c );
+        OutputLine( pName, ':', wIdx, hIdx, CodingStatisticsClassType::GetSubClassString( c ), sTool, totalPixels );
+
+        statTotalTool[i][c] += sTool;
+        subTotalTool[i] += sTool;
+      }
+
+      if (subTotalTool[i].count != 0)
+      {
+        OutputLine( pName, '~', "~~ST~~", "~~ST~~", "~~ST~~", subTotalTool[i], totalPixels );
+      }
+    }
+
+    for( int i = 0; i < toolCount; i++ )
+    {
+      const int type = i + (STATS__TOOL_TOTAL_FRAME + 1);
+      const char *pName = getName( CodingStatisticsType( type ) );
+
+      if (subTotalTool[i].count != 0)
+        OutputDashedLine( "Break down by tool/Channel type" );
+
+      for( uint32_t c = 0; c < CODING_STATS_NUM_SUBCLASSES; c += CODING_STATS_NUM_SIZES )
+      {
+        StatTool typeTotalTool;
+        for( uint32_t w = 0; w < CODING_STATS_NUM_WIDTHS; w++ )
+        {
+          for( uint32_t h = 0; h < CODING_STATS_NUM_HEIGHTS; h++ )
+            typeTotalTool += statTotalTool[i][c + h * CODING_STATS_NUM_WIDTHS + w];
+        }
+
+        if( typeTotalTool.count != 0 )
+        {
+          OutputLine( pName, '=', "-", "-", CodingStatisticsClassType::GetSubClassString( c ), typeTotalTool, totalPixels );
+        }
+      }
+    }
+  }
+
+public:
+
+  ~CodingStatistics()
+  {
+#if RExt__DECODER_DEBUG_BIT_STATISTICS
+    if (m_mode & STATS__MODE_BITS)
+      OutputBitStats();
 #endif //RExt__DECODER_DEBUG_BIT_STATISTICS
 
 #ifdef RExt__DECODER_DEBUG_TOOL_STATISTICS
+    if (m_mode & STATS__MODE_TOOLS)
+      OutputToolStats();
 #endif //RExt__DECODER_DEBUG_TOOL_STATISTICS
   }
 
diff --git a/source/Lib/CommonLib/CodingStructure.cpp b/source/Lib/CommonLib/CodingStructure.cpp
index b6475c12c99705eab6e18a262dbe20964db70fe9..55c7eebec481a2c82e238f989bd3af1d8886681c 100644
--- a/source/Lib/CommonLib/CodingStructure.cpp
+++ b/source/Lib/CommonLib/CodingStructure.cpp
@@ -557,7 +557,9 @@ cTUTraverser CodingStructure::traverseTUs( const UnitArea& unit, const ChannelTy
 
 void CodingStructure::allocateVectorsAtPicLevel()
 {
-  const int  twice = ( !pcv->ISingleTree && slice->isIntra() && pcv->chrFormat != CHROMA_400 ) ? 2 : 1;
+  const int  twice = ( 
+    !pcv->ISingleTree && slice->isIntra()
+    && pcv->chrFormat != CHROMA_400 ) ? 2 : 1;
   size_t allocSize = twice * unitScale[0].scale( area.blocks[0].size() ).area();
 
   cus.reserve( allocSize );
diff --git a/source/Lib/CommonLib/CodingStructure.h b/source/Lib/CommonLib/CodingStructure.h
index 15e5ea4dfd717f85164fc88ded69d70ce83ff0cf..df7337a1cad487139cf6ab0b2dfeef57cb619028 100644
--- a/source/Lib/CommonLib/CodingStructure.h
+++ b/source/Lib/CommonLib/CodingStructure.h
@@ -58,7 +58,6 @@ enum PictureType
   PIC_ORG_RESI,
   NUM_PIC_TYPES
 };
-
 extern XUCache g_globalUnitCache;
 
 // ---------------------------------------------------------------------------
@@ -147,7 +146,6 @@ public:
   cCUTraverser    traverseCUs(const UnitArea& _unit, const ChannelType _chType) const;
   cPUTraverser    traversePUs(const UnitArea& _unit, const ChannelType _chType) const;
   cTUTraverser    traverseTUs(const UnitArea& _unit, const ChannelType _chType) const;
-
   // ---------------------------------------------------------------------------
   // encoding search utilities
   // ---------------------------------------------------------------------------
diff --git a/source/Lib/CommonLib/Common.h b/source/Lib/CommonLib/Common.h
index a10194a5dffab672a5a45d59c80531d0d813737d..ad0e5ec79adb4872eef350b34eac84e9ce427bbc 100644
--- a/source/Lib/CommonLib/Common.h
+++ b/source/Lib/CommonLib/Common.h
@@ -114,7 +114,6 @@ struct UnitScale
   Size     scale( const Size     &size ) const { return { size.width >> posx, size.height >> posy }; }
   Area     scale( const Area    &_area ) const { return Area( scale( _area.pos() ), scale( _area.size() ) ); }
 };
-
 inline size_t rsAddr(const Position &pos, const uint32_t stride, const UnitScale &unitScale )
 {
   return (size_t)(stride >> unitScale.posx) * (size_t)(pos.y >> unitScale.posy) + (size_t)(pos.x >> unitScale.posx);
diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h
index 7c8e0c1d131dfdf15e61305bf6aa46907fcd0417..d878f43d5bd1a0489a65f8b5aafc010739d7bd89 100644
--- a/source/Lib/CommonLib/CommonDef.h
+++ b/source/Lib/CommonLib/CommonDef.h
@@ -331,6 +331,7 @@ static const int AFFINE_MAX_NUM_COMB =                             12; ///< max
 static const int AFFINE_MIN_BLOCK_SIZE =                            4; ///< Minimum affine MC block size
 #endif
 
+
 #if W0038_DB_OPT
 static const int MAX_ENCODER_DEBLOCKING_QUALITY_LAYERS =           8 ;
 #endif
@@ -401,7 +402,6 @@ static const unsigned C806_ALF_TEMPPRED_NUM =                      6;
 
 static const int NTAPS_LUMA               =                         8; ///< Number of taps for luma
 static const int NTAPS_CHROMA             =                         4; ///< Number of taps for chroma
-
 // ====================================================================================================================
 // Macro functions
 // ====================================================================================================================
diff --git a/source/Lib/CommonLib/Contexts.cpp b/source/Lib/CommonLib/Contexts.cpp
index d7d21d2fce751fc8d80eb670e6cec8687021e779..b387c12caabc004df24bc3a9bb36e0313f6cf210 100644
--- a/source/Lib/CommonLib/Contexts.cpp
+++ b/source/Lib/CommonLib/Contexts.cpp
@@ -383,6 +383,7 @@ const CtxSet ContextSetCfg::AffineType = ContextSetCfg::addCtxSet
 #endif
 #endif
 
+
 const CtxSet ContextSetCfg::Mvd = ContextSetCfg::addCtxSet
 ({
   {  169, 198,},
@@ -804,7 +805,7 @@ void CtxStore<BinProbModel>::init( int qp, int initId )
   const std::vector<uint8_t>& initTable = ContextSetCfg::getInitTable( initId );
   CHECK( m_CtxBuffer.size() != initTable.size(),
         "Size of init table (" << initTable.size() << ") does not match size of context buffer (" << m_CtxBuffer.size() << ")." );
-  int clippedQP = std::min( std::max( 0, qp ), MAX_QP );
+  int clippedQP = Clip3( 0, MAX_QP, qp );
   for( std::size_t k = 0; k < m_CtxBuffer.size(); k++ )
   {
     m_CtxBuffer[k].init( clippedQP, initTable[k] );
diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp
index b5e4bc9dd77d8e79b3e7d0776214029da55155a3..89899afb677fe2f02a92f5037ae8298172c68421 100644
--- a/source/Lib/CommonLib/InterPrediction.cpp
+++ b/source/Lib/CommonLib/InterPrediction.cpp
@@ -109,7 +109,6 @@ void InterPrediction::destroy()
       m_filteredBlockTmp[i][c] = nullptr;
     }
   }
-
 }
 
 void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC )
@@ -124,7 +123,6 @@ void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC )
   }
 
   m_currChromaFormat = chromaFormatIDC;
-
   if( m_acYuvPred[REF_PIC_LIST_0][COMPONENT_Y] == nullptr ) // check if first is null (in which case, nothing initialised yet)
   {
     for( uint32_t c = 0; c < MAX_NUM_COMPONENT; c++ )
@@ -148,6 +146,7 @@ void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC )
       }
     }
 
+
     m_iRefListIdx = -1;
     
   }
@@ -365,7 +364,9 @@ void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const R
 }
 #endif
 
-void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList& eRefPicList, PelUnitBuf& pcYuvPred, const bool& bi )
+
+void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList& eRefPicList, PelUnitBuf& pcYuvPred, const bool& bi 
+)
 {
   const SPS &sps = *pu.cs->sps;
 
@@ -400,7 +401,6 @@ void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList&
   for( uint32_t comp = COMPONENT_Y; comp < pcYuvPred.bufs.size() && comp <= m_maxCompIDToPred; comp++ )
   {
     const ComponentID compID = ComponentID( comp );
-
 #if JVET_K_AFFINE
     if ( pu.cu->affine )
     {
@@ -526,7 +526,6 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio
     refBuf = refPic->getRecoBuf( CompArea( compID, chFmt, offset, pu.blocks[compID].size() ) );
   }
 
-
   if( yFrac == 0 )
   {
     m_if.filterHor(compID, (Pel*) refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, width, height, xFrac, rndRes, chFmt, clpRng);
@@ -537,7 +536,7 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio
   }
   else
   {
-    PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][compID], pu.blocks[compID]);
+      PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][compID], pu.blocks[compID]);
 
     int vFilterSize = isLuma(compID) ? NTAPS_LUMA : NTAPS_CHROMA;
     m_if.filterHor(compID, (Pel*) refBuf.buf - ((vFilterSize >> 1) - 1) * refBuf.stride, refBuf.stride, tmpBuf.buf, tmpBuf.stride, width, height + vFilterSize - 1, xFrac, false,         chFmt, clpRng);
@@ -545,7 +544,6 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio
     m_if.filterVer(compID, (Pel*) tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, dstBuf.buf, dstBuf.stride, width, height,                   yFrac, false, rndRes, chFmt, clpRng);
     JVET_J0090_SET_CACHE_ENABLE( true );
   }
-
 }
 
 #if JVET_K_AFFINE
@@ -761,7 +759,8 @@ void InterPrediction::xWeightedAverage( const PredictionUnit& pu, const CPelUnit
   }
 }
 
-void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBuf, const RefPicList &eRefPicList )
+void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBuf, const RefPicList &eRefPicList 
+)
 {
         CodingStructure &cs = *pu.cs;
   const PPS &pps            = *cs.pps;
@@ -801,7 +800,8 @@ void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBu
   return;
 }
 
-void InterPrediction::motionCompensation( CodingUnit &cu, const RefPicList &eRefPicList )
+void InterPrediction::motionCompensation( CodingUnit &cu, const RefPicList &eRefPicList 
+)
 {
   for( auto &pu : CU::traversePUs( cu ) )
   {
@@ -810,10 +810,12 @@ void InterPrediction::motionCompensation( CodingUnit &cu, const RefPicList &eRef
   }
 }
 
-void InterPrediction::motionCompensation( PredictionUnit &pu, const RefPicList &eRefPicList /*= REF_PIC_LIST_X*/ )
+void InterPrediction::motionCompensation( PredictionUnit &pu, const RefPicList &eRefPicList /*= REF_PIC_LIST_X*/ 
+)
 {
   PelUnitBuf predBuf = pu.cs->getPredBuf( pu );
-  motionCompensation( pu, predBuf, eRefPicList );
+  motionCompensation( pu, predBuf, eRefPicList 
+  );
 }
 
 
diff --git a/source/Lib/CommonLib/InterPrediction.h b/source/Lib/CommonLib/InterPrediction.h
index a895322cead55ccc2ae4e85a53d3fe50bca856ee..cbe87a5165bd8774d59e4c1d92a3971ba862a6a4 100644
--- a/source/Lib/CommonLib/InterPrediction.h
+++ b/source/Lib/CommonLib/InterPrediction.h
@@ -49,7 +49,6 @@
 
 #include "RdCost.h"
 #include "ContextModelling.h"
-
 // forward declaration
 class Mv;
 
@@ -61,7 +60,6 @@ class Mv;
 // Class definition
 // ====================================================================================================================
 
-
 class InterPrediction : public WeightPrediction
 {
 private:
@@ -84,7 +82,8 @@ protected:
   int                  m_iRefListIdx;
   
 
-  void xPredInterUni            ( const PredictionUnit& pu, const RefPicList& eRefPicList, PelUnitBuf& pcYuvPred, const bool& bi );
+  void xPredInterUni            ( const PredictionUnit& pu, const RefPicList& eRefPicList, PelUnitBuf& pcYuvPred, const bool& bi 
+  );
   void xPredInterBi             ( PredictionUnit& pu, PelUnitBuf &pcYuvPred );
   void xPredInterBlk            ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng
                                  );
@@ -99,7 +98,6 @@ protected:
 #if JVET_K0346
   void xSubPuMC(PredictionUnit& pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList = REF_PIC_LIST_X);
 #endif
-
   void destroy();
 
 
@@ -114,9 +112,12 @@ public:
   void    init                (RdCost* pcRdCost, ChromaFormat chromaFormatIDC);
 
   // inter
-  void    motionCompensation  (PredictionUnit &pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList = REF_PIC_LIST_X);
-  void    motionCompensation  (PredictionUnit &pu, const RefPicList &eRefPicList = REF_PIC_LIST_X);
-  void    motionCompensation  (CodingUnit &cu,     const RefPicList &eRefPicList = REF_PIC_LIST_X);
+  void    motionCompensation  (PredictionUnit &pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList = REF_PIC_LIST_X
+  );
+  void    motionCompensation  (PredictionUnit &pu, const RefPicList &eRefPicList = REF_PIC_LIST_X
+  );
+  void    motionCompensation  (CodingUnit &cu,     const RefPicList &eRefPicList = REF_PIC_LIST_X
+  );
 
 #if JVET_J0090_MEMORY_BANDWITH_MEASURE
   void    cacheAssign( CacheModel *cache );
diff --git a/source/Lib/CommonLib/LoopFilter.cpp b/source/Lib/CommonLib/LoopFilter.cpp
index 96a0dda5b7157dd6be99d3d00bb6f53af05cfcb4..926240e1e5253f7daf95a2ce8d5b62853d35682d 100644
--- a/source/Lib/CommonLib/LoopFilter.cpp
+++ b/source/Lib/CommonLib/LoopFilter.cpp
@@ -421,7 +421,6 @@ unsigned LoopFilter::xGetBoundaryStrengthSingle ( const CodingUnit& cu, const De
   const Slice& sliceQ = *cu.slice;
 
   const Position& cuPosLuma = cu.lumaPos();
-
   const Position& posQ  = localPos;
   const Position  posP  = ( edgeDir == EDGE_VER ) ? posQ.offset( -1, 0 ) : posQ.offset( 0, -1 );
 
diff --git a/source/Lib/CommonLib/MotionInfo.h b/source/Lib/CommonLib/MotionInfo.h
index 001b84ad427718adf2ae4404ff1c4f7c8c2cb897..d90715d127b887d1027ac638717921697392a53a 100644
--- a/source/Lib/CommonLib/MotionInfo.h
+++ b/source/Lib/CommonLib/MotionInfo.h
@@ -108,7 +108,6 @@ struct MotionInfo
 
   Mv      mv     [ NUM_REF_PIC_LIST_01 ];
   int16_t   refIdx [ NUM_REF_PIC_LIST_01 ];
-
   MotionInfo()        : isInter(  false ), interDir( 0 ), sliceIdx( 0 ), refIdx{ NOT_VALID, NOT_VALID } { }
   // ensure that MotionInfo(0) produces '\x000....' bit pattern - needed to work with AreaBuf - don't use this constructor for anything else
   MotionInfo( int i ) : isInter( i != 0 ), interDir( 0 ), sliceIdx( 0 ), refIdx{         0,         0 } { CHECKD( i != 0, "The argument for this constructor has to be '0'" ); }
@@ -143,4 +142,5 @@ struct MotionInfo
   }
 };
 
+
 #endif // __MOTIONINFO__
diff --git a/source/Lib/CommonLib/Mv.h b/source/Lib/CommonLib/Mv.h
index e74c7fce2a17e3e5aad232edc6e17c9681fe1d3c..001d68ebe7c785a78f0df789d5d7ff68262498df 100644
--- a/source/Lib/CommonLib/Mv.h
+++ b/source/Lib/CommonLib/Mv.h
@@ -265,7 +265,6 @@ public:
   }
 #endif
 };// END CLASS DEFINITION MV
-
 #if JVET_K0357_AMVR
 void roundMV( Mv& rcMv, unsigned imvShift );
 #endif
diff --git a/source/Lib/CommonLib/Picture.cpp b/source/Lib/CommonLib/Picture.cpp
index a90cf4aaeb9371c3d10895fc60f8c6d2a9530ad3..525b8cc5fd7f134105e491908a4942e9551a86f1 100644
--- a/source/Lib/CommonLib/Picture.cpp
+++ b/source/Lib/CommonLib/Picture.cpp
@@ -728,6 +728,10 @@ Picture::Picture()
   {
     m_prevQP[i] = -1;
   }
+#if JVET_K0157
+  m_spliceIdx = NULL;
+  m_ctuNums = 0;
+#endif
 }
 
 void Picture::create(const ChromaFormat &_chromaFormat, const Size &size, const unsigned _maxCUSize, const unsigned _margin, const bool _decoder)
@@ -788,6 +792,13 @@ void Picture::destroy()
     tileMap = nullptr;
   }
 #endif
+#if JVET_K0157
+  if (m_spliceIdx)
+  {
+    delete[] m_spliceIdx;
+    m_spliceIdx = NULL;
+  }
+#endif
 }
 
 void Picture::createTempBuffers( const unsigned _maxCUSize )
@@ -903,6 +914,14 @@ void Picture::finalInit( const SPS& sps, const PPS& pps )
   tileMap = new TileMap;
   tileMap->create( sps, pps );
 #endif
+#if JVET_K0157
+  if (m_spliceIdx == NULL)
+  {
+    m_ctuNums = cs->pcv->sizeInCtus;
+    m_spliceIdx = new int[m_ctuNums];
+    memset(m_spliceIdx, 0, m_ctuNums * sizeof(int));
+  }
+#endif
 }
 
 void Picture::allocateNewSlice()
@@ -1113,3 +1132,25 @@ Pel* Picture::getOrigin( const PictureType &type, const ComponentID compID ) con
   return M_BUFS( jId, type ).getOrigin( compID );
 
 }
+
+#if JVET_K0157
+void Picture::createSpliceIdx(int nums)
+{
+  m_ctuNums = nums;
+  m_spliceIdx = new int[m_ctuNums];
+  memset(m_spliceIdx, 0, m_ctuNums * sizeof(int));
+}
+
+bool Picture::getSpliceFull()
+{
+  int count = 0;
+  for (int i = 0; i < m_ctuNums; i++)
+  {
+    if (m_spliceIdx[i] != 0)
+      count++;
+  }
+  if (count < m_ctuNums * 0.25)
+    return false;
+  return true;
+}
+#endif
diff --git a/source/Lib/CommonLib/Picture.h b/source/Lib/CommonLib/Picture.h
index f9b360b987dccf6c160fb7063b6e350e56d8d5f6..d742e85cd8a0c7277c44472ab49918d74a3006d8 100644
--- a/source/Lib/CommonLib/Picture.h
+++ b/source/Lib/CommonLib/Picture.h
@@ -225,6 +225,14 @@ struct Picture : public UnitArea
   int  getPOC()                               const { return poc; }
   void setBorderExtension( bool bFlag)              { m_bIsBorderExtended = bFlag;}
   Pel* getOrigin( const PictureType &type, const ComponentID compID ) const;
+
+#if JVET_K0157
+  int           getSpliceIdx(uint32_t idx) const { return m_spliceIdx[idx]; }
+  void          setSpliceIdx(uint32_t idx, int poc) { m_spliceIdx[idx] = poc; }
+  void          createSpliceIdx(int nums);
+  bool          getSpliceFull();
+#endif
+
 public:
   bool m_bIsBorderExtended;
   bool referenced;
@@ -240,6 +248,11 @@ public:
   uint32_t layer;
   uint32_t depth;
 
+#if JVET_K0157
+  int* m_spliceIdx;
+  int  m_ctuNums;
+#endif
+
 #if ENABLE_SPLIT_PARALLELISM
 #if ENABLE_WPP_PARALLELISM
   PelStorage m_bufs[( PARL_SPLIT_MAX_NUM_JOBS * PARL_WPP_MAX_NUM_THREADS )][NUM_PIC_TYPES];
diff --git a/source/Lib/CommonLib/Quant.cpp b/source/Lib/CommonLib/Quant.cpp
index fddd9d2291ba5cd4d2dd7a9ecf9a17f43cb1c75c..61ebccbf0a90aa0bbc79003642a72c5ba31e8b5f 100644
--- a/source/Lib/CommonLib/Quant.cpp
+++ b/source/Lib/CommonLib/Quant.cpp
@@ -793,7 +793,8 @@ void Quant::quant(TransformUnit &tu, const ComponentID &compID, const CCoeffBuf
     const int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;
     // QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset
 
-    const int64_t iAdd = int64_t(tu.cs->slice->getSliceType() == I_SLICE ? 171 : 85) << int64_t(iQBits - 9);
+    const int64_t iAdd = int64_t(tu.cs->slice->getSliceType() == I_SLICE 
+      ? 171 : 85) << int64_t(iQBits - 9);
 #if HEVC_USE_SIGN_HIDING
     const int qBits8 = iQBits - 8;
 #endif
diff --git a/source/Lib/CommonLib/RdCost.h b/source/Lib/CommonLib/RdCost.h
index 766816757c3bd86b2f9cb5a27d903303ccfaff81..c33ac7457a500d00821ce02c7feded2a7b3dd1ca 100644
--- a/source/Lib/CommonLib/RdCost.h
+++ b/source/Lib/CommonLib/RdCost.h
@@ -89,7 +89,10 @@ public:
   // - 0 = no subsampling, 1 = even rows, 2 = every 4th, etc.
   int                   subShift;
 
-  DistParam() : org(), cur(), step( 1 ), bitDepth( 0 ), useMR( false ), applyWeight( false ), isBiPred( false ), wpCur( nullptr ), compID( MAX_NUM_COMPONENT ), maximumDistortionForEarlyExit( std::numeric_limits<Distortion>::max() ), subShift( 0 ) { }
+  DistParam() :
+  org(), cur(), step( 1 ), bitDepth( 0 ), useMR( false ), applyWeight( false ), isBiPred( false ), wpCur( nullptr ), compID( MAX_NUM_COMPONENT ), maximumDistortionForEarlyExit( std::numeric_limits<Distortion>::max() ), subShift( 0 )
+
+  { }
 };
 
 /// RD cost computation class
@@ -116,7 +119,6 @@ private:
   int                     m_iCostScale;
 
   bool                    m_useQtbt;
-
 public:
   RdCost();
   virtual ~RdCost();
@@ -169,6 +171,7 @@ public:
   void           setCostScale             ( int iCostScale )           { m_iCostScale = iCostScale; }
   Distortion     getCost                  ( uint32_t b )                   { return Distortion( m_motionLambda * b ); }
 
+
 #if ENABLE_SPLIT_PARALLELISM
   void copyState( const RdCost& other );
 #endif
diff --git a/source/Lib/CommonLib/Rom.cpp b/source/Lib/CommonLib/Rom.cpp
index dfb4ce1b5fb444f4a2cc837857543a7a5ff79aab..84c9beca97ea9ca805a2651e8f4d1b721e4a5e06 100644
--- a/source/Lib/CommonLib/Rom.cpp
+++ b/source/Lib/CommonLib/Rom.cpp
@@ -186,6 +186,7 @@ const int g_aiNonLMPosThrs[] = {  3,  1,  0 };
 #endif
 
 
+
 // initialize ROM variables
 void initROM()
 {
diff --git a/source/Lib/CommonLib/Slice.cpp b/source/Lib/CommonLib/Slice.cpp
index 7e26fc803f31578221081ed0b1f997c3e4949196..be7267d1d8605577ba9aec499c404c631344f8d8 100644
--- a/source/Lib/CommonLib/Slice.cpp
+++ b/source/Lib/CommonLib/Slice.cpp
@@ -419,7 +419,6 @@ void Slice::setRefPicList( PicList& rcListPic, bool checkNumPocTotalCurr, bool b
       pcRefPic = xGetLongTermRefPic(rcListPic, m_pRPS->getPOC(i), m_pRPS->getCheckLTMSBPresent(i));
     }
   }
-
   // ref_pic_list_init
   Picture*  rpsCurrList0[MAX_NUM_REF+1];
   Picture*  rpsCurrList1[MAX_NUM_REF+1];
@@ -432,7 +431,7 @@ void Slice::setRefPicList( PicList& rcListPic, bool checkNumPocTotalCurr, bool b
     // - Otherwise, when the current picture contains a P or B slice, the value of NumPocTotalCurr shall not be equal to 0.
     if (getRapPicFlag())
     {
-      CHECK(numPicTotalCurr != 0, "Invalid state");
+        CHECK(numPicTotalCurr != 0, "Invalid state");
     }
 
     if (m_eSliceType == I_SLICE)
@@ -502,7 +501,6 @@ void Slice::setRefPicList( PicList& rcListPic, bool checkNumPocTotalCurr, bool b
       m_bIsUsedAsLongTerm[REF_PIC_LIST_1][rIdx] = ( cIdx >= NumPicStCurr0 + NumPicStCurr1 );
     }
   }
-
     // For generalized B
   // note: maybe not existed case (always L0 is copied to L1 if L1 is empty)
   if( bCopyL0toL1ErrorCase && isInterB() && getNumRefIdx(REF_PIC_LIST_1) == 0)
@@ -533,7 +531,7 @@ int Slice::getNumRpsCurrTempList() const
       numRpsCurrTempList++;
     }
   }
-  return numRpsCurrTempList;
+    return numRpsCurrTempList;
 }
 
 void Slice::initEqualRef()
@@ -1312,7 +1310,11 @@ int Slice::checkThatAllRefPicsAreAvailable( PicList& rcListPic, const ReferenceP
 
 /** Function for constructing an explicit Reference Picture Set out of the available pictures in a referenced Reference Picture Set
 */
-void Slice::createExplicitReferencePictureSetFromReference( PicList& rcListPic, const ReferencePictureSet *pReferencePictureSet, bool isRAP, int pocRandomAccess, bool bUseRecoveryPoint, const bool bEfficientFieldIRAPEnabled)
+void Slice::createExplicitReferencePictureSetFromReference(PicList& rcListPic, const ReferencePictureSet *pReferencePictureSet, bool isRAP, int pocRandomAccess, bool bUseRecoveryPoint, const bool bEfficientFieldIRAPEnabled
+#if JVET_K0157
+                                                         , bool isEncodeLtRef, bool isCompositeRefEnable
+#endif
+)
 {
   Picture* rpcPic;
   int i, j;
@@ -1352,7 +1354,11 @@ void Slice::createExplicitReferencePictureSetFromReference( PicList& rcListPic,
         }
         else
         {
+#if JVET_K0157
+          if (bEfficientFieldIRAPEnabled && rpcPic->getPOC() == this->getAssociatedIRAPPOC() && this->getAssociatedIRAPPOC() == this->getPOC() + (isCompositeRefEnable ? 2 : 1))
+#else
           if(bEfficientFieldIRAPEnabled && rpcPic->getPOC() == this->getAssociatedIRAPPOC() && this->getAssociatedIRAPPOC() == this->getPOC()+1)
+#endif
           {
             irapIsInRPS = true;
           }
@@ -1371,7 +1377,11 @@ void Slice::createExplicitReferencePictureSetFromReference( PicList& rcListPic,
     while ( iterPic != rcListPic.end())
     {
       rpcPic = *(iterPic++);
+#if JVET_K0157
+      if (rpcPic->getPOC() == this->getAssociatedIRAPPOC() && this->getAssociatedIRAPPOC() == this->getPOC() + (isCompositeRefEnable ? 2 : 1))
+#else
       if(rpcPic->getPOC() == this->getAssociatedIRAPPOC() && this->getAssociatedIRAPPOC() == this->getPOC()+1)
+#endif
       {
         pLocalRPS->setDeltaPOC(k, 1);
         pLocalRPS->setUsed(k, true);
@@ -1381,6 +1391,53 @@ void Slice::createExplicitReferencePictureSetFromReference( PicList& rcListPic,
       }
     }
   }
+#if JVET_K0157
+  if (isCompositeRefEnable && isEncodeLtRef)
+  {
+    useNewRPS = true;
+    nrOfNegativePictures = 0;
+    nrOfPositivePictures = 0;
+    for (i = 0; i<pReferencePictureSet->getNumberOfPictures(); i++)
+    {
+      j = 0;
+      k = 0;
+
+      // loop through all pictures in the reference picture buffer
+      PicList::iterator iterPic = rcListPic.begin();
+      while (iterPic != rcListPic.end())
+      {
+        j++;
+        rpcPic = *(iterPic++);
+
+        if (rpcPic->getPOC() == this->getPOC() + 1 + pReferencePictureSet->getDeltaPOC(i) && rpcPic->referenced)
+        {
+          // This picture exists as a reference picture
+          // and should be added to the explicit Reference Picture Set
+          pLocalRPS->setDeltaPOC(k, pReferencePictureSet->getDeltaPOC(i) + 1);
+          pLocalRPS->setUsed(k, pReferencePictureSet->getUsed(i) && (!isRAP));
+          if (bEfficientFieldIRAPEnabled)
+          {
+            pLocalRPS->setUsed(k, pLocalRPS->getUsed(k) && !(bUseRecoveryPoint && this->getPOC() > pocRandomAccess && this->getPOC() + pReferencePictureSet->getDeltaPOC(i) + 1 < pocRandomAccess));
+          }
+
+          if (pLocalRPS->getDeltaPOC(k) < 0)
+          {
+            nrOfNegativePictures++;
+          }
+          else
+          {
+            if (bEfficientFieldIRAPEnabled && rpcPic->getPOC() == this->getAssociatedIRAPPOC() && this->getAssociatedIRAPPOC() == this->getPOC() + 2)
+            {
+              irapIsInRPS = true;
+            }
+            nrOfPositivePictures++;
+          }
+          k++;
+        }
+      }
+    }
+  }
+#endif
   pLocalRPS->setNumberOfNegativePictures(nrOfNegativePictures);
   pLocalRPS->setNumberOfPositivePictures(nrOfPositivePictures);
   pLocalRPS->setNumberOfPictures(nrOfNegativePictures+nrOfPositivePictures);
@@ -1641,6 +1698,9 @@ SPSNext::SPSNext( SPS& sps )
 #else
 #endif
   , m_MTTMode                   ( 0 )
+#if JVET_K0157
+    , m_compositeRefEnabled     ( false )
+#endif
   // ADD_NEW_TOOL : (sps extension) add tool enabling flags here (with "false" as default values)
 {
 }
diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h
index 5164daada2b1b45d73837a0b4b009635c7863015..94a7fe402e8d135c4b8865b4c8fc7809202eb273 100644
--- a/source/Lib/CommonLib/Slice.h
+++ b/source/Lib/CommonLib/Slice.h
@@ -859,6 +859,9 @@ private:
   // multi type tree (QTBT + triple split)
   unsigned    m_MTTMode;
 
+#if JVET_K0157
+  bool              m_compositeRefEnabled;        //composite longterm reference
+#endif
   // ADD_NEW_TOOL : (sps extension) add tool enabling flags and associated parameters here
 
 public:
@@ -915,7 +918,6 @@ public:
   void      setUseInterEMT        ( bool b )                                        { m_InterEMT = b; }
   bool      getUseInterEMT        ()                                      const     { return m_InterEMT; }
 #endif
-
   //=====  additional parameters  =====
   // qtbt
   void      setCTUSize            ( unsigned    ctuSize )                           { m_CTUSize = ctuSize; }
@@ -959,6 +961,10 @@ public:
   unsigned  getMTTMode            ()                                      const     { return m_MTTMode; }
   void      setMTTMode            ( unsigned    mode )                              { m_MTTMode = mode; m_MTTEnabled = ( m_MTTMode != 0 ); }
 
+#if JVET_K0157
+  void      setUseCompositeRef(bool b) { m_compositeRefEnabled = b; }
+  bool      getUseCompositeRef()                                      const { return m_compositeRefEnabled; }
+#endif
   // ADD_NEW_TOOL : (sps extension) add access functions for tool enabling flags and associated parameters here
 
 };
@@ -1764,9 +1770,13 @@ public:
   bool                        isTemporalLayerSwitchingPoint( PicList& rcListPic )                                           const;
   bool                        isStepwiseTemporalLayerSwitchingPointCandidate( PicList& rcListPic )                          const;
   int                         checkThatAllRefPicsAreAvailable( PicList& rcListPic, const ReferencePictureSet *pReferencePictureSet, bool printErrors, int pocRandomAccess = 0, bool bUseRecoveryPoint = false) const;
-  void                        createExplicitReferencePictureSetFromReference( PicList& rcListPic, const ReferencePictureSet *pReferencePictureSet, bool isRAP, int pocRandomAccess, bool bUseRecoveryPoint, const bool bEfficientFieldIRAPEnabled);
+  void                        createExplicitReferencePictureSetFromReference(PicList& rcListPic, const ReferencePictureSet *pReferencePictureSet, bool isRAP, int pocRandomAccess, bool bUseRecoveryPoint, const bool bEfficientFieldIRAPEnabled
+#if JVET_K0157
+                              , bool isEncodeLtRef, bool isCompositeRefEnable
+#endif
+  );
   void                        setMaxNumMergeCand(uint32_t val )                          { m_maxNumMergeCand = val;                                      }
-  uint32_t                        getMaxNumMergeCand() const                             { return m_maxNumMergeCand;                                     }
+  uint32_t                    getMaxNumMergeCand() const                             { return m_maxNumMergeCand;                                     }
 
   void                        setNoOutputPriorPicsFlag( bool val )                   { m_noOutputPriorPicsFlag = val;                                }
   bool                        getNoOutputPriorPicsFlag() const                       { return m_noOutputPriorPicsFlag;                               }
diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp
index c3d3c15ef78e2f63578faec4b4f27543bac2f696..52235829949eedd056e0b520df558d109b519393 100644
--- a/source/Lib/CommonLib/TrQuant.cpp
+++ b/source/Lib/CommonLib/TrQuant.cpp
@@ -703,6 +703,13 @@ void TrQuant::xT( const TransformUnit &tu, const ComponentID &compID, const CPel
   const unsigned ucMode          = getEmtMode ( tu, compID );
   const unsigned ucTrIdx         = getEmtTrIdx( tu, compID );
 
+  if( ucTrIdx != DCT2 )
+  {
+#if RExt__DECODER_DEBUG_TOOL_STATISTICS
+    CodingStatistics::IncrementStatisticTool( CodingStatisticsClassType{ STATS__TOOL_EMT, uint32_t( iWidth ), uint32_t( iHeight ), compID } );
+#endif
+  }
+
 #if INTRA67_3MPM
 #if HEVC_USE_4x4_DSTVII
   xTrMxN_EMT(channelBitDepth, resi.buf, resi.stride, dstCoeff.buf, iWidth, iHeight, useDST, maxLog2TrDynamicRange, ucMode, ucTrIdx
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index 499837ae61f1eacb1a90213179ad92f3c819a48b..ed0b7a5e665abb0e9b58caa77c8856e41e57ec10 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -50,6 +50,9 @@
 #include <assert.h>
 #include <cassert>
 
+
+
+
 #define JVET_K1000_SIMPLIFIED_EMT                         1 // EMT with only DCT-2, DCT-8 and DST-7
 
 #define JVET_K0371_ALF                                    1
@@ -75,6 +78,9 @@
 #define REUSE_CU_RESULTS                                  1
 #endif
 
+#define JVET_K0390_RATECTRL                               1
+#define RATECTRL_FIX_FULLNBIT                             1  //fix the QP-lambda relationship in rate control if JVET-K0154 for FULL_NBIT is enabled
+
 #define JVET_K0352_MERGE_ENCOPT                           1 // encoder optimization for merge
 #define JVET_K0556_MAX_TT_SIZE_64                         1 // Maximum TT size is set to 64x64 for P/B-slice
 
@@ -82,11 +88,12 @@
 #define JVET_K0554                                        1 // when adopting, also remove the macro HM_QTBT_ONLY_QT_IMPLICIT (keep the case for value 0)
 
 #define JVET_K0346                                        1 // simplifications on ATMVP
+
 #define JVET_K0063_PDPC_SIMP                              1 // Simplified PDPC
 
 #define JVET_K0351_LESS_CONSTRAINT                        1 // Only disallow binary split with same orientation in center partition of the ternary split and release the other constraints in K0351.
 
-#define JVET_K0251_QP_EXT                                 1 // Extending the QP parameter value range for coarse quantization 
+#define JVET_K0251_QP_EXT                                 1 // Extending the QP parameter value range for coarse quantization
 
 #define JVET_K_AFFINE                                     1
 #if JVET_K_AFFINE
@@ -104,6 +111,7 @@
 
 #define JVET_K0357_AMVR                                   1 // Adaptive motion vector resolution separated from JEM_TOOLS macro
 
+
 #ifndef JVET_B0051_NON_MPM_MODE
 #define JVET_B0051_NON_MPM_MODE                         ( 1 && JEM_TOOLS )
 
@@ -157,11 +165,16 @@
 
 #ifndef ENABLE_TRACING
 #define ENABLE_TRACING                                    0 // DISABLE by default (enable only when debugging, requires 15% run-time in decoding) -- see documentation in 'doc/DTrace for NextSoftware.pdf'
-
+#if ENABLE_TRACING
+#define K0149_BLOCK_STATISTICS                            1 // enables block statistics, which can be analysed with YUView (https://github.com/IENT/YUView)
+#if K0149_BLOCK_STATISTICS
+#define BLOCK_STATS_AS_CSV                                0 // statistics will be written in a comma separated value format. this is not supported by YUView
+#endif
+#endif
 #endif // ! ENABLE_TRACING
 
 #define WCG_EXT                                           0 // part of JEM sharp Luma qp
-#define WCG_WPSNR                                         WCG_EXT 
+#define WCG_WPSNR                                         WCG_EXT
 
 #if HEVC_TOOLS
 #define HEVC_USE_INTRA_SMOOTHING_T32                      1
@@ -180,6 +193,7 @@
 #define HEVC_USE_SIGN_HIDING                              1
 #endif
 
+#define JVET_K0157                                        1
 
 #define KEEP_PRED_AND_RESI_SIGNALS                        0
 
@@ -298,7 +312,7 @@
 
 #define SHARP_LUMA_DELTA_QP                               1 ///< include non-normative LCU deltaQP and normative chromaQP change
 #define ER_CHROMA_QP_WCG_PPS                              1 ///< Chroma QP model for WCG used in Anchor 3.2
-#define ENABLE_QPA                                        0
+#define ENABLE_QPA                                        1 ///< Non-normative perceptual QP adaptation according to JVET-H0047 and JVET-K0206. Deactivated by default, activated using encoder arguments --PerceptQPA=1 --SliceChromaQPOffsetPeriodicity=1
 
 
 
@@ -1422,8 +1436,8 @@ enum AlfFilterType
 struct AlfFilterShape
 {
   AlfFilterShape( int size )
-    : filterLength( size ), 
-    numCoeff( size * size / 4 + 1 ), 
+    : filterLength( size ),
+    numCoeff( size * size / 4 + 1 ),
     filterSize( size * size / 2 + 1 )
   {
     if( size == 5 )
@@ -1466,7 +1480,7 @@ struct AlfFilterShape
                     2,
                 2,  2,  2,
             2,  2,  2,  2,  2,
-        2,  2,  2,  1,  1 
+        2,  2,  2,  1,  1
       };
 
       golombIdx = {
diff --git a/source/Lib/CommonLib/Unit.cpp b/source/Lib/CommonLib/Unit.cpp
index 9b14f80f9b0655fa01a05983c6ecf80df72a2c5a..05eff4c40d068e0679a55b01e5c6d75815b5ed9b 100644
--- a/source/Lib/CommonLib/Unit.cpp
+++ b/source/Lib/CommonLib/Unit.cpp
@@ -274,7 +274,6 @@ CodingUnit& CodingUnit::operator=( const CodingUnit& other )
   imv               = other.imv;
   imvNumCand        = other.imvNumCand;
 #endif
-
   return *this;
 }
 
diff --git a/source/Lib/CommonLib/Unit.h b/source/Lib/CommonLib/Unit.h
index 18694015491a8c7565477df51e8dc1607e0738a4..3e453c722c3e1d1257b64d7f532dd4dfb80b3724 100644
--- a/source/Lib/CommonLib/Unit.h
+++ b/source/Lib/CommonLib/Unit.h
@@ -313,7 +313,6 @@ struct CodingUnit : public UnitArea
 #if JVET_K1000_SIMPLIFIED_EMT
   uint8_t          emtFlag;
 #endif
-
   // needed for fast imv mode decisions
   int8_t          imvNumCand;
 
diff --git a/source/Lib/CommonLib/UnitPartitioner.cpp b/source/Lib/CommonLib/UnitPartitioner.cpp
index 89fea3e4b6d2a41c9bbf80a244d8c26393167cbf..f12c4889f911584bba1c431475bc529d4a759f2f 100644
--- a/source/Lib/CommonLib/UnitPartitioner.cpp
+++ b/source/Lib/CommonLib/UnitPartitioner.cpp
@@ -472,11 +472,11 @@ bool QTBTPartitioner::canSplit( const PartSplit split, const CodingStructure &cs
     break;
   case CU_TRIH_SPLIT:
     if( ( cs.sps->getSpsNext().getMTTMode() & 1 ) != 1 )          return false;
-    if( area.height <= 2 * minTtSize || area.height > maxTtSize ) return false;
+    if( area.height <= 2 * minTtSize || area.height > maxTtSize || area.width > maxTtSize) return false;
     break;
   case CU_TRIV_SPLIT:
     if( ( cs.sps->getSpsNext().getMTTMode() & 1 ) != 1 )          return false;
-    if( area.width <= 2 * minTtSize || area.width > maxTtSize )   return false;
+    if( area.width <= 2 * minTtSize || area.width > maxTtSize || area.height > maxTtSize)  return false;
     break;
   default:
     break;
diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp
index 9d131a11216e7456eae993006008c21eec02da1c..9086d3cf473387c333d2cd34af1b740ada730ae3 100644
--- a/source/Lib/CommonLib/UnitTools.cpp
+++ b/source/Lib/CommonLib/UnitTools.cpp
@@ -66,6 +66,30 @@ UnitArea CS::getArea( const CodingStructure &cs, const UnitArea &area, const Cha
   return isDualITree( cs ) ? area.singleChan( chType ) : area;
 }
 
+#if DMVR_JVET_LOW_LATENCY_K0217
+void CS::setRefinedMotionField(CodingStructure &cs)
+{
+  for (CodingUnit *cu : cs.cus)
+  {
+    for (auto &pu : CU::traversePUs(*cu))
+    {
+      if (pu.cs->sps->getSpsNext().getUseDMVR()
+        && pu.mergeFlag
+        && pu.mergeType == MRG_TYPE_DEFAULT_N
+        && !pu.frucMrgMode
+        && !pu.cu->LICFlag
+        && !pu.cu->affine
+        && PU::isBiPredFromDifferentDir(pu))
+      {
+        pu.mv[REF_PIC_LIST_0] += pu.mvd[REF_PIC_LIST_0];
+        pu.mv[REF_PIC_LIST_1] -= pu.mvd[REF_PIC_LIST_0];
+        pu.mvd[REF_PIC_LIST_0].setZero();
+        PU::spanMotionInfo(pu);
+      }
+    }
+  }
+}
+#endif
 // CU tools
 
 bool CU::isIntra(const CodingUnit &cu)
@@ -311,8 +335,8 @@ int PU::getIntraMPMs( const PredictionUnit &pu, unsigned* mpm, const ChannelType
 
     CHECK(2 >= numMPMs, "Invalid number of most probable modes");
 
-    const int offset = (int) NUM_LUMA_MODE - 5;
-    const int mod    = offset + 3;
+    const int offset = 61;
+    const int mod    = 64;
 
     if (leftIntraDir == aboveIntraDir)
     {
@@ -619,7 +643,6 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co
   // compute the location of the current PU
 
   int cnt = 0;
-
   const Position posLT = pu.Y().topLeft();
   const Position posRT = pu.Y().topRight();
   const Position posLB = pu.Y().bottomLeft();
@@ -639,7 +662,6 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co
 
     // get Inter Dir
     mrgCtx.interDirNeighbours[cnt] = miLeft.interDir;
-
     // get Mv from Left
     mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miLeft.mv[0], miLeft.refIdx[0]);
 
@@ -678,7 +700,7 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co
 
       // get Inter Dir
       mrgCtx.interDirNeighbours[cnt] = miAbove.interDir;
-      // get Mv from Left
+      // get Mv from Above
       mrgCtx.mvFieldNeighbours[cnt << 1].setMvField( miAbove.mv[0], miAbove.refIdx[0] );
 
       if( slice.isInterB() )
@@ -720,7 +742,7 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co
 
       // get Inter Dir
       mrgCtx.interDirNeighbours[cnt] = miAboveRight.interDir;
-      // get Mv from Left
+      // get Mv from Above-right
       mrgCtx.mvFieldNeighbours[cnt << 1].setMvField( miAboveRight.mv[0], miAboveRight.refIdx[0] );
 
       if( slice.isInterB() )
@@ -800,7 +822,8 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co
     bool bMrgIdxMatchATMVPCan = ( mrgCandIdx == cnt );
     bool tmpLICFlag           = false;
 
-    isAvailableSubPu = cs.sps->getSpsNext().getUseATMVP() && getInterMergeSubPuMvpCand( pu, mrgCtx, tmpLICFlag, cnt );
+    isAvailableSubPu = cs.sps->getSpsNext().getUseATMVP() && getInterMergeSubPuMvpCand( pu, mrgCtx, tmpLICFlag, cnt 
+    );
 
     if( isAvailableSubPu )
     {
@@ -1078,7 +1101,6 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co
 }
 
 
-
 static int xGetDistScaleFactor(const int &iCurrPOC, const int &iCurrRefPOC, const int &iColPOC, const int &iColRefPOC)
 {
   int iDiffPocD = iColPOC - iColRefPOC;
@@ -1124,7 +1146,6 @@ bool PU::getColocatedMVP(const PredictionUnit &pu, const RefPicList &eRefPicList
   {
     return false;
   }
-
   int iColRefIdx = mi.refIdx[eColRefPicList];
 
   if (iColRefIdx < 0)
@@ -2165,7 +2186,6 @@ bool PU::isAffineMrgFlagCoded( const PredictionUnit &pu )
   }
   return getFirstAvailableAffineNeighbour( pu ) != nullptr;
 }
-
 void PU::getAffineMergeCand( const PredictionUnit &pu, MvField (*mvFieldNeighbours)[3], unsigned char &interDirNeighbours, int &numValidMergeCand )
 {
   for ( int mvNum = 0; mvNum < 3; mvNum++ )
@@ -2416,7 +2436,8 @@ void clipColBlkMv(int& mvX, int& mvY, const PredictionUnit& pu)
 }
 #endif
 
-bool PU::getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx& mrgCtx, bool& LICFlag, const int count)
+bool PU::getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx& mrgCtx, bool& LICFlag, const int count
+)
 {
   const Slice   &slice = *pu.cs->slice;
 #if JVET_K0346
diff --git a/source/Lib/CommonLib/UnitTools.h b/source/Lib/CommonLib/UnitTools.h
index f7640e1135dfae435d6838b6904d25c1b722c95f..2571b290bc149bd7df1f56d38a50fd8b19babbcd 100644
--- a/source/Lib/CommonLib/UnitTools.h
+++ b/source/Lib/CommonLib/UnitTools.h
@@ -49,6 +49,9 @@ namespace CS
   uint64_t getEstBits                   ( const CodingStructure &cs );
   UnitArea getArea                    ( const CodingStructure &cs, const UnitArea &area, const ChannelType chType );
   bool   isDualITree                  ( const CodingStructure &cs );
+#if DMVR_JVET_LOW_LATENCY_K0217
+  void   setRefinedMotionField        ( CodingStructure &cs );
+#endif
 }
 
 
@@ -81,6 +84,7 @@ namespace CU
   bool hasNonTsCodedBlock             (const CodingUnit& cu);
   uint32_t getNumNonZeroCoeffNonTs        (const CodingUnit& cu);
 
+
   PUTraverser traversePUs             (      CodingUnit& cu);
   TUTraverser traverseTUs             (      CodingUnit& cu);
   cPUTraverser traversePUs            (const CodingUnit& cu);
@@ -126,6 +130,7 @@ namespace PU
 #if JVET_K0357_AMVR
   void applyImv                       (      PredictionUnit &pu, MergeCtx &mrgCtx, InterPrediction *interPred = NULL );
 #endif
+  void getAffineMergeCand             (const PredictionUnit &pu, MvField (*mvFieldNeighbours)[3], unsigned char &interDirNeighbours, int &numValidMergeCand );
 #if JVET_K_AFFINE
   bool isAffineMrgFlagCoded           (const PredictionUnit &pu );
   void getAffineMergeCand             (const PredictionUnit &pu, MvField (*mvFieldNeighbours)[3], unsigned char &interDirNeighbours, int &numValidMergeCand );
@@ -133,7 +138,8 @@ namespace PU
   void setAllAffineMv                 (      PredictionUnit &pu, Mv affLT, Mv affRT, Mv affLB, RefPicList eRefList );
 #endif
 #if JVET_K0346
-  bool getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx &mrgCtx, bool& LICFlag, const int count);
+  bool getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx &mrgCtx, bool& LICFlag, const int count
+  );
   bool getInterMergeSubPuRecurCand(const PredictionUnit &pu, MergeCtx &mrgCtx, const int count);
 #endif
   bool isBiPredFromDifferentDir       (const PredictionUnit &pu);
diff --git a/source/Lib/CommonLib/dtrace.cpp b/source/Lib/CommonLib/dtrace.cpp
index 3a6c3dae5896fc452067476e74b5f6d7664b154d..66bb00825599aa8624a8973b65a3939232c5fc56 100644
--- a/source/Lib/CommonLib/dtrace.cpp
+++ b/source/Lib/CommonLib/dtrace.cpp
@@ -320,3 +320,18 @@ void CDTrace::dtrace_repeat( int k, int i_times, const char *format, /*va_list a
   }
   return;
 }
+
+#if K0149_BLOCK_STATISTICS
+void CDTrace::dtrace_header( const char *format, /*va_list args*/... )
+{
+  if( m_trace_file )
+  {
+    va_list args;
+    va_start ( args, format );
+    vfprintf ( m_trace_file, format, args );
+    fflush( m_trace_file );
+    va_end ( args );
+  }
+  return;
+}
+#endif
diff --git a/source/Lib/CommonLib/dtrace.h b/source/Lib/CommonLib/dtrace.h
index 87140ee6aaad7f3fd1e33fecb43195059344e443..9d622f3eed7876f759e8a7adf6081113c4bdb8a4 100644
--- a/source/Lib/CommonLib/dtrace.h
+++ b/source/Lib/CommonLib/dtrace.h
@@ -47,6 +47,11 @@
 #include <vector>
 #include <cstdarg>
 
+#if K0149_BLOCK_STATISTICS
+class CodingStructure;
+struct Position;
+#endif
+
 class CDTrace;
 
 typedef std::string CType;
@@ -118,6 +123,21 @@ public:
     template<bool bCount>
     void dtrace       ( int, const char *format, /*va_list args*/... );
     void dtrace_repeat( int, int i_times, const char *format, /*va_list args*/... );
+#if K0149_BLOCK_STATISTICS
+    void dtrace_header       ( const char *format, /*va_list args*/... );
+    // CTU
+    void dtrace_block_scalar( int k, const CodingStructure &cs, std::string stat_type, signed value );
+    // CU
+    void dtrace_block_scalar( int k, const CodingUnit &cu, std::string stat_type, signed value, bool isChroma = false );
+    void dtrace_block_vector( int k, const CodingUnit &cu, std::string stat_type, signed val_x, signed val_y );
+    // PU
+    void dtrace_block_scalar( int k, const PredictionUnit &pu, std::string stat_type, signed value, bool isChroma = false );
+    void dtrace_block_vector( int k, const PredictionUnit &pu, std::string stat_type, signed val_x, signed val_y );
+    void dtrace_block_affinetf( int k, const PredictionUnit &pu, std::string stat_type, signed val_x0, signed val_y0, signed val_x1, signed val_y1, signed val_x2, signed val_y2 );
+    // TU
+    void dtrace_block_scalar(int k, const TransformUnit &tu, std::string stat_type, signed value, bool isChroma = false );
+    void dtrace_block_vector(int k, const TransformUnit &tu, std::string stat_type, signed val_x, signed val_y);
+#endif
     bool update       ( state_type stateval );
     int  init( vstring channel_names );
     int  getLastError() { return m_error_code;  }
@@ -126,6 +146,9 @@ public:
     std::string getErrMessage();
     int64_t getChannelCounter( int channel ) { return chanRules[channel].getCounter(); }
     void    decrementChannelCounter( int channel ) { chanRules[channel].decrementCounter(); }
+#if K0149_BLOCK_STATISTICS
+    bool isChannelActive( int channel ) { return chanRules[channel].active(); }
+#endif
 };
 
 
diff --git a/source/Lib/CommonLib/dtrace_blockstatistics.cpp b/source/Lib/CommonLib/dtrace_blockstatistics.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9e4552d8a5513f9b8822400100f79d572022effb
--- /dev/null
+++ b/source/Lib/CommonLib/dtrace_blockstatistics.cpp
@@ -0,0 +1,775 @@
+/* The copyright in this software is being made available under the BSD
+ * License, included below. This software may be subject to other third party
+ * and contributor rights, including patent rights, and no such rights are
+ * granted under this license.
+ *
+ * Copyright (c) 2010-2018, ITU/ISO/IEC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file     dtrace_blockstatistics.cpp
+ *  \brief    DTrace block statistcis support for next software
+ */
+
+#include "dtrace_blockstatistics.h"
+#include "dtrace.h"
+#include "dtrace_next.h"
+#include "CommonLib/Unit.h"
+#include "CommonLib/Picture.h"
+#include "CommonLib/UnitTools.h"
+//#include "CommonLib/CodingStructure.h"
+
+#if K0149_BLOCK_STATISTICS
+std::string GetBlockStatisticName(BlockStatistic statistic)
+{
+  auto statisticIterator = blockstatistic2description.find(statistic);
+  // enforces that all delcared statistic enum items are also part of the map
+  assert(statisticIterator != blockstatistic2description.end() && "A block statistics declared in the enum is missing in the map for statistic description.");
+
+  return std::get<0>(statisticIterator->second);
+}
+
+std::string GetBlockStatisticTypeString(BlockStatistic statistic)
+{
+  auto statisticIterator = blockstatistic2description.find(statistic);
+  // enforces that all delcared statistic enum items are also part of the map
+  assert(statisticIterator != blockstatistic2description.end() && "A block statistics declared in the enum is missing in the map for statistic description.");
+
+  BlockStatisticType statisticType = std::get<1>(statisticIterator->second);
+  switch (statisticType) {
+  case BlockStatisticType::Flag:
+    return std::string("Flag");
+    break;
+  case BlockStatisticType::Vector:
+    return std::string("Vector");
+    break;
+  case BlockStatisticType::Integer:
+    return std::string("Integer");
+    break;
+  case BlockStatisticType::AffineTFVectors:
+    return std::string("AffineTFVectors");
+    break;
+  default:
+    assert(0);
+    break;
+  }
+  return std::string();
+}
+
+std::string GetBlockStatisticTypeSpecificInfo(BlockStatistic statistic)
+{
+  auto statisticIterator = blockstatistic2description.find(statistic);
+  // enforces that all delcared statistic enum items are also part of the map
+  assert(statisticIterator != blockstatistic2description.end() && "A block statistics declared in the enum is missing in the map for statistic description.");
+
+  return std::get<2>(statisticIterator->second);
+}
+
+void CDTrace::dtrace_block_scalar( int k, const CodingStructure &cs, std::string stat_type, signed value )
+{
+#if BLOCK_STATS_AS_CSV
+  dtrace<false>( k, "BlockStat;%d;%4d;%4d;%2d;%2d;%s;%d\n", cs.picture->poc, cs.area.lx(), cs.area.ly(), cs.area.lwidth(), cs.area.lheight(), stat_type.c_str(), value );
+#else
+  dtrace<false>( k, "BlockStat: POC %d @(%4d,%4d) [%2dx%2d] %s=%d\n", cs.picture->poc, cs.area.lx(), cs.area.ly(), cs.area.lwidth(), cs.area.lheight(), stat_type.c_str(), value );
+#endif
+}
+
+void CDTrace::dtrace_block_scalar( int k, const CodingUnit &cu, std::string stat_type, signed value,  bool isChroma /*= false*/  )
+{
+  const CodingStructure& cs = *cu.cs;
+#if BLOCK_STATS_AS_CSV
+  if(isChroma)
+  {
+    dtrace<false>( k, "BlockStat;%d;%4d;%4d;%2d;%2d;%s;%d\n", cs.picture->poc, cu.Cb().x*2, cu.Cb().y*2, cu.Cb().width*2, cu.Cb().height*2, stat_type.c_str(), value );
+  }
+  else
+  {
+    dtrace<false>( k, "BlockStat;%d;%4d;%4d;%2d;%2d;%s;%d\n", cs.picture->poc, cu.lx(), cu.ly(), cu.lwidth(), cu.lheight(), stat_type.c_str(), value );
+  }
+#else
+  if(isChroma)
+  {
+    dtrace<false>( k, "BlockStat: POC %d @(%4d,%4d) [%2dx%2d] %s=%d\n", cs.picture->poc, cu.Cb().x*2, cu.Cb().y*2, cu.Cb().width*2, cu.Cb().height*2, stat_type.c_str(), value );
+  }
+  else
+  {
+    dtrace<false>( k, "BlockStat: POC %d @(%4d,%4d) [%2dx%2d] %s=%d\n", cs.picture->poc, cu.lx(), cu.ly(), cu.lwidth(), cu.lheight(), stat_type.c_str(), value );
+  }
+#endif
+}
+
+void CDTrace::dtrace_block_vector( int k, const CodingUnit &cu, std::string stat_type, signed val_x, signed val_y )
+{
+  const CodingStructure& cs = *cu.cs;
+#if BLOCK_STATS_AS_CSV
+  dtrace<false>( k, "BlockStat;%d;%4d;%4d;%2d;%2d;%s;%4d;%4d\n", cs.picture->poc, cu.lx(), cu.ly(), cu.lwidth(), cu.lheight(), stat_type.c_str(), val_x, val_y );
+#else
+  dtrace<false>( k, "BlockStat: POC %d @(%4d,%4d) [%2dx%2d] %s={%4d,%4d}\n", cs.picture->poc, cu.lx(), cu.ly(), cu.lwidth(), cu.lheight(), stat_type.c_str(), val_x, val_y );
+#endif
+}
+
+void CDTrace::dtrace_block_scalar( int k, const PredictionUnit &pu, std::string stat_type, signed value, bool isChroma /*= false*/  )
+{
+  const CodingStructure& cs = *pu.cs;
+#if BLOCK_STATS_AS_CSV
+  if(isChroma)
+  {
+    dtrace<false>( k, "BlockStat;%d;%4d;%4d;%2d;%2d;%s;%d\n", cs.picture->poc, pu.Cb().x*2, pu.Cb().y*2, pu.Cb().width*2, pu.Cb().height*2, stat_type.c_str(), value );
+  }
+  else
+  {
+    dtrace<false>( k, "BlockStat;%d;%4d;%4d;%2d;%2d;%s;%d\n", cs.picture->poc, pu.lx(), pu.ly(), pu.lwidth(), pu.lheight(), stat_type.c_str(), value );
+  }
+#else
+  if(isChroma)
+  {
+    dtrace<false>( k, "BlockStat: POC %d @(%4d,%4d) [%2dx%2d] %s=%d\n", cs.picture->poc, pu.Cb().x*2, pu.Cb().y*2, pu.Cb().width*2, pu.Cb().height*2, stat_type.c_str(), value );
+  }
+  else
+  {
+    dtrace<false>( k, "BlockStat: POC %d @(%4d,%4d) [%2dx%2d] %s=%d\n", cs.picture->poc, pu.lx(), pu.ly(), pu.lwidth(), pu.lheight(), stat_type.c_str(), value );
+  }
+#endif
+}
+
+void CDTrace::dtrace_block_vector( int k, const PredictionUnit &pu, std::string stat_type, signed val_x, signed val_y )
+{
+  const CodingStructure& cs = *pu.cs;
+#if BLOCK_STATS_AS_CSV
+  dtrace<false>( k, "BlockStat;%d;%4d;%4d;%2d;%2d;%s;%4d;%4d\n", cs.picture->poc, pu.lx(), pu.ly(), pu.lwidth(), pu.lheight(), stat_type.c_str(), val_x, val_y );
+#else
+  dtrace<false>( k, "BlockStat: POC %d @(%4d,%4d) [%2dx%2d] %s={%4d,%4d}\n", cs.picture->poc, pu.lx(), pu.ly(), pu.lwidth(), pu.lheight(), stat_type.c_str(), val_x, val_y );
+#endif
+}
+
+void CDTrace::dtrace_block_scalar(int k, const TransformUnit &tu, std::string stat_type, signed value, bool isChroma /*= false*/  )
+{ 
+  const CodingStructure& cs = *tu.cs;
+#if BLOCK_STATS_AS_CSV
+  if(isChroma)
+  {
+    dtrace<false>( k, "BlockStat;%d;%4d;%4d;%2d;%2d;%s;%d\n", cs.picture->poc, tu.Cb().x*2, tu.Cb().y*2, tu.Cb().width*2, tu.Cb().height*2, stat_type.c_str(), value );
+  }
+  else
+  {
+    dtrace<false>( k, "BlockStat;%d;%4d;%4d;%2d;%2d;%s;%d\n", cs.picture->poc, tu.lx(), tu.ly(), tu.lwidth(), tu.lheight(), stat_type.c_str(), value );
+  }
+#else
+  if(isChroma)
+  {
+    dtrace<false>( k, "BlockStat: POC %d @(%4d,%4d) [%2dx%2d] %s=%d\n", cs.picture->poc, tu.Cb().x*2, tu.Cb().y*2, tu.Cb().width*2, tu.Cb().height*2, stat_type.c_str(), value );
+  }
+  else
+  {
+    dtrace<false>( k, "BlockStat: POC %d @(%4d,%4d) [%2dx%2d] %s=%d\n", cs.picture->poc, tu.lx(), tu.ly(), tu.lwidth(), tu.lheight(), stat_type.c_str(), value );
+  }
+#endif
+}
+
+void CDTrace::dtrace_block_vector(int k, const TransformUnit &tu, std::string stat_type, signed val_x, signed val_y)
+{
+  const CodingStructure& cs = *tu.cs;
+#if BLOCK_STATS_AS_CSV
+  dtrace<false>(k, "BlockStat;%d;%4d;%4d;%2d;%2d;%s;%4d;%4d\n", cs.picture->poc, pu.lx(), pu.ly(), pu.lwidth(), pu.lheight(), stat_type.c_str(), val_x, val_y);
+#else
+  dtrace<false>(k, "BlockStat: POC %d @(%4d,%4d) [%2dx%2d] %s={%4d,%4d}\n", cs.picture->poc, tu.lx(), tu.ly(), tu.lwidth(), tu.lheight(), stat_type.c_str(), val_x, val_y);
+#endif
+}
+
+void CDTrace::dtrace_block_affinetf( int k, const PredictionUnit &pu, std::string stat_type, signed val_x0, signed val_y0, signed val_x1, signed val_y1, signed val_x2, signed val_y2 )
+{
+  const CodingStructure& cs = *pu.cs;
+#if BLOCK_STATS_AS_CSV
+  dtrace<false>( k, "BlockStat;%d;%4d;%4d;%2d;%2d;%s;%4d;%4d;%4d;%4d;%4d;%4d\n",
+                 cs.picture->poc, pu.lx(), pu.ly(), pu.lwidth(), pu.lheight(), stat_type.c_str(),
+                 val_x0, val_y0, val_x1, val_y1 , val_x2, val_y2  );
+#else
+  dtrace<false>( k, "BlockStat: POC %d @(%4d,%4d) [%2dx%2d] %s={%4d,%4d,%4d,%4d,%4d,%4d}\n",
+                 cs.picture->poc, pu.lx(), pu.ly(), pu.lwidth(), pu.lheight(), stat_type.c_str(),
+                 val_x0, val_y0, val_x1, val_y1 , val_x2, val_y2  );
+#endif
+}
+
+
+
+void writeBlockStatisticsHeader(const SPS *sps)
+{
+  static bool has_header_been_written = false;
+  if (has_header_been_written)
+  {
+    return;
+  }
+
+  // only write header when block statistics are used
+  bool write_blockstatistics =   g_trace_ctx->isChannelActive( D_BLOCK_STATISTICS_ALL) || g_trace_ctx->isChannelActive( D_BLOCK_STATISTICS_CODED);
+  if(!write_blockstatistics)
+  {
+    return;
+  }
+
+  DTRACE_HEADER( g_trace_ctx, "# VTMBMS Block Statistics\n");
+  // sequence info
+  DTRACE_HEADER( g_trace_ctx, "# Sequence size: [%dx %d]\n", sps->getPicWidthInLumaSamples(), sps->getPicHeightInLumaSamples());
+  // list statistics
+  for( auto i = static_cast<int>(BlockStatistic::PredMode); i < static_cast<int>(BlockStatistic::NumBlockStatistics); i++)
+  {
+    BlockStatistic statistic = BlockStatistic(i);
+    std::string statitic_name = GetBlockStatisticName(statistic);
+    std::string statitic_type = GetBlockStatisticTypeString(statistic);
+    std::string statitic_type_specific_info = GetBlockStatisticTypeSpecificInfo(statistic);
+    DTRACE_HEADER( g_trace_ctx, "# Block Statistic Type: %s; %s; %s\n", statitic_name.c_str(), statitic_type.c_str(), statitic_type_specific_info.c_str());
+  }
+
+  has_header_been_written = true;
+}
+
+void getAndStoreBlockStatistics(const CodingStructure& cs, const UnitArea& ctuArea)
+{
+  // two differemt behaviors, depending on which information is needed
+  bool writeAll =   g_trace_ctx->isChannelActive( D_BLOCK_STATISTICS_ALL);
+  bool writeCoded =   g_trace_ctx->isChannelActive( D_BLOCK_STATISTICS_CODED);
+
+  CHECK(writeAll && writeCoded, "Either used D_BLOCK_STATISTICS_ALL_DATA or D_BLOCK_STATISTICS_CODED_DATA. Not both at once!")
+
+  if (writeCoded)
+    writeAllCodedData(cs, ctuArea);    // this will write out important cu-based data, only if it is actually decoded and used
+  else if (writeAll)
+    writeAllData(cs, ctuArea);         // this will write out all inter- or intra-prediction related data
+}
+
+void writeAllData(const CodingStructure& cs, const UnitArea& ctuArea)
+{
+  const int maxNumChannelType = cs.pcv->chrFormat != CHROMA_400 && CS::isDualITree( cs ) ? 2 : 1;
+
+  for( int ch = 0; ch < maxNumChannelType; ch++ )
+  {
+    const ChannelType chType = ChannelType( ch );
+
+    for( const CodingUnit &cu : cs.traverseCUs( CS::getArea( cs, ctuArea, chType ), chType ) )
+    {
+      if( chType == CHANNEL_TYPE_LUMA )
+      {
+        DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::PredMode), cu.predMode);
+        DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::PartSize), cu.partSize);
+        DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::Depth), cu.depth);
+        DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::QT_Depth), cu.qtDepth);
+        DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::BT_Depth), cu.btDepth);
+        DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::MT_Depth), cu.mtDepth);
+        DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::ChromaQPAdj), cu.chromaQpAdj);
+        DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::QP), cu.qp);
+        DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::SplitSeries), (int)cu.splitSeries);
+
+        if (cs.pps->getTransquantBypassEnabledFlag())
+        {
+          DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::TransQuantBypassFlag), cu.transQuantBypass);
+        }
+
+        // skip flag
+        if (!cs.slice->isIntra())
+        {
+          DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::SkipFlag), cu.skip);
+        }
+
+  #if JVET_K1000_SIMPLIFIED_EMT && HM_EMT_NSST_AS_IN_JEM
+        if (!(!((cs.sps->getSpsNext().getUseIntraEMT() && CU::isIntra(cu)) || (cs.sps->getSpsNext().getUseInterEMT() && CU::isInter(cu))) || isChroma(cu.chType)))
+        {
+          DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::EMTFlag), cu.emtFlag);
+        }
+  #endif
+      }
+      else if( chType == CHANNEL_TYPE_CHROMA )
+      {
+        DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::PartSize_Chroma), cu.partSize);
+        DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::Depth_Chroma), cu.depth);
+        DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::QT_Depth_Chroma), cu.qtDepth);
+        DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::BT_Depth_Chroma), cu.btDepth);
+        DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::MT_Depth_Chroma), cu.mtDepth);
+        DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::ChromaQPAdj_Chroma), cu.chromaQpAdj);
+        DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::QP_Chroma), cu.qp);
+        DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::SplitSeries_Chroma), (int)cu.splitSeries);
+
+        if (cs.pps->getTransquantBypassEnabledFlag())
+        {
+          DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::TransQuantBypassFlag_Chroma), cu.transQuantBypass);
+        }
+
+  #if JVET_K1000_SIMPLIFIED_EMT && HM_EMT_NSST_AS_IN_JEM
+        if (!(!((cs.sps->getSpsNext().getUseIntraEMT() && CU::isIntra(cu)) || (cs.sps->getSpsNext().getUseInterEMT() && CU::isInter(cu))) || isChroma(cu.chType)))
+        {
+          DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::EMTFlag_Chroma), cu.emtFlag);
+        }
+  #endif
+      }
+
+
+      switch( cu.predMode )
+      {
+      case MODE_INTER:
+        {
+          for( const PredictionUnit &pu : CU::traversePUs( cu ) )
+          {
+            if (!pu.cu->skip)
+            {
+              DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::MergeFlag), pu.mergeFlag);
+            }
+            if( pu.mergeFlag )
+            {
+              DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::MergeIdx),  pu.mergeIdx);
+              DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::MergeType), pu.mergeType);
+            }
+#if JVET_K_AFFINE
+            DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::AffineFlag), pu.cu->affine);
+#if JVET_K0337_AFFINE_6PARA
+            DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::AffineType), pu.cu->affineType);
+#endif
+#endif
+            DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::InterDir), pu.interDir);
+
+            if (pu.interDir != 2 /* PRED_L1 */)
+            {
+              DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::MVPIdxL0), pu.mvpIdx[REF_PIC_LIST_0]);
+              DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::RefIdxL0), pu.refIdx[REF_PIC_LIST_0]);
+            }
+            if (pu.interDir != 1 /* PRED_L1 */)
+            {
+              DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::MVPIdxL1), pu.mvpIdx[REF_PIC_LIST_1]);
+              DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::RefIdxL1), pu.refIdx[REF_PIC_LIST_1]);
+            }
+#if JVET_K_AFFINE
+            if (!pu.cu->affine)
+            {
+#endif
+              if (pu.interDir != 2 /* PRED_L1 */)
+              {
+                Mv mv = pu.mv[REF_PIC_LIST_0];
+                Mv mvd = pu.mvd[REF_PIC_LIST_0];
+#if JVET_K0346 || JVET_K_AFFINE
+                mv.setLowPrec();
+                mvd.setLowPrec();
+#endif
+                DTRACE_BLOCK_VECTOR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::MVDL0), mvd.hor, mvd.ver);
+                DTRACE_BLOCK_VECTOR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::MVL0), mv.hor, mv.ver);
+              }
+              if (pu.interDir != 1 /* PRED_L1 */)
+              {
+                Mv mv = pu.mv[REF_PIC_LIST_1];
+                Mv mvd = pu.mvd[REF_PIC_LIST_1];
+#if JVET_K0346 || JVET_K_AFFINE
+                mv.setLowPrec();
+                mvd.setLowPrec();
+#endif
+                DTRACE_BLOCK_VECTOR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::MVDL1), mvd.hor, mvd.ver);
+                DTRACE_BLOCK_VECTOR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::MVL1), mv.hor, mv.ver);
+              }
+#if JVET_K_AFFINE
+            }
+            else
+            {
+              if (pu.interDir != 2 /* PRED_L1 */)
+              {
+                Mv mv[3];
+                const CMotionBuf &mb = pu.getMotionBuf();
+                mv[0] = mb.at(0, 0).mv[REF_PIC_LIST_0];
+                mv[1] = mb.at(mb.width - 1, 0).mv[REF_PIC_LIST_0];
+                mv[2] = mb.at(0, mb.height - 1).mv[REF_PIC_LIST_0];
+#if JVET_K0346 || JVET_K_AFFINE
+                // motion vectors should use low precision or they will appear to large
+                mv[0].setLowPrec();
+                mv[1].setLowPrec();
+                mv[2].setLowPrec();
+#endif
+                DTRACE_BLOCK_AFFINETF(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::AffineMVL0), mv[0].hor, mv[0].ver, mv[1].hor, mv[1].ver, mv[2].hor, mv[2].ver);
+              }
+              if (pu.interDir != 1 /* PRED_L1 */)
+              {
+                Mv mv[3];
+                const CMotionBuf &mb = pu.getMotionBuf();
+                mv[0] = mb.at(0, 0).mv[REF_PIC_LIST_1];
+                mv[1] = mb.at(mb.width - 1, 0).mv[REF_PIC_LIST_1];
+                mv[2] = mb.at(0, mb.height - 1).mv[REF_PIC_LIST_1];
+#if JVET_K0346 || JVET_K_AFFINE
+                // motion vectors should use low precision or they will appear to large
+                mv[0].setLowPrec();
+                mv[1].setLowPrec();
+                mv[2].setLowPrec();
+#endif
+                DTRACE_BLOCK_AFFINETF(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::AffineMVL1), mv[0].hor, mv[0].ver, mv[1].hor, mv[1].ver, mv[2].hor, mv[2].ver);
+              }
+            }
+#endif        
+          }
+#if JVET_K0357_AMVR
+          DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::IMVMode), cu.imv);
+#endif
+          DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::RootCbf), cu.rootCbf);
+        }
+        break;
+      case MODE_INTRA:
+        {
+
+          if(chType == CHANNEL_TYPE_LUMA)
+          {
+            DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::IPCM), cu.ipcm);
+          }
+          else if(chType == CHANNEL_TYPE_CHROMA)
+          {
+            DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::IPCM_Chroma), cu.ipcm);
+          }
+
+          const uint32_t numChType = ::getNumberValidChannels( cu.chromaFormat );
+
+          for( uint32_t chType = CHANNEL_TYPE_LUMA; chType < numChType; chType++ )
+          {
+            if( cu.blocks[chType].valid() )
+            {
+              for( const PredictionUnit &pu : CU::traversePUs( cu ) )
+              {
+                if( isLuma( ChannelType( chType ) ) )
+                {
+                  const uint32_t uiChFinalMode  = PU::getFinalIntraMode( pu, ChannelType( chType ) );
+                  DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::Luma_IntraMode), uiChFinalMode);
+                }
+                else
+                {
+                  const uint32_t uiChFinalMode  = PU::getFinalIntraMode( pu, ChannelType( chType ) );
+                  DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::Chroma_IntraMode), uiChFinalMode);
+            #if ENABLE_CHROMA_422
+                    assert(0);
+            #endif
+                  }
+              }
+            }
+          }
+        }
+        break;
+      default:
+        THROW( "Invalid prediction mode" );
+        break;
+      }
+
+      for (const TransformUnit &tu : CU::traverseTUs(cu))
+      {
+        if (tu.Y().valid())
+        {
+          DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::Cbf_Y), tu.cbf[COMPONENT_Y]);
+          DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::TransformSkipFlag_Y), tu.transformSkip[COMPONENT_Y]);
+        }
+        if (!(cu.chromaFormat == CHROMA_400 || (CS::isDualITree(*cu.cs) && cu.chType == CHANNEL_TYPE_LUMA)))
+        {
+          DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::Cbf_Cb), tu.cbf[COMPONENT_Cb]);
+          DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::Cbf_Cr), tu.cbf[COMPONENT_Cr]);
+          DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::TransformSkipFlag_Cb), tu.transformSkip[COMPONENT_Cb]);
+          DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::TransformSkipFlag_Cr), tu.transformSkip[COMPONENT_Cr]);
+        }        
+      }
+    }
+  }
+}
+
+void writeAllCodedData(const CodingStructure & cs, const UnitArea & ctuArea)
+{
+  const int maxNumChannelType = cs.pcv->chrFormat != CHROMA_400 && CS::isDualITree(cs) ? 2 : 1;
+
+  for (int ch = 0; ch < maxNumChannelType; ch++)
+  {
+    const ChannelType chType = ChannelType(ch);
+    const SPS& sps = *cs.sps;
+
+    for (const CodingUnit &cu : cs.traverseCUs(CS::getArea(cs, ctuArea, chType), chType))
+    {
+      if( chType == CHANNEL_TYPE_LUMA )
+      {
+        DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::PartSize), cu.partSize);
+        DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::Depth), cu.depth);
+        DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::QT_Depth), cu.qtDepth);
+        DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::BT_Depth), cu.btDepth);
+        DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::MT_Depth), cu.mtDepth);
+        DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::ChromaQPAdj), cu.chromaQpAdj);
+        DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::QP), cu.qp);
+        DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::SplitSeries), (int)cu.splitSeries);
+        // transquant bypass flag
+        if (cs.pps->getTransquantBypassEnabledFlag())
+        {
+          DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::TransQuantBypassFlag), cu.transQuantBypass);
+        }
+        // skip flag
+        if (!cs.slice->isIntra())
+        {
+          DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::SkipFlag), cu.skip);
+        }
+
+        // prediction mode and partitioning data
+        DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::PredMode), cu.predMode);
+
+        if (CU::isIntra(cu) && cu.partSize == SIZE_2Nx2N)
+        {
+          if (!(!sps.getUsePCM() || cu.lumaSize().width > (1 << sps.getPCMLog2MaxSize()) || cu.lumaSize().width < (1 << sps.getPCMLog2MinSize())))
+          {
+            DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::IPCM), cu.ipcm);
+          }
+        }
+      }
+      else if (chType == CHANNEL_TYPE_CHROMA )
+      {
+        DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::PartSize_Chroma), cu.partSize);
+        DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::Depth_Chroma), cu.depth);
+        DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::QT_Depth_Chroma), cu.qtDepth);
+        DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::BT_Depth_Chroma), cu.btDepth);
+        DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::MT_Depth_Chroma), cu.mtDepth);
+        DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::ChromaQPAdj_Chroma), cu.chromaQpAdj);
+        DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::QP_Chroma), cu.qp);
+        DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::SplitSeries_Chroma), (int)cu.splitSeries);
+        // transquant bypass flag
+        if (cs.pps->getTransquantBypassEnabledFlag())
+        {
+          DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::TransQuantBypassFlag_Chroma), cu.transQuantBypass);
+        }
+
+        if (CU::isIntra(cu) && cu.partSize == SIZE_2Nx2N)
+        {
+          if (!(!sps.getUsePCM() || cu.lumaSize().width > (1 << sps.getPCMLog2MaxSize()) || cu.lumaSize().width < (1 << sps.getPCMLog2MinSize())))
+          {
+            DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::IPCM_Chroma), cu.ipcm);
+          }
+        }
+      }
+
+      for (auto &pu : CU::traversePUs(cu))
+      {
+        switch (pu.cu->predMode)
+        {
+          case MODE_INTRA:
+          {          
+            if (pu.Y().valid())
+            {
+              DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::Luma_IntraMode), PU::getFinalIntraMode(pu, ChannelType(chType)));
+            }
+            if (!(pu.chromaFormat == CHROMA_400 || (CS::isDualITree(*pu.cs) && pu.chType == CHANNEL_TYPE_LUMA)))
+            {
+              DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::Chroma_IntraMode), PU::getFinalIntraMode(pu, CHANNEL_TYPE_CHROMA));
+            }
+            break;
+          }
+          case MODE_INTER:
+          {
+            if (!pu.cu->skip)
+            {
+              DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::MergeFlag), pu.mergeFlag);
+            }
+            if (pu.mergeFlag)
+            {
+              DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::MergeType), pu.mergeType);
+  #if JVET_K_AFFINE
+              if (!(cu.cs->slice->isIntra() || !cu.cs->sps->getSpsNext().getUseAffine() || cu.partSize != SIZE_2Nx2N)
+                && !(!cu.firstPU->mergeFlag && !(cu.lumaSize().width > 8 && cu.lumaSize().height > 8))
+                && !(cu.firstPU->mergeFlag && !PU::isAffineMrgFlagCoded(*cu.firstPU)))
+              {
+                DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::AffineFlag), pu.cu->affine);
+                if (cu.affine && !cu.firstPU->mergeFlag && cu.cs->sps->getSpsNext().getUseAffineType())
+                {
+                  DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::AffineType), pu.cu->affineType);
+                }
+              }
+  #endif
+  #if JVET_K_AFFINE
+              if (!(pu.cu->affine))
+  #endif
+              {
+                DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::MergeIdx), pu.mergeIdx);
+              }
+            }
+            else
+            {
+              if (!pu.cs->slice->isInterP())
+              {
+                DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::InterDir), pu.interDir);
+              }
+  #if JVET_K_AFFINE
+              if (!(cu.cs->slice->isIntra() || !cu.cs->sps->getSpsNext().getUseAffine() || cu.partSize != SIZE_2Nx2N)
+                && !(!cu.firstPU->mergeFlag && !(cu.lumaSize().width > 8 && cu.lumaSize().height > 8))
+                && !(cu.firstPU->mergeFlag && !PU::isAffineMrgFlagCoded(*cu.firstPU)))
+              {
+                DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::AffineFlag), pu.cu->affine);
+                if (cu.affine && !cu.firstPU->mergeFlag && cu.cs->sps->getSpsNext().getUseAffineType())
+                {
+                  DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::AffineType), pu.cu->affineType);
+                }
+              }
+  #endif
+            }
+            if (pu.interDir != 2 /* PRED_L1 */)
+            {
+              DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::MVPIdxL0), pu.mvpIdx[REF_PIC_LIST_0]);
+              DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::RefIdxL0), pu.refIdx[REF_PIC_LIST_0]);
+            }
+            if (pu.interDir != 1 /* PRED_L1 */)
+            {
+              DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::MVPIdxL1), pu.mvpIdx[REF_PIC_LIST_1]);
+              DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::RefIdxL1), pu.refIdx[REF_PIC_LIST_1]);
+            }
+
+  #if JVET_K_AFFINE
+            if (!pu.cu->affine)
+            {
+  #endif
+              if (pu.interDir != 2 /* PRED_L1 */)
+              {
+                Mv mv = pu.mv[REF_PIC_LIST_0];
+                Mv mvd = pu.mvd[REF_PIC_LIST_0];
+  #if JVET_K0346 || JVET_K_AFFINE
+                mv.setLowPrec();
+                mvd.setLowPrec();
+  #endif
+                DTRACE_BLOCK_VECTOR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::MVDL0), mvd.hor, mvd.ver);
+                DTRACE_BLOCK_VECTOR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::MVL0), mv.hor, mv.ver);
+              }
+              if (pu.interDir != 1 /* PRED_L1 */)
+              {
+                Mv mv = pu.mv[REF_PIC_LIST_1];
+                Mv mvd = pu.mvd[REF_PIC_LIST_1];
+  #if JVET_K0346 || JVET_K_AFFINE
+                mv.setLowPrec();
+                mvd.setLowPrec();
+  #endif
+                DTRACE_BLOCK_VECTOR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::MVDL0), mvd.hor, mvd.ver);
+                DTRACE_BLOCK_VECTOR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::MVL1), mv.hor, mv.ver);
+              }
+  #if JVET_K_AFFINE
+            }
+            else
+            {
+              if (pu.interDir != 2 /* PRED_L1 */)
+              {
+                Mv mv[3];
+                const CMotionBuf &mb = pu.getMotionBuf();
+                mv[0] = mb.at(0, 0).mv[REF_PIC_LIST_0];
+                mv[1] = mb.at(mb.width - 1, 0).mv[REF_PIC_LIST_0];
+                mv[2] = mb.at(0, mb.height - 1).mv[REF_PIC_LIST_0];
+  #if JVET_K0346 || JVET_K_AFFINE
+                // motion vectors should use low precision or they will appear to large
+                mv[0].setLowPrec();
+                mv[1].setLowPrec();
+                mv[2].setLowPrec();
+  #endif
+                DTRACE_BLOCK_AFFINETF(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::AffineMVL0), mv[0].hor, mv[0].ver, mv[1].hor, mv[1].ver, mv[2].hor, mv[2].ver);
+              }
+              if (pu.interDir != 1 /* PRED_L1 */)
+              {
+                Mv mv[3];
+                const CMotionBuf &mb = pu.getMotionBuf();
+                mv[0] = mb.at(0, 0).mv[REF_PIC_LIST_1];
+                mv[1] = mb.at(mb.width - 1, 0).mv[REF_PIC_LIST_1];
+                mv[2] = mb.at(0, mb.height - 1).mv[REF_PIC_LIST_1];
+  #if JVET_K0346 || JVET_K_AFFINE
+                // motion vectors should use low precision or they will appear to large
+                mv[0].setLowPrec();
+                mv[1].setLowPrec();
+                mv[2].setLowPrec();
+  #endif
+                DTRACE_BLOCK_AFFINETF(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::AffineMVL1), mv[0].hor, mv[0].ver, mv[1].hor, mv[1].ver, mv[2].hor, mv[2].ver);
+              }
+            }
+  #endif
+  #if JVET_K0357_AMVR
+            if (cu.cs->sps->getSpsNext().getUseIMV() && CU::hasSubCUNonZeroMVd(cu))
+            {
+              DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::IMVMode), cu.imv);
+            }
+  #endif
+            break;
+          }
+          default:
+          {
+            CHECK(1, "Invalid prediction mode");
+            break;
+          }
+        }
+      } // end pu
+      if (CU::isInter(cu))
+      {
+        const PredictionUnit &pu = *cu.firstPU;
+        if (!((cu.cs->pcv->noRQT || cu.partSize == SIZE_2Nx2N) && pu.mergeFlag))
+        {
+          DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::RootCbf), cu.rootCbf);
+        }
+      }
+      if (cu.rootCbf || CU::isIntra(cu))
+      {        
+        for (const TransformUnit &tu : CU::traverseTUs(cu))
+        {
+          if (tu.Y().valid())
+          {
+            DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, tu, GetBlockStatisticName(BlockStatistic::Cbf_Y), tu.cbf[COMPONENT_Y]);
+#if HM_EMT_NSST_AS_IN_JEM && JVET_K1000_SIMPLIFIED_EMT
+            if (!(!tu.cu->cs->pps->getUseTransformSkip() || tu.cu->transQuantBypass || !TU::hasTransformSkipFlag(*tu.cs, tu.blocks[COMPONENT_Y]) || (isLuma(COMPONENT_Y) && tu.cu->emtFlag)))
+#else
+            if (!(!tu.cu->cs->pps->getUseTransformSkip() || tu.cu->transQuantBypass || !TU::hasTransformSkipFlag(*tu.cs, tu.blocks[COMPONENT_Y])))
+#endif
+            {
+              DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, tu, GetBlockStatisticName(BlockStatistic::TransformSkipFlag_Y), tu.transformSkip[COMPONENT_Y]);
+            }
+          }
+          if (!(cu.chromaFormat == CHROMA_400 || (CS::isDualITree(*cu.cs) && cu.chType == CHANNEL_TYPE_LUMA)))
+          {
+            DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, tu, GetBlockStatisticName(BlockStatistic::Cbf_Cb), tu.cbf[COMPONENT_Cb]);
+            DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, tu, GetBlockStatisticName(BlockStatistic::Cbf_Cr), tu.cbf[COMPONENT_Cr]);
+#if HM_EMT_NSST_AS_IN_JEM && JVET_K1000_SIMPLIFIED_EMT
+            if (!(!tu.cu->cs->pps->getUseTransformSkip() || tu.cu->transQuantBypass || !TU::hasTransformSkipFlag(*tu.cs, tu.blocks[COMPONENT_Cb]) || (isLuma(COMPONENT_Cb) && tu.cu->emtFlag)))
+#else
+            if (!(!tu.cu->cs->pps->getUseTransformSkip() || tu.cu->transQuantBypass || !TU::hasTransformSkipFlag(*tu.cs, tu.blocks[COMPONENT_Cb])))
+#endif
+            {
+              DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, tu, GetBlockStatisticName(BlockStatistic::TransformSkipFlag_Cb), tu.transformSkip[COMPONENT_Cb]);
+            }
+#if HM_EMT_NSST_AS_IN_JEM && JVET_K1000_SIMPLIFIED_EMT
+            if (!(!tu.cu->cs->pps->getUseTransformSkip() || tu.cu->transQuantBypass || !TU::hasTransformSkipFlag(*tu.cs, tu.blocks[COMPONENT_Cr]) || (isLuma(COMPONENT_Cr) && tu.cu->emtFlag)))
+#else
+            if (!(!tu.cu->cs->pps->getUseTransformSkip() || tu.cu->transQuantBypass || !TU::hasTransformSkipFlag(*tu.cs, tu.blocks[COMPONENT_Cr])))
+#endif
+            {
+              DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, tu, GetBlockStatisticName(BlockStatistic::TransformSkipFlag_Cr), tu.transformSkip[COMPONENT_Cr]);
+            }
+          }
+        }
+      }
+#if JVET_K1000_SIMPLIFIED_EMT && HM_EMT_NSST_AS_IN_JEM
+      if (!(!((cs.sps->getSpsNext().getUseIntraEMT() && CU::isIntra(cu)) || (cs.sps->getSpsNext().getUseInterEMT() && CU::isInter(cu))) || isChroma(cu.chType)))
+      {
+        if( isLuma( ChannelType( chType ) ) )
+        {
+          DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::EMTFlag), cu.emtFlag);
+        }
+        else
+        {
+          DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::EMTFlag_Chroma), cu.emtFlag);
+        }
+      }
+#endif
+    }
+  }
+}
+#endif
diff --git a/source/Lib/CommonLib/dtrace_blockstatistics.h b/source/Lib/CommonLib/dtrace_blockstatistics.h
new file mode 100644
index 0000000000000000000000000000000000000000..a81b03a27f258971585def1a1fa174e0e5197393
--- /dev/null
+++ b/source/Lib/CommonLib/dtrace_blockstatistics.h
@@ -0,0 +1,208 @@
+/* The copyright in this software is being made available under the BSD
+ * License, included below. This software may be subject to other third party
+ * and contributor rights, including patent rights, and no such rights are
+ * granted under this license.
+ *
+ * Copyright (c) 2010-2018, ITU/ISO/IEC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file     dtrace_blockstatistics.h
+ *  \brief    DTrace block statistcis support for next software
+ */
+
+#ifndef _DTRACE_BLOCKSTATISTICS_H_
+#define _DTRACE_BLOCKSTATISTICS_H_
+
+#include <map>
+#include "CommonLib/CommonDef.h"
+#include "CommonLib/Unit.h"
+
+#if K0149_BLOCK_STATISTICS
+#define DTRACE_HEADER(ctx,...) ctx->dtrace_header( __VA_ARGS__ )
+#define DTRACE_BLOCK_SCALAR(ctx,channel,cs_cu_pu,stat_type,val)      ctx->dtrace_block_scalar( channel, cs_cu_pu, stat_type, val )
+#define DTRACE_BLOCK_SCALAR_CHROMA(ctx,channel,cs_cu_pu,stat_type,val)      ctx->dtrace_block_scalar( channel, cs_cu_pu, stat_type, val, true)
+#define DTRACE_BLOCK_VECTOR(ctx,channel,cu_pu,stat_type,v_x,v_y)     ctx->dtrace_block_vector( channel, cu_pu, stat_type, v_x, v_y )
+#define DTRACE_BLOCK_AFFINETF(ctx,channel,pu,stat_type,v_x0,v_y0,v_x1,v_y1,v_x2,v_y2)  ctx->dtrace_block_affinetf( channel, pu, stat_type, v_x0, v_y0, v_x1, v_y1, v_x2, v_y2 )
+
+enum class BlockStatistic {
+  // general
+  PredMode,
+  PartSize,
+  Depth,
+  QT_Depth,
+  BT_Depth,
+  MT_Depth,
+  ChromaQPAdj,
+  QP,
+  SplitSeries,
+  TransQuantBypassFlag,
+#if JVET_K1000_SIMPLIFIED_EMT
+  EMTFlag,
+#endif
+  TransformSkipFlag_Y,
+  TransformSkipFlag_Cb,
+  TransformSkipFlag_Cr,
+
+  // intra
+  IPCM,
+  Luma_IntraMode,
+  Chroma_IntraMode,
+  // inter
+  SkipFlag,
+  RootCbf,
+  Cbf_Y,
+  Cbf_Cb,
+  Cbf_Cr,
+#if  JVET_K0357_AMVR
+  IMVMode,
+#endif
+  InterDir,
+  MergeFlag,
+  MergeIdx,
+  MergeType,
+  MVPIdxL0,
+  MVPIdxL1,
+  MVL0,
+  MVL1,
+  MVDL0,
+  MVDL1,
+  RefIdxL0,
+  RefIdxL1,
+#if JVET_K_AFFINE
+  AffineFlag,
+  AffineMVL0,
+  AffineMVL1,
+#if JVET_K0337_AFFINE_6PARA
+  AffineType,
+#endif
+#endif
+
+// for dual tree
+  // general
+  PartSize_Chroma,
+  Depth_Chroma,
+  QT_Depth_Chroma,
+  BT_Depth_Chroma,
+  MT_Depth_Chroma,
+  ChromaQPAdj_Chroma,
+  QP_Chroma,
+  SplitSeries_Chroma,
+  TransQuantBypassFlag_Chroma,
+
+  // intra
+  IPCM_Chroma,
+
+  NumBlockStatistics,
+};
+
+enum class BlockStatisticType {
+  Flag,
+  Vector,
+  Integer,
+  AffineTFVectors,
+};
+
+static const std::map<BlockStatistic, std::tuple<std::string, BlockStatisticType, std::string>> blockstatistic2description =
+{
+  // Statistics enum                                                                                Statistics name string         Statistic Type                              Type specific information:
+  //                                                                                                                                                                           Value range, vector scale
+  { BlockStatistic::PredMode,               std::tuple<std::string, BlockStatisticType, std::string>{"PredMode",                    BlockStatisticType::Flag,                   ""}},
+  { BlockStatistic::MergeFlag,              std::tuple<std::string, BlockStatisticType, std::string>{"MergeFlag",                   BlockStatisticType::Flag,                   ""}},
+  { BlockStatistic::MVL0,                   std::tuple<std::string, BlockStatisticType, std::string>{"MVL0",                        BlockStatisticType::Vector,                 "Scale: 4"}},
+  { BlockStatistic::MVL1,                   std::tuple<std::string, BlockStatisticType, std::string>{"MVL1",                        BlockStatisticType::Vector,                 "Scale: 4"}},
+  { BlockStatistic::IPCM,                   std::tuple<std::string, BlockStatisticType, std::string>{"IPCM",                        BlockStatisticType::Flag,                   ""}},
+  { BlockStatistic::Luma_IntraMode,         std::tuple<std::string, BlockStatisticType, std::string>{"Luma_IntraMode",              BlockStatisticType::Integer,                "[0, " + std::to_string(NUM_INTRA_MODE) + "]"}},
+  { BlockStatistic::Chroma_IntraMode,       std::tuple<std::string, BlockStatisticType, std::string>{"Chroma_IntraMode",            BlockStatisticType::Integer,                "[0, " + std::to_string(NUM_INTRA_MODE) + "]"}},
+  { BlockStatistic::SkipFlag,               std::tuple<std::string, BlockStatisticType, std::string>{"SkipFlag",                    BlockStatisticType::Flag,                   ""}},
+  { BlockStatistic::TransformSkipFlag_Y,    std::tuple<std::string, BlockStatisticType, std::string>{"TransformSkipFlag_Y",         BlockStatisticType::Flag,                   ""}},
+  { BlockStatistic::TransformSkipFlag_Cb,   std::tuple<std::string, BlockStatisticType, std::string>{"TransformSkipFlag_Cb",        BlockStatisticType::Flag,                   ""}},
+  { BlockStatistic::TransformSkipFlag_Cr,   std::tuple<std::string, BlockStatisticType, std::string>{"TransformSkipFlag_Cr",        BlockStatisticType::Flag,                   ""}},
+  { BlockStatistic::PartSize,               std::tuple<std::string, BlockStatisticType, std::string>{"PartSize",                    BlockStatisticType::Integer,                "[0, " + std::to_string(NUMBER_OF_PART_SIZES) + "]"}},
+  { BlockStatistic::Depth,                  std::tuple<std::string, BlockStatisticType, std::string>{"Depth",                       BlockStatisticType::Integer,                "[0, 7]"}}, 
+  { BlockStatistic::QT_Depth,               std::tuple<std::string, BlockStatisticType, std::string>{"QT_Depth",                    BlockStatisticType::Integer,                "[0, 7]"}}, 
+  { BlockStatistic::BT_Depth,               std::tuple<std::string, BlockStatisticType, std::string>{"BT_Depth",                    BlockStatisticType::Integer,                "[0, 7]"}}, 
+  { BlockStatistic::MT_Depth,               std::tuple<std::string, BlockStatisticType, std::string>{"MT_Depth",                    BlockStatisticType::Integer,                "[0, 7]"}}, 
+  { BlockStatistic::ChromaQPAdj,            std::tuple<std::string, BlockStatisticType, std::string>{"ChromaQPAdj",                 BlockStatisticType::Integer,                "[-10, 10]"}}, 
+  { BlockStatistic::QP,                     std::tuple<std::string, BlockStatisticType, std::string>{"QP",                          BlockStatisticType::Integer,                "[0, 51]"}},
+  { BlockStatistic::SplitSeries,            std::tuple<std::string, BlockStatisticType, std::string>{"SplitSeries",                 BlockStatisticType::Integer,                "[0, " + std::to_string(std::numeric_limits<SplitSeries>::max()) + "]"}},
+  { BlockStatistic::RootCbf,                std::tuple<std::string, BlockStatisticType, std::string>{"RootCbf",                     BlockStatisticType::Flag,                   ""}},
+  { BlockStatistic::Cbf_Y,                  std::tuple<std::string, BlockStatisticType, std::string>{"Cbf_Y",                       BlockStatisticType::Flag,                   ""}},
+  { BlockStatistic::Cbf_Cb,                 std::tuple<std::string, BlockStatisticType, std::string>{"Cbf_Cb",                      BlockStatisticType::Flag,                   ""}},
+  { BlockStatistic::Cbf_Cr,                 std::tuple<std::string, BlockStatisticType, std::string>{"Cbf_Cr",                      BlockStatisticType::Flag,                   ""}},
+  { BlockStatistic::TransQuantBypassFlag,   std::tuple<std::string, BlockStatisticType, std::string>{"TransQuantBypassFlag",        BlockStatisticType::Flag,                   ""}},
+  { BlockStatistic::MergeIdx,               std::tuple<std::string, BlockStatisticType, std::string>{"MergeIdx",                    BlockStatisticType::Integer,                "[0, 7]"}},
+  { BlockStatistic::InterDir,               std::tuple<std::string, BlockStatisticType, std::string>{"InterDir",                    BlockStatisticType::Integer,                "[1, 3]"}},
+  { BlockStatistic::MergeType,              std::tuple<std::string, BlockStatisticType, std::string>{"MergeType",                   BlockStatisticType::Integer,                "[0, 2]"}},
+  { BlockStatistic::MVPIdxL0,               std::tuple<std::string, BlockStatisticType, std::string>{"MVPIdxL0",                    BlockStatisticType::Integer,                "[0, 1]"}}, 
+  { BlockStatistic::MVDL0,                  std::tuple<std::string, BlockStatisticType, std::string>{"MVDL0",                       BlockStatisticType::Vector,                 "Scale: 4"}},
+  { BlockStatistic::RefIdxL0,               std::tuple<std::string, BlockStatisticType, std::string>{"RefIdxL0",                    BlockStatisticType::Integer,                "[0, 4]"}}, 
+  { BlockStatistic::MVPIdxL1,               std::tuple<std::string, BlockStatisticType, std::string>{"MVPIdxL1",                    BlockStatisticType::Integer,                "[0, 1]"}}, 
+  { BlockStatistic::MVDL1,                  std::tuple<std::string, BlockStatisticType, std::string>{"MVDL1",                       BlockStatisticType::Vector,                 "Scale: 4"}},
+  { BlockStatistic::RefIdxL1,               std::tuple<std::string, BlockStatisticType, std::string>{"RefIdxL1",                    BlockStatisticType::Integer,                "[0, 4]"}}, 
+#if JVET_K0357_AMVR
+  { BlockStatistic::IMVMode,                std::tuple<std::string, BlockStatisticType, std::string>{"IMVMode",                     BlockStatisticType::Integer,                "[0, 2]"}},
+#endif
+#if JVET_K_AFFINE
+  { BlockStatistic::AffineFlag,             std::tuple<std::string, BlockStatisticType, std::string>{"AffineFlag",                  BlockStatisticType::Flag,                   ""}},
+  { BlockStatistic::AffineMVL0,             std::tuple<std::string, BlockStatisticType, std::string>{"AffineMVL0",                  BlockStatisticType::AffineTFVectors,        "Scale: 4"}},
+  { BlockStatistic::AffineMVL1,             std::tuple<std::string, BlockStatisticType, std::string>{"AffineMVL1",                  BlockStatisticType::AffineTFVectors,        "Scale: 4"}},
+#if JVET_K0337_AFFINE_6PARA
+  { BlockStatistic::AffineType,             std::tuple<std::string, BlockStatisticType, std::string>{"AffineType",                  BlockStatisticType::Flag,                   ""} },
+#endif
+#endif
+#if JVET_K1000_SIMPLIFIED_EMT
+  { BlockStatistic::EMTFlag,                std::tuple<std::string, BlockStatisticType, std::string>{"EMTFlag",                     BlockStatisticType::Flag,                   ""}},
+#endif
+
+
+  // for dual tree
+  { BlockStatistic::PartSize_Chroma,               std::tuple<std::string, BlockStatisticType, std::string>{"PartSize_Chroma",                    BlockStatisticType::Integer,                "[0, " + std::to_string(NUMBER_OF_PART_SIZES) + "]"}},
+  { BlockStatistic::Depth_Chroma,                  std::tuple<std::string, BlockStatisticType, std::string>{"Depth_Chroma",                       BlockStatisticType::Integer,                "[0, 10]"}}, // todo: actual limits?
+  { BlockStatistic::QT_Depth_Chroma,               std::tuple<std::string, BlockStatisticType, std::string>{"QT_Depth_Chroma",                    BlockStatisticType::Integer,                "[0, 10]"}}, // todo: actual limits?
+  { BlockStatistic::BT_Depth_Chroma,               std::tuple<std::string, BlockStatisticType, std::string>{"BT_Depth_Chroma",                    BlockStatisticType::Integer,                "[0, 10]"}}, // todo: actual limits?
+  { BlockStatistic::MT_Depth_Chroma,               std::tuple<std::string, BlockStatisticType, std::string>{"MT_Depth_Chroma",                    BlockStatisticType::Integer,                "[0, 10]"}}, // todo: actual limits?
+  { BlockStatistic::ChromaQPAdj_Chroma,            std::tuple<std::string, BlockStatisticType, std::string>{"ChromaQPAdj_Chroma",                 BlockStatisticType::Integer,                "[-10, 10]"}}, // todo: actual limits?
+  { BlockStatistic::QP_Chroma,                     std::tuple<std::string, BlockStatisticType, std::string>{"QP_Chroma",                          BlockStatisticType::Integer,                "[0, 51]"}},
+  { BlockStatistic::SplitSeries_Chroma,            std::tuple<std::string, BlockStatisticType, std::string>{"SplitSeries_Chroma",                 BlockStatisticType::Integer,                "[0, " + std::to_string(std::numeric_limits<SplitSeries>::max()) + "]"}},
+  { BlockStatistic::TransQuantBypassFlag_Chroma,   std::tuple<std::string, BlockStatisticType, std::string>{"TransQuantBypassFlag_Chroma",        BlockStatisticType::Flag,                   ""}},
+  { BlockStatistic::IPCM_Chroma,                   std::tuple<std::string, BlockStatisticType, std::string>{"IPCM_Chroma",                        BlockStatisticType::Flag,                   ""}},
+
+};
+
+
+std::string GetBlockStatisticName(BlockStatistic statistic);
+std::string GetBlockStatisticTypeString(BlockStatistic statistic);
+std::string GetBlockStatisticTypeSpecificInfo(BlockStatistic statistic);
+
+void writeBlockStatisticsHeader(const SPS *sps);
+void getAndStoreBlockStatistics(const CodingStructure& cs, const UnitArea& ctuArea);
+void writeAllData(const CodingStructure& cs, const UnitArea& ctuArea);
+void writeAllCodedData(const CodingStructure& cs, const UnitArea& ctuArea);
+#endif
+
+#endif // _DTRACE_BLOCKSTATISTICS_H_
diff --git a/source/Lib/CommonLib/dtrace_next.h b/source/Lib/CommonLib/dtrace_next.h
index 89f214cc9c5d7dbeda5b15b3168f266f4e4a623a..88d6249f118683fcd18bbbee5935f68b79e943b0 100644
--- a/source/Lib/CommonLib/dtrace_next.h
+++ b/source/Lib/CommonLib/dtrace_next.h
@@ -145,8 +145,12 @@ enum DTRACE_CHANNEL
   D_RDOQ_COST,
   D_TMP,
   D_CRC
+#if K0149_BLOCK_STATISTICS
+  ,
+  D_BLOCK_STATISTICS_ALL,
+  D_BLOCK_STATISTICS_CODED,
+#endif
 };
-
 #define _CNL_DEF(_s) {_s,(std::string(#_s))}
 
 inline void tracing_uninit( CDTrace *pDtrace )
@@ -245,6 +249,11 @@ inline CDTrace* tracing_init( std::string& sTracingFile, std::string& sTracingRu
     _CNL_DEF( D_RDOQ_COST ),
     _CNL_DEF( D_TMP ),
     _CNL_DEF( D_CRC )
+  #if K0149_BLOCK_STATISTICS
+    ,
+    _CNL_DEF( D_BLOCK_STATISTICS_ALL ),
+    _CNL_DEF( D_BLOCK_STATISTICS_CODED ),
+  #endif
   };
   dtrace_channels_t channels( next_channels, &next_channels[sizeof( next_channels ) / sizeof( next_channels[0] )] );
 
diff --git a/source/Lib/CommonLib/x86/InitX86.cpp b/source/Lib/CommonLib/x86/InitX86.cpp
index 47d109ea0e678a28b5e998c2b58363fea9dbdbb6..d94ede594c582b7bd6338a17f9203ff60b483451 100644
--- a/source/Lib/CommonLib/x86/InitX86.cpp
+++ b/source/Lib/CommonLib/x86/InitX86.cpp
@@ -169,5 +169,24 @@ void AdaptiveLoopFilter::initAdaptiveLoopFilterX86()
 }
 #endif
 
+#if ENABLE_SIMD_OPT_CPR
+void IbcHashMap::initIbcHashMapX86()
+{
+  auto vext = read_x86_extension_flags();
+  switch (vext) 
+  {
+  case AVX512:
+  case AVX2:
+  case AVX:
+  case SSE42:
+    _initIbcHashMapX86<SSE42>();
+    break;
+  case SSE41:
+  default:
+    break;
+  }
+}
+#endif
+
 #endif
 
diff --git a/source/Lib/DecoderAnalyserLib/CMakeLists.txt b/source/Lib/DecoderAnalyserLib/CMakeLists.txt
index b26321590b94b8c2eaac751d7ed8dcf980bdeedc..9b0017617cd6af649ccf69129f8205355165e636 100644
--- a/source/Lib/DecoderAnalyserLib/CMakeLists.txt
+++ b/source/Lib/DecoderAnalyserLib/CMakeLists.txt
@@ -18,7 +18,7 @@ target_compile_definitions( ${LIB_NAME} PUBLIC RExt__DECODER_DEBUG_BIT_STATISTIC
 target_compile_definitions( ${LIB_NAME} PUBLIC RExt__DECODER_DEBUG_TOOL_STATISTICS=1 )
 
 if( ENABLE_VTM )
-  target_compile_definitions( ${LIB_NAME} PUBLIC JEM_TOOLS=0 )
+  target_compile_definitions( ${LIB_NAME} PUBLIC BMS_TOOLS=0 )
 endif()
 
 if( EXTENSION_360_VIDEO )
diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp
index 16908fa548e848a3c068886d6e637c4a0fbf8c48..91eba95ce14e8ccdce8de247ef665c4f2c5c5158 100644
--- a/source/Lib/DecoderLib/CABACReader.cpp
+++ b/source/Lib/DecoderLib/CABACReader.cpp
@@ -180,7 +180,7 @@ bool CABACReader::coding_tree_unit( CodingStructure& cs, const UnitArea& area, i
         ctx += leftCTUAddr > -1 ? ( ctbAlfFlag[leftCTUAddr] ? 1 : 0 ) : 0;
         ctx += aboveCTUAddr > -1 ? ( ctbAlfFlag[aboveCTUAddr] ? 1 : 0 ) : 0;
 
-        if( alfSliceParam.chromaCtbPresentFlag && compIdx )
+        if( compIdx && alfSliceParam.chromaCtbPresentFlag )
         {
           ctbAlfFlag[ctuRsAddr] = 1;
         }
@@ -703,7 +703,6 @@ bool CABACReader::split_cu_flag( CodingStructure& cs, Partitioner &partitioner )
 bool CABACReader::coding_unit( CodingUnit &cu, Partitioner &partitioner, CUCtx& cuCtx )
 {
   CodingStructure& cs = *cu.cs;
-
   // transquant bypass flag
   if( cs.pps->getTransquantBypassEnabledFlag() )
   {
@@ -802,7 +801,7 @@ void CABACReader::imv_mode( CodingUnit& cu, MergeCtx& mrgCtx )
 
   unsigned value = 0;
   unsigned ctxId = DeriveCtx::CtxIMVFlag( cu );
-  value = m_BinDecoder.decodeBin( Ctx::ImvFlag( ctxId ) );
+    value = m_BinDecoder.decodeBin( Ctx::ImvFlag( ctxId ) );
   DTRACE( g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", value, ctxId );
 
   if( spsNext.getImvMode() == IMV_4PEL && value )
@@ -852,7 +851,6 @@ void CABACReader::cu_pred_data( CodingUnit &cu )
     intra_chroma_pred_modes( cu );
     return;
   }
-
   MergeCtx mrgCtx;
 
   for( auto &pu : CU::traversePUs( cu ) )
@@ -863,6 +861,7 @@ void CABACReader::cu_pred_data( CodingUnit &cu )
 #if JVET_K0357_AMVR
   imv_mode   ( cu, mrgCtx );
 #endif
+
 }
 
 
diff --git a/source/Lib/DecoderLib/CMakeLists.txt b/source/Lib/DecoderLib/CMakeLists.txt
index 0ec46167283c09b10e12bbfd98ee9adabec5598a..62413e9004538890a6c4506006be8d78ea6c6959 100644
--- a/source/Lib/DecoderLib/CMakeLists.txt
+++ b/source/Lib/DecoderLib/CMakeLists.txt
@@ -17,7 +17,7 @@ add_library( ${LIB_NAME} STATIC ${SRC_FILES} ${INC_FILES} ${NATVIS_FILES} )
 target_compile_definitions( ${LIB_NAME} PUBLIC )
 
 if( ENABLE_VTM )
-  target_compile_definitions( ${LIB_NAME} PUBLIC JEM_TOOLS=0 )
+  target_compile_definitions( ${LIB_NAME} PUBLIC BMS_TOOLS=0 )
 endif()
 
 if( EXTENSION_360_VIDEO )
diff --git a/source/Lib/DecoderLib/DecCu.cpp b/source/Lib/DecoderLib/DecCu.cpp
index 370f41f93afabe3f8a81491ee1afa5cb68241f47..4c06ed241f2e349832f4efb4ac77e8089b43ad45 100644
--- a/source/Lib/DecoderLib/DecCu.cpp
+++ b/source/Lib/DecoderLib/DecCu.cpp
@@ -48,6 +48,10 @@
 #if RExt__DECODER_DEBUG_TOOL_STATISTICS
 #include "CommonLib/CodingStatistics.h"
 #endif
+#if K0149_BLOCK_STATISTICS
+#include "CommonLib/ChromaFormat.h"
+#include "CommonLib/dtrace_blockstatistics.h"
+#endif
 
 //! \ingroup DecoderLib
 //! \{
@@ -107,6 +111,9 @@ void DecCu::decompressCtu( CodingStructure& cs, const UnitArea& ctuArea )
       DTRACE_BLOCK_REC( cs.picture->getRecoBuf( currCU ), currCU, currCU.predMode );
     }
   }
+#if K0149_BLOCK_STATISTICS
+  getAndStoreBlockStatistics(cs, ctuArea);
+#endif
 }
 
 // ====================================================================================================================
@@ -400,8 +407,13 @@ void DecCu::xDeriveCUMV( CodingUnit &cu )
     MergeCtx mrgCtx;
 
 #if RExt__DECODER_DEBUG_TOOL_STATISTICS
+#if JVET_K_AFFINE
+    if( pu.cu->affine )
+    {
+      CodingStatistics::IncrementStatisticTool( CodingStatisticsClassType{ STATS__TOOL_AFF, pu.Y().width, pu.Y().height } );
+    }
+#endif
 #endif
-
     if( pu.mergeFlag )
     {
       {
diff --git a/source/Lib/DecoderLib/DecLib.cpp b/source/Lib/DecoderLib/DecLib.cpp
index 511967e0710bec006bc64f227b1e392b230c1f0b..590a0525cd48af9fdef09e17267eaf305c5b5e91 100644
--- a/source/Lib/DecoderLib/DecLib.cpp
+++ b/source/Lib/DecoderLib/DecLib.cpp
@@ -48,6 +48,9 @@
 #include <fcntl.h>
 #include "AnnexBread.h"
 #include "NALread.h"
+#if K0149_BLOCK_STATISTICS
+#include "CommonLib/dtrace_blockstatistics.h"
+#endif
 
 #if RExt__DECODER_DEBUG_TOOL_STATISTICS
 #include "CommonLib/CodingStatistics.h"
@@ -512,6 +515,10 @@ void DecLib::executeLoopFilters()
   // deblocking filter
   m_cLoopFilter.loopFilterPic( cs );
 
+#if DMVR_JVET_LOW_LATENCY_K0217
+  CS::setRefinedMotionField(cs);
+#endif
+
   if( cs.sps->getUseSAO() )
   {
     m_cSAO.SAOProcess( cs, cs.picture->getSAO() );
@@ -898,6 +905,15 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
   }
   m_apcSlicePilot->setIndependentSliceIdx(uiIndependentSliceIdx);
 
+#if K0149_BLOCK_STATISTICS
+  PPS *pps = m_parameterSetManager.getPPS(m_apcSlicePilot->getPPSId());
+  CHECK(pps == 0, "No PPS present");
+  SPS *sps = m_parameterSetManager.getSPS(pps->getSPSId());
+  CHECK(sps == 0, "No SPS present");
+
+  writeBlockStatisticsHeader(sps);
+#endif
+
   DTRACE_UPDATE( g_trace_ctx, std::make_pair( "poc", m_apcSlicePilot->getPOC() ) );
 
 #if HEVC_DEPENDENT_SLICES
@@ -1085,7 +1101,7 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
     pcSlice->checkCRA(pcSlice->getRPS(), m_pocCRA, m_associatedIRAPType, m_cListPic );
     // Set reference list
     pcSlice->setRefPicList( m_cListPic, true, true );
-
+	
     if (!pcSlice->isIntra())
     {
       bool bLowDelay = true;
diff --git a/source/Lib/DecoderLib/DecSlice.cpp b/source/Lib/DecoderLib/DecSlice.cpp
index 88ae6e7632318e469898361eee3c930f0bcf40ac..01c9f40106e0516cc6d78e6d95956a56125e0bb3 100644
--- a/source/Lib/DecoderLib/DecSlice.cpp
+++ b/source/Lib/DecoderLib/DecSlice.cpp
@@ -225,6 +225,7 @@ void DecSlice::decompressSlice( Slice* slice, InputBitstream* bitstream )
 #endif
 
 
+
     isLastCtuOfSliceSegment = cabacReader.coding_tree_unit( cs, ctuArea, pic->m_prevQP, ctuRsAddr );
 
     m_pcCuDecoder->decompressCtu( cs, ctuArea );
diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp
index b681d5b1b2b9a968ac33c358430caf99c6719b0e..fe994754f6a0ac68f074fcbb10995ff7e83e6690 100644
--- a/source/Lib/DecoderLib/VLCReader.cpp
+++ b/source/Lib/DecoderLib/VLCReader.cpp
@@ -827,7 +827,6 @@ void HLSyntaxReader::parseSPSNext( SPSNext& spsNext, const bool usePCM )
   }
 #endif
 #endif
-
   for( int k = 0; k < SPSNext::NumReservedFlags; k++ )
   {
     READ_FLAG( symbol,  "reserved_flag" );                          if( symbol != 0 ) EXIT("Incompatible version: SPSNext reserved flag not equal to zero (bitstream was probably created with newer software version)" );
@@ -1819,7 +1818,7 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para
       pcSlice->setSubPuMvpSliceSubblkSizeEnable(uiCode);
       if (pcSlice->getSubPuMvpSliceSubblkSizeEnable())
       {
-        READ_CODE(3, uiCode, "slice_atmvp_subblk_size_log2");
+        READ_CODE(3, uiCode, "log2_slice_sub_pu_tmvp_size_minus2");
         pcSlice->setSubPuMvpSubblkLog2Size(uiCode + MIN_CU_LOG2);
       }
       else
diff --git a/source/Lib/EncoderLib/Analyze.h b/source/Lib/EncoderLib/Analyze.h
index 977db82526cfda2cc3ca80a2e59d0d02b1672324..2e5e4736054211ade625a795d717096a26d9026b 100644
--- a/source/Lib/EncoderLib/Analyze.h
+++ b/source/Lib/EncoderLib/Analyze.h
@@ -45,6 +45,7 @@
 #include <stdio.h>
 #include <memory.h>
 #include <assert.h>
+#include <cinttypes>
 #include "CommonLib/CommonDef.h"
 #include "CommonLib/ChromaFormat.h"
 #include "math.h"
@@ -84,9 +85,17 @@ public:
   virtual ~Analyze()  {}
   Analyze() { clear(); }
 
-  void  addResult( double psnr[MAX_NUM_COMPONENT], double bits, const double MSEyuvframe[MAX_NUM_COMPONENT])
+  void  addResult( double psnr[MAX_NUM_COMPONENT], double bits, const double MSEyuvframe[MAX_NUM_COMPONENT]
+#if JVET_K0157
+    , bool isEncodeLtRef
+#endif
+  )
   {
     m_dAddBits  += bits;
+#if JVET_K0157
+    if (isEncodeLtRef)
+      return;
+#endif
     for(uint32_t i=0; i<MAX_NUM_COMPONENT; i++)
     {
       m_dPSNRSum[i] += psnr[i];
@@ -173,11 +182,10 @@ public:
     PSNRyuv = (MSEyuv == 0) ? 999.99 : 10.0 * log10((maxval * maxval) / MSEyuv);
   }
 
-
 #if ENABLE_QPA || WCG_WPSNR
-  void    printOut ( char cDelim, const ChromaFormat chFmt, const bool printMSEBasedSNR, const bool printSequenceMSE, const BitDepths &bitDepths, const bool useWPSNR = false )
+  void    printOut ( char cDelim, const ChromaFormat chFmt, const bool printMSEBasedSNR, const bool printSequenceMSE, const bool printHexPsnr, const BitDepths &bitDepths, const bool useWPSNR = false )
 #else
-  void    printOut ( char cDelim, const ChromaFormat chFmt, const bool printMSEBasedSNR, const bool printSequenceMSE, const BitDepths &bitDepths )
+  void    printOut ( char cDelim, const ChromaFormat chFmt, const bool printMSEBasedSNR, const bool printSequenceMSE, const bool printHexPsnr, const BitDepths &bitDepths )
 #endif
   {
 #if !WCG_WPSNR
@@ -226,6 +234,11 @@ public:
 #endif
           msg( e_msg_level, "         \tTotal Frames |   "   "Bitrate     "  "Y-PSNR" );
 
+          if (printHexPsnr)
+          {
+            msg(e_msg_level, "xY-PSNR           ");
+          }
+
           if (printSequenceMSE)
           {
             msg( e_msg_level, "    Y-MSE\n" );
@@ -244,6 +257,19 @@ public:
 #endif
                  getPsnr(COMPONENT_Y) / (double)getNumPic() );
 
+          if (printHexPsnr)
+          {
+            double dPsnr;
+            uint64_t xPsnr;
+            dPsnr = getPsnr(COMPONENT_Y) / (double)getNumPic();
+
+            copy(reinterpret_cast<uint8_t *>(&dPsnr),
+              reinterpret_cast<uint8_t *>(&dPsnr) + sizeof(dPsnr),
+              reinterpret_cast<uint8_t *>(&xPsnr));
+
+            msg(e_msg_level, "   %16" PRIx64 " ", xPsnr);
+          }
+
           if (printSequenceMSE)
           {
             msg( e_msg_level, "  %8.4lf\n", m_MSEyuvframe[COMPONENT_Y] / (double)getNumPic() );
@@ -267,6 +293,11 @@ public:
 #endif
           msg( e_msg_level, "\tTotal Frames |   "   "Bitrate     "  "Y-PSNR" );
 
+          if (printHexPsnr)
+          {
+            msg(e_msg_level, "xY-PSNR           ");
+          }
+
           if (printSequenceMSE)
           {
             msg( e_msg_level, "    Y-MSE\n" );
@@ -285,6 +316,19 @@ public:
 #endif
                  getPsnr(COMPONENT_Y) / (double)getNumPic() );
 
+          if (printHexPsnr)
+          {
+            double dPsnr;
+            uint64_t xPsnr;
+            dPsnr = getPsnr(COMPONENT_Y) / (double)getNumPic();
+
+            copy(reinterpret_cast<uint8_t *>(&dPsnr),
+              reinterpret_cast<uint8_t *>(&dPsnr) + sizeof(dPsnr),
+              reinterpret_cast<uint8_t *>(&xPsnr));
+
+            msg(e_msg_level, "   %16" PRIx64 " ", xPsnr);
+          }
+
           if (printSequenceMSE)
           {
             msg( e_msg_level, "  %8.4lf\n", m_MSEyuvframe[COMPONENT_Y] / (double)getNumPic() );
@@ -313,6 +357,11 @@ public:
 #endif
             msg( e_msg_level, "         \tTotal Frames |   "   "Bitrate     "  "Y-PSNR    "  "U-PSNR    "  "V-PSNR    "  "YUV-PSNR " );
 
+            if (printHexPsnr)
+            {
+              msg(e_msg_level, "xY-PSNR           "  "xU-PSNR           "  "xV-PSNR           ");
+            }
+
             if (printSequenceMSE)
             {
               msg( e_msg_level, " Y-MSE     "  "U-MSE     "  "V-MSE    "  "YUV-MSE \n" );
@@ -340,6 +389,21 @@ public:
                    getPsnr(COMPONENT_Cr) / (double)getNumPic(),
                    PSNRyuv );
 
+            if (printHexPsnr)
+            {
+              double dPsnr[MAX_NUM_COMPONENT];
+              uint64_t xPsnr[MAX_NUM_COMPONENT];
+              for (int i = 0; i < MAX_NUM_COMPONENT; i++)
+              {
+                dPsnr[i] = getPsnr((ComponentID)i) / (double)getNumPic();
+
+                copy(reinterpret_cast<uint8_t *>(&dPsnr[i]),
+                  reinterpret_cast<uint8_t *>(&dPsnr[i]) + sizeof(dPsnr[i]),
+                  reinterpret_cast<uint8_t *>(&xPsnr[i]));
+              }
+              msg(e_msg_level, "   %16" PRIx64 "  %16" PRIx64 "  %16" PRIx64, xPsnr[COMPONENT_Y], xPsnr[COMPONENT_Cb], xPsnr[COMPONENT_Cr]);
+            }
+
             if (printSequenceMSE)
             {
               msg( e_msg_level, "  %8.4lf  "   "%8.4lf  "    "%8.4lf  "   "%8.4lf\n",
@@ -373,6 +437,11 @@ public:
             m_ext360.printHeader(e_msg_level);
 #endif
 
+            if (printHexPsnr)
+            {
+              msg(e_msg_level, "xY-PSNR           "  "xU-PSNR           "  "xV-PSNR           ");
+            }
+
             if (printSequenceMSE)
             {
               msg( e_msg_level, " Y-MSE     "  "U-MSE     "  "V-MSE    "  "YUV-MSE \n" );
@@ -400,6 +469,21 @@ public:
                    getPsnr(COMPONENT_Cr) / (double)getNumPic(),
                    PSNRyuv );
 
+            if (printHexPsnr)
+            {
+              double dPsnr[MAX_NUM_COMPONENT];
+              uint64_t xPsnr[MAX_NUM_COMPONENT];
+              for (int i = 0; i < MAX_NUM_COMPONENT; i++)
+              {
+                dPsnr[i] = getPsnr((ComponentID)i) / (double)getNumPic();
+
+                copy(reinterpret_cast<uint8_t *>(&dPsnr[i]),
+                  reinterpret_cast<uint8_t *>(&dPsnr[i]) + sizeof(dPsnr[i]),
+                  reinterpret_cast<uint8_t *>(&xPsnr[i]));
+              }
+              msg(e_msg_level, "   %16" PRIx64 "  %16" PRIx64 "  %16" PRIx64 , xPsnr[COMPONENT_Y], xPsnr[COMPONENT_Cb], xPsnr[COMPONENT_Cr]);
+            }
+
 #if EXTENSION_360_VIDEO
             m_ext360.printPSNRs(getNumPic(), e_msg_level);
 #endif
@@ -427,7 +511,7 @@ public:
   }
 
 
-  void    printSummary(const ChromaFormat chFmt, const bool printSequenceMSE, const BitDepths &bitDepths, const std::string &sFilename)
+  void    printSummary(const ChromaFormat chFmt, const bool printSequenceMSE, const bool printHexPsnr, const BitDepths &bitDepths, const std::string &sFilename)
   {
     FILE* pFile = fopen (sFilename.c_str(), "at");
 
diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp
index d98bd514611797f6bbf6811b58f27a64d245655f..10059954eda284343993f235a96be6cbe46b8b3a 100644
--- a/source/Lib/EncoderLib/CABACWriter.cpp
+++ b/source/Lib/EncoderLib/CABACWriter.cpp
@@ -642,7 +642,6 @@ void CABACWriter::split_cu_mode_mt(const PartSplit split, const CodingStructure&
 void CABACWriter::coding_unit( const CodingUnit& cu, Partitioner& partitioner, CUCtx& cuCtx )
 {
   CodingStructure& cs = *cu.cs;
-
   // transquant bypass flag
   if( cs.pps->getTransquantBypassEnabledFlag() )
   {
@@ -753,6 +752,7 @@ void CABACWriter::cu_pred_data( const CodingUnit& cu )
 #if JVET_K0357_AMVR
   imv_mode   ( cu );
 #endif
+
 }
 
 
@@ -1302,7 +1302,7 @@ void CABACWriter::imv_mode( const CodingUnit& cu )
   }
 
   unsigned ctxId = DeriveCtx::CtxIMVFlag( cu );
-  m_BinEncoder.encodeBin( ( cu.imv > 0 ), Ctx::ImvFlag( ctxId ) );
+    m_BinEncoder.encodeBin( ( cu.imv > 0 ), Ctx::ImvFlag( ctxId ) );
   DTRACE( g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", (cu.imv > 0), ctxId );
 
   if( spsNext.getImvMode() == IMV_4PEL && cu.imv > 0 )
diff --git a/source/Lib/EncoderLib/CMakeLists.txt b/source/Lib/EncoderLib/CMakeLists.txt
index 9c30ad89e76226aa20bbc92b7f6c7ff73058855e..9e75e9fb100aea05e051b09172210d93e3b66676 100644
--- a/source/Lib/EncoderLib/CMakeLists.txt
+++ b/source/Lib/EncoderLib/CMakeLists.txt
@@ -17,7 +17,7 @@ add_library( ${LIB_NAME} STATIC ${SRC_FILES} ${INC_FILES} ${NATVIS_FILES} )
 target_compile_definitions( ${LIB_NAME} PUBLIC )
 
 if( ENABLE_VTM )
-  target_compile_definitions( ${LIB_NAME} PUBLIC JEM_TOOLS=0 )
+  target_compile_definitions( ${LIB_NAME} PUBLIC BMS_TOOLS=0 )
 endif()
 
 if( EXTENSION_360_VIDEO )
diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h
index 015b7cc95794a95eb66267544956d842c630b8b8..6996a87228967306b9a46d9dc3d4142bcba838a5 100644
--- a/source/Lib/EncoderLib/EncCfg.h
+++ b/source/Lib/EncoderLib/EncCfg.h
@@ -212,6 +212,9 @@ protected:
 
 #if ENABLE_WPP_PARALLELISM
   bool      m_AltDQPCoding;
+#endif
+#if JVET_K0157
+  bool      m_compositeRefEnabled;        //composite reference
 #endif
   // ADD_NEW_TOOL : (encoder lib) add tool enabling flags and associated parameters here
 
@@ -567,8 +570,8 @@ public:
   bool      getPrintMSEBasedSequencePSNR    ()         const { return m_printMSEBasedSequencePSNR;  }
   void      setPrintMSEBasedSequencePSNR    (bool value)     { m_printMSEBasedSequencePSNR = value; }
 
-  bool getPrintHexPsnr() const { return m_printHexPsnr; }
-  void setPrintHexPsnr(bool value) { m_printHexPsnr = value; }
+  bool      getPrintHexPsnr                 ()         const { return m_printHexPsnr;               }
+  void      setPrintHexPsnr                 (bool value)     { m_printHexPsnr = value;              }
 
   bool      getPrintFrameMSE                ()         const { return m_printFrameMSE;              }
   void      setPrintFrameMSE                (bool value)     { m_printFrameMSE = value;             }
@@ -668,6 +671,11 @@ public:
 
 
 
+#if JVET_K0157
+  void      setUseCompositeRef              (bool b)         { m_compositeRefEnabled = b; }
+  bool      getUseCompositeRef              ()         const { return m_compositeRefEnabled; }
+#endif
+
   // ADD_NEW_TOOL : (encoder lib) add access functions here
 
   void      setMaxCUWidth                   ( uint32_t  u )      { m_maxCUWidth  = u; }
diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp
index dadf0bf0252c3eef545f147d9cff7032afbf4d77..4dd1259285ddefe1ad264ddd7cb4d51eedd7031f 100644
--- a/source/Lib/EncoderLib/EncCu.cpp
+++ b/source/Lib/EncoderLib/EncCu.cpp
@@ -302,7 +302,6 @@ void EncCu::compressCtu( CodingStructure& cs, const UnitArea& area, const unsign
   // init the partitioning manager
   Partitioner *partitioner = PartitionerFactory::get( *cs.slice );
   partitioner->initCtu( area, CH_L, *cs.slice );
-
   // init current context pointer
   m_CurrCtx = m_CtxBuffer.data();
 
@@ -322,7 +321,9 @@ void EncCu::compressCtu( CodingStructure& cs, const UnitArea& area, const unsign
   const bool copyUnsplitCTUSignals = bestCS->cus.size() == 1 && KEEP_PRED_AND_RESI_SIGNALS;
   cs.useSubStructure( *bestCS, partitioner->chType, CS::getArea( *bestCS, area, partitioner->chType ), copyUnsplitCTUSignals, false, false, copyUnsplitCTUSignals );
 
-  if( !cs.pcv->ISingleTree && cs.slice->isIntra() && cs.pcv->chrFormat != CHROMA_400 )
+  if( !cs.pcv->ISingleTree && 
+    cs.slice->isIntra() 
+    && cs.pcv->chrFormat != CHROMA_400 )
   {
     m_CABACEstimator->getCtx() = m_CurrCtx->start;
 
@@ -340,6 +341,12 @@ void EncCu::compressCtu( CodingStructure& cs, const UnitArea& area, const unsign
     cs.useSubStructure( *bestCS, partitioner->chType, CS::getArea( *bestCS, area, partitioner->chType ), copyUnsplitCTUSignals, false, false, copyUnsplitCTUSignals );
   }
 
+#if JVET_K0390_RATECTRL
+  if (m_pcEncCfg->getUseRateCtrl())
+  {
+    (m_pcRateCtrl->getRCPic()->getLCU(ctuRsAddr)).m_actualMSE = (double)bestCS->dist / (double)m_pcRateCtrl->getRCPic()->getLCU(ctuRsAddr).m_numberOfPixel;
+  }
+#endif
   // reset context states and uninit context pointer
   m_CABACEstimator->getCtx() = m_CurrCtx->start;
   m_CurrCtx                  = 0;
@@ -555,7 +562,6 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par
   }
 #endif
 
-
   m_modeCtrl->initCULevel( partitioner, *tempCS );
 
   m_CurrCtx->start = m_CABACEstimator->getCtx();
@@ -704,9 +710,9 @@ void EncCu::updateLambda( Slice* slice, double dQP )
 #endif
 #endif
   double qp_temp = (double) dQP + bitdepth_luma_qp_scale - SHIFT_QP;
-  
+
   double dQPFactor = m_pcEncCfg->getGOPEntry( m_pcSliceEncoder->getGopId() ).m_QPFactor;
-  
+
   if( slice->getSliceType() == I_SLICE )
   {
     if( m_pcEncCfg->getIntraQpFactor() >= 0.0 /*&& m_pcEncCfg->getGOPEntry( m_pcSliceEncoder->getGopId() ).m_sliceType != I_SLICE*/ )
@@ -757,7 +763,7 @@ void EncCu::updateLambda( Slice* slice, double dQP )
   dLambda *= lambdaModifier;
 
   int qpBDoffset = slice->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA);
-  int iQP = max( -qpBDoffset, min( MAX_QP, (int) floor( dQP + 0.5 ) ) );
+  int iQP = Clip3(-qpBDoffset, MAX_QP, (int)floor(dQP + 0.5));
   m_pcSliceEncoder->setUpLambda(slice, dLambda, iQP);
 
 #else
@@ -1249,7 +1255,8 @@ void EncCu::xCheckRDCostIntra( CodingStructure *&tempCS, CodingStructure *&bestC
       m_CABACEstimator->cu_transquant_bypass_flag( cu );
     }
 
-    if( !cu.cs->slice->isIntra() )
+    if( !cu.cs->slice->isIntra() 
+      )
     {
       m_CABACEstimator->cu_skip_flag ( cu );
     }
@@ -1326,7 +1333,7 @@ void EncCu::xCheckIntraPCM(CodingStructure *&tempCS, CodingStructure *&bestCS, P
   cu.ipcm             = true;
 
   tempCS->addPU(tempCS->area, partitioner.chType);
-  
+
   tempCS->addTU( tempCS->area, partitioner.chType );
 
   m_pcIntraSearch->IPCMSearch(*tempCS, partitioner);
@@ -1340,7 +1347,8 @@ void EncCu::xCheckIntraPCM(CodingStructure *&tempCS, CodingStructure *&bestCS, P
     m_CABACEstimator->cu_transquant_bypass_flag( cu );
   }
 
-  if( !cu.cs->slice->isIntra() )
+  if( !cu.cs->slice->isIntra() 
+    )
   {
     m_CABACEstimator->cu_skip_flag ( cu );
   }
@@ -1505,6 +1513,10 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
   static_vector<unsigned, MRG_MAX_NUM_CANDS>  RdModeList;
   bool                                        mrgTempBufSet    = false;
 
+#if DMVR_JVET_LOW_LATENCY_K0217
+  Mv                                          refinedMvdL0[MRG_MAX_NUM_CANDS];
+#endif
+
   for( unsigned i = 0; i < MRG_MAX_NUM_CANDS; i++ )
   {
     RdModeList.push_back( i );
@@ -1553,7 +1565,6 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
       m_pcRdCost->setDistParam (distParam, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y(), sps.getBitDepth (CHANNEL_TYPE_LUMA), COMPONENT_Y, bUseHadamard);
 
       const UnitArea localUnitArea( tempCS->area.chromaFormat, Area( 0, 0, tempCS->area.Y().width, tempCS->area.Y().height) );
-
       for( uint32_t uiMergeCand = 0; uiMergeCand < mergeCtx.numValidMergeCand; uiMergeCand++ )
       {
         acMergeBuffer[uiMergeCand] = m_acMergeBuffer[uiMergeCand].getBuf( localUnitArea );
@@ -1570,6 +1581,9 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
         {
           mergeCtx.mvFieldNeighbours[2*uiMergeCand].mv   = pu.mv[0];
           mergeCtx.mvFieldNeighbours[2*uiMergeCand+1].mv = pu.mv[1];
+#if DMVR_JVET_LOW_LATENCY_K0217
+          refinedMvdL0[uiMergeCand] = pu.mvd[0];
+#endif
         }
 
 #if DISTORTION_TYPE_BUGFIX
@@ -1585,10 +1599,8 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
         double cost     = (double)uiSad + (double)uiBitsCand * sqrtLambdaForFirstPass;
 
         updateCandList( uiMergeCand, cost, RdModeList, candCostList, uiNumMrgSATDCand );
-
         CHECK( std::min( uiMergeCand + 1, uiNumMrgSATDCand ) != RdModeList.size(), "" );
       }
-
       // Try to limit number of candidates using SATD-costs
       for( uint32_t i = 1; i < uiNumMrgSATDCand; i++ )
       {
@@ -1617,7 +1629,6 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
     for( uint32_t uiMrgHADIdx = 0; uiMrgHADIdx < uiNumMrgSATDCand; uiMrgHADIdx++ )
     {
       uint32_t uiMergeCand = RdModeList[uiMrgHADIdx];
-
       if( ( (uiNoResidualPass != 0) && candHasNoResidual[uiMergeCand] )
        || ( (uiNoResidualPass == 0) && bestIsSkip ) )
       {
@@ -1647,6 +1658,9 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
 
       if( mrgTempBufSet )
       {
+#if DMVR_JVET_LOW_LATENCY_K0217
+        pu.mvd[0] = refinedMvdL0[uiMergeCand];
+#endif
         tempCS->getPredBuf().copyFrom( acMergeBuffer[ uiMergeCand ]);
       }
       else
@@ -1655,11 +1669,15 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
         
       }
 
+      xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass
 #if JVET_K0357_AMVR
-      xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, NULL, true, ( ( uiNoResidualPass == 0 ) ? &candHasNoResidual[uiMergeCand] : NULL ) );
-#else
-      xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, ( ( uiNoResidualPass == 0 ) ? &candHasNoResidual[uiMergeCand] : NULL ) );
+        , NULL
 #endif
+#if JVET_K1000_SIMPLIFIED_EMT
+        , 1
+#endif
+        , uiNoResidualPass == 0 ? &candHasNoResidual[uiMergeCand] : NULL );
+
       if( m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip )
       {
         bestIsSkip = bestCS->getCU( partitioner.chType )->rootCbf == 0;
@@ -1744,7 +1762,6 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct
 
   cu.firstPU->mergeFlag = true;
   cu.firstPU->mergeIdx  = 0;
-
   PU::getAffineMergeCand( *cu.firstPU, affineMvField, interDirNeighbours, numValidMergeCand );
   if( numValidMergeCand == -1 )
   {
@@ -1758,30 +1775,38 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct
   PU::spanMotionInfo( *cu.firstPU );
 
   m_pcInterSearch->motionCompensation( cu );
+
+  xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0
 #if JVET_K0357_AMVR
-  xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0, NULL, true, &hasNoResidual);
-#else
-  xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0, &hasNoResidual);
+    , NULL
 #endif
+#if JVET_K1000_SIMPLIFIED_EMT
+    , 1
+#endif
+    , &hasNoResidual);
 
   if( ! (encTestMode.lossless || hasNoResidual) )
   {
     tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
     tempCS->copyStructure( *bestCS, partitioner.chType );
     tempCS->getPredBuf().copyFrom( bestCS->getPredBuf() );
+
+    xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 1
 #if JVET_K0357_AMVR
-    xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 1, NULL, true, &hasNoResidual);
-#else
-    xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 1, &hasNoResidual);
+      , NULL
+#endif
+#if JVET_K1000_SIMPLIFIED_EMT
+      , 1
 #endif
+      , &hasNoResidual);
   }
 }
 #endif
-
 void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
 {
   tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
 
+
   CodingUnit &cu      = tempCS->addCU( tempCS->area, partitioner.chType );
 
   partitioner.setCUData( cu );
@@ -1798,16 +1823,24 @@ void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestC
   cu.qp               = encTestMode.qp;
   CU::addPUs( cu );
 
+
   m_pcInterSearch->predInterSearch( cu, partitioner );
 
 #if JVET_K0357_AMVR
   const unsigned wIdx = gp_sizeIdxInfo->idxFrom( tempCS->area.lwidth () );
 #endif
+
+
+  xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, 0
 #if JVET_K0357_AMVR
-  xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0, (m_pImvTempCS ? m_pImvTempCS[wIdx][encTestMode.partSize] : NULL));
-#else
-  xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0, NULL);
+    , m_pImvTempCS ? m_pImvTempCS[wIdx][encTestMode.partSize] : NULL
+#endif
+#if JVET_K1000_SIMPLIFIED_EMT
+    , 1
 #endif
+    , 0
+  );
+
 }
 
 
@@ -1843,6 +1876,7 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be
     }
   }
 
+
   CodingUnit &cu = ( pcCUInfo2Reuse != nullptr ) ? *tempCS->getCU( partitioner.chType ) : tempCS->addCU( tempCS->area, partitioner.chType );
 
   if( pcCUInfo2Reuse == nullptr )
@@ -1877,11 +1911,13 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be
   cu.emtFlag  = false;
 #endif
 
+  
   if( pcCUInfo2Reuse != nullptr )
   {
     // reuse the motion info from pcCUInfo2Reuse
     CU::resetMVDandMV2Int( cu, m_pcInterSearch );
 
+
     if( !CU::hasSubCUNonZeroMVd( cu ) )
     {
       m_modeCtrl->useModeResult( encTestModeBase, tempCS, partitioner );
@@ -1894,26 +1930,41 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be
   }
   else
   {
+
     m_pcInterSearch->predInterSearch( cu, partitioner );
+
   }
 
+
   if( !CU::hasSubCUNonZeroMVd( cu ) )
   {
     m_modeCtrl->useModeResult( encTestModeBase, tempCS, partitioner );
     return false;
   }
-  xEncodeInterResidual(tempCS, bestCS, partitioner, encTestModeBase, 0, NULL);
 
+  xEncodeInterResidual( tempCS, bestCS, partitioner, encTestModeBase, 0
+#if JVET_K0357_AMVR
+    , NULL
+#endif
+#if JVET_K1000_SIMPLIFIED_EMT
+    , true
+#endif
+    , 0
+  );
 
   return true;
 }
 #endif
 
+void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, int residualPass
 #if JVET_K0357_AMVR
-void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, int residualPass, CodingStructure* imvCS, int emtMode, bool* bestHasNonResi )
-#else
-void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, int residualPass, bool* bestHasNonResi )
+  , CodingStructure* imvCS
+#endif
+#if JVET_K1000_SIMPLIFIED_EMT
+  , int emtMode
 #endif
+  , bool* bestHasNonResi
+  )
 {
   if( residualPass == 1 && encTestMode.lossless )
   {
@@ -1922,19 +1973,72 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be
 
   CodingUnit*            cu        = tempCS->getCU( partitioner.chType );
   double   bestCostInternal        = MAX_DOUBLE;
+#if JVET_K1000_SIMPLIFIED_EMT
+  double           bestCost        = bestCS->cost;
+  const SPS&            sps        = *tempCS->sps;
+  const int      maxSizeEMT        = tempCS->pcv->noRQT ? EMT_INTER_MAX_CU_WITH_QTBT : EMT_INTER_MAX_CU;
+  bool              swapped        = false; // avoid unwanted data copy
+  bool             reloadCU        = false;
+  const bool considerEmtSecondPass = emtMode && sps.getSpsNext().getUseInterEMT() && partitioner.currArea().lwidth() <= maxSizeEMT && partitioner.currArea().lheight() <= maxSizeEMT;
+
+  int minEMTMode = 0;
+  int maxEMTMode = (considerEmtSecondPass?1:0);
 
+  if( emtMode == 2 )
+  {
+    minEMTMode = maxEMTMode = (cu->emtFlag?1:0);
+  }
 
+  for( int curEmtMode = minEMTMode; curEmtMode <= maxEMTMode; curEmtMode++ )
+#endif
   {
+#if JVET_K1000_SIMPLIFIED_EMT
+    if( reloadCU )
+    {
+      if( bestCost == bestCS->cost ) //The first EMT pass didn't become the bestCS, so we clear the TUs generated
+      {
+        tempCS->clearTUs();
+      }
+      else if( false == swapped )
+      {
+        tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
+        tempCS->copyStructure( *bestCS, partitioner.chType );
+        tempCS->getPredBuf().copyFrom( bestCS->getPredBuf() );
+        bestCost = bestCS->cost;
+        cu       = tempCS->getCU( partitioner.chType );
+        swapped = true;
+      }
+      else
+      {
+        tempCS->clearTUs();
+        bestCost = bestCS->cost;
+        cu       = tempCS->getCU( partitioner.chType );
+      }
+
+      //we need to restart the distortion for the new tempCS, the bit count and the cost
+      tempCS->dist     = 0;
+      tempCS->fracBits = 0;
+      tempCS->cost     = MAX_DOUBLE;
+    }
+
+    reloadCU    = true; // enable cu reloading
+#endif
     cu->skip    = false;
+#if JVET_K1000_SIMPLIFIED_EMT
+    cu->emtFlag = curEmtMode;
+#endif
 
     const bool skipResidual = residualPass == 1;
     m_pcInterSearch->encodeResAndCalcRdInterCU( *tempCS, partitioner, skipResidual );
 
-
     xEncodeDontSplit( *tempCS, partitioner );
 
     xCheckDQP( *tempCS, partitioner );
 
+
+#if JVET_K1000_SIMPLIFIED_EMT
+    double emtFirstPassCost = tempCS->cost;
+#endif
 #if JVET_K0357_AMVR
     if( imvCS && (tempCS->cost < imvCS->cost) )
     {
@@ -1944,7 +2048,6 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be
       }
       imvCS->copyStructure( *tempCS, partitioner.chType );
     }
-
 #endif
     if( NULL != bestHasNonResi && (bestCostInternal > tempCS->cost) )
     {
@@ -1959,8 +2062,22 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be
 #endif
     xCheckBestMode( tempCS, bestCS, partitioner, encTestMode );
 
+#if JVET_K1000_SIMPLIFIED_EMT
+    //now we check whether the second pass should be skipped or not
+    if( !curEmtMode && maxEMTMode )
+    {
+      const double thresholdToSkipEmtSecondPass = 1.1; // Skip checking EMT transforms
+      const bool bCond1 = !cu->firstTU->cbf[COMPONENT_Y];
 
-  }//end emt loop 
+      const bool bCond3 = emtFirstPassCost > ( bestCost * thresholdToSkipEmtSecondPass );
+
+      if( m_pcEncCfg->getFastInterEMT() && (bCond1 || bCond3 ) ) 
+      {
+        maxEMTMode = 0; // do not test EMT
+      }
+    }
+#endif
+  } //end emt loop
 }
 
 
diff --git a/source/Lib/EncoderLib/EncCu.h b/source/Lib/EncoderLib/EncCu.h
index 560973c32561f8b35ad41df9b8726f119029e78a..101a62b582b961f3a1fc883fcfbdf683ef8117a7 100644
--- a/source/Lib/EncoderLib/EncCu.h
+++ b/source/Lib/EncoderLib/EncCu.h
@@ -121,7 +121,6 @@ private:
   unsigned int          m_prevPOC;
   bool                  m_clearSubMergeStatic;
 #endif
-
 #if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
   EncLib*               m_pcEncLib;
 #endif
@@ -201,13 +200,16 @@ protected:
 
   void xCheckRDCostMerge2Nx2N ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode );
 
+  void xEncodeInterResidual   ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, int residualPass = 0
 #if JVET_K0357_AMVR
-  void xEncodeInterResidual   ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, int residualPass = 0, CodingStructure* imvCS = NULL, int emtMode = 1, bool* bestHasNonResi = NULL );
-#else
-  void xEncodeInterResidual   ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, int residualPass, bool* bestHasNonResi );
+    , CodingStructure* imvCS = NULL
+#endif
+#if JVET_K1000_SIMPLIFIED_EMT
+    , int emtMode = 1
 #endif
+    , bool* bestHasNonResi = NULL
+  );
 #if REUSE_CU_RESULTS
-
   void xReuseCachedResult     ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &Partitioner );
 #endif
 };
diff --git a/source/Lib/EncoderLib/EncGOP.cpp b/source/Lib/EncoderLib/EncGOP.cpp
index bebe67399e6c85b36ec66fede8929284a1212f73..8c133e5668b778383b3b41ededb430b7960ca2fb 100644
--- a/source/Lib/EncoderLib/EncGOP.cpp
+++ b/source/Lib/EncoderLib/EncGOP.cpp
@@ -110,6 +110,15 @@ EncGOP::EncGOP()
 #endif
 
   m_bInitAMaxBT         = true;
+#if JVET_K0157
+  m_bgPOC = -1;
+  m_picBg = NULL;
+  m_picOrig = NULL;
+  m_isEncodedLTRef = false;
+  m_isUseLTRef = false;
+  m_isPrepareLTRef = true;
+  m_lastLTRefPoc = 0;
+#endif
 }
 
 EncGOP::~EncGOP()
@@ -139,6 +148,20 @@ void  EncGOP::destroy()
     m_pcDeblockingTempPicYuv = NULL;
   }
 #endif
+#if JVET_K0157
+  if (m_picBg)
+  {
+    m_picBg->destroy();
+    delete m_picBg;
+    m_picBg = NULL;
+  }
+  if (m_picOrig)
+  {
+    m_picOrig->destroy();
+    delete m_picOrig;
+    m_picOrig = NULL;
+  }
+#endif
 }
 
 void EncGOP::init ( EncLib* pcEncLib )
@@ -159,7 +182,7 @@ void EncGOP::init ( EncLib* pcEncLib )
   m_totalCoded         = 0;
 
   m_AUWriterIf = pcEncLib->getAUWriterIf();
-  
+
 #if WCG_EXT
   pcEncLib->getRdCost()->initLumaLevelToWeightTable();
 #endif
@@ -1286,7 +1309,11 @@ void trySkipOrDecodePicture( bool& decPic, bool& encPic, const EncCfg& cfg, Pict
 // ====================================================================================================================
 void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
                           std::list<PelUnitBuf*>& rcListPicYuvRecOut,
-                          bool isField, bool isTff, const InputColourSpaceConversion snr_conversion, const bool printFrameMSE )
+                          bool isField, bool isTff, const InputColourSpaceConversion snr_conversion, const bool printFrameMSE
+#if JVET_K0157
+                        , bool isEncodeLtRef
+#endif
+)
 {
   // TODO: Split this function up.
 
@@ -1296,7 +1323,11 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
   pcBitstreamRedirect = new OutputBitstream;
   AccessUnit::iterator  itLocationToPushSliceHeaderNALU; // used to store location where NALU containing slice header is to be inserted
 
-  xInitGOP( iPOCLast, iNumPicRcvd, isField );
+  xInitGOP(iPOCLast, iNumPicRcvd, isField
+#if JVET_K0157
+         , isEncodeLtRef
+#endif
+  );
 
   m_iNumPicCoded = 0;
   SEIMessages leadingSeiMessages;
@@ -1335,11 +1366,18 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
     /////////////////////////////////////////////////////////////////////////////////////////////////// Initial to start encoding
     int iTimeOffset;
     int pocCurr;
+#if JVET_K0157
+    int multipleFactor = m_pcCfg->getUseCompositeRef() ? 2 : 1;
+#endif
 
     if(iPOCLast == 0) //case first frame or first top field
     {
       pocCurr=0;
+#if JVET_K0157
+      iTimeOffset = multipleFactor;
+#else
       iTimeOffset = 1;
+#endif
     }
     else if(iPOCLast == 1 && isField) //case first bottom field, just like the first frame, the poc computation is not right anymore, we set the right value
     {
@@ -1348,11 +1386,24 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
     }
     else
     {
+#if JVET_K0157
+      pocCurr = iPOCLast - iNumPicRcvd * multipleFactor + m_pcCfg->getGOPEntry(iGOPid).m_POC - ((isField && m_iGopSize>1) ? 1 : 0);
+#else
       pocCurr = iPOCLast - iNumPicRcvd + m_pcCfg->getGOPEntry(iGOPid).m_POC - ((isField && m_iGopSize>1) ? 1:0);
+#endif
       iTimeOffset = m_pcCfg->getGOPEntry(iGOPid).m_POC;
     }
 
+#if JVET_K0157
+    if (m_pcCfg->getUseCompositeRef() && isEncodeLtRef)
+    {
+      pocCurr++;
+      iTimeOffset--;
+    }
+    if (pocCurr / multipleFactor >= m_pcCfg->getFramesToBeEncoded())
+#else
     if(pocCurr>=m_pcCfg->getFramesToBeEncoded())
+#endif
     {
       if (m_pcCfg->getEfficientFieldIRAPEnabled())
       {
@@ -1399,7 +1450,11 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
     pcPic->allocateNewSlice();
     m_pcSliceEncoder->setSliceSegmentIdx(0);
 
-    m_pcSliceEncoder->initEncSlice ( pcPic, iPOCLast, pocCurr, iGOPid, pcSlice, isField );
+    m_pcSliceEncoder->initEncSlice(pcPic, iPOCLast, pocCurr, iGOPid, pcSlice, isField
+#if JVET_K0157
+      , isEncodeLtRef
+#endif
+    );
 
     DTRACE_UPDATE( g_trace_ctx, ( std::make_pair( "poc", pocCurr ) ) );
     DTRACE_UPDATE( g_trace_ctx, ( std::make_pair( "final", 0 ) ) );
@@ -1425,7 +1480,6 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
     {
       pcSlice->setSliceType(I_SLICE);
     }
-
     // Set the nal unit type
     pcSlice->setNalUnitType(getNalUnitType(pocCurr, m_iLastIDR, isField));
     if(pcSlice->getTemporalLayerNonReferenceFlag())
@@ -1463,7 +1517,34 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
     }
 
     pcSlice->decodingRefreshMarking(m_pocCRA, m_bRefreshPending, rcListPic, m_pcCfg->getEfficientFieldIRAPEnabled());
+#if JVET_K0157
+    if (m_pcCfg->getUseCompositeRef() && isEncodeLtRef)
+    {
+      setUseLTRef(true);
+      setPrepareLTRef(false);
+      setNewestBgPOC(pocCurr);
+      setLastLTRefPoc(pocCurr);
+    }
+    else if (pcPic->cs->sps->getSpsNext().getUseCompositeRef() && getLastLTRefPoc() >= 0 && getEncodedLTRef()==false && !getPicBg()->getSpliceFull() && (pocCurr - getLastLTRefPoc()) > (m_pcCfg->getFrameRate() * 2))
+    {
+      setUseLTRef(false);
+      setPrepareLTRef(false);
+      setEncodedLTRef(true);
+      setNewestBgPOC(-1);
+      setLastLTRefPoc(-1);
+    }
+
+    if (pcPic->cs->sps->getSpsNext().getUseCompositeRef() && m_picBg->getSpliceFull() && getUseLTRef())
+    {
+      m_pcEncLib->selectReferencePictureSet(pcSlice, pocCurr, iGOPid, m_bgPOC);
+    }
+    else
+    {
+      m_pcEncLib->selectReferencePictureSet(pcSlice, pocCurr, iGOPid, -1);
+    }
+#else
     m_pcEncLib->selectReferencePictureSet(pcSlice, pocCurr, iGOPid);
+#endif
     if (!m_pcCfg->getEfficientFieldIRAPEnabled())
     {
       if ( pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_LP
@@ -1484,7 +1565,11 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       || (m_pcCfg->getEfficientFieldIRAPEnabled() && isField && pcSlice->getAssociatedIRAPType() >= NAL_UNIT_CODED_SLICE_BLA_W_LP && pcSlice->getAssociatedIRAPType() <= NAL_UNIT_CODED_SLICE_CRA && pcSlice->getAssociatedIRAPPOC() == pcSlice->getPOC()+1)
       )
     {
-      pcSlice->createExplicitReferencePictureSetFromReference(rcListPic, pcSlice->getRPS(), pcSlice->isIRAP(), m_iLastRecoveryPicPOC, m_pcCfg->getDecodingRefreshType() == 3, m_pcCfg->getEfficientFieldIRAPEnabled());
+      pcSlice->createExplicitReferencePictureSetFromReference(rcListPic, pcSlice->getRPS(), pcSlice->isIRAP(), m_iLastRecoveryPicPOC, m_pcCfg->getDecodingRefreshType() == 3, m_pcCfg->getEfficientFieldIRAPEnabled()
+#if JVET_K0157
+                                                            , isEncodeLtRef, m_pcCfg->getUseCompositeRef()
+#endif
+      );
     }
 
     pcSlice->applyReferencePictureSet(rcListPic, pcSlice->getRPS());
@@ -1552,19 +1637,42 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
         }
       }
     }
+#if JVET_K0157
+    if (pcSlice->getRPSidx() == -1)
+      arrangeLongtermPicturesInRPS(pcSlice, rcListPic);
+#else
     arrangeLongtermPicturesInRPS(pcSlice, rcListPic);
+#endif
     RefPicListModification* refPicListModification = pcSlice->getRefPicListModification();
     refPicListModification->setRefPicListModificationFlagL0(0);
     refPicListModification->setRefPicListModificationFlagL1(0);
+
+#if JVET_K0157
+    if (m_pcCfg->getUseCompositeRef() && getUseLTRef() && (pocCurr > getLastLTRefPoc()))
+    {
+      pcSlice->setNumRefIdx(REF_PIC_LIST_0, min(m_pcCfg->getGOPEntry(iGOPid).m_numRefPicsActive + 1, pcSlice->getRPS()->getNumberOfPictures()));
+      pcSlice->setNumRefIdx(REF_PIC_LIST_1, min(m_pcCfg->getGOPEntry(iGOPid).m_numRefPicsActive + 1, pcSlice->getRPS()->getNumberOfPictures()));
+    }
+    else
+    {
+      pcSlice->setNumRefIdx(REF_PIC_LIST_0, std::min(m_pcCfg->getGOPEntry(iGOPid).m_numRefPicsActive, pcSlice->getRPS()->getNumberOfPictures()));
+      pcSlice->setNumRefIdx(REF_PIC_LIST_1, std::min(m_pcCfg->getGOPEntry(iGOPid).m_numRefPicsActive, pcSlice->getRPS()->getNumberOfPictures()));
+    }
+    if (pcPic->cs->sps->getSpsNext().getUseCompositeRef() && getPrepareLTRef()) {
+      arrangeCompositeReference(pcSlice, rcListPic, pocCurr);
+    }
+#else
     pcSlice->setNumRefIdx(REF_PIC_LIST_0,min(m_pcCfg->getGOPEntry(iGOPid).m_numRefPicsActive,pcSlice->getRPS()->getNumberOfPictures()));
     pcSlice->setNumRefIdx(REF_PIC_LIST_1,min(m_pcCfg->getGOPEntry(iGOPid).m_numRefPicsActive,pcSlice->getRPS()->getNumberOfPictures()));
+#endif
 
     //  Set reference list
     pcSlice->setRefPicList ( rcListPic );
 
     if( m_pcCfg->getUseAMaxBT() )
     {
-      if( !pcSlice->isIntra() )
+      if( !pcSlice->isIntra() 
+        )
       {
         int refLayer = pcSlice->getDepth();
         if( refLayer > 9 ) refLayer = 9; // Max layer is 10
@@ -1614,7 +1722,6 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
     {
       pcSlice->setSliceType ( P_SLICE );
     }
-
     xUpdateRasInit( pcSlice );
 
     // Do decoding refresh marking if any
@@ -1767,12 +1874,12 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
 #if V0078_ADAPTIVE_LOWER_BOUND
         if (estimatedCpbFullness - estimatedBits < m_pcRateCtrl->getRCPic()->getLowerBound())
         {
-          estimatedBits = max(200, estimatedCpbFullness - m_pcRateCtrl->getRCPic()->getLowerBound());
+          estimatedBits = std::max(200, estimatedCpbFullness - m_pcRateCtrl->getRCPic()->getLowerBound());
         }
 #else
         if (estimatedCpbFullness - estimatedBits < (int)(m_pcRateCtrl->getCpbSize()*0.1f))
         {
-          estimatedBits = max(200, estimatedCpbFullness - (int)(m_pcRateCtrl->getCpbSize()*0.1f));
+          estimatedBits = std::max(200, estimatedCpbFullness - (int)(m_pcRateCtrl->getCpbSize()*0.1f));
         }
 #endif
 
@@ -1905,7 +2012,11 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
     trySkipOrDecodePicture( decPic, encPic, *m_pcCfg, pcPic );
 
     pcPic->cs->slice = pcSlice; // please keep this
-    if (pcSlice->getPPS()->getSliceChromaQpFlag() && CS::isDualITree(*pcSlice->getPic()->cs))
+#if ENABLE_QPA
+    if (pcSlice->getPPS()->getSliceChromaQpFlag() && CS::isDualITree (*pcSlice->getPic()->cs) && !m_pcCfg->getUsePerceptQPA() && (m_pcCfg->getSliceChromaOffsetQpPeriodicity() == 0))
+#else
+    if (pcSlice->getPPS()->getSliceChromaQpFlag() && CS::isDualITree (*pcSlice->getPic()->cs))
+#endif
     {
       // overwrite chroma qp offset for dual tree
       pcSlice->setSliceChromaQpDelta(COMPONENT_Cb, m_pcCfg->getChromaCbQpOffsetDualTree());
@@ -2010,6 +2121,10 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
 
       m_pcLoopFilter->loopFilterPic( cs );
 
+#if DMVR_JVET_LOW_LATENCY_K0217 
+      CS::setRefinedMotionField(cs);
+#endif
+
       DTRACE_UPDATE( g_trace_ctx, ( std::make_pair( "final", 1 ) ) );
 
       if( pcSlice->getSPS()->getUseSAO() )
@@ -2043,7 +2158,12 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
         }
       }
 #endif
-
+#if JVET_K0157
+      if (pcPic->cs->sps->getSpsNext().getUseCompositeRef() && getPrepareLTRef())
+      {
+        updateCompositeReference(pcSlice, rcListPic, pocCurr);
+      }
+#endif
     }
     else // skip enc picture
     {
@@ -2059,7 +2179,8 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
     {
       for( const CodingUnit *cu : pcPic->cs->cus )
       {
-        if( !pcSlice->isIntra() )
+        if( !pcSlice->isIntra() 
+          )
         {
           m_uiBlkSize[pcSlice->getDepth()] += cu->Y().area();
           m_uiNumBlk [pcSlice->getDepth()]++;
@@ -2236,7 +2357,11 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       m_pcCfg->setEncodedFlag(iGOPid, true);
 
       double PSNR_Y;
-      xCalculateAddPSNRs( isField, isTff, iGOPid, pcPic, accessUnit, rcListPic, encTime, snr_conversion, printFrameMSE, &PSNR_Y );
+      xCalculateAddPSNRs(isField, isTff, iGOPid, pcPic, accessUnit, rcListPic, encTime, snr_conversion, printFrameMSE, &PSNR_Y
+#if JVET_K0157
+                       , isEncodeLtRef
+#endif
+      );
 
       // Only produce the Green Metadata SEI message with the last picture.
       if( m_pcCfg->getSEIGreenMetadataInfoSEIEnable() && pcSlice->getPOC() == ( m_pcCfg->getFramesToBeEncoded() - 1 )  )
@@ -2300,7 +2425,10 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
     pcPic->reconstructed = true;
     m_bFirst = false;
     m_iNumPicCoded++;
-    m_totalCoded ++;
+#if JVET_K0157
+    if (!(pcPic->cs->sps->getSpsNext().getUseCompositeRef() && isEncodeLtRef))
+#endif
+      m_totalCoded ++;
     /* logging: insert a newline at end of picture period */
 
     if (m_pcCfg->getEfficientFieldIRAPEnabled())
@@ -2319,7 +2447,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
 
 }
 
-void EncGOP::printOutSummary(uint32_t uiNumAllPicCoded, bool isField, const bool printMSEBasedSNR, const bool printSequenceMSE, const BitDepths &bitDepths)
+void EncGOP::printOutSummary(uint32_t uiNumAllPicCoded, bool isField, const bool printMSEBasedSNR, const bool printSequenceMSE, const bool printHexPsnr, const BitDepths &bitDepths)
 {
 #if ENABLE_QPA
   const bool    useWPSNR = m_pcEncLib->getUseWPSNR();
@@ -2345,25 +2473,25 @@ void EncGOP::printOutSummary(uint32_t uiNumAllPicCoded, bool isField, const bool
     m_gcAnalyzeWPSNR.setFrmRate(m_pcCfg->getFrameRate()*rateMultiplier / (double)m_pcCfg->getTemporalSubsampleRatio());
   }
 #endif
-  
+
   const ChromaFormat chFmt = m_pcCfg->getChromaFormatIdc();
 
   //-- all
   msg( INFO, "\n" );
   msg( DETAILS,"\nSUMMARY --------------------------------------------------------\n" );
 #if ENABLE_QPA
-  m_gcAnalyzeAll.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, bitDepths, useWPSNR);
+  m_gcAnalyzeAll.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths, useWPSNR);
 #else
-  m_gcAnalyzeAll.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, bitDepths);
+  m_gcAnalyzeAll.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths);
 #endif
   msg( DETAILS,"\n\nI Slices--------------------------------------------------------\n" );
-  m_gcAnalyzeI.printOut('i', chFmt, printMSEBasedSNR, printSequenceMSE, bitDepths);
+  m_gcAnalyzeI.printOut('i', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths);
 
   msg( DETAILS,"\n\nP Slices--------------------------------------------------------\n" );
-  m_gcAnalyzeP.printOut('p', chFmt, printMSEBasedSNR, printSequenceMSE, bitDepths);
+  m_gcAnalyzeP.printOut('p', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths);
 
   msg( DETAILS,"\n\nB Slices--------------------------------------------------------\n" );
-  m_gcAnalyzeB.printOut('b', chFmt, printMSEBasedSNR, printSequenceMSE, bitDepths);
+  m_gcAnalyzeB.printOut('b', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths);
   
 #if WCG_WPSNR
   if (useLumaWPSNR)
@@ -2374,14 +2502,14 @@ void EncGOP::printOutSummary(uint32_t uiNumAllPicCoded, bool isField, const bool
 #endif
   if (!m_pcCfg->getSummaryOutFilename().empty())
   {
-    m_gcAnalyzeAll.printSummary(chFmt, printSequenceMSE, bitDepths, m_pcCfg->getSummaryOutFilename());
+    m_gcAnalyzeAll.printSummary(chFmt, printSequenceMSE, printHexPsnr, bitDepths, m_pcCfg->getSummaryOutFilename());
   }
 
   if (!m_pcCfg->getSummaryPicFilenameBase().empty())
   {
-    m_gcAnalyzeI.printSummary(chFmt, printSequenceMSE, bitDepths, m_pcCfg->getSummaryPicFilenameBase()+"I.txt");
-    m_gcAnalyzeP.printSummary(chFmt, printSequenceMSE, bitDepths, m_pcCfg->getSummaryPicFilenameBase()+"P.txt");
-    m_gcAnalyzeB.printSummary(chFmt, printSequenceMSE, bitDepths, m_pcCfg->getSummaryPicFilenameBase()+"B.txt");
+    m_gcAnalyzeI.printSummary(chFmt, printSequenceMSE, printHexPsnr, bitDepths, m_pcCfg->getSummaryPicFilenameBase()+"I.txt");
+    m_gcAnalyzeP.printSummary(chFmt, printSequenceMSE, printHexPsnr, bitDepths, m_pcCfg->getSummaryPicFilenameBase()+"P.txt");
+    m_gcAnalyzeB.printSummary(chFmt, printSequenceMSE, printHexPsnr, bitDepths, m_pcCfg->getSummaryPicFilenameBase()+"B.txt");
   }
 
 #if WCG_WPSNR
@@ -2399,13 +2527,13 @@ void EncGOP::printOutSummary(uint32_t uiNumAllPicCoded, bool isField, const bool
 
     msg( DETAILS,"\n\nSUMMARY INTERLACED ---------------------------------------------\n" );
 #if ENABLE_QPA
-    m_gcAnalyzeAll_in.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, bitDepths, useWPSNR);
+    m_gcAnalyzeAll_in.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths, useWPSNR);
 #else
-    m_gcAnalyzeAll_in.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, bitDepths);
+    m_gcAnalyzeAll_in.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths);
 #endif
     if (!m_pcCfg->getSummaryOutFilename().empty())
     {
-      m_gcAnalyzeAll_in.printSummary(chFmt, printSequenceMSE, bitDepths, m_pcCfg->getSummaryOutFilename());
+      m_gcAnalyzeAll_in.printSummary(chFmt, printSequenceMSE, printHexPsnr, bitDepths, m_pcCfg->getSummaryOutFilename());
 #if WCG_WPSNR
       if (useLumaWPSNR)
       {
@@ -2448,13 +2576,19 @@ uint64_t EncGOP::preLoopFilterPicAndCalcDist( Picture* pcPic )
 // ====================================================================================================================
 // Protected member functions
 // ====================================================================================================================
-
-
-void EncGOP::xInitGOP( int iPOCLast, int iNumPicRcvd, bool isField )
+void EncGOP::xInitGOP( int iPOCLast, int iNumPicRcvd, bool isField
+#if JVET_K0157
+  , bool isEncodeLtRef
+#endif
+)
 {
   CHECK(!( iNumPicRcvd > 0 ), "Unspecified error");
   //  Exception for the first frames
+#if JVET_K0157
+  if ((isField && (iPOCLast == 0 || iPOCLast == 1)) || (!isField && (iPOCLast == 0)) || isEncodeLtRef)
+#else
   if ( ( isField && (iPOCLast == 0 || iPOCLast == 1) ) || (!isField  && (iPOCLast == 0))  )
+#endif
   {
     m_iGopSize    = 1;
   }
@@ -2485,7 +2619,12 @@ void EncGOP::xGetBuffer( PicList&                  rcListPic,
     iTimeOffset--;
   }
 
+#if JVET_K0157
+  int multipleFactor = m_pcCfg->getUseCompositeRef() ? 2 : 1;
+  for (i = 0; i < (iNumPicRcvd * multipleFactor - iTimeOffset + 1); i += multipleFactor)
+#else
   for ( i = 0; i < (iNumPicRcvd - iTimeOffset + 1); i++ )
+#endif
   {
     iterPicYuvRec--;
   }
@@ -2512,15 +2651,18 @@ void EncGOP::xGetBuffer( PicList&                  rcListPic,
 #if ENABLE_QPA
 
 #ifndef BETA
- #define BETA (2.0 / 3.0)  // value between 0 and 1; use 0.0 for traditional PSNR
+  #define BETA 0.5 // value between 0.0 and 1; use 0.0 to obtain traditional PSNR
 #endif
 #define GLOBAL_AVERAGING 1 // "global" averaging of a_k across a set instead of one picture
 #if FRAME_WEIGHTING
 static const uint32_t DQP[16] = { 4, 12, 11, 12,  9, 12, 11, 12,  6, 12, 11, 12,  9, 12, 11, 12 };
 #endif
 
-static inline double calcWeightedSquaredError(const CPelBuf& org,    const CPelBuf& rec,     double &sumAct,
-                                              const uint32_t imageWidth, const uint32_t imageHeight, const uint32_t offsetX,  const uint32_t offsetY, int blockWidth, int blockHeight)
+static inline double calcWeightedSquaredError(const CPelBuf& org,        const CPelBuf& rec,
+                                              double &sumAct,            const uint32_t bitDepth,
+                                              const uint32_t imageWidth, const uint32_t imageHeight,
+                                              const uint32_t offsetX,    const uint32_t offsetY,
+                                              int blockWidth,            int blockHeight)
 {
   const int    O = org.stride;
   const int    R = rec.stride;
@@ -2534,8 +2676,8 @@ static inline double calcWeightedSquaredError(const CPelBuf& org,    const CPelB
 
   const int hAct = offsetY + (uint32_t)blockHeight < imageHeight ? blockHeight : blockHeight - 1;
   const int wAct = offsetX + (uint32_t)blockWidth  < imageWidth  ? blockWidth  : blockWidth  - 1;
-  uint64_t ssErr   = 0; // sum of squared diffs
-  uint64_t saAct   = 0; // sum of abs. activity
+  uint64_t ssErr = 0; // sum of squared diffs
+  uint64_t saAct = 0; // sum of abs. activity
   double msAct;
   int x, y;
 
@@ -2544,7 +2686,7 @@ static inline double calcWeightedSquaredError(const CPelBuf& org,    const CPelB
   {
     for (x = 0; x < blockWidth; x++)
     {
-      register  int64_t iDiff = (int64_t)o[y*O + x] - (int64_t)r[y*R + x];
+      const     int64_t iDiff = (int64_t)o[y*O + x] - (int64_t)r[y*R + x];
       ssErr += uint64_t(iDiff * iDiff);
     }
   }
@@ -2554,13 +2696,18 @@ static inline double calcWeightedSquaredError(const CPelBuf& org,    const CPelB
   {
     for (x = xAct; x < wAct; x++)
     {
-      saAct += uint64_t(abs(4 * (int64_t)o[y*O + x] - (int64_t)o[y*O + x-1] - (int64_t)o[y*O + x+1] - (int64_t)o[(y-1)*O + x] - (int64_t)o[(y+1)*O + x]));
+      const int f = 12 * (int)o[y*O + x] - 2 * ((int)o[y*O + x-1] + (int)o[y*O + x+1] + (int)o[(y-1)*O + x] + (int)o[(y+1)*O + x])
+                       - (int)o[(y-1)*O + x-1] - (int)o[(y-1)*O + x+1] - (int)o[(y+1)*O + x-1] - (int)o[(y+1)*O + x+1];
+      saAct += abs(f);
     }
   }
 
   // calculate weight (mean squared activity)
   msAct = (double)saAct / (double(wAct - xAct) * double(hAct - yAct));
-  if (msAct < 8.0) msAct = 8.0;
+
+  // lower limit, accounts for high-pass gain
+  if (msAct < double(1 << (bitDepth - 4))) msAct = double(1 << (bitDepth - 4));
+
   msAct *= msAct; // because ssErr is squared
 
   sumAct += msAct; // includes high-pass gain
@@ -2572,9 +2719,9 @@ static inline double calcWeightedSquaredError(const CPelBuf& org,    const CPelB
 
 uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1, const uint32_t rshift
 #if ENABLE_QPA
-                                  , const uint32_t chromaShift /*= 0*/
+                                    , const uint32_t chromaShift /*= 0*/
 #endif
-                                   )
+                                      )
 {
   uint64_t uiTotalDiff;
   const  Pel*  pSrc0 = pic0.bufAt(0, 0);
@@ -2591,7 +2738,7 @@ uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1,
     {
       const uint32_t   W = pic0.width;  // image width
       const uint32_t   H = pic0.height; // image height
-      const double R = double(W * H) / (1920.0 * 1080.0);
+      const double     R = double(W * H) / (1920.0 * 1080.0);
       const uint32_t   B = Clip3<uint32_t>(0, 128 >> chromaShift, 4 * uint32_t(16.0 * sqrt(R) + 0.5)); // WPSNR block size in integer multiple of 4 (for SIMD, = 64 at full-HD)
 
       uint32_t x, y;
@@ -2603,7 +2750,7 @@ uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1,
         {
           for (x = 0; x < W; x++)
           {
-            register int64_t iDiff = (int64_t)pSrc0[x] - (int64_t)pSrc1[x];
+            const           int64_t iDiff = (int64_t)pSrc0[x] - (int64_t)pSrc1[x];
             uiTotalDiff += uint64_t(iDiff * iDiff);
           }
           pSrc0 += pic0.stride;
@@ -2620,7 +2767,11 @@ uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1,
       {
         for (x = 0; x < W; x += B)
         {
-          wmse += calcWeightedSquaredError(pic1, pic0, sumAct, W, H, x, y, B, B);
+          wmse += calcWeightedSquaredError(pic1,   pic0,
+                                           sumAct, BD,
+                                           W,      H,
+                                           x,      y,
+                                           B,      B);
 #if !GLOBAL_AVERAGING
           numAct += 1.0;
 #endif
@@ -2629,11 +2780,17 @@ uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1,
 
       // integer weighted distortion
 #if GLOBAL_AVERAGING
-      sumAct = 1.5 * double(1 << BD);
-      if ((W << chromaShift) > 2048 && (H << chromaShift) > 1280)   // UHD luma
+      sumAct = 32.0 * double(1 << BD);
+
+      if ((W << chromaShift) > 2048 && (H << chromaShift) > 1280) // for UHD/4K
+      {
+        sumAct *= 0.5;
+      }
+      else if ((W << chromaShift) <= 1024 || (H << chromaShift) <= 640) // 480p
       {
-        sumAct /= 1.5;
+        sumAct *= 2.0;
       }
+
       return (wmse <= 0.0) ? 0 : uint64_t(wmse * pow(sumAct, BETA) + 0.5);
 #else
       return (wmse <= 0.0 || numAct <= 0.0) ? 0 : uint64_t(wmse * pow(sumAct / numAct, BETA) + 0.5);
@@ -2670,7 +2827,7 @@ uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1,
   return uiTotalDiff;
 }
 #if WCG_WPSNR
-double EncGOP::xFindDistortionPlaneWPSNR(const CPelBuf& pic0, const CPelBuf& pic1, const uint32_t rshift, const CPelBuf& picLuma0, 
+double EncGOP::xFindDistortionPlaneWPSNR(const CPelBuf& pic0, const CPelBuf& pic1, const uint32_t rshift, const CPelBuf& picLuma0,
   ComponentID compID, const ChromaFormat chfmt    )
 {
   const bool    useLumaWPSNR = m_pcEncLib->getLumaLevelToDeltaQPMapping().isEnabled();
@@ -2723,9 +2880,17 @@ double EncGOP::xFindDistortionPlaneWPSNR(const CPelBuf& pic0, const CPelBuf& pic
 }
 #endif
 
-void EncGOP::xCalculateAddPSNRs( const bool isField, const bool isFieldTopFieldFirst, const int iGOPid, Picture* pcPic, const AccessUnit&accessUnit, PicList &rcListPic, const int64_t dEncTime, const InputColourSpaceConversion snr_conversion, const bool printFrameMSE, double* PSNR_Y )
+void EncGOP::xCalculateAddPSNRs( const bool isField, const bool isFieldTopFieldFirst, const int iGOPid, Picture* pcPic, const AccessUnit&accessUnit, PicList &rcListPic, const int64_t dEncTime, const InputColourSpaceConversion snr_conversion, const bool printFrameMSE, double* PSNR_Y
+#if JVET_K0157
+                               , bool isEncodeLtRef
+#endif
+)
 {
-  xCalculateAddPSNR( pcPic, pcPic->getRecoBuf(), accessUnit, (double) dEncTime, snr_conversion, printFrameMSE, PSNR_Y );
+  xCalculateAddPSNR(pcPic, pcPic->getRecoBuf(), accessUnit, (double)dEncTime, snr_conversion, printFrameMSE, PSNR_Y
+#if JVET_K0157
+                  , isEncodeLtRef
+#endif
+  );
 
   //In case of field coding, compute the interlaced PSNR for both fields
   if(isField)
@@ -2778,19 +2943,31 @@ void EncGOP::xCalculateAddPSNRs( const bool isField, const bool isFieldTopFieldF
       }
       Picture* correspondingFieldPic = *(iterPic);
 
-      if( (pcPic->topField && isFieldTopFieldFirst) || (!pcPic->topField && !isFieldTopFieldFirst))
+      if ((pcPic->topField && isFieldTopFieldFirst) || (!pcPic->topField && !isFieldTopFieldFirst))
       {
-        xCalculateInterlacedAddPSNR(pcPic, correspondingFieldPic, pcPic->getRecoBuf(), correspondingFieldPic->getRecoBuf(), snr_conversion, printFrameMSE, PSNR_Y );
+        xCalculateInterlacedAddPSNR(pcPic, correspondingFieldPic, pcPic->getRecoBuf(), correspondingFieldPic->getRecoBuf(), snr_conversion, printFrameMSE, PSNR_Y
+#if JVET_K0157
+          , isEncodeLtRef
+#endif
+        );
       }
       else
       {
-        xCalculateInterlacedAddPSNR(correspondingFieldPic, pcPic, correspondingFieldPic->getRecoBuf(), pcPic->getRecoBuf(), snr_conversion, printFrameMSE, PSNR_Y );
+        xCalculateInterlacedAddPSNR(correspondingFieldPic, pcPic, correspondingFieldPic->getRecoBuf(), pcPic->getRecoBuf(), snr_conversion, printFrameMSE, PSNR_Y
+#if JVET_K0157
+          , isEncodeLtRef
+#endif
+        );
       }
     }
   }
 }
 
-void EncGOP::xCalculateAddPSNR( Picture* pcPic, PelUnitBuf cPicD, const AccessUnit& accessUnit, double dEncTime, const InputColourSpaceConversion conversion, const bool printFrameMSE, double* PSNR_Y )
+void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUnit& accessUnit, double dEncTime, const InputColourSpaceConversion conversion, const bool printFrameMSE, double* PSNR_Y
+#if JVET_K0157
+                              , bool isEncodeLtRef
+#endif
+)
 {
   const SPS&         sps = *pcPic->cs->sps;
   const CPelUnitBuf& pic = cPicD;
@@ -2860,7 +3037,7 @@ void EncGOP::xCalculateAddPSNR( Picture* pcPic, PelUnitBuf cPicD, const AccessUn
 #else
     const uint64_t uiSSDtemp = xFindDistortionPlane(recPB, orgPB, 0);
 #if WCG_WPSNR
-  const double uiSSDtempWeighted = xFindDistortionPlaneWPSNR(recPB, orgPB, 0, org.get(COMPONENT_Y), compID, format);
+    const double uiSSDtempWeighted = xFindDistortionPlaneWPSNR(recPB, orgPB, 0, org.get(COMPONENT_Y), compID, format);
 #endif
     const uint32_t maxval = 255 << (bitDepth - 8);
 #endif
@@ -2919,13 +3096,21 @@ void EncGOP::xCalculateAddPSNR( Picture* pcPic, PelUnitBuf cPicD, const AccessUn
   m_vRVM_RP.push_back( uibits );
 
   //===== add PSNR =====
-  m_gcAnalyzeAll.addResult (dPSNR, (double)uibits, MSEyuvframe);
+  m_gcAnalyzeAll.addResult(dPSNR, (double)uibits, MSEyuvframe
+#if JVET_K0157
+    , isEncodeLtRef
+#endif
+  );
 #if EXTENSION_360_VIDEO
   m_ext360.addResult(m_gcAnalyzeAll);
 #endif
   if (pcSlice->isIntra())
   {
-    m_gcAnalyzeI.addResult (dPSNR, (double)uibits, MSEyuvframe);
+    m_gcAnalyzeI.addResult(dPSNR, (double)uibits, MSEyuvframe
+#if JVET_K0157
+      , isEncodeLtRef
+#endif
+    );
     *PSNR_Y = dPSNR[COMPONENT_Y];
 #if EXTENSION_360_VIDEO
     m_ext360.addResult(m_gcAnalyzeI);
@@ -2933,7 +3118,11 @@ void EncGOP::xCalculateAddPSNR( Picture* pcPic, PelUnitBuf cPicD, const AccessUn
   }
   if (pcSlice->isInterP())
   {
-    m_gcAnalyzeP.addResult (dPSNR, (double)uibits, MSEyuvframe);
+    m_gcAnalyzeP.addResult(dPSNR, (double)uibits, MSEyuvframe
+#if JVET_K0157
+      , isEncodeLtRef
+#endif
+    );
     *PSNR_Y = dPSNR[COMPONENT_Y];
 #if EXTENSION_360_VIDEO
     m_ext360.addResult(m_gcAnalyzeP);
@@ -2941,7 +3130,11 @@ void EncGOP::xCalculateAddPSNR( Picture* pcPic, PelUnitBuf cPicD, const AccessUn
   }
   if (pcSlice->isInterB())
   {
-    m_gcAnalyzeB.addResult (dPSNR, (double)uibits, MSEyuvframe);
+    m_gcAnalyzeB.addResult(dPSNR, (double)uibits, MSEyuvframe
+#if JVET_K0157
+      , isEncodeLtRef
+#endif
+    );
     *PSNR_Y = dPSNR[COMPONENT_Y];
 #if EXTENSION_360_VIDEO
     m_ext360.addResult(m_gcAnalyzeB);
@@ -3024,7 +3217,11 @@ void EncGOP::xCalculateAddPSNR( Picture* pcPic, PelUnitBuf cPicD, const AccessUn
 
 void EncGOP::xCalculateInterlacedAddPSNR( Picture* pcPicOrgFirstField, Picture* pcPicOrgSecondField,
                                           PelUnitBuf cPicRecFirstField, PelUnitBuf cPicRecSecondField,
-                                          const InputColourSpaceConversion conversion, const bool printFrameMSE, double* PSNR_Y )
+                                          const InputColourSpaceConversion conversion, const bool printFrameMSE, double* PSNR_Y
+#if JVET_K0157
+                                        , bool isEncodeLtRef
+#endif
+)
 {
   const SPS &sps = *pcPicOrgFirstField->cs->sps;
   const ChromaFormat format = sps.getChromaFormatIdc();
@@ -3100,7 +3297,11 @@ void EncGOP::xCalculateInterlacedAddPSNR( Picture* pcPicOrgFirstField, Picture*
   uint32_t uibits = 0; // the number of bits for the pair is not calculated here - instead the overall total is used elsewhere.
 
   //===== add PSNR =====
-  m_gcAnalyzeAll_in.addResult (dPSNR, (double)uibits, MSEyuvframe);
+  m_gcAnalyzeAll_in.addResult (dPSNR, (double)uibits, MSEyuvframe
+#if JVET_K0157
+    , isEncodeLtRef
+#endif
+  );
 
   *PSNR_Y = dPSNR[COMPONENT_Y];
 
@@ -3130,13 +3331,21 @@ NalUnitType EncGOP::getNalUnitType(int pocCurr, int lastIDR, bool isField)
     return NAL_UNIT_CODED_SLICE_IDR_W_RADL;
   }
 
-  if(m_pcCfg->getEfficientFieldIRAPEnabled() && isField && pocCurr == 1)
+#if JVET_K0157
+  if (m_pcCfg->getEfficientFieldIRAPEnabled() && isField && pocCurr == (m_pcCfg->getUseCompositeRef() ? 2: 1))
+#else
+  if (m_pcCfg->getEfficientFieldIRAPEnabled() && isField && pocCurr == 1)
+#endif
   {
     // to avoid the picture becoming an IRAP
     return NAL_UNIT_CODED_SLICE_TRAIL_R;
   }
 
-  if(m_pcCfg->getDecodingRefreshType() != 3 && (pocCurr - isField) % m_pcCfg->getIntraPeriod() == 0)
+#if JVET_K0157
+  if (m_pcCfg->getDecodingRefreshType() != 3 && (pocCurr - isField) % (m_pcCfg->getIntraPeriod() * (m_pcCfg->getUseCompositeRef() ? 2 : 1)) == 0)
+#else
+  if (m_pcCfg->getDecodingRefreshType() != 3 && (pocCurr - isField) % m_pcCfg->getIntraPeriod() == 0)
+#endif
   {
     if (m_pcCfg->getDecodingRefreshType() == 1)
     {
@@ -3355,6 +3564,223 @@ void EncGOP::arrangeLongtermPicturesInRPS(Slice *pcSlice, PicList& rcListPic)
   }
 }
 
+#if JVET_K0157
+void EncGOP::arrangeCompositeReference(Slice* pcSlice, PicList& rcListPic, int pocCurr)
+{
+  Picture* curPic = NULL;
+  PicList::iterator  iterPic = rcListPic.begin();
+  const PreCalcValues *pcv = pcSlice->getPPS()->pcv;
+  m_bgPOC = pocCurr + 1;
+  if (m_picBg->getSpliceFull())
+  {
+    return;
+  }
+  while (iterPic != rcListPic.end())
+  {
+    curPic = *(iterPic++);
+    if (curPic->getPOC() == pocCurr)
+    {
+      break;
+    }
+  }
+  if (pcSlice->getSliceType() == I_SLICE)
+  {
+    return;
+  }
+
+  int width = pcv->lumaWidth;
+  int height = pcv->lumaHeight;
+  int stride = curPic->getOrigBuf().get(COMPONENT_Y).stride;
+  int cStride = curPic->getOrigBuf().get(COMPONENT_Cb).stride;
+  Pel* curLumaAddr = curPic->getOrigBuf().get(COMPONENT_Y).buf;
+  Pel* curCbAddr = curPic->getOrigBuf().get(COMPONENT_Cb).buf;
+  Pel* curCrAddr = curPic->getOrigBuf().get(COMPONENT_Cr).buf;
+  Pel* bgOrgLumaAddr = m_picOrig->getOrigBuf().get(COMPONENT_Y).buf;
+  Pel* bgOrgCbAddr = m_picOrig->getOrigBuf().get(COMPONENT_Cb).buf;
+  Pel* bgOrgCrAddr = m_picOrig->getOrigBuf().get(COMPONENT_Cr).buf;
+  int cuMaxWidth = pcv->maxCUWidth;
+  int cuMaxHeight = pcv->maxCUHeight;
+  int maxReplace = (pcv->sizeInCtus) / 2;
+  maxReplace = maxReplace < 1 ? 1 : maxReplace;
+  typedef struct tagCostStr
+  {
+    double cost;
+    int ctuIdx;
+  }CostStr;
+  CostStr* minCtuCost = new CostStr[maxReplace];
+  for (int i = 0; i < maxReplace; i++)
+  {
+    minCtuCost[i].cost = 1e10;
+    minCtuCost[i].ctuIdx = -1;
+  }
+  int bitIncrementY = pcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) - 8;
+  int bitIncrementUV = pcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_CHROMA) - 8;
+  for (int y = 0; y < height; y += cuMaxHeight)
+  {
+    for (int x = 0; x < width; x += cuMaxWidth)
+    {
+      double lcuDist = 0.0;
+      double lcuDistCb = 0.0;
+      double lcuDistCr = 0.0;
+      int    realPixelCnt = 0;
+      double lcuCost = 1e10;
+      int largeDist = 0;
+
+      for (int tmpy = 0; tmpy < cuMaxHeight; tmpy++)
+      {
+        if (y + tmpy >= height)
+        {
+          break;
+        }
+        for (int tmpx = 0; tmpx < cuMaxWidth; tmpx++)
+        {
+          if (x + tmpx >= width)
+          {
+            break;
+          }
+
+          realPixelCnt++;
+          lcuDist += abs(curLumaAddr[(y + tmpy)*stride + x + tmpx] - bgOrgLumaAddr[(y + tmpy)*stride + x + tmpx]);
+          if (abs(curLumaAddr[(y + tmpy)*stride + x + tmpx] - bgOrgLumaAddr[(y + tmpy)*stride + x + tmpx]) >(20 << bitIncrementY))
+          {
+            largeDist++;
+          }
+
+          if (tmpy % 2 == 0 && tmpx % 2 == 0)
+          {
+            lcuDistCb += abs(curCbAddr[(y + tmpy) / 2 * cStride + (x + tmpx) / 2] - bgOrgCbAddr[(y + tmpy) / 2 * cStride + (x + tmpx) / 2]);
+            lcuDistCr += abs(curCrAddr[(y + tmpy) / 2 * cStride + (x + tmpx) / 2] - bgOrgCrAddr[(y + tmpy) / 2 * cStride + (x + tmpx) / 2]);
+          }
+        }
+      }
+
+      //Test the vertical or horizontal edge for background patches candidates
+      int yInLCU = y / cuMaxHeight;
+      int xInLCU = x / cuMaxWidth;
+      int iLCUIdx = yInLCU * pcv->widthInCtus + xInLCU;
+      if ((largeDist / (double)realPixelCnt < 0.01 &&lcuDist / realPixelCnt < (3.5 * (1 << bitIncrementY)) && lcuDistCb / realPixelCnt < (0.5 * (1 << bitIncrementUV)) && lcuDistCr / realPixelCnt < (0.5 * (1 << bitIncrementUV)) && m_picBg->getSpliceIdx(iLCUIdx) == 0))
+      {
+        lcuCost = lcuDist / realPixelCnt + lcuDistCb / realPixelCnt + lcuDistCr / realPixelCnt;
+        //obtain the maxReplace smallest cost
+        //1) find the largest cost in the maxReplace candidates
+        for (int i = 0; i < maxReplace - 1; i++)
+        {
+          if (minCtuCost[i].cost > minCtuCost[i + 1].cost)
+          {
+            swap(minCtuCost[i].cost, minCtuCost[i + 1].cost);
+            swap(minCtuCost[i].ctuIdx, minCtuCost[i + 1].ctuIdx);
+          }
+        }
+        // 2) compare the current cost with the largest cost
+        if (lcuCost < minCtuCost[maxReplace - 1].cost)
+        {
+          minCtuCost[maxReplace - 1].cost = lcuCost;
+          minCtuCost[maxReplace - 1].ctuIdx = iLCUIdx;
+        }
+      }
+    }
+  }
+
+  // modify QP for background CTU
+  {
+    for (int i = 0; i < maxReplace; i++)
+    {
+      if (minCtuCost[i].ctuIdx != -1)
+      {
+        m_picBg->setSpliceIdx(minCtuCost[i].ctuIdx, pocCurr);
+      }
+    }
+  }
+  delete[]minCtuCost;
+}
+
+void EncGOP::updateCompositeReference(Slice* pcSlice, PicList& rcListPic, int pocCurr)
+{
+  Picture* curPic = NULL;
+  const PreCalcValues *pcv = pcSlice->getPPS()->pcv;
+  PicList::iterator  iterPic = rcListPic.begin();
+  iterPic = rcListPic.begin();
+  while (iterPic != rcListPic.end())
+  {
+    curPic = *(iterPic++);
+    if (curPic->getPOC() == pocCurr)
+    {
+      break;
+    }
+  }
+  assert(curPic->getPOC() == pocCurr);
+
+  int width = pcv->lumaWidth;
+  int height = pcv->lumaHeight;
+  int stride = curPic->getRecoBuf().get(COMPONENT_Y).stride;
+  int cStride = curPic->getRecoBuf().get(COMPONENT_Cb).stride;
+
+  Pel* bgLumaAddr = m_picBg->getRecoBuf().get(COMPONENT_Y).buf;
+  Pel* bgCbAddr = m_picBg->getRecoBuf().get(COMPONENT_Cb).buf;
+  Pel* bgCrAddr = m_picBg->getRecoBuf().get(COMPONENT_Cr).buf;
+  Pel* curLumaAddr = curPic->getRecoBuf().get(COMPONENT_Y).buf;
+  Pel* curCbAddr = curPic->getRecoBuf().get(COMPONENT_Cb).buf;
+  Pel* curCrAddr = curPic->getRecoBuf().get(COMPONENT_Cr).buf;
+
+  int maxCuWidth = pcv->maxCUWidth;
+  int maxCuHeight = pcv->maxCUHeight;
+
+  // Update background reference
+  if (pcSlice->getSliceType() == I_SLICE)//(pocCurr == 0)
+  {
+    curPic->extendPicBorder();
+    curPic->setBorderExtension(true);
+
+    m_picBg->getRecoBuf().copyFrom(curPic->getRecoBuf());
+    m_picOrig->getOrigBuf().copyFrom(curPic->getOrigBuf());
+  }
+  else
+  {
+    //cout << "update B" << pocCurr << endl;
+    for (int y = 0; y < height; y += maxCuHeight)
+    {
+      for (int x = 0; x < width; x += maxCuWidth)
+      {
+        if (m_picBg->getSpliceIdx((y / maxCuHeight)*pcv->widthInCtus + x / maxCuWidth) == pocCurr)
+        {
+          for (int tmpy = 0; tmpy < maxCuHeight; tmpy++)
+          {
+            if (y + tmpy >= height)
+            {
+              break;
+            }
+            for (int tmpx = 0; tmpx < maxCuWidth; tmpx++)
+            {
+              if (x + tmpx >= width)
+              {
+                break;
+              }
+              bgLumaAddr[(y + tmpy)*stride + x + tmpx] = curLumaAddr[(y + tmpy)*stride + x + tmpx];
+              if (tmpy % 2 == 0 && tmpx % 2 == 0)
+              {
+                bgCbAddr[(y + tmpy) / 2 * cStride + (x + tmpx) / 2] = curCbAddr[(y + tmpy) / 2 * cStride + (x + tmpx) / 2];
+                bgCrAddr[(y + tmpy) / 2 * cStride + (x + tmpx) / 2] = curCrAddr[(y + tmpy) / 2 * cStride + (x + tmpx) / 2];
+              }
+            }
+          }
+        }
+      }
+    }
+    m_picBg->setBorderExtension(false);
+    m_picBg->extendPicBorder();
+    m_picBg->setBorderExtension(true);
+
+    curPic->extendPicBorder();
+    curPic->setBorderExtension(true);
+    m_picOrig->getOrigBuf().copyFrom(curPic->getOrigBuf());
+
+    m_picBg->setBorderExtension(false);
+    m_picBg->extendPicBorder();
+    m_picBg->setBorderExtension(true);
+  }
+}
+#endif
+
 void EncGOP::applyDeblockingFilterMetric( Picture* pcPic, uint32_t uiNumSlices )
 {
   PelBuf cPelBuf = pcPic->getRecoBuf().get( COMPONENT_Y );
diff --git a/source/Lib/EncoderLib/EncGOP.h b/source/Lib/EncoderLib/EncGOP.h
index e7aa27d50d8fcaf15fb0e4de1b90d76ad165c62b..84c60e73bbc0a541bb385ddaa2ffd822fe42b803 100644
--- a/source/Lib/EncoderLib/EncGOP.h
+++ b/source/Lib/EncoderLib/EncGOP.h
@@ -131,6 +131,15 @@ private:
 
   SEIWriter               m_seiWriter;
 
+#if JVET_K0157
+  Picture *               m_picBg;
+  Picture *               m_picOrig;
+  int                     m_bgPOC;
+  bool                    m_isEncodedLTRef;
+  bool                    m_isPrepareLTRef;
+  bool                    m_isUseLTRef;
+  int                     m_lastLTRefPoc;
+#endif
   //--Adaptive Loop filter
   EncSampleAdaptiveOffset*  m_pcSAO;
 #if JVET_K0371_ALF
@@ -173,21 +182,45 @@ public:
 
   void  init        ( EncLib* pcEncLib );
   void  compressGOP ( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, std::list<PelUnitBuf*>& rcListPicYuvRec,
-                      bool isField, bool isTff, const InputColourSpaceConversion snr_conversion, const bool printFrameMSE );
+                      bool isField, bool isTff, const InputColourSpaceConversion snr_conversion, const bool printFrameMSE
+#if JVET_K0157
+                    , bool isEncodeLtRef
+#endif
+  );
   void  xAttachSliceDataToNalUnit (OutputNALUnit& rNalu, OutputBitstream* pcBitstreamRedirect);
 
 
   int   getGOPSize()          { return  m_iGopSize;  }
 
   PicList*   getListPic()      { return m_pcListPic; }
+#if JVET_K0157
+  void      setPicBg(Picture* tmpPicBg) { m_picBg = tmpPicBg; }
+  Picture*  getPicBg() const { return m_picBg; }
+  void      setPicOrig(Picture* tmpPicBg) { m_picOrig = tmpPicBg; }
+  Picture*  getPicOrig() { return m_picOrig; }
+  void      setNewestBgPOC(int poc) { m_bgPOC = poc; }
+  int       getNewestBgPOC() const { return m_bgPOC; }
+  void      setEncodedLTRef(bool isEncodedLTRef) { m_isEncodedLTRef = isEncodedLTRef; }
+  bool      getEncodedLTRef() { return m_isEncodedLTRef; }
+  void      setUseLTRef(bool isUseLTRef) { m_isUseLTRef = isUseLTRef; }
+  bool      getUseLTRef() { return m_isUseLTRef; }
+  void      setPrepareLTRef(bool isPrepareLTRef) { m_isPrepareLTRef = isPrepareLTRef; }
+  bool      getPrepareLTRef() { return m_isPrepareLTRef; }
+  void      setLastLTRefPoc(int iLastLTRefPoc) { m_lastLTRefPoc = iLastLTRefPoc; }
+  int       getLastLTRefPoc() const { return m_lastLTRefPoc; }
 
-  void  printOutSummary      ( uint32_t uiNumAllPicCoded, bool isField, const bool printMSEBasedSNR, const bool printSequenceMSE, const BitDepths &bitDepths );
+#endif
+  void  printOutSummary      ( uint32_t uiNumAllPicCoded, bool isField, const bool printMSEBasedSNR, const bool printSequenceMSE, const bool printHexPsnr, const BitDepths &bitDepths );
 #if W0038_DB_OPT
   uint64_t  preLoopFilterPicAndCalcDist( Picture* pcPic );
 #endif
   EncSlice*  getSliceEncoder()   { return m_pcSliceEncoder; }
   NalUnitType getNalUnitType( int pocCurr, int lastIdr, bool isField );
   void arrangeLongtermPicturesInRPS(Slice *, PicList& );
+#if JVET_K0157
+  void arrangeCompositeReference(Slice* pcSlice, PicList& rcListPic, int pocCurr);
+  void updateCompositeReference(Slice* pcSlice, PicList& rcListPic, int pocCurr);
+#endif
 
 #if EXTENSION_360_VIDEO
   Analyze& getAnalyzeAllData() { return m_gcAnalyzeAll; }
@@ -201,15 +234,31 @@ protected:
 
 protected:
 
-  void  xInitGOP          ( int iPOCLast, int iNumPicRcvd, bool isField );
+  void  xInitGOP          ( int iPOCLast, int iNumPicRcvd, bool isField
+#if JVET_K0157
+    , bool isEncodeLtRef
+#endif
+  );
   void  xGetBuffer        ( PicList& rcListPic, std::list<PelUnitBuf*>& rcListPicYuvRecOut,
                             int iNumPicRcvd, int iTimeOffset, Picture*& rpcPic, int pocCurr, bool isField );
 
-  void  xCalculateAddPSNRs         ( const bool isField, const bool isFieldTopFieldFirst, const int iGOPid, Picture* pcPic, const AccessUnit&accessUnit, PicList &rcListPic, int64_t dEncTime, const InputColourSpaceConversion snr_conversion, const bool printFrameMSE, double* PSNR_Y );
-  void  xCalculateAddPSNR          ( Picture* pcPic, PelUnitBuf cPicD, const AccessUnit&, double dEncTime, const InputColourSpaceConversion snr_conversion, const bool printFrameMSE, double* PSNR_Y );
+  void  xCalculateAddPSNRs(const bool isField, const bool isFieldTopFieldFirst, const int iGOPid, Picture* pcPic, const AccessUnit&accessUnit, PicList &rcListPic, int64_t dEncTime, const InputColourSpaceConversion snr_conversion, const bool printFrameMSE, double* PSNR_Y
+#if JVET_K0157
+    , bool isEncodeLtRef
+#endif
+  );
+  void  xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUnit&, double dEncTime, const InputColourSpaceConversion snr_conversion, const bool printFrameMSE, double* PSNR_Y
+#if JVET_K0157
+    , bool isEncodeLtRef
+#endif
+  );
   void  xCalculateInterlacedAddPSNR( Picture* pcPicOrgFirstField, Picture* pcPicOrgSecondField,
                                      PelUnitBuf cPicRecFirstField, PelUnitBuf cPicRecSecondField,
-                                     const InputColourSpaceConversion snr_conversion, const bool printFrameMSE, double* PSNR_Y );
+                                     const InputColourSpaceConversion snr_conversion, const bool printFrameMSE, double* PSNR_Y
+#if JVET_K0157
+                                    , bool isEncodeLtRef
+#endif
+  );
 
   uint64_t xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1, const uint32_t rshift
 #if ENABLE_QPA
diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp
index 9612ec3650d013e0083c3b68f591cdfba97b2047..8429fde15c8b81878a62b94ae85474446d06e14b 100644
--- a/source/Lib/EncoderLib/EncLib.cpp
+++ b/source/Lib/EncoderLib/EncLib.cpp
@@ -87,7 +87,9 @@ void EncLib::create ()
 
 
 
-
+#if JVET_K0157
+  m_iPOCLast = m_compositeRefEnabled ? -2 : -1;
+#endif
   // create processing unit classes
   m_cGOPEncoder.        create( );
   m_cSliceEncoder.      create( getSourceWidth(), getSourceHeight(), m_chromaFormatIDC, m_maxCUWidth, m_maxCUHeight, m_maxTotalCUDepth );
@@ -140,8 +142,13 @@ void EncLib::create ()
 
   if ( m_RCEnableRateControl )
   {
+#if RATECTRL_FIX_FULLNBIT
+    m_cRateCtrl.init(m_framesToBeEncoded, m_RCTargetBitrate, (int)((double)m_iFrameRate / m_temporalSubsampleRatio + 0.5), m_iGOPSize, m_iSourceWidth, m_iSourceHeight,
+      m_maxCUWidth, m_maxCUHeight, getBitDepth(CHANNEL_TYPE_LUMA), m_RCKeepHierarchicalBit, m_RCUseLCUSeparateModel, m_GOPList);
+#else
     m_cRateCtrl.init( m_framesToBeEncoded, m_RCTargetBitrate, (int)( (double)m_iFrameRate/m_temporalSubsampleRatio + 0.5), m_iGOPSize, m_iSourceWidth, m_iSourceHeight,
                       m_maxCUWidth, m_maxCUHeight,m_RCKeepHierarchicalBit, m_RCUseLCUSeparateModel, m_GOPList );
+#endif
   }
 
 }
@@ -219,6 +226,12 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf )
   omp_set_nested( true );
 #endif
 
+#if JVET_K0157
+  if (sps0.getSpsNext().getUseCompositeRef()) 
+  {
+    sps0.setLongTermRefsPresent(true);
+  }
+#endif
 
 #if U0132_TARGET_BITS_SATURATION
   if (m_RCCpbSaturationEnabled)
@@ -248,6 +261,14 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf )
     xInitPPS(pps1, sps0);
   }
 #endif
+#if JVET_K0157
+  if (sps0.getSpsNext().getUseCompositeRef())
+  {
+    PPS &pps2 = *(m_ppsMap.allocatePS(2));
+    xInitPPS(pps2, sps0);
+    xInitPPSforLT(pps2);
+  }
+#endif
 
   // initialize processing unit classes
   m_cGOPEncoder.  init( this );
@@ -356,6 +377,22 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf )
 #if ENABLE_WPP_PARALLELISM
   m_entropyCodingSyncContextStateVec.resize( pps0.pcv->heightInCtus );
 #endif
+#if JVET_K0157
+  if (sps0.getSpsNext().getUseCompositeRef()) 
+  {
+    Picture *picBg = new Picture;
+    picBg->create(sps0.getChromaFormatIdc(), Size(sps0.getPicWidthInLumaSamples(), sps0.getPicHeightInLumaSamples()), sps0.getMaxCUWidth(), sps0.getMaxCUWidth() + 16, false);
+    picBg->getRecoBuf().fill(0);
+    picBg->finalInit(sps0, pps0);
+    picBg->allocateNewSlice();
+    picBg->createSpliceIdx(pps0.pcv->sizeInCtus);
+    m_cGOPEncoder.setPicBg(picBg);
+    Picture *picOrig = new Picture;
+    picOrig->create(sps0.getChromaFormatIdc(), Size(sps0.getPicWidthInLumaSamples(), sps0.getPicHeightInLumaSamples()), sps0.getMaxCUWidth(), sps0.getMaxCUWidth() + 16, false);
+    picOrig->getOrigBuf().fill(0);
+    m_cGOPEncoder.setPicOrig(picOrig);
+  }
+#endif
 }
 
 #if HEVC_USE_SCALING_LISTS
@@ -441,6 +478,15 @@ void EncLib::xInitScalingLists(SPS &sps, PPS &pps)
 }
 #endif
 
+#if JVET_K0157
+void EncLib::xInitPPSforLT(PPS& pps)
+{
+  pps.setOutputFlagPresentFlag(true);
+  pps.setDeblockingFilterControlPresentFlag(true);
+  pps.setPPSDeblockingFilterDisabledFlag(true);
+}
+#endif
+
 // ====================================================================================================================
 // Public member functions
 // ====================================================================================================================
@@ -483,6 +529,38 @@ void EncLib::deletePicBuffer()
 void EncLib::encode( bool flush, PelStorage* pcPicYuvOrg, PelStorage* cPicYuvTrueOrg, const InputColourSpaceConversion snrCSC, std::list<PelUnitBuf*>& rcListPicYuvRecOut,
                      int& iNumEncoded )
 {
+#if JVET_K0157
+  if (m_compositeRefEnabled && m_cGOPEncoder.getPicBg()->getSpliceFull() && m_iPOCLast >= 10 && m_iNumPicRcvd == 0 && m_cGOPEncoder.getEncodedLTRef() == false)
+  {
+    Picture* picCurr = NULL;
+    xGetNewPicBuffer(rcListPicYuvRecOut, picCurr, 2);
+    const PPS *pps = m_ppsMap.getPS(2);
+    const SPS *sps = m_spsMap.getPS(pps->getSPSId());
+
+    picCurr->M_BUFS(0, PIC_ORIGINAL).copyFrom(m_cGOPEncoder.getPicBg()->getRecoBuf());
+    picCurr->finalInit(*sps, *pps);
+    picCurr->poc = m_iPOCLast - 1;
+    m_iPOCLast -= 2;
+    if (getUseAdaptiveQP())
+    {
+      AQpPreanalyzer::preanalyze(picCurr);
+    }
+    if (m_RCEnableRateControl)
+    {
+      m_cRateCtrl.initRCGOP(m_iNumPicRcvd);
+    }
+    m_cGOPEncoder.compressGOP(m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut,
+      false, false, snrCSC, m_printFrameMSE, true);
+    m_cGOPEncoder.setEncodedLTRef(true);
+    if (m_RCEnableRateControl)
+    {
+      m_cRateCtrl.destroyRCGOP();
+    }
+
+    iNumEncoded = 0;
+    m_iNumPicRcvd = 0;
+  }
+#endif
   //PROF_ACCUM_AND_START_NEW_SET( getProfilerPic(), P_GOP_LEVEL );
   if (pcPicYuvOrg != NULL)
   {
@@ -493,7 +571,11 @@ void EncLib::encode( bool flush, PelStorage* pcPicYuvOrg, PelStorage* cPicYuvTru
     int ppsID=-1; // Use default PPS ID
     if (getWCGChromaQPControl().isEnabled())
     {
+#if JVET_K0157
+      ppsID = getdQPs()[m_iPOCLast / (m_compositeRefEnabled ? 2 : 1) + 1];
+#else
       ppsID=getdQPs()[ m_iPOCLast+1 ];
+#endif
       ppsID+=(getSwitchPOC() != -1 && (m_iPOCLast+1 >= getSwitchPOC())?1:0);
     }
     xGetNewPicBuffer( rcListPicYuvRecOut,
@@ -533,8 +615,12 @@ void EncLib::encode( bool flush, PelStorage* pcPicYuvOrg, PelStorage* cPicYuvTru
   }
 
   // compress GOP
-  m_cGOPEncoder.compressGOP( m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut,
-                             false, false, snrCSC, m_printFrameMSE );
+  m_cGOPEncoder.compressGOP(m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut,
+                            false, false, snrCSC, m_printFrameMSE
+#if JVET_K0157
+    , false
+#endif
+  );
 
   if ( m_RCEnableRateControl )
   {
@@ -622,8 +708,11 @@ void EncLib::encode( bool flush, PelStorage* pcPicYuvOrg, PelStorage* pcPicYuvTr
     if ( m_iNumPicRcvd && ((flush&&fieldNum==1) || (m_iPOCLast/2)==0 || m_iNumPicRcvd==m_iGOPSize ) )
     {
       // compress GOP
-      m_cGOPEncoder.compressGOP( m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut,
-                                 true, isTff, snrCSC, m_printFrameMSE );
+      m_cGOPEncoder.compressGOP(m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut, true, isTff, snrCSC, m_printFrameMSE
+#if JVET_K0157
+                              , false
+#endif
+      );
 
       iNumEncoded += m_iNumPicRcvd;
       m_uiNumAllPicCoded += m_iNumPicRcvd;
@@ -711,8 +800,11 @@ void EncLib::xGetNewPicBuffer ( std::list<PelUnitBuf*>& rcListPicYuvRecOut, Pict
   rpcPic->reconstructed = false;
   rpcPic->referenced = true;
 
-
+#if JVET_K0157
+  m_iPOCLast += (m_compositeRefEnabled ? 2 : 1);
+#else
   m_iPOCLast++;
+#endif
   m_iNumPicRcvd++;
 }
 
@@ -821,6 +913,9 @@ void EncLib::xInitSPS(SPS &sps)
   sps.getSpsNext().setUseIntraEMT           ( m_IntraEMT );
   sps.getSpsNext().setUseInterEMT           ( m_InterEMT );
 #endif
+#if JVET_K0157
+  sps.getSpsNext().setUseCompositeRef       ( m_compositeRefEnabled );
+#endif
 
   // ADD_NEW_TOOL : (encoder lib) set tool enabling flags and associated parameters here
 
@@ -861,7 +956,7 @@ void EncLib::xInitSPS(SPS &sps)
   sps.setMaxTLayers( m_maxTempLayer );
   sps.setTemporalIdNestingFlag( ( m_maxTempLayer == 1 ) ? true : false );
 
-  for (int i = 0; i < min(sps.getMaxTLayers(),(uint32_t) MAX_TLAYER); i++ )
+  for (int i = 0; i < std::min(sps.getMaxTLayers(), (uint32_t) MAX_TLAYER); i++ )
   {
     sps.setMaxDecPicBuffering(m_maxDecPicBuffering[i], i);
     sps.setNumReorderPics(m_numReorderPics[i], i);
@@ -1151,7 +1246,7 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps)
   if (getUsePerceptQPA() && !bUseDQP)
   {
     CHECK( m_iMaxCuDQPDepth != 0, "max. delta-QP depth must be zero!" );
-    bUseDQP = true;
+    bUseDQP = (getBaseQP() < 38) && (getSourceWidth() > 512 || getSourceHeight() > 320);
   }
 #endif
 
@@ -1245,9 +1340,15 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps)
       }
     }
   }
+ #if ENABLE_QPA
+  if ((getUsePerceptQPA() || getSliceChromaOffsetQpPeriodicity() > 0) && (getChromaFormatIdc() != CHROMA_400))
+  {
+    bChromaDeltaQPEnabled = true;
+  }
+ #endif
   pps.setSliceChromaQpFlag(bChromaDeltaQPEnabled);
 #endif
-  if (!pps.getSliceChromaQpFlag() && sps.getSpsNext().getUseDualITree())
+  if (!pps.getSliceChromaQpFlag() && sps.getSpsNext().getUseDualITree() && (getChromaFormatIdc() != CHROMA_400))
   {
     pps.setSliceChromaQpFlag(m_chromaCbQpOffsetDualTree != 0 || m_chromaCrQpOffsetDualTree != 0);
   }
@@ -1323,7 +1424,7 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps)
     }
   }
   CHECK(!(bestPos <= 15), "Unspecified error");
-  pps.setNumRefIdxL0DefaultActive(bestPos);
+    pps.setNumRefIdxL0DefaultActive(bestPos);
   pps.setNumRefIdxL1DefaultActive(bestPos);
   pps.setTransquantBypassEnabledFlag(getTransquantBypassEnabledFlag());
   pps.setUseTransformSkip( m_useTransformSkip );
@@ -1495,8 +1596,20 @@ void EncLib::xInitRPS(SPS &sps, bool isFieldCoding)
    // This is a function that
    // determines what Reference Picture Set to use
    // for a specific slice (with POC = POCCurr)
-void EncLib::selectReferencePictureSet(Slice* slice, int POCCurr, int GOPid )
+void EncLib::selectReferencePictureSet(Slice* slice, int POCCurr, int GOPid
+#if JVET_K0157
+                                      , int ltPoc
+#endif
+)
 {
+#if JVET_K0157
+  bool isEncodeLtRef = (POCCurr == ltPoc);
+  if (m_compositeRefEnabled && isEncodeLtRef)
+  {
+    POCCurr++;
+  }
+  int rIdx = GOPid;
+#endif
   slice->setRPSidx(GOPid);
 
   for(int extraNum=m_iGOPSize; extraNum<m_extraRPSs+m_iGOPSize; extraNum++)
@@ -1511,6 +1624,9 @@ void EncLib::selectReferencePictureSet(Slice* slice, int POCCurr, int GOPid )
       if(POCIndex == m_GOPList[extraNum].m_POC)
       {
         slice->setRPSidx(extraNum);
+#if JVET_K0157
+        rIdx = extraNum;
+#endif
       }
     }
     else
@@ -1518,6 +1634,9 @@ void EncLib::selectReferencePictureSet(Slice* slice, int POCCurr, int GOPid )
       if(POCCurr==m_GOPList[extraNum].m_POC)
       {
         slice->setRPSidx(extraNum);
+#if JVET_K0157
+        rIdx = extraNum;
+#endif
       }
     }
   }
@@ -1525,9 +1644,77 @@ void EncLib::selectReferencePictureSet(Slice* slice, int POCCurr, int GOPid )
   if(POCCurr == 1 && slice->getPic()->fieldPic)
   {
     slice->setRPSidx(m_iGOPSize+m_extraRPSs);
+#if JVET_K0157
+    rIdx = m_iGOPSize + m_extraRPSs;
+#endif
   }
 
+#if JVET_K0157
+  ReferencePictureSet *rps = const_cast<ReferencePictureSet *>(slice->getSPS()->getRPSList()->getReferencePictureSet(slice->getRPSidx()));
+  if (m_compositeRefEnabled && ltPoc != -1 && !isEncodeLtRef)
+  {
+    if (ltPoc != -1 && rps->getNumberOfLongtermPictures() != 1 && !isEncodeLtRef)
+    {
+      int idx = rps->getNumberOfPictures();
+      int maxPicOrderCntLSB = 1 << slice->getSPS()->getBitsForPOC();
+      int ltPocLsb = ltPoc % maxPicOrderCntLSB;
+
+      rps->setNumberOfPictures(rps->getNumberOfPictures() + 1);
+      rps->setNumberOfLongtermPictures(1);
+      rps->setPOC(idx, ltPoc);
+      rps->setPocLSBLT(idx, ltPocLsb);
+      rps->setDeltaPOC(idx, -POCCurr + ltPoc);
+      rps->setUsed(idx, true);
+    }
+  }
+  else if (m_compositeRefEnabled && isEncodeLtRef)
+  {
+    ReferencePictureSet* localRPS = slice->getLocalRPS();
+    (*localRPS) = ReferencePictureSet();
+    int refPics = rps->getNumberOfPictures();
+    localRPS->setNumberOfPictures(rps->getNumberOfPictures());
+    for (int i = 0; i < refPics; i++)
+    {
+      localRPS->setDeltaPOC(i, rps->getDeltaPOC(i) + 1);
+      localRPS->setUsed(i, rps->getUsed(i));
+    }
+    localRPS->setNumberOfNegativePictures(rps->getNumberOfNegativePictures());
+    localRPS->setNumberOfPositivePictures(rps->getNumberOfPositivePictures());
+    localRPS->setInterRPSPrediction(true);
+    int deltaRPS = 1;
+    int newIdc = 0;
+    for (int i = 0; i < refPics; i++)
+    {
+      int deltaPOC = ((i != refPics) ? rps->getDeltaPOC(i) : 0);  // check if the reference abs POC is >= 0
+      int refIdc = 0;
+      for (int j = 0; j < localRPS->getNumberOfPictures(); j++) // loop through the  pictures in the new RPS
+      {
+        if ((deltaPOC + deltaRPS) == localRPS->getDeltaPOC(j))
+        {
+          if (localRPS->getUsed(j))
+          {
+            refIdc = 1;
+          }
+          else
+          {
+            refIdc = 2;
+          }
+        }
+      }
+      localRPS->setRefIdc(i, refIdc);
+      newIdc++;
+    }
+    localRPS->setNumRefIdc(newIdc + 1);
+    localRPS->setRefIdc(newIdc, 0);
+    localRPS->setDeltaRPS(deltaRPS);
+    localRPS->setDeltaRIdxMinus1(slice->getSPS()->getRPSList()->getNumberOfReferencePictureSets() - 1 - rIdx);
+    slice->setRPS(localRPS);
+    slice->setRPSidx(-1);
+    return;
+  }
+#else
   const ReferencePictureSet *rps = (slice->getSPS()->getRPSList()->getReferencePictureSet(slice->getRPSidx()));
+#endif
   slice->setRPS(rps);
 }
 
@@ -1678,7 +1865,11 @@ int EncCfg::getQPForPicture(const uint32_t gopIndex, const Slice *pSlice) const
     const int* pdQPs = getdQPs();
     if ( pdQPs )
     {
+#if JVET_K0157
+      qp += pdQPs[pSlice->getPOC() / (m_compositeRefEnabled ? 2 : 1)];
+#else
       qp += pdQPs[ pSlice->getPOC() ];
+#endif
     }
 #endif
 
diff --git a/source/Lib/EncoderLib/EncLib.h b/source/Lib/EncoderLib/EncLib.h
index 7fbdd7aec8d55f30710f93ea2eac8991fefd86de..74d78d10a3909ce88fdf86126d6744ef29404a82 100644
--- a/source/Lib/EncoderLib/EncLib.h
+++ b/source/Lib/EncoderLib/EncLib.h
@@ -149,6 +149,9 @@ protected:
   void  xInitPPS          (PPS &pps, const SPS &sps); ///< initialize PPS from encoder options
 #if HEVC_USE_SCALING_LISTS
   void  xInitScalingLists (SPS &sps, PPS &pps);   ///< initialize scaling lists
+#endif
+#if JVET_K0157
+  void  xInitPPSforLT(PPS& pps);
 #endif
   void  xInitHrdParameters(SPS &sps);                 ///< initialize HRD parameters
 
@@ -209,7 +212,12 @@ public:
 #endif
   RateCtrl*               getRateCtrl           ()              { return  &m_cRateCtrl;            }
 
-  void selectReferencePictureSet(Slice* slice, int POCCurr, int GOPid );
+
+  void selectReferencePictureSet(Slice* slice, int POCCurr, int GOPid
+#if JVET_K0157
+    , int ltPoc
+#endif
+  );
   int getReferencePictureSetIdxForSOP(int POCCurr, int GOPid );
 
   bool                   PPSNeedsWriting(int ppsId);
@@ -240,7 +248,7 @@ public:
                int& iNumEncoded, bool isTff );
 
 
-  void printSummary(bool isField) { m_cGOPEncoder.printOutSummary (m_uiNumAllPicCoded, isField, m_printMSEBasedSequencePSNR, m_printSequenceMSE, m_spsMap.getFirstPS()->getBitDepths()); }
+  void printSummary(bool isField) { m_cGOPEncoder.printOutSummary (m_uiNumAllPicCoded, isField, m_printMSEBasedSequencePSNR, m_printSequenceMSE, m_printHexPsnr, m_spsMap.getFirstPS()->getBitDepths()); }
 
 };
 
diff --git a/source/Lib/EncoderLib/EncModeCtrl.cpp b/source/Lib/EncoderLib/EncModeCtrl.cpp
index 6823598fa38ad57b5b996b6f8d056ef24430b368..dd8cd9bd160d0811c5c7abd42b5a20ee9e1d2c15 100644
--- a/source/Lib/EncoderLib/EncModeCtrl.cpp
+++ b/source/Lib/EncoderLib/EncModeCtrl.cpp
@@ -743,7 +743,8 @@ bool BestEncInfoCache::isValid( const CodingStructure& cs, const Partitioner& pa
 
   BestEncodingInfo& encInfo = *m_bestEncInfo[idx1][idx2][idx3][idx4];
 
-  if( cs.picture->poc != encInfo.poc || CS::getArea( cs, cs.area, partitioner.chType ) != encInfo.cu || !isTheSameNbHood( encInfo.cu, partitioner ) )
+  if( cs.picture->poc != encInfo.poc || CS::getArea( cs, cs.area, partitioner.chType ) != encInfo.cu || !isTheSameNbHood( encInfo.cu, partitioner ) 
+    )
   {
     return false;
   }
@@ -942,7 +943,15 @@ void EncModeCtrlMTnoRQT::initCTUEncoding( const Slice &slice )
 
   if( m_pcEncCfg->getUseE0023FastEnc() )
   {
-    m_skipThreshold = ( ( slice.getMinPictureDistance() <= PICTURE_DISTANCE_TH ) ? FAST_SKIP_DEPTH : SKIP_DEPTH );
+#if JVET_K0157
+    if (m_pcEncCfg->getUseCompositeRef())
+      m_skipThreshold = ( ( slice.getMinPictureDistance() <= PICTURE_DISTANCE_TH * 2 ) ? FAST_SKIP_DEPTH : SKIP_DEPTH );
+    else
+      m_skipThreshold = ((slice.getMinPictureDistance() <= PICTURE_DISTANCE_TH) ? FAST_SKIP_DEPTH : SKIP_DEPTH);
+
+#else
+    m_skipThreshold = ((slice.getMinPictureDistance() <= PICTURE_DISTANCE_TH) ? FAST_SKIP_DEPTH : SKIP_DEPTH);
+#endif
   }
   else
   {
@@ -1057,7 +1066,6 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru
 #endif
 
   xGetMinMaxQP( minQP, maxQP, cs, partitioner, baseQP, *cs.sps, *cs.pps, true );
-
   // Add coding modes here
   // NOTE: Working back to front, as a stack, which is more efficient with the container
   // NOTE: First added modes will be processed at the end.
@@ -1425,11 +1433,11 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt
     {
       return false;
     }
-
     if( lastTestMode().type != ETM_INTRA && cuECtx.bestCS && cuECtx.bestCU && interHadActive( cuECtx ) )
     {
       // Get SATD threshold from best Inter-CU
-      if( !cs.slice->isIntra() && m_pcEncCfg->getUsePbIntraFast() )
+      if( !cs.slice->isIntra() && m_pcEncCfg->getUsePbIntraFast() 
+        )
       {
         CodingUnit* bestCU = cuECtx.bestCU;
         if( bestCU && CU::isInter( *bestCU ) )
diff --git a/source/Lib/EncoderLib/EncModeCtrl.h b/source/Lib/EncoderLib/EncModeCtrl.h
index d8caa932771b8cb34f4c63b0f5780a15889ae3c8..12fd159c5f840d80c6ceae961b95d7b776c32409 100644
--- a/source/Lib/EncoderLib/EncModeCtrl.h
+++ b/source/Lib/EncoderLib/EncModeCtrl.h
@@ -385,6 +385,8 @@ struct CodedCUInfo
 
   bool validMv[NUM_REF_PIC_LIST_01][MAX_STORED_CU_INFO_REFS];
   Mv   saveMv [NUM_REF_PIC_LIST_01][MAX_STORED_CU_INFO_REFS];
+
+
 #if ENABLE_SPLIT_PARALLELISM
 
   uint64_t
@@ -431,6 +433,7 @@ public:
 
   bool getMv  ( const UnitArea& area, const RefPicList refPicList, const int iRefIdx,       Mv& rMv ) const;
   void setMv  ( const UnitArea& area, const RefPicList refPicList, const int iRefIdx, const Mv& rMv );
+
 };
 
 #if REUSE_CU_RESULTS
diff --git a/source/Lib/EncoderLib/EncSlice.cpp b/source/Lib/EncoderLib/EncSlice.cpp
index 4c7541aa94352b937ea0772e01e0585067e5f29c..a11f93c01dad8f484cb893a85e4ec2a3ec201cf2 100644
--- a/source/Lib/EncoderLib/EncSlice.cpp
+++ b/source/Lib/EncoderLib/EncSlice.cpp
@@ -40,6 +40,9 @@
 #include "EncLib.h"
 #include "CommonLib/UnitTools.h"
 #include "CommonLib/Picture.h"
+#if K0149_BLOCK_STATISTICS
+#include "CommonLib/dtrace_blockstatistics.h"
+#endif
 
 #if ENABLE_WPP_PARALLELISM
 #include <mutex>
@@ -57,6 +60,9 @@ extern recursive_mutex g_cache_mutex;
 
 EncSlice::EncSlice()
  : m_encCABACTableIdx(I_SLICE)
+#if ENABLE_QPA
+ , m_adaptedLumaQP(-1)
+#endif
 {
 }
 
@@ -140,7 +146,120 @@ EncSlice::setUpLambda( Slice* slice, const double dLambda, int iQP)
   slice->setLambdas( dLambdas );
 }
 
+#if ENABLE_QPA
+
+static inline int apprI3Log2 (const double d) // rounded 3*log2(d)
+{
+  return d < 1.5e-13 ? -128 : int (floor (3.0 * log (d) / log (2.0) + 0.5));
+}
+
+static void filterAndCalculateAverageEnergies (const Pel* pSrc, const int  iSrcStride,
+                                               double &hpEner,  const int  iHeight,    const int iWidth,
+                                               const uint32_t uBitDepth /* luma bit-depth (4-16) */)
+{
+  uint64_t saAct = 0;
+
+  // skip first row as there may be a black border frame
+  pSrc += iSrcStride;
+  // center rows
+  for (int y = 1; y < iHeight - 1; y++)
+  {
+    // skip column as there may be a black border frame
+
+    for (int x = 1; x < iWidth - 1; x++) // and columns
+    {
+      const int f = 12 * (int)pSrc[x  ] - 2 * ((int)pSrc[x-1] + (int)pSrc[x+1] + (int)pSrc[x  -iSrcStride] + (int)pSrc[x  +iSrcStride])
+                       - (int)pSrc[x-1-iSrcStride] - (int)pSrc[x+1-iSrcStride] - (int)pSrc[x-1+iSrcStride] - (int)pSrc[x+1+iSrcStride];
+      saAct += abs (f);
+    }
+    // skip column as there may be a black border frame
+    pSrc += iSrcStride;
+  }
+  // skip last row as there may be a black border frame
+
+  hpEner = double(saAct) / double((iWidth - 2) * (iHeight - 2));
+
+  // lower limit, compensate for highpass amplification
+  if (hpEner < double(1 << (uBitDepth - 4))) hpEner = double(1 << (uBitDepth - 4));
+}
+
+#ifndef GLOBAL_AVERAGING
+  #define GLOBAL_AVERAGING 1 // "global" averaging of a_k across a set instead of one picture
+#endif
+
+#if GLOBAL_AVERAGING
+static double getAveragePictureEnergy (const CPelBuf picOrig, const uint32_t uBitDepth)
+{
+  double hpEnerPic = 5.65625 * double(1 << (uBitDepth >> 1));   // square-root of a_pic value
+
+  if (picOrig.width > 2048 && picOrig.height > 1280) // for UHD/4K
+  {
+    hpEnerPic *= (4.0 / 5.65625);
+  }
+  else if (picOrig.width <= 1024 || picOrig.height <= 640) // 480p
+  {
+    hpEnerPic *= (8.0 / 5.65625);
+  }
+
+  return hpEnerPic;
+}
+#endif
+
+static int applyQPAdaptationChroma (Picture* const pcPic, Slice* const pcSlice, EncCfg* const pcEncCfg, const int sliceQP)
+{
+  double hpEner[MAX_NUM_COMPONENT] = {0.0, 0.0, 0.0};
+  int    optSliceChromaQpOffset[2] = {0, 0};
+  int    savedLumaQP               = -1;
+
+  for (uint32_t comp = 0; comp < getNumberValidComponents (pcPic->chromaFormat); comp++)
+  {
+    const ComponentID compID = (ComponentID)comp;
+    const CPelBuf    picOrig = pcPic->getOrigBuf (pcPic->block (compID));
+
+    filterAndCalculateAverageEnergies (picOrig.buf, picOrig.stride, hpEner[comp], picOrig.height, picOrig.width,
+                                       pcSlice->getSPS()->getBitDepth (toChannelType (compID)) - (isChroma (compID) ? 1 : 0));
+    if (isChroma (compID))
+    {
+      const int  adaptChromaQPOffset = 2.0 * hpEner[comp] <= hpEner[0] ? 0 : apprI3Log2 (2.0 * hpEner[comp] / hpEner[0]);
+   #if GLOBAL_AVERAGING
+      int       averageAdaptedLumaQP = Clip3 (0, MAX_QP, sliceQP + apprI3Log2 (hpEner[0] / getAveragePictureEnergy (pcPic->getOrigBuf().Y(), pcSlice->getSPS()->getBitDepth (CH_L))));
+   #else
+      int       averageAdaptedLumaQP = Clip3 (0, MAX_QP, sliceQP); // mean slice QP
+   #endif
+   #if SHARP_LUMA_DELTA_QP
+
+      // change mean picture QP index based on picture's average luma value (Sharp)
+      if (pcEncCfg->getLumaLevelToDeltaQPMapping().mode == LUMALVL_TO_DQP_NUM_MODES)
+      {
+        const CPelBuf picLuma = pcPic->getOrigBuf().Y();
+        uint64_t uAvgLuma = 0;
+
+        for (SizeType y = 0; y < picLuma.height; y++)
+        {
+          for (SizeType x = 0; x < picLuma.width; x++)
+          {
+            uAvgLuma += (uint64_t)picLuma.at (x, y);
+          }
+        }
+        uAvgLuma = (uAvgLuma + (picLuma.area() >> 1)) / picLuma.area();
+
+        averageAdaptedLumaQP = Clip3 (0, MAX_QP, averageAdaptedLumaQP + 1 - int((3 * uAvgLuma * uAvgLuma) >> uint64_t (2 * pcSlice->getSPS()->getBitDepth (CH_L) - 1)));
+      }
+   #endif
+      const int lumaChromaMappingDQP = averageAdaptedLumaQP - getScaledChromaQP (averageAdaptedLumaQP, pcEncCfg->getChromaFormatIdc());
+
+      optSliceChromaQpOffset[comp-1] = std::min (3 + lumaChromaMappingDQP, adaptChromaQPOffset + lumaChromaMappingDQP);
 
+      if (savedLumaQP < 0) savedLumaQP = averageAdaptedLumaQP; // save it for later
+    }
+  }
+
+  pcEncCfg->setSliceChromaOffsetQpIntraOrPeriodic (pcEncCfg->getSliceChromaOffsetQpPeriodicity(), optSliceChromaQpOffset);
+
+  return savedLumaQP;
+}
+
+#endif // ENABLE_QPA
 
 /**
  - non-referenced frame marking
@@ -156,8 +275,11 @@ EncSlice::setUpLambda( Slice* slice, const double dLambda, int iQP)
  \param rpcSlice      slice header class
  \param isField       true for field coding
  */
-
-void EncSlice::initEncSlice( Picture* pcPic, const int pocLast, const int pocCurr, const int iGOPid, Slice*& rpcSlice, const bool isField )
+void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr, const int iGOPid, Slice*& rpcSlice, const bool isField
+#if JVET_K0157
+  , bool isEncodeLtRef
+#endif
+)
 {
   double dQP;
   double dLambda;
@@ -166,7 +288,19 @@ void EncSlice::initEncSlice( Picture* pcPic, const int pocLast, const int pocCur
   rpcSlice->setSliceBits(0);
   rpcSlice->setPic( pcPic );
   rpcSlice->initSlice();
+#if JVET_K0157
+  int multipleFactor = pcPic->cs->sps->getSpsNext().getUseCompositeRef() ? 2 : 1;
+  if (pcPic->cs->sps->getSpsNext().getUseCompositeRef() && isEncodeLtRef)
+  {
+    rpcSlice->setPicOutputFlag(false);
+  }
+  else
+  {
+    rpcSlice->setPicOutputFlag(true);
+  }
+#else
   rpcSlice->setPicOutputFlag( true );
+#endif
   rpcSlice->setPOC( pocCurr );
 #if JVET_K0072
   rpcSlice->setDepQuantEnabledFlag( m_pcCfg->getDepQuantEnabledFlag() );
@@ -190,7 +324,11 @@ void EncSlice::initEncSlice( Picture* pcPic, const int pocLast, const int pocCur
     }
     else
     {
+#if JVET_K0157
+      poc = poc % (m_pcCfg->getGOPSize() * multipleFactor);
+#else
       poc = poc % m_pcCfg->getGOPSize();
+#endif
     }
 
     if ( poc == 0 )
@@ -199,11 +337,19 @@ void EncSlice::initEncSlice( Picture* pcPic, const int pocLast, const int pocCur
     }
     else
     {
+#if JVET_K0157
+      int step = m_pcCfg->getGOPSize() * multipleFactor;
+#else
       int step = m_pcCfg->getGOPSize();
+#endif
       depth    = 0;
       for( int i=step>>1; i>=1; i>>=1 )
       {
+#if JVET_K0157
+        for (int j = i; j<(m_pcCfg->getGOPSize() * multipleFactor); j += step)
+#else
         for ( int j=i; j<m_pcCfg->getGOPSize(); j+=step )
+#endif
         {
           if ( j == poc )
           {
@@ -233,11 +379,19 @@ void EncSlice::initEncSlice( Picture* pcPic, const int pocLast, const int pocCur
   {
     if(m_pcCfg->getDecodingRefreshType() == 3)
     {
+#if JVET_K0157
+      eSliceType = (pocLast == 0 || pocCurr % (m_pcCfg->getIntraPeriod() * multipleFactor) == 0 || m_pcGOPEncoder->getGOPSize() == 0) ? I_SLICE : eSliceType;
+#else
       eSliceType = (pocLast == 0 || pocCurr % m_pcCfg->getIntraPeriod() == 0             || m_pcGOPEncoder->getGOPSize() == 0) ? I_SLICE : eSliceType;
+#endif
     }
     else
     {
+#if JVET_K0157
+      eSliceType = (pocLast == 0 || (pocCurr - (isField ? 1 : 0)) % (m_pcCfg->getIntraPeriod() * multipleFactor) == 0 || m_pcGOPEncoder->getGOPSize() == 0) ? I_SLICE : eSliceType;
+#else
       eSliceType = (pocLast == 0 || (pocCurr - (isField ? 1 : 0)) % m_pcCfg->getIntraPeriod() == 0 || m_pcGOPEncoder->getGOPSize() == 0) ? I_SLICE : eSliceType;
+#endif
     }
   }
 
@@ -399,7 +553,7 @@ void EncSlice::initEncSlice( Picture* pcPic, const int pocLast, const int pocCur
     dLambda *= lambdaModifier;
 #endif
 
-    iQP = max( -rpcSlice->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA), min( MAX_QP, (int) floor( dQP + 0.5 ) ) );
+    iQP = Clip3( -rpcSlice->getSPS()->getQpBDOffset( CHANNEL_TYPE_LUMA ), MAX_QP, (int) floor( dQP + 0.5 ) );
 #endif
 
     m_vdRdPicLambda[iDQpIdx] = dLambda;
@@ -418,11 +572,20 @@ void EncSlice::initEncSlice( Picture* pcPic, const int pocLast, const int pocCur
 #endif
 
 #if W0038_CQP_ADJ
+ #if ENABLE_QPA
+  m_adaptedLumaQP = -1;
+
+  if ((m_pcCfg->getUsePerceptQPA() || m_pcCfg->getSliceChromaOffsetQpPeriodicity() > 0) && !m_pcCfg->getUseRateCtrl() && rpcSlice->getPPS()->getSliceChromaQpFlag() &&
+      (rpcSlice->isIntra() || (m_pcCfg->getSliceChromaOffsetQpPeriodicity() > 0 && (rpcSlice->getPOC() % m_pcCfg->getSliceChromaOffsetQpPeriodicity()) == 0)))
+  {
+    m_adaptedLumaQP = applyQPAdaptationChroma (pcPic, rpcSlice, m_pcCfg, iQP);
+  }
+ #endif
   if(rpcSlice->getPPS()->getSliceChromaQpFlag())
   {
-    const bool bUseIntraOrPeriodicOffset = rpcSlice->getSliceType()==I_SLICE || (m_pcCfg->getSliceChromaOffsetQpPeriodicity()!=0 && (rpcSlice->getPOC()%m_pcCfg->getSliceChromaOffsetQpPeriodicity())==0);
-    int cbQP = bUseIntraOrPeriodicOffset? m_pcCfg->getSliceChromaOffsetQpIntraOrPeriodic(false) : m_pcCfg->getGOPEntry(iGOPid).m_CbQPoffset;
-    int crQP = bUseIntraOrPeriodicOffset? m_pcCfg->getSliceChromaOffsetQpIntraOrPeriodic(true)  : m_pcCfg->getGOPEntry(iGOPid).m_CrQPoffset;
+    const bool bUseIntraOrPeriodicOffset = rpcSlice->isIntra() || (m_pcCfg->getSliceChromaOffsetQpPeriodicity() > 0 && (rpcSlice->getPOC() % m_pcCfg->getSliceChromaOffsetQpPeriodicity()) == 0);
+    int cbQP = bUseIntraOrPeriodicOffset ? m_pcCfg->getSliceChromaOffsetQpIntraOrPeriodic(false) : m_pcCfg->getGOPEntry(iGOPid).m_CbQPoffset;
+    int crQP = bUseIntraOrPeriodicOffset ? m_pcCfg->getSliceChromaOffsetQpIntraOrPeriodic(true)  : m_pcCfg->getGOPEntry(iGOPid).m_CrQPoffset;
 
     cbQP = Clip3( -12, 12, cbQP + rpcSlice->getPPS()->getQpOffset(COMPONENT_Cb) ) - rpcSlice->getPPS()->getQpOffset(COMPONENT_Cb);
     crQP = Clip3( -12, 12, crQP + rpcSlice->getPPS()->getQpOffset(COMPONENT_Cr) ) - rpcSlice->getPPS()->getQpOffset(COMPONENT_Cr);
@@ -453,7 +616,7 @@ void EncSlice::initEncSlice( Picture* pcPic, const int pocLast, const int pocCur
 #endif
 
   setUpLambda(rpcSlice, dLambda, iQP);
-  
+
 #if WCG_EXT
   // cost = Distortion + Lambda*R,
   // when QP is adjusted by luma, distortion is changed, so we have to adjust lambda to match the distortion, then the cost function becomes
@@ -471,11 +634,19 @@ void EncSlice::initEncSlice( Picture* pcPic, const int pocLast, const int pocCur
     {
       if(m_pcCfg->getDecodingRefreshType() == 3)
       {
+#if JVET_K0157
+        eSliceType = (pocLast == 0 || (pocCurr) % (m_pcCfg->getIntraPeriod() * multipleFactor) == 0 || m_pcGOPEncoder->getGOPSize() == 0) ? I_SLICE : eSliceType;
+#else
         eSliceType = (pocLast == 0 || (pocCurr)                     % m_pcCfg->getIntraPeriod() == 0 || m_pcGOPEncoder->getGOPSize() == 0) ? I_SLICE : eSliceType;
+#endif
       }
       else
       {
+#if JVET_K0157
+        eSliceType = (pocLast == 0 || (pocCurr - (isField ? 1 : 0)) % (m_pcCfg->getIntraPeriod() * multipleFactor) == 0 || m_pcGOPEncoder->getGOPSize() == 0) ? I_SLICE : eSliceType;
+#else
         eSliceType = (pocLast == 0 || (pocCurr - (isField ? 1 : 0)) % m_pcCfg->getIntraPeriod() == 0 || m_pcGOPEncoder->getGOPSize() == 0) ? I_SLICE : eSliceType;
+#endif
       }
     }
 
@@ -485,7 +656,7 @@ void EncSlice::initEncSlice( Picture* pcPic, const int pocLast, const int pocCur
   if (m_pcCfg->getUseRecalculateQPAccordingToLambda())
   {
     dQP = xGetQPValueAccordingToLambda( dLambda );
-    iQP = max( -rpcSlice->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA), min( MAX_QP, (int) floor( dQP + 0.5 ) ) );
+    iQP = Clip3( -rpcSlice->getSPS()->getQpBDOffset( CHANNEL_TYPE_LUMA ), MAX_QP, (int) floor( dQP + 0.5 ) );
   }
 
   rpcSlice->setSliceQp           ( iQP );
@@ -653,7 +824,7 @@ double EncSlice::calculateLambda( const Slice*     slice,
   dLambda *= lambdaModifier;
 #endif
 
-  iQP = max( -slice->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA), min( MAX_QP, (int) floor( dQP + 0.5 ) ) );
+  iQP = Clip3( -slice->getSPS()->getQpBDOffset( CHANNEL_TYPE_LUMA ), MAX_QP, (int) floor( dQP + 0.5 ) );
 
 #if JVET_K0072
   if( m_pcCfg->getDepQuantEnabledFlag() )
@@ -677,68 +848,9 @@ void EncSlice::resetQP( Picture* pic, int sliceQP, double lambda )
 }
 
 #if ENABLE_QPA
-static inline int apprI2Log2 (const double d)
-{
-  return d < 6.0e-20 ? -128 : int(floor(2.0 * log(d) / log(2.0) + 0.5));
-}
-
-#ifndef HLM_L1_NORM
-  #define HLM_L1_NORM
-#endif
-
-static int filterAndCalculateAverageEnergies (const Pel* pSrc,     const int  iSrcStride,
-                                              double &hpEner,      const int  iHeight,    const int iWidth,
-                                              const int  iPOC = 0)
-{
-  int iHpValue;
-  uint32_t uHpERow, uHpEner = 0;
-
-  // skip first row as there may be a black border frame
-  pSrc += iSrcStride;
-  // center rows
-  for (int y = 1; y < iHeight - 1; y++)
-  {
-    uHpERow = 0;
-    // skip column as there may be a black border frame
-
-    for (int x = 1; x < iWidth - 1; x++) // and columns
-    {
-      iHpValue = 4 * (int)pSrc[x] - (int)pSrc[x-1] - (int)pSrc[x+1] - (int)pSrc[x-iSrcStride] - (int)pSrc[x+iSrcStride];
-#ifdef HLM_L1_NORM
-      uHpERow += abs (iHpValue);
-#else
-      uHpERow += iHpValue * iHpValue;
-#endif
-    }
-    // skip column as there may be a black border frame
-#ifdef HLM_L1_NORM
-    uHpEner += uHpERow;
-#else
-    uHpEner += (uHpERow + 64) >> 7; // avoids overflows
-#endif
-    pSrc += iSrcStride;
-  }
-  // skip last row as there may be a black border frame
-
-  hpEner = double(uHpEner) / double((iWidth - 2) * (iHeight - 2));
-#ifdef HLM_L1_NORM
-  hpEner *= hpEner;
-#endif
-  // lower limit, compensate for highpass amplification
-  if (hpEner < 64.0) hpEner = 64.0;
-
-  if (iPOC  <= 0) return 0;
-  return 1; // OK
-}
-
-#ifdef HLM_L1_NORM
-  #undef HLM_L1_NORM
-#endif
-
-#if ENABLE_QPA
-static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice,    const PreCalcValues& pcv,
+static bool applyQPAdaptation (Picture* const pcPic,     Slice* const pcSlice,        const PreCalcValues& pcv,
                                const uint32_t startAddr, const uint32_t boundingAddr, const bool useSharpLumaDQP,
-                               const int gopSize,    const double hpEnerAvg,  const double hpEnerMax)
+                               const double hpEnerAvg,   const double hpEnerMax,      const bool useFrameWiseQPA, const int previouslyAdaptedLumaQP = -1)
 {
   const int  iBitDepth   = pcSlice->getSPS()->getBitDepth (CHANNEL_TYPE_LUMA);
   const int  iQPIndex    = pcSlice->getSliceQp(); // initial QP index for current slice, used in following loops
@@ -746,20 +858,28 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice,    co
   const TileMap& tileMap = *pcPic->tileMap;
 #endif
   bool   sliceQPModified = false;
-  double hpEnerPic = 1.0 / (1.5 * double(1 << iBitDepth)); // speedup: multiply instead of divide in loops below
+#if GLOBAL_AVERAGING
+  const double hpEnerPic = 1.0 / getAveragePictureEnergy (pcPic->getOrigBuf().Y(), iBitDepth); // inverse, speed
+#else
+  const double hpEnerPic = 1.0 / hpEnerAvg; // speedup: multiply instead of divide in loop below; 1.0 for tuning
+#endif
 
-  if (pcv.lumaWidth > 2048 && pcv.lumaHeight > 1280) // for UHD/4K
+  if (useFrameWiseQPA || (iQPIndex >= MAX_QP))
   {
-    hpEnerPic *= 1.5;
-  }
+    int iQPFixed;
 
-  if ((pcPic->getPOC() & 1) && (iQPIndex >= MAX_QP))
-  {
-    int iQPFixed = Clip3 (0, MAX_QP, iQPIndex + ((apprI2Log2 (hpEnerAvg * hpEnerPic) + apprI2Log2 (hpEnerMax * hpEnerPic) + 1) >> 1)); // adapted slice QP = (mean(QP) + max(QP)) / 2
+    if (useFrameWiseQPA)
+    {
+      iQPFixed = (previouslyAdaptedLumaQP < 0) ? Clip3 (0, MAX_QP, iQPIndex + apprI3Log2 (hpEnerAvg * hpEnerPic)) : previouslyAdaptedLumaQP; // average-activity slice QP
+    }
+    else
+    {
+      iQPFixed = Clip3 (0, MAX_QP, iQPIndex + ((apprI3Log2 (hpEnerAvg * hpEnerPic) + apprI3Log2 (hpEnerMax * hpEnerPic) + 1) >> 1)); // adapted slice QP = (mean(QP) + max(QP)) / 2
+    }
 #if SHARP_LUMA_DELTA_QP
 
     // change new fixed QP based on average CTU luma value (Sharp)
-    if (useSharpLumaDQP)
+    if (useSharpLumaDQP && (iQPIndex < MAX_QP) && (previouslyAdaptedLumaQP < 0))
     {
       uint64_t uAvgLuma = 0;
 
@@ -779,9 +899,9 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice,    co
     }
 #endif
 
-    if (iQPFixed < iQPIndex) iQPFixed = iQPIndex;
+    if (iQPIndex >= MAX_QP) iQPFixed = MAX_QP;
     else
-    if (iQPFixed > iQPIndex)
+    if (iQPFixed != iQPIndex)
     {
       const double* oldLambdas = pcSlice->getLambdas();
       const double  corrFactor = pow (2.0, double(iQPFixed - iQPIndex) / 3.0);
@@ -816,12 +936,12 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice,    co
       const uint32_t ctuRsAddr = ctuTsAddr;
 #endif
 
-      int iQPAdapt = Clip3 (0, MAX_QP, iQPIndex + apprI2Log2 (pcPic->m_uEnerHpCtu[ctuRsAddr] * hpEnerPic));
+      int iQPAdapt = Clip3 (0, MAX_QP, iQPIndex + apprI3Log2 (pcPic->m_uEnerHpCtu[ctuRsAddr] * hpEnerPic));
 
 #if SHARP_LUMA_DELTA_QP
-      if ((pcv.widthInCtus > 1) && (gopSize > 1)) // try to enforce CTU SNR greater than zero dB
+      if (pcv.widthInCtus > 1) // try to enforce CTU SNR greater than zero dB
 #else
-      if ((!pcSlice->isIntra()) && (gopSize > 1)) // try to enforce CTU SNR greater than zero dB
+      if (!pcSlice->isIntra()) // try to enforce CTU SNR greater than zero dB
 #endif
       {
         const Pel      dcOffset   = pcPic->m_iOffsetCtu[ctuRsAddr];
@@ -832,11 +952,11 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice,    co
         {
           const uint64_t uAvgLuma   = (uint64_t)dcOffset;
 
-          iQPAdapt = max (0, iQPAdapt + 1 - int((3 * uAvgLuma * uAvgLuma) >> uint64_t(2 * iBitDepth - 1)));
+          iQPAdapt = std::max (0, iQPAdapt + 1 - int((3 * uAvgLuma * uAvgLuma) >> uint64_t(2 * iBitDepth - 1)));
         }
 
 #endif
-        const uint32_t     uRefScale  = g_invQuantScales[iQPAdapt % 6] << ((iQPAdapt / 6) + iBitDepth - (pcSlice->isIntra() ? 4 : 3));
+        const uint32_t uRefScale  = g_invQuantScales[iQPAdapt % 6] << ((iQPAdapt / 6) + iBitDepth - 4);
         const CompArea subArea    = clipArea (CompArea (COMPONENT_Y, pcPic->chromaFormat, Area ((ctuRsAddr % pcv.widthInCtus) * pcv.maxCUWidth, (ctuRsAddr / pcv.widthInCtus) * pcv.maxCUHeight, pcv.maxCUWidth, pcv.maxCUHeight)), pcPic->Y());
         const Pel*     pSrc       = pcPic->getOrigBuf (subArea).buf;
         const SizeType iSrcStride = pcPic->getOrigBuf (subArea).stride;
@@ -866,10 +986,10 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice,    co
         // reduce QP index if CTU would be fully quantized to zero
         if (uAbsDCless < uRefScale)
         {
-          const int limit  = min (0, ((iQPIndex + 4) >> 3) - 6);
-          const int redVal = max (limit, apprI2Log2 ((double)uAbsDCless / (double)uRefScale));
+          const int limit  = std::min (0, ((iQPIndex + 4) >> 3) - 6);
+          const int redVal = std::max (limit, apprI3Log2 ((double)uAbsDCless / (double)uRefScale));
 
-          iQPAdapt = max (0, iQPAdapt + redVal);
+          iQPAdapt = std::max (0, iQPAdapt + redVal);
         }
 #if SHARP_LUMA_DELTA_QP
 
@@ -879,7 +999,7 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice,    co
 
       pcPic->m_iOffsetCtu[ctuRsAddr] = (Pel)iQPAdapt; // adapted QPs
 
-      if ((pcv.widthInCtus > 1) && (gopSize > 1)) // try to reduce local bitrate peaks via minimum smoothing
+      if (pcv.widthInCtus > 1) // try to reduce local bitrate peaks via minimum smoothing of the adapted QPs
       {
         iQPAdapt = ctuRsAddr % pcv.widthInCtus; // horizontal offset
         if (iQPAdapt == 0)
@@ -888,11 +1008,11 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice,    co
         }
         else // iQPAdapt >= 1
         {
-          iQPAdapt = (iQPAdapt > 1) ? min (pcPic->m_iOffsetCtu[ctuRsAddr - 2], pcPic->m_iOffsetCtu[ctuRsAddr]) : pcPic->m_iOffsetCtu[ctuRsAddr];
+          iQPAdapt = (iQPAdapt > 1) ? std::min (pcPic->m_iOffsetCtu[ctuRsAddr - 2], pcPic->m_iOffsetCtu[ctuRsAddr]) : pcPic->m_iOffsetCtu[ctuRsAddr];
         }
         if (ctuRsAddr > pcv.widthInCtus)
         {
-          iQPAdapt = min (iQPAdapt, (int)pcPic->m_iOffsetCtu[ctuRsAddr - 1 - pcv.widthInCtus]); // min(L, T)
+          iQPAdapt = std::min (iQPAdapt, (int)pcPic->m_iOffsetCtu[ctuRsAddr - 1 - pcv.widthInCtus]);
         }
         if ((ctuRsAddr > 0) && (pcPic->m_iOffsetCtu[ctuRsAddr - 1] < (Pel)iQPAdapt))
         {
@@ -900,7 +1020,7 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice,    co
         }
         if ((ctuTsAddr == boundingAddr - 1) && (ctuRsAddr > pcv.widthInCtus)) // last CTU in the given slice
         {
-          iQPAdapt = min (pcPic->m_iOffsetCtu[ctuRsAddr - 1], pcPic->m_iOffsetCtu[ctuRsAddr - pcv.widthInCtus]);
+          iQPAdapt = std::min (pcPic->m_iOffsetCtu[ctuRsAddr - 1], pcPic->m_iOffsetCtu[ctuRsAddr - pcv.widthInCtus]);
           if (pcPic->m_iOffsetCtu[ctuRsAddr] < (Pel)iQPAdapt)
           {
             pcPic->m_iOffsetCtu[ctuRsAddr] = (Pel)iQPAdapt;
@@ -914,8 +1034,6 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice,    co
 }
 #endif // ENABLE_QPA
 
-#endif // ENABLE_QPA || ENABLE_PRIVATE
-
 // ====================================================================================================================
 // Public member functions
 // ====================================================================================================================
@@ -1132,7 +1250,8 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c
 #if JVET_K0346
   if (pcSlice->getSPS()->getSpsNext().getUseSubPuMvp())
   {
-    if (!pcSlice->isIntra())
+    if (!pcSlice->isIntra()
+       )
     {
       if (pcSlice->getPOC() > m_pcCuEncoder->getPrevPOC() && m_pcCuEncoder->getClearSubMergeStatic())
       {
@@ -1176,7 +1295,15 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c
     else
     {
       m_pcCuEncoder->setPrevPOC(pcSlice->getPOC());
-      m_pcCuEncoder->setClearSubMergeStatic(true);
+      if (m_pcCfg->getGOPSize() != m_pcCfg->getIntraPeriod())
+      {
+        m_pcCuEncoder->setClearSubMergeStatic(true);
+      }
+      else
+      {
+        m_pcCuEncoder->clearSubMergeStatics();
+        m_pcCuEncoder->setClearSubMergeStatic(false);
+      }
     }
   }
 #endif
@@ -1269,22 +1396,18 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c
   }
 
 #if ENABLE_QPA
- #if ENABLE_QPA
   double hpEnerMax     = 1.0;
   double hpEnerPic     = 0.0;
- #endif
   int    iSrcOffset;
 
- #if ENABLE_QPA
-  if (m_pcCfg->getUsePerceptQPA() && pcSlice->getPPS()->getUseDQP() && !m_pcCfg->getUseRateCtrl())
- #endif
+  if (m_pcCfg->getUsePerceptQPA() && !m_pcCfg->getUseRateCtrl())
   {
     for (uint32_t ctuTsAddr = startCtuTsAddr; ctuTsAddr < boundingCtuTsAddr; ctuTsAddr++)
     {
  #if HEVC_TILES_WPP
-      const uint32_t     ctuRsAddr  = tileMap.getCtuTsToRsAddrMap (ctuTsAddr);
+      const uint32_t ctuRsAddr  = tileMap.getCtuTsToRsAddrMap (ctuTsAddr);
  #else
-      const uint32_t     ctuRsAddr  = ctuTsAddr;
+      const uint32_t ctuRsAddr  = ctuTsAddr;
  #endif
       const Position pos ((ctuRsAddr % widthInCtus) * pcv.maxCUWidth, (ctuRsAddr / widthInCtus) * pcv.maxCUHeight);
       const CompArea subArea    = clipArea (CompArea (COMPONENT_Y, pcPic->chromaFormat, Area (pos.x, pos.y, pcv.maxCUWidth, pcv.maxCUHeight)), pcPic->Y());
@@ -1315,24 +1438,23 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c
       iSrcOffset = (iSrcOffset + (x >> 1)) / x; // slow division
 
       filterAndCalculateAverageEnergies (pcPic->getOrigBuf (fltArea).buf, iSrcStride,
-                                         hpEner, iFltHeight, iFltWidth, pcPic->getPOC());
+                                         hpEner, iFltHeight, iFltWidth,
+                                         pcSlice->getSPS()->getBitDepth (CHANNEL_TYPE_LUMA));
 
- #if ENABLE_QPA
       if (hpEner > hpEnerMax) hpEnerMax = hpEner;
       hpEnerPic += hpEner;
       pcPic->m_uEnerHpCtu[ctuRsAddr] = hpEner;
       pcPic->m_iOffsetCtu[ctuRsAddr] = (Pel)iSrcOffset;
- #endif
     } // end iteration over all CTUs in current slice
 
   }
 
- #if ENABLE_QPA
-  if (m_pcCfg->getUsePerceptQPA() && pcSlice->getPPS()->getUseDQP() && !m_pcCfg->getUseRateCtrl() && (boundingCtuTsAddr > startCtuTsAddr))
+  if (m_pcCfg->getUsePerceptQPA() && !m_pcCfg->getUseRateCtrl() && (boundingCtuTsAddr > startCtuTsAddr))
   {
     const double hpEnerAvg = hpEnerPic / double(boundingCtuTsAddr - startCtuTsAddr);
 
-    if (applyQPAdaptation (pcPic, pcSlice, pcv, startCtuTsAddr, boundingCtuTsAddr, m_pcCfg->getLumaLevelToDeltaQPMapping().mode == LUMALVL_TO_DQP_NUM_MODES, m_pcCfg->getGOPSize(), hpEnerAvg, hpEnerMax))
+    if (applyQPAdaptation (pcPic, pcSlice, pcv, startCtuTsAddr, boundingCtuTsAddr, m_pcCfg->getLumaLevelToDeltaQPMapping().mode == LUMALVL_TO_DQP_NUM_MODES,
+                           hpEnerAvg, hpEnerMax, (m_pcCfg->getBaseQP() >= 38) || (m_pcCfg->getSourceWidth() <= 512 && m_pcCfg->getSourceHeight() <= 320), m_adaptedLumaQP))
     {
       m_CABACEstimator->initCtxModels (*pcSlice);
   #if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
@@ -1356,9 +1478,7 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c
       }
     }
   }
- #endif // ENABLE_QPA
-
-#endif // ENABLE_QPA || ENABLE_PRIVATE
+#endif // ENABLE_QPA
 
   cs.pcv      = pcSlice->getPPS()->pcv;
   cs.fracBits = 0;
@@ -1387,6 +1507,11 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c
     }
   }
   else
+#endif
+#if K0149_BLOCK_STATISTICS
+  const SPS *sps = pcSlice->getSPS();
+  CHECK(sps == 0, "No SPS present");
+  writeBlockStatisticsHeader(sps);
 #endif
   encodeCtus( pcPic, bCompressEntireSlice, bFastDeltaQP, startCtuTsAddr, boundingCtuTsAddr, m_pcLib );
 
@@ -1568,12 +1693,17 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons
 #endif
 
 
+
 #if ENABLE_WPP_PARALLELISM
     pEncLib->getCuEncoder( dataId )->compressCtu( cs, ctuArea, ctuRsAddr, prevQP, currQP );
 #else
     m_pcCuEncoder->compressCtu( cs, ctuArea, ctuRsAddr, prevQP, currQP );
 #endif
 
+#if K0149_BLOCK_STATISTICS
+    getAndStoreBlockStatistics(cs, ctuArea);
+#endif
+
     pCABACWriter->resetBits();
     pCABACWriter->coding_tree_unit( cs, ctuArea, prevQP, ctuRsAddr, true );
     const int numberOfWrittenBits = int( pCABACWriter->getEstFracBits() >> SCALE_BITS );
@@ -1629,11 +1759,13 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons
 
 #if !ENABLE_WPP_PARALLELISM
     int actualBits = int(cs.fracBits >> SCALE_BITS);
+    actualBits    -= (int)m_uiPicTotalBits;
 #endif
     if ( pCfg->getUseRateCtrl() )
     {
 #if ENABLE_WPP_PARALLELISM
       int actualBits      = int( cs.fracBits >> SCALE_BITS );
+      actualBits         -= (int)m_uiPicTotalBits;
 #endif
       int actualQP        = g_RCInvalidQPValue;
       double actualLambda = pRdCost->getLambda();
@@ -1957,7 +2089,7 @@ void EncSlice::calculateBoundingCtuTsAddrForSlice(uint32_t &startCtuTSAddrSlice,
   {
     // Adjust for wavefronts (no tiles).
     // WPP: if a slice does not start at the beginning of a CTB row, it must end within the same CTB row
-    boundingCtuTSAddrSlice = min(boundingCtuTSAddrSlice, startCtuTSAddrSlice - (startCtuTSAddrSlice % pcPic->cs->pcv->widthInCtus) + (pcPic->cs->pcv->widthInCtus));
+    boundingCtuTSAddrSlice = std::min(boundingCtuTSAddrSlice, startCtuTSAddrSlice - (startCtuTSAddrSlice % pcPic->cs->pcv->widthInCtus) + (pcPic->cs->pcv->widthInCtus));
   }
 #endif
 }
@@ -2011,7 +2143,7 @@ void EncSlice::xDetermineStartAndBoundingCtuTsAddr  ( uint32_t& startCtuTsAddr,
   pcSlice->setSliceSegmentCurStartCtuTsAddr(startCtuTsAddrSliceSegment);
 
   // Make a joint decision based on reconstruction and dependent slice bounds
-  startCtuTsAddr    = max(startCtuTsAddrSlice   , startCtuTsAddrSliceSegment   );
+  startCtuTsAddr    = std::max(startCtuTsAddrSlice, startCtuTsAddrSliceSegment);
   boundingCtuTsAddr = boundingCtuTsAddrSliceSegment;
 #else
   startCtuTsAddr = startCtuTsAddrSlice;
diff --git a/source/Lib/EncoderLib/EncSlice.h b/source/Lib/EncoderLib/EncSlice.h
index 2c6eabb26d25d5362aba4080de659a3827d7a679..836bc4d7caccc05776643776ac7e4dbc2792c25b 100644
--- a/source/Lib/EncoderLib/EncSlice.h
+++ b/source/Lib/EncoderLib/EncSlice.h
@@ -106,7 +106,7 @@ public:
   int getGopId()        const { return m_gopID; }
   double  calculateLambda( const Slice* slice, const int GOPid, const int depth, const double refQP, const double dQP, int &iQP );
   void    setUpLambda( Slice* slice, const double dLambda, int iQP );
-  
+
 private:
 #endif
 #if HEVC_TILES_WPP
@@ -117,6 +117,10 @@ private:
 
 
 public:
+#if ENABLE_QPA
+  int                     m_adaptedLumaQP;
+
+#endif
   EncSlice();
   virtual ~EncSlice();
 
@@ -126,7 +130,12 @@ public:
 
   /// preparation of slice encoding (reference marking, QP and lambda)
   void    initEncSlice        ( Picture*  pcPic, const int pocLast, const int pocCurr,
-                                const int iGOPid,   Slice*& rpcSlice, const bool isField );
+                                const int iGOPid, Slice*& rpcSlice, const bool isField
+#if JVET_K0157
+    , bool isEncodeLtRef
+#endif
+  );
+
   void    resetQP             ( Picture* pic, int sliceQP, double lambda );
 
   // compress and encode slice
@@ -150,7 +159,6 @@ public:
   void    setSliceSegmentIdx  (uint32_t i)              { m_uiSliceSegmentIdx = i;          }
 
   SliceType getEncCABACTableIdx() const             { return m_encCABACTableIdx;        }
-
 private:
   double  xGetQPValueAccordingToLambda ( double lambda );
 };
diff --git a/source/Lib/EncoderLib/InterSearch.cpp b/source/Lib/EncoderLib/InterSearch.cpp
index 1b790b62a1f9f528b4a2c4a1bb97d17a70d9a5a9..11e5085dd69ffa6556f82e62c0f9cd5a82522737 100644
--- a/source/Lib/EncoderLib/InterSearch.cpp
+++ b/source/Lib/EncoderLib/InterSearch.cpp
@@ -749,9 +749,11 @@ void InterSearch::xMergeEstimation( PredictionUnit& pu, PelUnitBuf& origBuf, int
       uiMergeIdx = uiMergeCand;
     }
   }
+
 }
 
 
+
 //! search of the best candidate for inter prediction
 void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
 {
@@ -796,6 +798,7 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
   int          bestBiPMvpL1    = 0;
   Distortion   biPDistTemp     = std::numeric_limits<Distortion>::max();
 
+
   MergeCtx     mergeCtx;
 
   // Loop over Prediction Units
@@ -874,7 +877,6 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
       for ( int iRefList = 0; iRefList < iNumPredDir; iRefList++ )
       {
         RefPicList  eRefPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 );
-
         for ( int iRefIdxTemp = 0; iRefIdxTemp < cs.slice->getNumRefIdx(eRefPicList); iRefIdxTemp++ )
         {
           uiBitsTemp = uiMbBits[iRefList];
@@ -1093,7 +1095,6 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
 
           iRefStart = 0;
           iRefEnd   = cs.slice->getNumRefIdx(eRefPicList)-1;
-
           for ( int iRefIdxTemp = iRefStart; iRefIdxTemp <= iRefEnd; iRefIdxTemp++ )
           {
             uiBitsTemp = uiMbBits[2] + uiMotBits[1-iRefList];
@@ -1187,6 +1188,7 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
     uiBits [1] = bitsValidList1;
     uiCost [1] = costValidList1;
 
+
 #if JVET_K_AFFINE
       uiLastModeTemp = uiLastMode;
 #endif
@@ -1232,6 +1234,7 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
         uiMEBits = uiBits[1];
       }
 
+
     if ( cu.partSize != SIZE_2Nx2N )
     {
       uint32_t uiMRGIndex    = 0;
@@ -1308,9 +1311,9 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
       int refIdx4Para[2] = { -1, -1 };
 
 #if JVET_K0220_ENC_CTRL
-      xPredAffineInterSearch( pu, origBuf, puIdx, uiLastModeTemp, uiAffineCost, cMvHevcTemp, acMvAffine4Para, refIdx4Para );
+      xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffineCost, cMvHevcTemp, acMvAffine4Para, refIdx4Para);
 #else
-      xPredAffineInterSearch( pu, origBuf, puIdx, uiLastModeTemp, uiAffineCost, cMvHevcTemp, bFastSkipBi, acMvAffine4Para, refIdx4Para );
+      xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffineCost, cMvHevcTemp, bFastSkipBi, acMvAffine4Para, refIdx4Para);
 #endif
       if ( cu.slice->getSPS()->getSpsNext().getUseAffineType() )
       {
@@ -1347,9 +1350,9 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
           Distortion uiAffine6Cost = std::numeric_limits<Distortion>::max();
           cu.affineType = AFFINEMODEL_6PARAM;
 #if JVET_K0220_ENC_CTRL
-          xPredAffineInterSearch( pu, origBuf, puIdx, uiLastModeTemp, uiAffine6Cost, cMvHevcTemp, acMvAffine4Para, refIdx4Para );
+          xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffine6Cost, cMvHevcTemp, acMvAffine4Para, refIdx4Para);
 #else
-          xPredAffineInterSearch( pu, origBuf, puIdx, uiLastModeTemp, uiAffine6Cost, cMvHevcTemp, bFastSkipBi, acMvAffine4Para, refIdx4Para );
+          xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffine6Cost, cMvHevcTemp, bFastSkipBi, acMvAffine4Para, refIdx4Para);
 #endif
 
           // reset to 4 parameter affine inter mode
@@ -1414,6 +1417,8 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
       }
     }
 #endif
+
+
     m_maxCompIDToPred = MAX_NUM_COMPONENT;
 
     {
@@ -1662,6 +1667,7 @@ Distortion InterSearch::xGetAffineTemplateCost( PredictionUnit& pu, PelUnitBuf&
 
 void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, RefPicList eRefPicList, Mv& rcMvPred, int iRefIdxPred, Mv& rcMv, int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost, const AMVPInfo& amvpInfo, bool bBi)
 {
+
   Mv cMvHalf, cMvQter;
 
   CHECK(eRefPicList >= MAX_NUM_REF_LIST_ADAPT_SR || iRefIdxPred>=int(MAX_IDX_ADAPT_SR), "Invalid reference picture list");
@@ -1678,8 +1684,8 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref
     // NOTE: Other buf contains predicted signal from another direction
     PelUnitBuf otherBuf = m_tmpPredStorage[1 - (int)eRefPicList].getBuf( UnitAreaRelative(*pu.cu, pu ));
     origBufTmp.copyFrom(origBuf);
-    origBufTmp.removeHighFreq(otherBuf, m_pcEncCfg->getClipForBiPredMeEnabled(), pu.cu->slice->clpRngs() );
-
+    origBufTmp.removeHighFreq( otherBuf, m_pcEncCfg->getClipForBiPredMeEnabled(), pu.cu->slice->clpRngs()
+                              );
     pBuf = &origBufTmp;
 
     fWeight = 0.5;
@@ -1701,6 +1707,17 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref
 #if JVET_K0357_AMVR
   cStruct.imvShift      = pu.cu->imv << 1;
 #endif
+#if JVET_K0157
+  cStruct.inCtuSearch = false;
+  cStruct.zeroMV = false;
+  {
+    if (pu.cs->sps->getSpsNext().getUseCompositeRef() && pu.cs->slice->getRefPic(eRefPicList, iRefIdxPred)->longTerm)
+    {
+      cStruct.inCtuSearch = true;
+    }
+  }
+#endif
+
   auto blkCache = dynamic_cast<CacheBlkInfoCtrl*>( m_modeCtrl );
 
   bool bQTBTMV  = false;
@@ -1730,7 +1747,11 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref
   {
     if( !bQTBTMV )
     {
-      xSetSearchRange( pu, ( bBi ? rcMv : rcMvPred ), iSrchRng, cStruct.searchRange );
+      xSetSearchRange(pu, (bBi ? rcMv : rcMvPred), iSrchRng, cStruct.searchRange
+#if JVET_K0157
+        , cStruct
+#endif
+      );
     }
     cStruct.subShiftMode = m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE3 ? 2 : 0;
     xPatternSearch( cStruct, rcMv, ruiCost);
@@ -1799,7 +1820,11 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref
 void InterSearch::xSetSearchRange ( const PredictionUnit& pu,
                                     const Mv& cMvPred,
                                     const int iSrchRng,
-                                    SearchRange& sr )
+                                    SearchRange& sr
+#if JVET_K0157
+                                  , IntTZSearchStruct& cStruct
+#endif
+)
 {
 #if JVET_K0346 || JVET_K_AFFINE
   const int iMvShift = cMvPred.highPrec ? 4 : 2;
@@ -1827,6 +1852,35 @@ void InterSearch::xSetSearchRange ( const PredictionUnit& pu,
   sr.top    = mvTL.ver;
   sr.right  = mvBR.hor;
   sr.bottom = mvBR.ver;
+
+#if JVET_K0157
+  if (pu.cs->sps->getSpsNext().getUseCompositeRef() && cStruct.inCtuSearch)
+  {
+    Position posRB = pu.Y().bottomRight();
+    Position posTL = pu.Y().topLeft();
+    const PreCalcValues *pcv = pu.cs->pcv;
+    Position posRBinCTU(posRB.x & pcv->maxCUWidthMask, posRB.y & pcv->maxCUHeightMask);
+    Position posLTinCTU = Position(posTL.x & pcv->maxCUWidthMask, posTL.y & pcv->maxCUHeightMask).offset(-4, -4);
+    if (sr.left < -posLTinCTU.x)
+      sr.left = -posLTinCTU.x;
+    if (sr.top < -posLTinCTU.y)
+      sr.top = -posLTinCTU.y;
+    if (sr.right >((int)pcv->maxCUWidth - 4 - posRBinCTU.x))
+      sr.right = (int)pcv->maxCUWidth - 4 - posRBinCTU.x;
+    if (sr.bottom >((int)pcv->maxCUHeight - 4 - posRBinCTU.y))
+      sr.bottom = (int)pcv->maxCUHeight - 4 - posRBinCTU.y;
+    if (posLTinCTU.x == -4 || posLTinCTU.y == -4)
+    {
+      sr.left = sr.right = sr.bottom = sr.top = 0;
+      cStruct.zeroMV = 1;
+    }
+    if (posRBinCTU.x == pcv->maxCUWidthMask || posRBinCTU.y == pcv->maxCUHeightMask)
+    {
+      sr.left = sr.right = sr.bottom = sr.top = 0;
+      cStruct.zeroMV = 1;
+    }
+  }
+#endif
 }
 
 
@@ -1998,7 +2052,11 @@ void InterSearch::xTZSearch( const PredictionUnit& pu,
     // set search range
     Mv currBestMv(cStruct.iBestX, cStruct.iBestY );
     currBestMv <<= 2;
-    xSetSearchRange( pu, currBestMv, m_iSearchRange>>(bFastSettings?1:0), sr );
+    xSetSearchRange(pu, currBestMv, m_iSearchRange >> (bFastSettings ? 1 : 0), sr
+#if JVET_K0157
+      , cStruct
+#endif
+    );
   }
 
   // start search
@@ -2258,7 +2316,11 @@ void InterSearch::xTZSearchSelective( const PredictionUnit& pu,
     // set search range
     Mv currBestMv(cStruct.iBestX, cStruct.iBestY );
     currBestMv <<= 2;
-    xSetSearchRange( pu, currBestMv, m_iSearchRange, sr );
+    xSetSearchRange( pu, currBestMv, m_iSearchRange, sr
+#if JVET_K0157
+      , cStruct
+#endif
+    );
   }
 
   // Initial search
@@ -2457,7 +2519,11 @@ void InterSearch::xPatternSearchFracDIF(
 
 
 #if JVET_K0357_AMVR
+#if JVET_K0157
+  if (cStruct.imvShift || (pu.cs->sps->getSpsNext().getUseCompositeRef() && cStruct.zeroMV))
+#else
   if( cStruct.imvShift )
+#endif
   {
     m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY + iOffset, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, m_pcEncCfg->getUseHADME() && !bIsLosslessCoded );
     ruiCost = m_cDistParam.distFunc( m_cDistParam );
@@ -2581,11 +2647,11 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
   pu.cu->affine = true;
   pu.mergeFlag = false;
 
+
   // Uni-directional prediction
   for ( int iRefList = 0; iRefList < iNumPredDir; iRefList++ )
   {
     RefPicList  eRefPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 );
-
     for ( int iRefIdxTemp = 0; iRefIdxTemp < slice.getNumRefIdx(eRefPicList); iRefIdxTemp++ )
     {
       // Get RefIdx bits
@@ -2715,7 +2781,6 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
       {
         xAffineMotionEstimation( pu, origBuf, eRefPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp );
       }
-
       // Set best AMVP Index
       xCopyAffineAMVPInfo( affiAMVPInfoTemp[eRefPicList], aacAffineAMVPInfo[iRefList][iRefIdxTemp] );
       xCheckBestAffineMVP( pu, affiAMVPInfoTemp[eRefPicList], eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp );
@@ -2866,7 +2931,6 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
 
       iRefStart = 0;
       iRefEnd   = slice.getNumRefIdx(eRefPicList) - 1;
-
       for ( int iRefIdxTemp = iRefStart; iRefIdxTemp <= iRefEnd; iRefIdxTemp++ )
       {
 #if JVET_K0185_AFFINE_6PARA_ENC // reuse refidx of 4-para
@@ -2955,6 +3019,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
   uiBits[1]  = bitsValidList1;
   uiCost[1]  = costValidList1;
 
+
   // Affine ME result set
   if ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1] ) // Bi
   {
@@ -3184,7 +3249,7 @@ void InterSearch::xCheckBestAffineMVP( PredictionUnit &pu, AffineAMVPInfo &affin
       if ( iVerIdx != 0 )
       {
 #if JVET_K0185_AFFINE_6PARA_ENC
-        secondPred = (iVerIdx == 1 ? affineAMVPInfo.mvCandRT[iMVPIdx] : affineAMVPInfo.mvCandLT[iMVPIdx]) + (acMv[0] - affineAMVPInfo.mvCandLT[iMVPIdx]);
+        secondPred = (iVerIdx == 1 ? affineAMVPInfo.mvCandRT[iMVPIdx] : affineAMVPInfo.mvCandLB[iMVPIdx]) + (acMv[0] - affineAMVPInfo.mvCandLT[iMVPIdx]);
 #else
         secondPred = affineAMVPInfo.mvCandRT[iMVPIdx] + (acMv[0] - affineAMVPInfo.mvCandLT[iMVPIdx]);
 #endif
@@ -3228,6 +3293,7 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
                                            Distortion&     ruiCost,
                                            bool            bBi )
 {
+
   const int width  = pu.Y().width;
   const int height = pu.Y().height;
 
@@ -3245,7 +3311,8 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
     // NOTE: Other buf contains predicted signal from another direction
     PelUnitBuf otherBuf = m_tmpPredStorage[1 - (int)eRefPicList].getBuf( UnitAreaRelative( *pu.cu, pu ) );
     origBufTmp.copyFrom(origBuf);
-    origBufTmp.removeHighFreq(otherBuf, m_pcEncCfg->getClipForBiPredMeEnabled(), pu.cu->slice->clpRngs());
+    origBufTmp.removeHighFreq(otherBuf, m_pcEncCfg->getClipForBiPredMeEnabled(), pu.cu->slice->clpRngs()
+                             );
     pBuf = &origBufTmp;
 
     fWeight = 0.5;
@@ -4184,7 +4251,8 @@ void InterSearch::xEncodeInterResidualQT(CodingStructure &cs, Partitioner &parti
 #endif
 }
 
-void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &partitioner, Distortion *puiZeroDist /*= NULL*/)
+void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &partitioner, Distortion *puiZeroDist /*= NULL*/
+)
 {
   const UnitArea& currArea = partitioner.currArea();
   const SPS &sps           = *cs.sps;
@@ -4253,10 +4321,8 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
     saveCS.picture = cs.picture;
     saveCS.area.repositionTo(currArea);
     saveCS.clearTUs();
-
     TransformUnit &bestTU = saveCS.addTU( currArea, partitioner.chType );
 
-
     for( uint32_t c = 0; c < numTBlocks; c++ )
     {
       const ComponentID compID    = ComponentID(c);
@@ -4541,7 +4607,6 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
     for (uint32_t ch = 0; ch < numValidComp; ch++)
     {
       const ComponentID compID = ComponentID(ch);
-
       if (tu.blocks[compID].valid())
       {
         if( cs.pps->getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() && isChroma(compID) && uiAbsSum[COMPONENT_Y] )
@@ -4588,7 +4653,8 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
 
     do
     {
-      xEstimateInterResidualQT(*csSplit, partitioner, bCheckFull ? nullptr : puiZeroDist);
+      xEstimateInterResidualQT(*csSplit, partitioner, bCheckFull ? nullptr : puiZeroDist
+      );
 
       csSplit->cost = m_pcRdCost->calcRdCost( csSplit->fracBits, csSplit->dist );
 #if JVET_K1000_SIMPLIFIED_EMT
@@ -4656,6 +4722,9 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
       {
         cs.useSubStructure( *csSplit, partitioner.chType, currArea, false, false, false, true );
         cs.cost = csSplit->cost;
+#if JVET_K1000_SIMPLIFIED_EMT
+        isSplit = true;
+#endif
       }
     }
 
@@ -4680,7 +4749,8 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
 #endif
 }
 
-void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &partitioner, const bool &skipResidual)
+void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &partitioner, const bool &skipResidual
+)
 {
   CodingUnit &cu = *cs.getCU( partitioner.chType );
 
@@ -4701,13 +4771,11 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
 
     // add an empty TU
     cs.addTU(cs.area, partitioner.chType);
-
     Distortion distortion = 0;
 
     for (int comp = 0; comp < numValidComponents; comp++)
     {
       const ComponentID compID = ComponentID(comp);
-
       CPelBuf reco = cs.getRecoBuf (compID);
       CPelBuf org  = cs.getOrgBuf  (compID);
 #if WCG_EXT
@@ -4747,7 +4815,6 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
   //  Residual coding.
   cs.getResiBuf().copyFrom (cs.getOrgBuf());
   cs.getResiBuf().subtract (cs.getPredBuf());
-
   Distortion zeroDistortion = 0;
 
   const TempCtx ctxStart( m_CtxCache, m_CABACEstimator->getCtx() );
@@ -4755,7 +4822,6 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
   cs.getOrgResiBuf().copyFrom(cs.getResiBuf());
 
   xEstimateInterResidualQT(cs, partitioner, &zeroDistortion);
-
   TransformUnit &firstTU = *cs.getTU( partitioner.chType );
 
   cu.rootCbf = false;
@@ -4777,7 +4843,7 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
   const int  numValidTBlocks   = ::getNumberValidTBlocks( *cs.pcv );
   for (uint32_t i = 0; i < numValidTBlocks; i++)
   {
-    cu.rootCbf |= TU::getCbf( firstTU, ComponentID( i ) );
+    cu.rootCbf |= TU::getCbfAtDepth(firstTU, ComponentID(i), 0);
   }
 
   // -------------------------------------------------------
@@ -4819,7 +4885,6 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
   for (int comp = 0; comp < numValidComponents; comp++)
   {
     const ComponentID compID = ComponentID(comp);
-
     CPelBuf reco = cs.getRecoBuf (compID);
     CPelBuf org  = cs.getOrgBuf  (compID);
 
@@ -4874,7 +4939,6 @@ uint64_t InterSearch::xGetSymbolFracBitsInter(CodingStructure &cs, Partitioner &
     {
       m_CABACEstimator->cu_transquant_bypass_flag( cu );
     }
-
     m_CABACEstimator->cu_skip_flag( cu );
     m_CABACEstimator->pred_mode   ( cu );
     m_CABACEstimator->cu_pred_data( cu );
@@ -4888,4 +4952,3 @@ uint64_t InterSearch::xGetSymbolFracBitsInter(CodingStructure &cs, Partitioner &
   return fracBits;
 }
 
-
diff --git a/source/Lib/EncoderLib/InterSearch.h b/source/Lib/EncoderLib/InterSearch.h
index 6cf72e338edbd4629b594809270d17edb8db4a16..cbdaaf21a92f2d7af4927affbf219613b26833ff 100644
--- a/source/Lib/EncoderLib/InterSearch.h
+++ b/source/Lib/EncoderLib/InterSearch.h
@@ -53,7 +53,6 @@
 #if JVET_K0367_AFFINE_FIX_POINT
 #include "CommonLib/AffineGradientSearch.h"
 #endif
-
 //! \ingroup EncoderLib
 //! \{
 
@@ -64,7 +63,6 @@
 static const uint32_t MAX_NUM_REF_LIST_ADAPT_SR = 2;
 static const uint32_t MAX_IDX_ADAPT_SR          = 33;
 static const uint32_t NUM_MV_PREDICTORS         = 3;
-
 class EncModeCtrl;
 
 /// encoder search class
@@ -95,7 +93,6 @@ private:
 
   ClpRng          m_lumaClpRng;
 
-
 protected:
   // interface to option
   EncCfg*         m_pcEncCfg;
@@ -124,7 +121,6 @@ protected:
 
   bool            m_isInitialized;
 
-
 public:
   InterSearch();
   virtual ~InterSearch();
@@ -145,7 +141,6 @@ public:
   void destroy                      ();
 
   void setTempBuffers               (CodingStructure ****pSlitCS, CodingStructure ****pFullCS, CodingStructure **pSaveCS );
-
 #if ENABLE_SPLIT_PARALLELISM
   void copyState                    ( const InterSearch& other );
 #endif
@@ -178,6 +173,10 @@ protected:
     int         subShiftMode;
 #if JVET_K0357_AMVR
     unsigned    imvShift;
+#endif
+#if JVET_K0157
+    bool        inCtuSearch;
+    bool        zeroMV;
 #endif
   } IntTZSearchStruct;
 
@@ -199,7 +198,6 @@ public:
   /// set ME search range
   void setAdaptiveSearchRange       ( int iDir, int iRefIdx, int iSearchRange) { CHECK(iDir >= MAX_NUM_REF_LIST_ADAPT_SR || iRefIdx>=int(MAX_IDX_ADAPT_SR), "Invalid index"); m_aaiAdaptSR[iDir][iRefIdx] = iSearchRange; }
 
-
 protected:
 
   // -------------------------------------------------------------------------------------------------------------------
@@ -291,6 +289,9 @@ protected:
                                     const Mv&             cMvPred,
                                     const int             iSrchRng,
                                     SearchRange&          sr
+#if JVET_K0157
+                                  , IntTZSearchStruct &  cStruct
+#endif
                                   );
 
   void xPatternSearchFast         ( const PredictionUnit& pu,
@@ -368,7 +369,10 @@ protected:
 
   void xCopyAffineAMVPInfo        ( AffineAMVPInfo& src, AffineAMVPInfo& dst );
   void xCheckBestAffineMVP        ( PredictionUnit &pu, AffineAMVPInfo &affineAMVPInfo, RefPicList eRefPicList, Mv acMv[3], Mv acMvPred[3], int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost );
+
 #endif
+
+
   void xExtDIFUpSamplingH         ( CPelBuf* pcPattern );
   void xExtDIFUpSamplingQ         ( CPelBuf* pcPatternKey, Mv halfPelRef );
 
@@ -378,12 +382,13 @@ protected:
 
   void  setWpScalingDistParam     ( int iRefIdx, RefPicList eRefPicListCur, Slice *slice );
 
-
 public:
 
-  void encodeResAndCalcRdInterCU  (CodingStructure &cs, Partitioner &partitioner, const bool &skipResidual);
+  void encodeResAndCalcRdInterCU  (CodingStructure &cs, Partitioner &partitioner, const bool &skipResidual
+  );
   void xEncodeInterResidualQT     (CodingStructure &cs, Partitioner &partitioner, const ComponentID &compID);
-  void xEstimateInterResidualQT   (CodingStructure &cs, Partitioner &partitioner, Distortion *puiZeroDist = NULL);
+  void xEstimateInterResidualQT   (CodingStructure &cs, Partitioner &partitioner, Distortion *puiZeroDist = NULL
+  );
   uint64_t xGetSymbolFracBitsInter  (CodingStructure &cs, Partitioner &partitioner);
 
 };// END CLASS DEFINITION EncSearch
diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp
index b0696fe49b51d4034356e9f7f0a18fc0c46bff17..eb8519e79dbe930bd7cc2d13f507cce82b8e21ce 100644
--- a/source/Lib/EncoderLib/IntraSearch.cpp
+++ b/source/Lib/EncoderLib/IntraSearch.cpp
@@ -915,7 +915,8 @@ void IntraSearch::xEncIntraHeader(CodingStructure &cs, Partitioner &partitioner,
     // CU header
     if( isFirst )
     {
-      if( !cs.slice->isIntra() )
+      if( !cs.slice->isIntra() 
+        )
       {
         if( cs.pps->getTransquantBypassEnabledFlag() )
         {
diff --git a/source/Lib/EncoderLib/RateCtrl.cpp b/source/Lib/EncoderLib/RateCtrl.cpp
index 6c265b9324123e14fa98c0d1d16761cd5100e890..4b4fffb03c458a4b6c7ba89e41b63c7c3cf1b066 100644
--- a/source/Lib/EncoderLib/RateCtrl.cpp
+++ b/source/Lib/EncoderLib/RateCtrl.cpp
@@ -39,6 +39,10 @@
 
 #include <cmath>
 
+#if JVET_K0390_RATECTRL
+#define LAMBDA_PREC                                           1000000
+#endif
+
 using namespace std;
 
 //sequence level
@@ -66,6 +70,9 @@ EncRCSeq::EncRCSeq()
   m_useLCUSeparateModel = false;
   m_adaptiveBit         = 0;
   m_lastLambda          = 0.0;
+#if RATECTRL_FIX_FULLNBIT
+  m_bitDepth          = 0;
+#endif
 }
 
 EncRCSeq::~EncRCSeq()
@@ -138,6 +145,9 @@ void EncRCSeq::create( int totalFrames, int targetBitrate, int frameRate, int GO
   {
     m_picPara[i].m_alpha = 0.0;
     m_picPara[i].m_beta  = 0.0;
+#if JVET_K0390_RATECTRL
+    m_picPara[i].m_validPix = -1;
+#endif
   }
 
   if ( m_useLCUSeparateModel )
@@ -150,6 +160,9 @@ void EncRCSeq::create( int totalFrames, int targetBitrate, int frameRate, int GO
       {
         m_LCUPara[i][j].m_alpha = 0.0;
         m_LCUPara[i][j].m_beta  = 0.0;
+#if JVET_K0390_RATECTRL
+        m_LCUPara[i][j].m_validPix = -1;
+#endif
       }
     }
   }
@@ -217,13 +230,47 @@ void EncRCSeq::initPicPara( TRCParameter* picPara )
     {
       if (i>0)
       {
+#if RATECTRL_FIX_FULLNBIT
+#if DISTORTION_LAMBDA_BUGFIX
+        int bitdepth_luma_scale =
+          2
+          * (m_bitDepth - 8
+            - DISTORTION_PRECISION_ADJUSTMENT(m_bitDepth));
+#else
+#if FULL_NBIT
+        int bitdepth_luma_scale = 2 * (m_bitDepth - 8);
+#else
+        int    bitdepth_luma_scale = 0;
+#endif
+#endif
+        m_picPara[i].m_alpha = 3.2003 * pow(2.0, bitdepth_luma_scale);
+        m_picPara[i].m_beta = -1.367;
+#else
         m_picPara[i].m_alpha = 3.2003;
         m_picPara[i].m_beta  = -1.367;
+#endif
       }
       else
       {
+#if RATECTRL_FIX_FULLNBIT
+#if DISTORTION_LAMBDA_BUGFIX
+        int bitdepth_luma_scale =
+          2
+          * (m_bitDepth - 8
+            - DISTORTION_PRECISION_ADJUSTMENT(m_bitDepth));
+#else
+#if FULL_NBIT
+        int bitdepth_luma_scale = 2 * (m_bitDepth - 8);
+#else
+        int    bitdepth_luma_scale = 0;
+#endif
+#endif
+        m_picPara[i].m_alpha = pow(2.0, bitdepth_luma_scale) * ALPHA;
+        m_picPara[i].m_beta = BETA2;
+#else
         m_picPara[i].m_alpha = ALPHA;
         m_picPara[i].m_beta  = BETA2;
+#endif
       }
     }
   }
@@ -276,7 +323,11 @@ void EncRCSeq::setAllBitRatio( double basicLambda, double* equaCoeffA, double* e
   int* bitsRatio = new int[m_GOPSize];
   for ( int i=0; i<m_GOPSize; i++ )
   {
+#if JVET_K0390_RATECTRL
+    bitsRatio[i] = (int)(equaCoeffA[i] * pow(basicLambda, equaCoeffB[i]) * (double)getPicPara(getGOPID2Level(i)).m_validPix);
+#else
     bitsRatio[i] = (int)( equaCoeffA[i] * pow( basicLambda, equaCoeffB[i] ) * m_numberOfPixel );
+#endif
   }
   initBitsRatio( bitsRatio );
   delete[] bitsRatio;
@@ -353,9 +404,75 @@ void EncRCGOP::create( EncRCSeq* encRCSeq, int numPic )
         lambdaRatio[7] = 12.3;
       }
     }
+#if JVET_K0390_RATECTRL
+    else if (encRCSeq->getAdaptiveBits() == 3)  // for GOP size = 16, random access case
+    {
+      {
+#if RATECTRL_FIX_FULLNBIT
+#if DISTORTION_LAMBDA_BUGFIX
+        int bitdepth_luma_scale =
+          2
+          * (encRCSeq->getbitDepth() - 8
+            - DISTORTION_PRECISION_ADJUSTMENT(encRCSeq->getbitDepth()));
+#else
+#if FULL_NBIT
+        int bitdepth_luma_scale = 2 * (encRCSeq->getbitDepth() - 8);
+#else
+        int    bitdepth_luma_scale = 0;
+#endif
+#endif
+
+        double hierarQp = 4.2005 * log(encRCSeq->getLastLambda() / pow(2.0, bitdepth_luma_scale)) + 13.7122;  //  the qp of POC16
+        double qpLev2 = (hierarQp + 0.0) + 0.2016    * (hierarQp + 0.0) - 4.8848;
+        double qpLev3 = (hierarQp + 3.0) + 0.22286 * (hierarQp + 3.0) - 5.7476;
+        double qpLev4 = (hierarQp + 4.0) + 0.2333    * (hierarQp + 4.0) - 5.9;
+        double qpLev5 = (hierarQp + 5.0) + 0.3            * (hierarQp + 5.0) - 7.1444;
+
+        double lambdaLev1 = exp((hierarQp - 13.7122) / 4.2005) *pow(2.0, bitdepth_luma_scale);
+        double lambdaLev2 = exp((qpLev2 - 13.7122) / 4.2005) * pow(2.0, bitdepth_luma_scale);
+        double lambdaLev3 = exp((qpLev3 - 13.7122) / 4.2005) * pow(2.0, bitdepth_luma_scale);
+        double lambdaLev4 = exp((qpLev4 - 13.7122) / 4.2005) * pow(2.0, bitdepth_luma_scale);
+        double lambdaLev5 = exp((qpLev5 - 13.7122) / 4.2005) * pow(2.0, bitdepth_luma_scale);
+#else
+        double hierarQp = 4.2005 * log(encRCSeq->getLastLambda()) + 13.7122;  //  the qp of POC16
+        double qpLev2 = (hierarQp + 0.0) + 0.2016    * (hierarQp + 0.0) - 4.8848;
+        double qpLev3 = (hierarQp + 3.0) + 0.22286 * (hierarQp + 3.0) - 5.7476;
+        double qpLev4 = (hierarQp + 4.0) + 0.2333    * (hierarQp + 4.0) - 5.9;
+        double qpLev5 = (hierarQp + 5.0) + 0.3            * (hierarQp + 5.0) - 7.1444;
+
+        double lambdaLev1 = exp((hierarQp - 13.7122) / 4.2005);
+        double lambdaLev2 = exp((qpLev2   - 13.7122) / 4.2005);
+        double lambdaLev3 = exp((qpLev3   - 13.7122) / 4.2005);
+        double lambdaLev4 = exp((qpLev4   - 13.7122) / 4.2005);
+        double lambdaLev5 = exp((qpLev5   - 13.7122) / 4.2005);
+#endif
+
+        lambdaRatio[0] = 1.0;
+        lambdaRatio[1] = lambdaLev2 / lambdaLev1;
+        lambdaRatio[2] = lambdaLev3 / lambdaLev1;
+        lambdaRatio[3] = lambdaLev4 / lambdaLev1;
+        lambdaRatio[4] = lambdaLev5 / lambdaLev1;
+        lambdaRatio[5] = lambdaLev5 / lambdaLev1;
+        lambdaRatio[6] = lambdaLev4 / lambdaLev1;
+        lambdaRatio[7] = lambdaLev5 / lambdaLev1;
+        lambdaRatio[8] = lambdaLev5 / lambdaLev1;
+        lambdaRatio[9] = lambdaLev3 / lambdaLev1;
+        lambdaRatio[10] = lambdaLev4 / lambdaLev1;
+        lambdaRatio[11] = lambdaLev5 / lambdaLev1;
+        lambdaRatio[12] = lambdaLev5 / lambdaLev1;
+        lambdaRatio[13] = lambdaLev4 / lambdaLev1;
+        lambdaRatio[14] = lambdaLev5 / lambdaLev1;
+        lambdaRatio[15] = lambdaLev5 / lambdaLev1;
+      }
+    }
+#endif
 
     xCalEquaCoeff( encRCSeq, lambdaRatio, equaCoeffA, equaCoeffB, encRCSeq->getGOPSize() );
+#if JVET_K0390_RATECTRL
+    basicLambda = xSolveEqua(encRCSeq, targetBpp, equaCoeffA, equaCoeffB, encRCSeq->getGOPSize());
+#else
     basicLambda = xSolveEqua( targetBpp, equaCoeffA, equaCoeffB, encRCSeq->getGOPSize() );
+#endif
     encRCSeq->setAllBitRatio( basicLambda, equaCoeffA, equaCoeffB );
 
     delete []lambdaRatio;
@@ -396,7 +513,11 @@ void EncRCGOP::xCalEquaCoeff( EncRCSeq* encRCSeq, double* lambdaRatio, double* e
   }
 }
 
+#if JVET_K0390_RATECTRL
+double EncRCGOP::xSolveEqua(EncRCSeq* encRCSeq, double targetBpp, double* equaCoeffA, double* equaCoeffB, int GOPSize)
+#else
 double EncRCGOP::xSolveEqua( double targetBpp, double* equaCoeffA, double* equaCoeffB, int GOPSize )
+#endif
 {
   double solution = 100.0;
   double minNumber = 0.1;
@@ -406,7 +527,13 @@ double EncRCGOP::xSolveEqua( double targetBpp, double* equaCoeffA, double* equaC
     double fx = 0.0;
     for ( int j=0; j<GOPSize; j++ )
     {
+#if JVET_K0390_RATECTRL
+      double tmpBpp = equaCoeffA[j] * pow(solution, equaCoeffB[j]);
+      double actualBpp = tmpBpp * (double)encRCSeq->getPicPara(encRCSeq->getGOPID2Level(j)).m_validPix / (double)encRCSeq->getNumPixel();
+      fx += actualBpp;
+#else
       fx += equaCoeffA[j] * pow( solution, equaCoeffB[j] );
+#endif
     }
 
     if ( fabs( fx - targetBpp ) < 0.000001 )
@@ -484,6 +611,10 @@ EncRCPic::EncRCPic()
   m_picActualBits       = 0;
   m_picQP               = 0;
   m_picLambda           = 0.0;
+#if JVET_K0390_RATECTRL
+  m_picMSE              = 0.0;
+  m_validPixelsInPic    = 0;
+#endif
 }
 
 EncRCPic::~EncRCPic()
@@ -641,6 +772,10 @@ void EncRCPic::create( EncRCSeq* encRCSeq, EncRCGOP* encRCGOP, int frameLevel, l
     {
       LCUIdx = j*picWidthInLCU + i;
       m_LCUs[LCUIdx].m_actualBits = 0;
+#if JVET_K0390_RATECTRL
+      m_LCUs[LCUIdx].m_actualSSE  = 0.0;
+      m_LCUs[LCUIdx].m_actualMSE  = 0.0;
+#endif
       m_LCUs[LCUIdx].m_QP         = 0;
       m_LCUs[LCUIdx].m_lambda     = 0.0;
       m_LCUs[LCUIdx].m_targetBits = 0;
@@ -654,6 +789,10 @@ void EncRCPic::create( EncRCSeq* encRCSeq, EncRCGOP* encRCGOP, int frameLevel, l
   m_picActualBits       = 0;
   m_picQP               = 0;
   m_picLambda           = 0.0;
+#if JVET_K0390_RATECTRL
+  m_validPixelsInPic    = 0;
+  m_picMSE              = 0.0;
+#endif
 }
 
 void EncRCPic::destroy()
@@ -673,6 +812,19 @@ double EncRCPic::estimatePicLambda( list<EncRCPic*>& listPreviousPictures, Slice
   double alpha         = m_encRCSeq->getPicPara( m_frameLevel ).m_alpha;
   double beta          = m_encRCSeq->getPicPara( m_frameLevel ).m_beta;
   double bpp       = (double)m_targetBits/(double)m_numberOfPixel;
+
+#if JVET_K0390_RATECTRL
+  int lastPicValPix = 0;
+  if (listPreviousPictures.size() > 0)
+  {
+    lastPicValPix = m_encRCSeq->getPicPara(m_frameLevel).m_validPix;
+  }
+  if (lastPicValPix > 0)
+  {
+    bpp = (double)m_targetBits / (double)lastPicValPix;
+  }
+#endif
+
   double estLambda;
   if (eSliceType == I_SLICE)
   {
@@ -727,6 +879,10 @@ double EncRCPic::estimatePicLambda( list<EncRCPic*>& listPreviousPictures, Slice
     estLambda = 0.1;
   }
 
+#if JVET_K0390_RATECTRL
+  //Avoid different results in different platforms. The problem is caused by the different results of pow() in different platforms.
+  estLambda = double(int64_t(estLambda * (double)LAMBDA_PREC + 0.5)) / (double)LAMBDA_PREC;
+#endif
   m_estPicLambda = estLambda;
 
   double totalWeight = 0.0;
@@ -764,7 +920,24 @@ double EncRCPic::estimatePicLambda( list<EncRCPic*>& listPreviousPictures, Slice
 
 int EncRCPic::estimatePicQP( double lambda, list<EncRCPic*>& listPreviousPictures )
 {
+#if RATECTRL_FIX_FULLNBIT
+#if DISTORTION_LAMBDA_BUGFIX
+  int bitdepth_luma_scale =
+    2
+    * (m_encRCSeq->getbitDepth() - 8
+      - DISTORTION_PRECISION_ADJUSTMENT(m_encRCSeq->getbitDepth()));
+#else
+#if FULL_NBIT
+  int bitdepth_luma_scale = 2 * (m_encRCSeq->getbitDepth() - 8);
+#else
+  int    bitdepth_luma_scale = 0;
+#endif
+#endif
+
+  int QP = int(4.2005 * log(lambda / pow(2.0, bitdepth_luma_scale)) + 13.7122 + 0.5);
+#else
   int QP = int( 4.2005 * log( lambda ) + 13.7122 + 0.5 );
+#endif
 
   int lastLevelQP = g_RCInvalidQPValue;
   int lastPicQP   = g_RCInvalidQPValue;
@@ -895,13 +1068,34 @@ double EncRCPic::getLCUEstLambda( double bpp )
     estLambda = 0.1;
   }
 
+#if JVET_K0390_RATECTRL
+  //Avoid different results in different platforms. The problem is caused by the different results of pow() in different platforms.
+  estLambda = double(int64_t(estLambda * (double)LAMBDA_PREC + 0.5)) / (double)LAMBDA_PREC;
+#endif
   return estLambda;
 }
 
 int EncRCPic::getLCUEstQP( double lambda, int clipPicQP )
 {
   int LCUIdx = getLCUCoded();
+#if RATECTRL_FIX_FULLNBIT
+#if DISTORTION_LAMBDA_BUGFIX
+  int bitdepth_luma_scale =
+    2
+    * (m_encRCSeq->getbitDepth() - 8
+      - DISTORTION_PRECISION_ADJUSTMENT(m_encRCSeq->getbitDepth()));
+#else
+#if FULL_NBIT
+  int bitdepth_luma_scale = 2 * (m_encRCSeq->getbitDepth() - 8);
+#else
+  int    bitdepth_luma_scale = 0;
+#endif
+#endif
+
+  int estQP = int(4.2005 * log(lambda / pow(2.0, bitdepth_luma_scale)) + 13.7122 + 0.5);
+#else
   int estQP = int( 4.2005 * log( lambda ) + 13.7122 + 0.5 );
+#endif
 
   //for Lambda clip, LCU level clip
   int clipNeighbourQP = g_RCInvalidQPValue;
@@ -929,6 +1123,9 @@ void EncRCPic::updateAfterCTU( int LCUIdx, int bits, int QP, double lambda, bool
   m_LCUs[LCUIdx].m_actualBits = bits;
   m_LCUs[LCUIdx].m_QP         = QP;
   m_LCUs[LCUIdx].m_lambda     = lambda;
+#if JVET_K0390_RATECTRL
+  m_LCUs[LCUIdx].m_actualSSE  = m_LCUs[LCUIdx].m_actualMSE * m_LCUs[LCUIdx].m_numberOfPixel;
+#endif
 
   m_LCULeft--;
   m_bitsLeft   -= bits;
@@ -964,7 +1161,34 @@ void EncRCPic::updateAfterCTU( int LCUIdx, int bits, int QP, double lambda, bool
     TRCParameter rcPara;
     rcPara.m_alpha = alpha;
     rcPara.m_beta  = beta;
+#if JVET_K0390_RATECTRL
+    if (QP == g_RCInvalidQPValue && m_encRCSeq->getAdaptiveBits() == 1)
+    {
+      rcPara.m_validPix = 0;
+    }
+    else
+    {
+      rcPara.m_validPix = LCUTotalPixels;
+    }
+
+    double MSE = m_LCUs[LCUIdx].m_actualMSE;
+    double updatedK = bpp * inputLambda / MSE;
+    double updatedC = MSE / pow(bpp, -updatedK);
+    rcPara.m_alpha = updatedC * updatedK;
+    rcPara.m_beta = -updatedK - 1.0;
+
+    if (bpp > 0 && updatedK > 0.0001)
+    {
+      m_encRCSeq->setLCUPara(m_frameLevel, LCUIdx, rcPara);
+    }
+    else
+    {
+      rcPara.m_alpha = Clip3(0.0001, g_RCAlphaMaxValue, rcPara.m_alpha);
+      m_encRCSeq->setLCUPara(m_frameLevel, LCUIdx, rcPara);
+    }
+#else
     m_encRCSeq->setLCUPara( m_frameLevel, LCUIdx, rcPara );
+#endif
 
     return;
   }
@@ -981,7 +1205,34 @@ void EncRCPic::updateAfterCTU( int LCUIdx, int bits, int QP, double lambda, bool
   TRCParameter rcPara;
   rcPara.m_alpha = alpha;
   rcPara.m_beta  = beta;
+#if JVET_K0390_RATECTRL
+  if (QP == g_RCInvalidQPValue && m_encRCSeq->getAdaptiveBits() == 1)
+  {
+    rcPara.m_validPix = 0;
+  }
+  else
+  {
+    rcPara.m_validPix = LCUTotalPixels;
+  }
+
+  double MSE = m_LCUs[LCUIdx].m_actualMSE;
+  double updatedK = bpp * inputLambda / MSE;
+  double updatedC = MSE / pow(bpp, -updatedK);
+  rcPara.m_alpha = updatedC * updatedK;
+  rcPara.m_beta = -updatedK - 1.0;
+
+  if (bpp > 0 && updatedK > 0.0001)
+  {
+    m_encRCSeq->setLCUPara(m_frameLevel, LCUIdx, rcPara);
+  }
+  else
+  {
+    rcPara.m_alpha = Clip3(0.0001, g_RCAlphaMaxValue, rcPara.m_alpha);
+    m_encRCSeq->setLCUPara(m_frameLevel, LCUIdx, rcPara);
+  }
+#else
   m_encRCSeq->setLCUPara( m_frameLevel, LCUIdx, rcPara );
+#endif
 
 }
 
@@ -1017,16 +1268,41 @@ double EncRCPic::calAverageLambda()
   double totalLambdas = 0.0;
   int numTotalLCUs = 0;
 
+#if JVET_K0390_RATECTRL
+  double totalSSE = 0.0;
+  int totalPixels = 0;
+#endif
   int i;
   for ( i=0; i<m_numberOfLCU; i++ )
   {
     if ( m_LCUs[i].m_lambda > 0.01 )
     {
+#if JVET_K0390_RATECTRL
+      if (m_LCUs[i].m_QP > 0 || m_encRCSeq->getAdaptiveBits() != 1)
+      {
+        m_validPixelsInPic += m_LCUs[i].m_numberOfPixel;
+        
+        totalLambdas += log(m_LCUs[i].m_lambda);
+        numTotalLCUs++;
+      }
+#else
       totalLambdas += log( m_LCUs[i].m_lambda );
       numTotalLCUs++;
+#endif
+
+#if JVET_K0390_RATECTRL
+      if (m_LCUs[i].m_QP > 0 || m_encRCSeq->getAdaptiveBits() != 1)
+      {
+        totalSSE += m_LCUs[i].m_actualSSE;
+        totalPixels += m_LCUs[i].m_numberOfPixel;
+       }
+#endif
     }
   }
 
+#if JVET_K0390_RATECTRL
+  setPicMSE(totalPixels > 0 ? totalSSE / (double)totalPixels : 1.0); //1.0 is useless in the following process, just to make sure the divisor not be 0
+#endif
   double avgLambda;
   if( numTotalLCUs == 0 )
   {
@@ -1065,7 +1341,11 @@ void EncRCPic::updateAfterPicture( int actualHeaderBits, int actualTotalBits, do
   {
     // update parameters
     double picActualBits = ( double )m_picActualBits;
+#if JVET_K0390_RATECTRL
+    double picActualBpp = picActualBits / (double)m_validPixelsInPic;
+#else
     double picActualBpp  = picActualBits/(double)m_numberOfPixel;
+#endif
     double calLambda     = alpha * pow( picActualBpp, beta );
     double inputLambda   = m_picLambda;
 
@@ -1080,7 +1360,26 @@ void EncRCPic::updateAfterPicture( int actualHeaderBits, int actualTotalBits, do
       TRCParameter rcPara;
       rcPara.m_alpha = alpha;
       rcPara.m_beta  = beta;
+#if JVET_K0390_RATECTRL
+      double avgMSE = getPicMSE();
+      double updatedK = picActualBpp * averageLambda / avgMSE;
+      double updatedC = avgMSE / pow(picActualBpp, -updatedK);
+
+      if (m_frameLevel > 0)  //only use for level > 0
+      {
+        rcPara.m_alpha = updatedC * updatedK;
+        rcPara.m_beta = -updatedK - 1.0;
+      }
+
+      rcPara.m_validPix = m_validPixelsInPic;
+
+      if (m_validPixelsInPic > 0)
+      {
+        m_encRCSeq->setPicPara(m_frameLevel, rcPara);
+      }
+#else
       m_encRCSeq->setPicPara( m_frameLevel, rcPara );
+#endif
 
       return;
     }
@@ -1099,8 +1398,27 @@ void EncRCPic::updateAfterPicture( int actualHeaderBits, int actualTotalBits, do
   TRCParameter rcPara;
   rcPara.m_alpha = alpha;
   rcPara.m_beta  = beta;
+#if JVET_K0390_RATECTRL
+  double picActualBpp = (double)m_picActualBits / (double)m_validPixelsInPic;
+
+  double avgMSE = getPicMSE();
+  double updatedK = picActualBpp * averageLambda / avgMSE;
+  double updatedC = avgMSE / pow(picActualBpp, -updatedK);
+  if (m_frameLevel > 0)  //only use for level > 0
+  {
+    rcPara.m_alpha = updatedC * updatedK;
+    rcPara.m_beta = -updatedK - 1.0;
+  }
+
+  rcPara.m_validPix = m_validPixelsInPic;
 
+  if (m_validPixelsInPic > 0)
+  {
+    m_encRCSeq->setPicPara(m_frameLevel, rcPara);
+  }
+#else
   m_encRCSeq->setPicPara( m_frameLevel, rcPara );
+#endif
 
   if ( m_frameLevel == 1 )
   {
@@ -1188,12 +1506,38 @@ double EncRCPic::getLCUEstLambdaAndQP(double bpp, int clipPicQP, int *estQP)
     minQP = max(clipNeighbourQP - 1, minQP);
   }
 
+#if RATECTRL_FIX_FULLNBIT
+#if DISTORTION_LAMBDA_BUGFIX
+  int bitdepth_luma_scale =
+    2
+    * (m_encRCSeq->getbitDepth() - 8
+      - DISTORTION_PRECISION_ADJUSTMENT(m_encRCSeq->getbitDepth()));
+#else
+#if FULL_NBIT
+  int bitdepth_luma_scale = 2 * (m_encRCSeq->getbitDepth() - 8);
+#else
+  int    bitdepth_luma_scale = 0;
+#endif
+#endif
+
+  double maxLambda = exp(((double)(maxQP + 0.49) - 13.7122) / 4.2005) * pow(2.0, bitdepth_luma_scale);
+  double minLambda = exp(((double)(minQP - 0.49) - 13.7122) / 4.2005) * pow(2.0, bitdepth_luma_scale);
+#else
   double maxLambda=exp(((double)(maxQP+0.49)-13.7122)/4.2005);
   double minLambda=exp(((double)(minQP-0.49)-13.7122)/4.2005);
+#endif
 
   estLambda = Clip3(minLambda, maxLambda, estLambda);
 
+#if JVET_K0390_RATECTRL
+  //Avoid different results in different platforms. The problem is caused by the different results of pow() in different platforms.
+  estLambda = double(int64_t(estLambda * (double)LAMBDA_PREC + 0.5)) / (double)LAMBDA_PREC;
+#endif
+#if RATECTRL_FIX_FULLNBIT
+  *estQP = int(4.2005 * log(estLambda / pow(2.0, bitdepth_luma_scale)) + 13.7122 + 0.5);
+#else
   *estQP = int( 4.2005 * log(estLambda) + 13.7122 + 0.5 );
+#endif
   *estQP = Clip3(minQP, maxQP, *estQP);
 
   return estLambda;
@@ -1231,7 +1575,11 @@ void RateCtrl::destroy()
   }
 }
 
+#if RATECTRL_FIX_FULLNBIT
+void RateCtrl::init(int totalFrames, int targetBitrate, int frameRate, int GOPSize, int picWidth, int picHeight, int LCUWidth, int LCUHeight, int bitDepth, int keepHierBits, bool useLCUSeparateModel, GOPEntry  GOPList[MAX_GOP])
+#else
 void RateCtrl::init( int totalFrames, int targetBitrate, int frameRate, int GOPSize, int picWidth, int picHeight, int LCUWidth, int LCUHeight, int keepHierBits, bool useLCUSeparateModel, GOPEntry  GOPList[MAX_GOP] )
+#endif
 {
   destroy();
 
@@ -1251,7 +1599,11 @@ void RateCtrl::init( int totalFrames, int targetBitrate, int frameRate, int GOPS
   {
     numberOfLevel = int( log((double)GOPSize)/log(2.0) + 0.5 ) + 1;
   }
+#if JVET_K0390_RATECTRL
+  if (!isLowdelay && (GOPSize == 16 || GOPSize == 8))
+#else
   if ( !isLowdelay && GOPSize == 8 )
+#endif
   {
     numberOfLevel = int( log((double)GOPSize)/log(2.0) + 0.5 ) + 1;
   }
@@ -1361,6 +1713,92 @@ void RateCtrl::init( int totalFrames, int targetBitrate, int frameRate, int GOPS
         adaptiveBit = 2;
       }
     }
+#if JVET_K0390_RATECTRL
+    else if (GOPSize == 16 && !isLowdelay)
+    {
+      if (bpp > 0.2)
+      {
+        bitsRatio[0] = 10;
+        bitsRatio[1] = 8;
+        bitsRatio[2] = 4;
+        bitsRatio[3] = 2;
+        bitsRatio[4] = 1;
+        bitsRatio[5] = 1;
+        bitsRatio[6] = 2;
+        bitsRatio[7] = 1;
+        bitsRatio[8] = 1;
+        bitsRatio[9] = 4;
+        bitsRatio[10] = 2;
+        bitsRatio[11] = 1;
+        bitsRatio[12] = 1;
+        bitsRatio[13] = 2;
+        bitsRatio[14] = 1;
+        bitsRatio[15] = 1;
+      }
+      else if (bpp > 0.1)
+      {
+        bitsRatio[0] = 15;
+        bitsRatio[1] = 9;
+        bitsRatio[2] = 4;
+        bitsRatio[3] = 2;
+        bitsRatio[4] = 1;
+        bitsRatio[5] = 1;
+        bitsRatio[6] = 2;
+        bitsRatio[7] = 1;
+        bitsRatio[8] = 1;
+        bitsRatio[9] = 4;
+        bitsRatio[10] = 2;
+        bitsRatio[11] = 1;
+        bitsRatio[12] = 1;
+        bitsRatio[13] = 2;
+        bitsRatio[14] = 1;
+        bitsRatio[15] = 1;
+      }
+      else if (bpp > 0.05)
+      {
+        bitsRatio[0] = 40;
+        bitsRatio[1] = 17;
+        bitsRatio[2] = 7;
+        bitsRatio[3] = 2;
+        bitsRatio[4] = 1;
+        bitsRatio[5] = 1;
+        bitsRatio[6] = 2;
+        bitsRatio[7] = 1;
+        bitsRatio[8] = 1;
+        bitsRatio[9] = 7;
+        bitsRatio[10] = 2;
+        bitsRatio[11] = 1;
+        bitsRatio[12] = 1;
+        bitsRatio[13] = 2;
+        bitsRatio[14] = 1;
+        bitsRatio[15] = 1;
+      }
+      else
+      {
+        bitsRatio[0] = 40;
+        bitsRatio[1] = 15;
+        bitsRatio[2] = 6;
+        bitsRatio[3] = 3;
+        bitsRatio[4] = 1;
+        bitsRatio[5] = 1;
+        bitsRatio[6] = 3;
+        bitsRatio[7] = 1;
+        bitsRatio[8] = 1;
+        bitsRatio[9] = 6;
+        bitsRatio[10] = 3;
+        bitsRatio[11] = 1;
+        bitsRatio[12] = 1;
+        bitsRatio[13] = 3;
+        bitsRatio[14] = 1;
+        bitsRatio[15] = 1;
+      }
+
+      if (keepHierBits == 2)
+      {
+        adaptiveBit = 3;
+      }
+    }
+#endif
     else
     {
       msg( WARNING, "\n hierarchical bit allocation is not support for the specified coding structure currently.\n" );
@@ -1397,6 +1835,27 @@ void RateCtrl::init( int totalFrames, int targetBitrate, int frameRate, int GOPS
       GOPID2Level[6] = 4;
       GOPID2Level[7] = 4;
     }
+#if JVET_K0390_RATECTRL
+    else if (GOPSize == 16 && !isLowdelay)
+    {
+      GOPID2Level[0] = 1;
+      GOPID2Level[1] = 2;
+      GOPID2Level[2] = 3;
+      GOPID2Level[3] = 4;
+      GOPID2Level[4] = 5;
+      GOPID2Level[5] = 5;
+      GOPID2Level[6] = 4;
+      GOPID2Level[7] = 5;
+      GOPID2Level[8] = 5;
+      GOPID2Level[9] = 3;
+      GOPID2Level[10] = 4;
+      GOPID2Level[11] = 5;
+      GOPID2Level[12] = 5;
+      GOPID2Level[13] = 4;
+      GOPID2Level[14] = 5;
+      GOPID2Level[15] = 5;
+    }
+#endif
   }
 
   if ( !isLowdelay && GOPSize == 8 )
@@ -1410,11 +1869,35 @@ void RateCtrl::init( int totalFrames, int targetBitrate, int frameRate, int GOPS
     GOPID2Level[6] = 4;
     GOPID2Level[7] = 4;
   }
+#if JVET_K0390_RATECTRL
+  else if (GOPSize == 16 && !isLowdelay)
+  {
+    GOPID2Level[0] = 1;
+    GOPID2Level[1] = 2;
+    GOPID2Level[2] = 3;
+    GOPID2Level[3] = 4;
+    GOPID2Level[4] = 5;
+    GOPID2Level[5] = 5;
+    GOPID2Level[6] = 4;
+    GOPID2Level[7] = 5;
+    GOPID2Level[8] = 5;
+    GOPID2Level[9] = 3;
+    GOPID2Level[10] = 4;
+    GOPID2Level[11] = 5;
+    GOPID2Level[12] = 5;
+    GOPID2Level[13] = 4;
+    GOPID2Level[14] = 5;
+    GOPID2Level[15] = 5;
+  }
+#endif
 
   m_encRCSeq = new EncRCSeq;
   m_encRCSeq->create( totalFrames, targetBitrate, frameRate, GOPSize, picWidth, picHeight, LCUWidth, LCUHeight, numberOfLevel, useLCUSeparateModel, adaptiveBit );
   m_encRCSeq->initBitsRatio( bitsRatio );
   m_encRCSeq->initGOPID2Level( GOPID2Level );
+#if RATECTRL_FIX_FULLNBIT
+  m_encRCSeq->setBitDepth(bitDepth);
+#endif
   m_encRCSeq->initPicPara();
   if ( useLCUSeparateModel )
   {
diff --git a/source/Lib/EncoderLib/RateCtrl.h b/source/Lib/EncoderLib/RateCtrl.h
index 8a6a58c13ebb19ed10b3dfa5449a795772327535..52b44fca853c41ba99692f051005668743e3d5f4 100644
--- a/source/Lib/EncoderLib/RateCtrl.h
+++ b/source/Lib/EncoderLib/RateCtrl.h
@@ -84,12 +84,19 @@ struct TRCLCU
   int m_numberOfPixel;
   double m_costIntra;
   int m_targetBitsLeft;
+#if JVET_K0390_RATECTRL
+  double m_actualSSE;
+  double m_actualMSE;
+#endif
 };
 
 struct TRCParameter
 {
   double m_alpha;
   double m_beta;
+#if JVET_K0390_RATECTRL
+  int    m_validPix;
+#endif
 };
 
 class EncRCSeq
@@ -147,6 +154,10 @@ public:
   int    getAdaptiveBits()              { return m_adaptiveBit;  }
   double getLastLambda()                { return m_lastLambda;   }
   void   setLastLambda( double lamdba ) { m_lastLambda = lamdba; }
+#if RATECTRL_FIX_FULLNBIT
+  void setBitDepth(int bitDepth) { m_bitDepth = bitDepth; }
+  int getbitDepth() { return m_bitDepth; }
+#endif
 
 private:
   int m_totalFrames;
@@ -177,6 +188,9 @@ private:
 
   int m_adaptiveBit;
   double m_lastLambda;
+#if RATECTRL_FIX_FULLNBIT
+  int m_bitDepth;
+#endif
 };
 
 class EncRCGOP
@@ -193,7 +207,11 @@ public:
 private:
   int  xEstGOPTargetBits( EncRCSeq* encRCSeq, int GOPSize );
   void   xCalEquaCoeff( EncRCSeq* encRCSeq, double* lambdaRatio, double* equaCoeffA, double* equaCoeffB, int GOPSize );
+#if JVET_K0390_RATECTRL
+  double xSolveEqua(EncRCSeq* encRCSeq, double targetBpp, double* equaCoeffA, double* equaCoeffB, int GOPSize);
+#else
   double xSolveEqua( double targetBpp, double* equaCoeffA, double* equaCoeffB, int GOPSize );
+#endif
 
 public:
   EncRCSeq* getEncRCSeq()        { return m_encRCSeq; }
@@ -282,6 +300,10 @@ public:
   void setPicEstQP( int QP )                              { m_estPicQP = QP; }
   double getPicEstLambda()                                { return m_estPicLambda; }
   void setPicEstLambda( double lambda )                   { m_picLambda = lambda; }
+#if JVET_K0390_RATECTRL
+  double getPicMSE()                                      { return m_picMSE; }
+  void  setPicMSE(double avgMSE)                           { m_picMSE = avgMSE; }
+#endif
 
 private:
   EncRCSeq* m_encRCSeq;
@@ -309,6 +331,10 @@ private:
   int m_picActualBits;          // the whole picture, including header
   int m_picQP;                  // in integer form
   double m_picLambda;
+#if JVET_K0390_RATECTRL
+  double m_picMSE;
+  int m_validPixelsInPic;
+#endif
 };
 
 class RateCtrl
@@ -318,7 +344,11 @@ public:
   ~RateCtrl();
 
 public:
+#if RATECTRL_FIX_FULLNBIT
+  void init(int totalFrames, int targetBitrate, int frameRate, int GOPSize, int picWidth, int picHeight, int LCUWidth, int LCUHeight, int bitDepth, int keepHierBits, bool useLCUSeparateModel, GOPEntry GOPList[MAX_GOP]);
+#else
   void init( int totalFrames, int targetBitrate, int frameRate, int GOPSize, int picWidth, int picHeight, int LCUWidth, int LCUHeight, int keepHierBits, bool useLCUSeparateModel, GOPEntry GOPList[MAX_GOP] );
+#endif
   void destroy();
   void initRCPic( int frameLevel );
   void initRCGOP( int numberOfPictures );
diff --git a/source/Lib/Utilities/CMakeLists.txt b/source/Lib/Utilities/CMakeLists.txt
index b8976132ec48f5a9b5d7f8ad0e256ec43888bb54..0b0464411f660fffc5e6a3ab431d0c0331724388 100644
--- a/source/Lib/Utilities/CMakeLists.txt
+++ b/source/Lib/Utilities/CMakeLists.txt
@@ -17,7 +17,7 @@ add_library( ${LIB_NAME} STATIC ${SRC_FILES} ${INC_FILES} ${NATVIS_FILES} )
 target_compile_definitions( ${LIB_NAME} PUBLIC )
 
 if( ENABLE_VTM )
-  target_compile_definitions( ${LIB_NAME} PUBLIC JEM_TOOLS=0 )
+  target_compile_definitions( ${LIB_NAME} PUBLIC BMS_TOOLS=0 )
 endif()
 
 if( EXTENSION_360_VIDEO )
diff --git a/source/Lib/Utilities/VideoIOYuv.cpp b/source/Lib/Utilities/VideoIOYuv.cpp
index 9494596cc472e587a13d9e6c64835454ef427d1a..5f26d44d5aee4afee42ca848573ef2a0233c3900 100644
--- a/source/Lib/Utilities/VideoIOYuv.cpp
+++ b/source/Lib/Utilities/VideoIOYuv.cpp
@@ -413,24 +413,25 @@ static bool readPlane(Pel* dst,
  * @param fileBitDepth component bit depth in file
  * @return true for success, false in case of error
  */
-static bool writePlane(ostream& fd, const Pel* src, bool is16bit,
+static bool writePlane(ostream& fd, const Pel* src,
+                       const bool is16bit,
                        const uint32_t stride_src,
                        uint32_t width444, uint32_t height444,
                        const ComponentID compID,
                        const ChromaFormat srcFormat,
                        const ChromaFormat fileFormat,
-                       const int fileBitDepth)
+                       const uint32_t fileBitDepth,
+                       const uint32_t packedYUVOutputMode = 0)
 {
   const uint32_t csx_file =getComponentScaleX(compID, fileFormat);
   const uint32_t csy_file =getComponentScaleY(compID, fileFormat);
   const uint32_t csx_src  =getComponentScaleX(compID, srcFormat);
   const uint32_t csy_src  =getComponentScaleY(compID, srcFormat);
 
-/*  const uint32_t stride_src      = stride444>>csx_src;*/
-
-  const uint32_t stride_file      = (width444 * (is16bit ? 2 : 1)) >> csx_file;
-  const uint32_t width_file       = width444 >>csx_file;
-  const uint32_t height_file      = height444>>csy_file;
+  const uint32_t width_file  = width444  >> csx_file;
+  const uint32_t height_file = height444 >> csy_file;
+  const bool     writePYUV   = (packedYUVOutputMode > 0) && (fileBitDepth == 10 || fileBitDepth == 12) && ((width_file & (1 + (fileBitDepth & 3))) == 0);
+  const uint32_t stride_file = writePYUV ? (width444 * fileBitDepth) >> (csx_file + 3) : (width444 * (is16bit ? 2 : 1)) >> csx_file;
 
   std::vector<uint8_t> bufVec(stride_file);
   uint8_t *buf=&(bufVec[0]);
@@ -438,15 +439,106 @@ static bool writePlane(ostream& fd, const Pel* src, bool is16bit,
   const Pel *pSrcBuf         = src;
   const int srcbuf_stride    = stride_src;
 
+  if (writePYUV)
+  {
+    const uint32_t mask_y_file = (1 << csy_file) - 1;
+    const uint32_t mask_y_src  = (1 << csy_src ) - 1;
+    const uint32_t widthS_file = width_file >> (fileBitDepth == 12 ? 1 : 2);
+
+    for (uint32_t y444 = 0; y444 < height444; y444++)
+    {
+      if ((y444 & mask_y_file) == 0)  // write a new line to file
+      {
+        if (csx_file < csx_src)
+        {
+          // eg file is 444, source is 422.
+          const uint32_t sx = csx_src - csx_file;
+
+          if (fileBitDepth == 10)  // write 4 values into 5 bytes
+          {
+            for (uint32_t x = 0; x < widthS_file; x++)
+            {
+              const uint32_t src0 = pSrcBuf[(4*x  ) >> sx];
+              const uint32_t src1 = pSrcBuf[(4*x+1) >> sx];
+              const uint32_t src2 = pSrcBuf[(4*x+2) >> sx];
+              const uint32_t src3 = pSrcBuf[(4*x+3) >> sx];
+
+              buf[5*x  ] = ((src0     ) & 0xff); // src0:76543210
+              buf[5*x+1] = ((src1 << 2) & 0xfc) + ((src0 >> 8) & 0x03);
+              buf[5*x+2] = ((src2 << 4) & 0xf0) + ((src1 >> 6) & 0x0f);
+              buf[5*x+3] = ((src3 << 6) & 0xc0) + ((src2 >> 4) & 0x3f);
+              buf[5*x+4] = ((src3 >> 2) & 0xff); // src3:98765432
+            }
+          }
+          else if (fileBitDepth == 12) //...2 values into 3 bytes
+          {
+            for (uint32_t x = 0; x < widthS_file; x++)
+            {
+              const uint32_t src0 = pSrcBuf[(2*x  ) >> sx];
+              const uint32_t src1 = pSrcBuf[(2*x+1) >> sx];
+
+              buf[3*x  ] = ((src0     ) & 0xff); // src0:76543210
+              buf[3*x+1] = ((src1 << 4) & 0xf0) + ((src0 >> 8) & 0x0f);
+              buf[3*x+2] = ((src1 >> 4) & 0xff); // src1:BA987654
+            }
+          }
+        }
+        else
+        {
+          // eg file is 422, source is 444.
+          const uint32_t sx = csx_file - csx_src;
+
+          if (fileBitDepth == 10)  // write 4 values into 5 bytes
+          {
+            for (uint32_t x = 0; x < widthS_file; x++)
+            {
+              const uint32_t src0 = pSrcBuf[(4*x  ) << sx];
+              const uint32_t src1 = pSrcBuf[(4*x+1) << sx];
+              const uint32_t src2 = pSrcBuf[(4*x+2) << sx];
+              const uint32_t src3 = pSrcBuf[(4*x+3) << sx];
+
+              buf[5*x  ] = ((src0     ) & 0xff); // src0:76543210
+              buf[5*x+1] = ((src1 << 2) & 0xfc) + ((src0 >> 8) & 0x03);
+              buf[5*x+2] = ((src2 << 4) & 0xf0) + ((src1 >> 6) & 0x0f);
+              buf[5*x+3] = ((src3 << 6) & 0xc0) + ((src2 >> 4) & 0x3f);
+              buf[5*x+4] = ((src3 >> 2) & 0xff); // src3:98765432
+            }
+          }
+          else if (fileBitDepth == 12) //...2 values into 3 bytes
+          {
+            for (uint32_t x = 0; x < widthS_file; x++)
+            {
+              const uint32_t src0 = pSrcBuf[(2*x  ) << sx];
+              const uint32_t src1 = pSrcBuf[(2*x+1) << sx];
+
+              buf[3*x  ] = ((src0     ) & 0xff); // src0:76543210
+              buf[3*x+1] = ((src1 << 4) & 0xf0) + ((src0 >> 8) & 0x0f);
+              buf[3*x+2] = ((src1 >> 4) & 0xff); // src1:BA987654
+            }
+          }
+        }
 
+        fd.write (reinterpret_cast<const char*>(buf), stride_file);
+        if (fd.eof() || fd.fail())
+        {
+          return false;
+        }
+      }
 
+      if ((y444 & mask_y_src) == 0)
+      {
+        pSrcBuf += srcbuf_stride;
+      }
+    }
+  }
+  else // !writePYUV
   if (compID!=COMPONENT_Y && (fileFormat==CHROMA_400 || srcFormat==CHROMA_400))
   {
     if (fileFormat!=CHROMA_400)
     {
-      const uint32_t value = 1u << (fileBitDepth - 1);
+      const uint32_t value = 1 << (fileBitDepth - 1);
 
-      for(uint32_t y=0; y< height_file; y++)
+      for (uint32_t y = 0; y < height_file; y++)
       {
         if (!is16bit)
         {
@@ -461,7 +553,7 @@ static bool writePlane(ostream& fd, const Pel* src, bool is16bit,
           uint16_t val(value);
           for (uint32_t x = 0; x < width_file; x++)
           {
-            buf[2*x+0]= (val>>0) & 0xff;
+            buf[2*x  ]= (val>>0) & 0xff;
             buf[2*x+1]= (val>>8) & 0xff;
           }
         }
@@ -541,35 +633,41 @@ static bool writePlane(ostream& fd, const Pel* src, bool is16bit,
   return true;
 }
 
-static bool writeField(ostream& fd, const Pel* top, const Pel* bottom, bool is16bit,
+static bool writeField(ostream& fd, const Pel* top, const Pel* bottom,
+                       const bool is16bit,
                        const uint32_t stride_src,
                        uint32_t width444, uint32_t height444,
                        const ComponentID compID,
                        const ChromaFormat srcFormat,
                        const ChromaFormat fileFormat,
-                       const uint32_t fileBitDepth, const bool isTff)
+                       const uint32_t fileBitDepth, const bool isTff,
+                       const uint32_t packedYUVOutputMode = 0)
 {
   const uint32_t csx_file =getComponentScaleX(compID, fileFormat);
   const uint32_t csy_file =getComponentScaleY(compID, fileFormat);
   const uint32_t csx_src  =getComponentScaleX(compID, srcFormat);
   const uint32_t csy_src  =getComponentScaleY(compID, srcFormat);
 
-  /*const uint32_t stride_src      = stride444>>csx_src;*/
-
-  const uint32_t stride_file      = (width444 * (is16bit ? 2 : 1)) >> csx_file;
-  const uint32_t width_file       = width444 >>csx_file;
-  const uint32_t height_file      = height444>>csy_file;
+  const uint32_t width_file  = width444  >> csx_file;
+  const uint32_t height_file = height444 >> csy_file;
+  const bool     writePYUV   = (packedYUVOutputMode > 0) && (fileBitDepth == 10 || fileBitDepth == 12) && ((width_file & (1 + (fileBitDepth & 3))) == 0);
+  const uint32_t stride_file = writePYUV ? (width444 * fileBitDepth) >> (csx_file + 3) : (width444 * (is16bit ? 2 : 1)) >> csx_file;
 
   std::vector<uint8_t> bufVec(stride_file * 2);
   uint8_t *buf=&(bufVec[0]);
 
+  if (writePYUV)
+  {
+    // TODO
+  }
+  else // !writePYUV
   if (compID!=COMPONENT_Y && (fileFormat==CHROMA_400 || srcFormat==CHROMA_400))
   {
     if (fileFormat!=CHROMA_400)
     {
-      const uint32_t value=1<<(fileBitDepth-1);
+      const uint32_t value = 1 << (fileBitDepth - 1);
 
-      for(uint32_t y=0; y< height_file; y++)
+      for (uint32_t y = 0; y < height_file; y++)
       {
         for (uint32_t field = 0; field < 2; field++)
         {
@@ -588,7 +686,7 @@ static bool writeField(ostream& fd, const Pel* top, const Pel* bottom, bool is16
             uint16_t val(value);
             for (uint32_t x = 0; x < width_file; x++)
             {
-              fieldBuffer[2*x+0]= (val>>0) & 0xff;
+              fieldBuffer[2*x  ]= (val>>0) & 0xff;
               fieldBuffer[2*x+1]= (val>>8) & 0xff;
             }
           }
@@ -753,7 +851,7 @@ bool VideoIOYuv::read ( PelUnitBuf& pic, PelUnitBuf& picOrg, const InputColourSp
       scalePlane( picOrg.get(compID), m_bitdepthShift[chType], minval, maxval);
     }
   }
-  
+
 #if EXTENSION_360_VIDEO
   if (pic.chromaFormat != NUM_CHROMA_FORMAT)
     ColourSpaceConvert(picOrg, pic, ipcsc, true);
@@ -777,7 +875,9 @@ bool VideoIOYuv::read ( PelUnitBuf& pic, PelUnitBuf& picOrg, const InputColourSp
  * @return true for success, false in case of error
  */
 bool VideoIOYuv::write( const CPelUnitBuf& pic,
-                        const InputColourSpaceConversion ipCSC, int confLeft, int confRight, int confTop, int confBottom, ChromaFormat format, const bool bClipToRec709 )
+                        const InputColourSpaceConversion ipCSC,
+                        const bool bPackedYUVOutputMode,
+                        int confLeft, int confRight, int confTop, int confBottom, ChromaFormat format, const bool bClipToRec709 )
 {
   PelStorage interm;
 
@@ -844,12 +944,13 @@ bool VideoIOYuv::write( const CPelUnitBuf& pic,
   {
     const ComponentID compID      = ComponentID(comp);
     const ChannelType ch          = toChannelType(compID);
-    const uint32_t        csx         = ::getComponentScaleX(compID, format);
-    const uint32_t        csy         = ::getComponentScaleY(compID, format);
+    const uint32_t    csx         = ::getComponentScaleX(compID, format);
+    const uint32_t    csy         = ::getComponentScaleY(compID, format);
     const CPelBuf     area        = picO.get(compID);
     const int         planeOffset = (confLeft >> csx) + (confTop >> csy) * area.stride;
     if (!writePlane (m_cHandle, area.bufAt (0, 0) + planeOffset, is16bit, area.stride,
-                     width444, height444, compID, picO.chromaFormat, format, m_fileBitdepth[ch]))
+                     width444, height444, compID, picO.chromaFormat, format, m_fileBitdepth[ch],
+                     bPackedYUVOutputMode ? 1 : 0))
     {
       retval = false;
     }
@@ -858,7 +959,10 @@ bool VideoIOYuv::write( const CPelUnitBuf& pic,
   return retval;
 }
 
-bool VideoIOYuv::write( const CPelUnitBuf& picTop, const CPelUnitBuf& picBottom, const InputColourSpaceConversion ipCSC, int confLeft, int confRight, int confTop, int confBottom, ChromaFormat format, const bool isTff, const bool bClipToRec709 )
+bool VideoIOYuv::write( const CPelUnitBuf& picTop, const CPelUnitBuf& picBottom,
+                        const InputColourSpaceConversion ipCSC,
+                        const bool bPackedYUVOutputMode,
+                        int confLeft, int confRight, int confTop, int confBottom, ChromaFormat format, const bool isTff, const bool bClipToRec709 )
 {
   PelStorage intermTop;
   PelStorage intermBottom;
@@ -934,8 +1038,8 @@ bool VideoIOYuv::write( const CPelUnitBuf& picTop, const CPelUnitBuf& picBottom,
     const CPelBuf     areaTop    = picTopO.   get( compID );
     const CPelBuf     areaBottom = picBottomO.get( compID );
     const CPelBuf     areaTopY   = picTopO.Y();
-    const uint32_t        width444   = areaTopY.width  - (confLeft + confRight);
-    const uint32_t        height444  = areaTopY.height - (confTop + confBottom);
+    const uint32_t    width444   = areaTopY.width  - (confLeft + confRight);
+    const uint32_t    height444  = areaTopY.height - (confTop + confBottom);
 
     CHECK(areaTop.width  == areaBottom.width , "Incompatible formats");
     CHECK(areaTop.height == areaBottom.height, "Incompatible formats");
@@ -950,12 +1054,13 @@ bool VideoIOYuv::write( const CPelUnitBuf& picTop, const CPelUnitBuf& picBottom,
     const uint32_t csy = ::getComponentScaleY(compID, dstChrFormat );
     const int planeOffset  = (confLeft>>csx) + ( confTop>>csy) * areaTop.stride; //offset is for entire frame - round up for top field and down for bottom field
 
-    if (! writeField(m_cHandle,
+    if (!writeField (m_cHandle,
                      (areaTop.   bufAt(0,0) + planeOffset),
                      (areaBottom.bufAt(0,0) + planeOffset),
                      is16bit,
                      areaTop.stride,
-                     width444, height444, compID, dstChrFormat, format, m_fileBitdepth[ch], isTff))
+                     width444, height444, compID, dstChrFormat, format, m_fileBitdepth[ch], isTff,
+                     bPackedYUVOutputMode ? 1 : 0))
     {
       retval=false;
     }
diff --git a/source/Lib/Utilities/VideoIOYuv.h b/source/Lib/Utilities/VideoIOYuv.h
index 044d274c9ea2d02d26df263a4e2d8dca80c620c9..bf72925bde43753d9de8028bdd1b6586f4003e66 100644
--- a/source/Lib/Utilities/VideoIOYuv.h
+++ b/source/Lib/Utilities/VideoIOYuv.h
@@ -78,10 +78,16 @@ public:
 
   // If fileFormat=NUM_CHROMA_FORMAT, use the format defined by pPicYuv
   bool  write( const CPelUnitBuf& pic,
-               const InputColourSpaceConversion ipCSC, int confLeft = 0, int confRight = 0, int confTop = 0, int confBottom = 0, ChromaFormat format = NUM_CHROMA_FORMAT, const bool bClipToRec709 = false ); ///< write one YUV frame with padding parameter
+               const InputColourSpaceConversion ipCSC,
+               const bool bPackedYUVOutputMode,
+               int confLeft = 0, int confRight = 0, int confTop = 0, int confBottom = 0, ChromaFormat format = NUM_CHROMA_FORMAT, const bool bClipToRec709 = false ); ///< write one YUV frame with padding parameter
 
   // If fileFormat=NUM_CHROMA_FORMAT, use the format defined by pPicYuvTop and pPicYuvBottom
-  bool  write( const CPelUnitBuf& picTop, const CPelUnitBuf& picBot, const InputColourSpaceConversion ipCSC, int confLeft=0, int confRight=0, int confTop=0, int confBottom=0, ChromaFormat fileFormat=NUM_CHROMA_FORMAT, const bool isTff=false, const bool bClipToRec709=false);
+  bool  write( const CPelUnitBuf& picTop, const CPelUnitBuf& picBot,
+               const InputColourSpaceConversion ipCSC,
+               const bool bPackedYUVOutputMode,
+               int confLeft = 0, int confRight = 0, int confTop = 0, int confBottom = 0, ChromaFormat format = NUM_CHROMA_FORMAT, const bool isTff = false, const bool bClipToRec709 = false );
+
   static void ColourSpaceConvert(const CPelUnitBuf &src, PelUnitBuf &dest, const InputColourSpaceConversion conversion, bool bIsForwards);
 
   bool  isEof ();                                           ///< check for end-of-file