diff --git a/doc/software-manual.tex b/doc/software-manual.tex
index b98ab091cd0955c1333b1086d09cddee49ef2c64..70f66e6a8a846ba9aa1aec933a73e8e72ebf1ea0 100644
--- a/doc/software-manual.tex
+++ b/doc/software-manual.tex
@@ -1968,9 +1968,7 @@ Enables or disables the Intra Sub-Partitions coding mode.
 \Option{ISPFast} &
 %\ShortOption{\None} &
 \Default{false} &
-Enables or disables reduced testing of non-DCT-II transforms if ISP is likely to become the best mode for a given CU.
-\par
-This option has no effect if either ISP or MTS are disabled.
+Enables or disables fast encoder methods for ISP.
 \\
 
 \Option{JointCbCr} &
diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp
index 10ef069fe19f734a6e24fd73f91294a76af8a5f4..4d2d7d0097b3f9198366c167aed491f1279823bf 100644
--- a/source/App/EncoderApp/EncApp.cpp
+++ b/source/App/EncoderApp/EncApp.cpp
@@ -559,7 +559,9 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setPPSDepQuantEnabledIdc                             ( m_PPSDepQuantEnabledIdc );
   m_cEncLib.setPPSRefPicListSPSIdc0                              ( m_PPSRefPicListSPSIdc0 );
   m_cEncLib.setPPSRefPicListSPSIdc1                              ( m_PPSRefPicListSPSIdc1 );
+#if !JVET_P0206_TMVP_flags
   m_cEncLib.setPPSTemporalMVPEnabledIdc                          ( m_PPSTemporalMVPEnabledIdc );
+#endif
   m_cEncLib.setPPSMvdL1ZeroIdc                                   ( m_PPSMvdL1ZeroIdc );
   m_cEncLib.setPPSCollocatedFromL0Idc                            ( m_PPSCollocatedFromL0Idc );
   m_cEncLib.setPPSSixMinusMaxNumMergeCandPlus1                   ( m_PPSSixMinusMaxNumMergeCandPlus1 );
diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp
index 99f747a44378c7a576be08a249909210f37472d2..e80890500c26ee326ec201be3ebfcb7dc53f43b5 100644
--- a/source/App/EncoderApp/EncAppCfg.cpp
+++ b/source/App/EncoderApp/EncAppCfg.cpp
@@ -3265,7 +3265,9 @@ bool EncAppCfg::xCheckParameter()
     m_PPSDepQuantEnabledIdc = 0;
     m_PPSRefPicListSPSIdc0 = 0;
     m_PPSRefPicListSPSIdc1 = 0;
+#if !JVET_P0206_TMVP_flags
     m_PPSTemporalMVPEnabledIdc = 0;
+#endif
     m_PPSMvdL1ZeroIdc = 0;
     m_PPSCollocatedFromL0Idc = 0;
     m_PPSSixMinusMaxNumMergeCandPlus1 = 0;
@@ -3277,7 +3279,9 @@ bool EncAppCfg::xCheckParameter()
     m_PPSDepQuantEnabledIdc = (m_depQuantEnabledFlag ? 1 : 0) + 1;
     m_PPSRefPicListSPSIdc0 = 0;
     m_PPSRefPicListSPSIdc1 = 0;
+#if !JVET_P0206_TMVP_flags
     m_PPSTemporalMVPEnabledIdc = 0;
+#endif
     m_PPSMvdL1ZeroIdc = 0;
     m_PPSCollocatedFromL0Idc = 0;
     m_PPSSixMinusMaxNumMergeCandPlus1 = 6 - m_maxNumMergeCand + 1;
@@ -3289,7 +3293,9 @@ bool EncAppCfg::xCheckParameter()
     m_PPSDepQuantEnabledIdc = (m_depQuantEnabledFlag ? 1 : 0) + 1;
     m_PPSRefPicListSPSIdc0 = 2;
     m_PPSRefPicListSPSIdc1 = 2;
-    m_PPSTemporalMVPEnabledIdc = m_TMVPModeId == 2 ? 0: ( int(m_TMVPModeId == 1 ? 1: 0) + 1);
+#if !JVET_P0206_TMVP_flags
+    m_PPSTemporalMVPEnabledIdc = m_TMVPModeId == 2 ? 0: ( int(m_TMVPModeId == 1 ? 1: 0) + 1); 
+#endif
     m_PPSMvdL1ZeroIdc = 2;
     m_PPSCollocatedFromL0Idc = 1;
     m_PPSSixMinusMaxNumMergeCandPlus1 = 6 - m_maxNumMergeCand + 1;
@@ -3301,7 +3307,9 @@ bool EncAppCfg::xCheckParameter()
     m_PPSDepQuantEnabledIdc = (m_depQuantEnabledFlag ? 1 : 0) + 1;
     m_PPSRefPicListSPSIdc0 = 2;
     m_PPSRefPicListSPSIdc1 = 2;
+#if !JVET_P0206_TMVP_flags
     m_PPSTemporalMVPEnabledIdc = m_TMVPModeId == 2 ? 0: ( int(m_TMVPModeId == 1 ? 1: 0) + 1);
+#endif
     m_PPSMvdL1ZeroIdc = 0;
     m_PPSCollocatedFromL0Idc = 0;
     m_PPSSixMinusMaxNumMergeCandPlus1 = 6 - m_maxNumMergeCand + 1;
diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h
index 23803e0a433231707707d42259d4051af6a5ce21..10b3380b7aa6ff9a03b8097cc7559790a51129b6 100644
--- a/source/App/EncoderApp/EncAppCfg.h
+++ b/source/App/EncoderApp/EncAppCfg.h
@@ -545,7 +545,9 @@ protected:
   int       m_PPSDepQuantEnabledIdc;
   int       m_PPSRefPicListSPSIdc0;
   int       m_PPSRefPicListSPSIdc1;
+#if !JVET_P0206_TMVP_flags
   int       m_PPSTemporalMVPEnabledIdc;
+#endif
   int       m_PPSMvdL1ZeroIdc;
   int       m_PPSCollocatedFromL0Idc;
   uint32_t  m_PPSSixMinusMaxNumMergeCandPlus1;
diff --git a/source/Lib/CommonLib/Buffer.cpp b/source/Lib/CommonLib/Buffer.cpp
index 73023d91e6187e24248f624e03d18979d1ac6180..0803b217cdd4cbdc877952b4202c845fdf0a297f 100644
--- a/source/Lib/CommonLib/Buffer.cpp
+++ b/source/Lib/CommonLib/Buffer.cpp
@@ -42,7 +42,11 @@
 #include "Buffer.h"
 #include "InterpolationFilter.h"
 
+#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
+void applyPROFCore(Pel* dst, int dstStride, const Pel* src, int srcStride, int width, int height, const Pel* gradX, const Pel* gradY, int gradStride, const int* dMvX, const int* dMvY, int dMvStride, const bool& bi, int shiftNum, Pel offset, const ClpRng& clpRng)
+#else
 void applyPROFCore(Pel* dst, int dstStride, const Pel* src, int srcStride, int width, int height, const Pel* gradX, const Pel* gradY, int gradStride, const int* dMvX, const int* dMvY, int dMvStride, int shiftNum, Pel offset, const ClpRng& clpRng)
+#endif
 {
   int idx = 0;
 #if !JVET_P0057_BDOF_PROF_HARMONIZATION 
@@ -63,10 +67,16 @@ void applyPROFCore(Pel* dst, int dstStride, const Pel* src, int srcStride, int w
 #endif
 #if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
       dI = Clip3(-dILimit, dILimit - 1, dI);
-#endif
-
+      dst[w] = src[w] + dI;
+      if (!bi)
+      {
+        dst[w] = (dst[w] + offset) >> shiftNum;
+        dst[w] = ClipPel(dst[w], clpRng);
+      }
+#else
       dI = (src[w] + dI + offset) >> shiftNum;
       dst[w] = (Pel)ClipPel(dI, clpRng);
+#endif
 
       idx++;
     }
@@ -77,6 +87,7 @@ void applyPROFCore(Pel* dst, int dstStride, const Pel* src, int srcStride, int w
   }
 }
 
+#if !JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
 template<bool l1PROFEnabled = true>
 void applyBiPROFCore (Pel* dst, int dstStride, const Pel* src0, const Pel* src1, int srcStride, int width, int height, const Pel* gradX0, const Pel* gradY0, const Pel* gradX1, const Pel* gradY1, int gradStride, const int* dMvX0, const int* dMvY0, const int* dMvX1, const int* dMvY1, int dMvStride, const int8_t w0, const ClpRng& clpRng)
 {
@@ -142,6 +153,7 @@ void applyBiPROFCore (Pel* dst, int dstStride, const Pel* src0, const Pel* src1,
     src1 += srcStride;
   }
 }
+#endif
 
 template< typename T >
 void addAvgCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T* dest, int dstStride, int width, int height, int rshift, int offset, const ClpRng& clpRng )
@@ -433,8 +445,10 @@ PelBufferOps::PelBufferOps()
 
   profGradFilter = gradFilterCore <false>;
   applyPROF      = applyPROFCore;
+#if !JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
   applyBiPROF[1] = applyBiPROFCore;
   applyBiPROF[0] = applyBiPROFCore <false>;
+#endif
   roundIntVector = nullptr;
 }
 
diff --git a/source/Lib/CommonLib/Buffer.h b/source/Lib/CommonLib/Buffer.h
index 81be539d873628c94751f740b50ad68860fb9f47..4f09f9def0548aab8a811176ef04317409730494 100644
--- a/source/Lib/CommonLib/Buffer.h
+++ b/source/Lib/CommonLib/Buffer.h
@@ -81,8 +81,12 @@ struct PelBufferOps
   void ( *removeHighFreq4)        ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height);
 #endif
   void (*profGradFilter) (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, const int bitDepth);
+#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
+  void (*applyPROF)      (Pel* dst, int dstStride, const Pel* src, int srcStride, int width, int height, const Pel* gradX, const Pel* gradY, int gradStride, const int* dMvX, const int* dMvY, int dMvStride, const bool& bi, int shiftNum, Pel offset, const ClpRng& clpRng);
+#else
   void (*applyPROF)      (Pel* dst, int dstStride, const Pel* src, int srcStride, int width, int height, const Pel* gradX, const Pel* gradY, int gradStride, const int* dMvX, const int* dMvY, int dMvStride, int shiftNum, Pel offset, const ClpRng& clpRng);
   void (*applyBiPROF[2]) (Pel* dst, int dstStride, const Pel* src0, const Pel* src1, int srcStride, int width, int height, const Pel* gradX0, const Pel* gradY0, const Pel* gradX1, const Pel* gradY1, int gradStride, const int* dMvX0, const int* dMvY0, const int* dMvX1, const int* dMvY1, int dMvStride, const int8_t gbiWeightL0, const ClpRng& clpRng);
+#endif
   void (*roundIntVector) (int* v, int size, unsigned int nShift, const int dmvLimit);
 };
 
diff --git a/source/Lib/CommonLib/ContextModelling.h b/source/Lib/CommonLib/ContextModelling.h
index 150796090b4fc175cbfa33351287b8fa75118be4..575b2aab4bde637496fb734b73e849ce80036b47 100644
--- a/source/Lib/CommonLib/ContextModelling.h
+++ b/source/Lib/CommonLib/ContextModelling.h
@@ -448,7 +448,10 @@ public:
                          {
                            violatesLfnstConstrained[CHANNEL_TYPE_LUMA  ] = false;
                            violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] = false;
-                           lfnstLastScanPos = false;
+                           lfnstLastScanPos                              = false;
+#if JVET_P1026_MTS_SIGNALLING
+                           violatesMtsCoeffConstraint                    = false;
+#endif
                          }
   CUCtx(int _qp)       : isDQPCoded(false), isChromaQpAdjCoded(false),
                          qgStart(false),
@@ -456,7 +459,10 @@ public:
                          {
                            violatesLfnstConstrained[CHANNEL_TYPE_LUMA  ] = false;
                            violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] = false;
-                           lfnstLastScanPos = false;
+                           lfnstLastScanPos                              = false;
+#if JVET_P1026_MTS_SIGNALLING
+                           violatesMtsCoeffConstraint                    = false;
+#endif
                          }
   ~CUCtx() {}
 public:
@@ -466,6 +472,9 @@ public:
   bool      lfnstLastScanPos;
   int8_t    qp;                   // used as a previous(last) QP and for QP prediction
   bool      violatesLfnstConstrained[MAX_NUM_CHANNEL_TYPE];
+#if JVET_P1026_MTS_SIGNALLING
+  bool      violatesMtsCoeffConstraint;
+#endif
 };
 
 class MergeCtx
diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp
index 6acfedc03e93fe26989016a68a11e8c55e3e5a2f..6537c4f89c8896183159b2405baff48de129e3a2 100644
--- a/source/Lib/CommonLib/InterPrediction.cpp
+++ b/source/Lib/CommonLib/InterPrediction.cpp
@@ -872,19 +872,28 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio
   enablePROF &= !m_encOnly || pu.cu->slice->getCheckLDC() || iDMvHorX > profThres || iDMvHorY > profThres || iDMvVerX > profThres || iDMvVerY > profThres || iDMvHorX < -profThres || iDMvHorY < -profThres || iDMvVerX < -profThres || iDMvVerY < -profThres;
   enablePROF &= pu.cs->pps->getPicWidthInLumaSamples() == refPic->getPicWidthInLumaSamples() && pu.cs->pps->getPicHeightInLumaSamples() == refPic->getPicHeightInLumaSamples();
 
+#if !JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
   if (compID == COMPONENT_Y)
   {
     m_applyPROF[m_iRefListIdx] = enablePROF;
   }
+#endif
 
   bool isLast = enablePROF ? false : !bi;
 
+#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
+  const int cuExtW = AFFINE_MIN_BLOCK_SIZE + PROF_BORDER_EXT_W * 2;
+  const int cuExtH = AFFINE_MIN_BLOCK_SIZE + PROF_BORDER_EXT_H * 2;
+
+  PelBuf gradXExt(m_gradBuf[0], cuExtW, cuExtH);
+  PelBuf gradYExt(m_gradBuf[1], cuExtW, cuExtH);
+#else
   const int cuExtW = pu.blocks[compID].width + PROF_BORDER_EXT_W * 2;
   const int cuExtH = pu.blocks[compID].height + PROF_BORDER_EXT_H * 2;
 
   PelBuf gradXExt(m_gradBuf[m_iRefListIdx][0], cuExtW, cuExtH);
   PelBuf gradYExt(m_gradBuf[m_iRefListIdx][1], cuExtW, cuExtH);
-
+#endif
   const int MAX_FILTER_SIZE = std::max<int>(NTAPS_LUMA, NTAPS_CHROMA);
   const int dstExtW = ((blockWidth + PROF_BORDER_EXT_W * 2 + 7) >> 3) << 3;
   const int dstExtH = blockHeight + PROF_BORDER_EXT_H * 2;
@@ -898,7 +907,11 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio
   int *dMvScaleHor = m_dMvBuf[m_iRefListIdx];
   int *dMvScaleVer = m_dMvBuf[m_iRefListIdx] + 16;
 
+#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
+  if (enablePROF)
+#else
   if (enablePROF && !bi)
+#endif
   {
     int* dMvH = dMvScaleHor;
     int* dMvV = dMvScaleVer;
@@ -1130,8 +1143,13 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio
           dstPel[blockWidth] = leftShift_round(refPel[blockWidth], shift) - (Pel)IF_INTERNAL_OFFS;
         }
 
+#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
+        PelBuf gradXBuf = gradXExt.subBuf(0, 0, blockWidth + 2, blockHeight + 2);
+        PelBuf gradYBuf = gradYExt.subBuf(0, 0, blockWidth + 2, blockHeight + 2);
+#else
         PelBuf gradXBuf = gradXExt.subBuf(w, h, blockWidth + 2, blockHeight + 2);
         PelBuf gradYBuf = gradYExt.subBuf(w, h, blockWidth + 2, blockHeight + 2);
+#endif
         g_pelBufOP.profGradFilter(dstExtBuf.buf, dstExtBuf.stride, blockWidth + 2, blockHeight + 2, gradXBuf.stride, gradXBuf.buf, gradYBuf.buf, clpRng.bd);
 
         const int shiftNum = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd));
@@ -1142,6 +1160,9 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio
 
         Pel * dstY = dstBuf.bufAt(w, h);
 
+#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
+        g_pelBufOP.applyPROF(dstY, dstBuf.stride, src, dstExtBuf.stride, blockWidth, blockHeight, gX, gY, gradXBuf.stride, dMvScaleHor, dMvScaleVer, blockWidth, bi, shiftNum, offset, clpRng);
+#else
         if (!bi)
         {
           g_pelBufOP.applyPROF(dstY, dstBuf.stride, src, dstExtBuf.stride, blockWidth, blockHeight, gX, gY, gradXBuf.stride, dMvScaleHor, dMvScaleVer, blockWidth, shiftNum, offset, clpRng);
@@ -1152,6 +1173,7 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio
           PelBuf destBuf(dstY, dstBuf.stride, Size(blockWidth, blockHeight));
           destBuf.copyFrom(srcExtBuf);
         }
+#endif
       }
       }
     }
@@ -1310,6 +1332,7 @@ void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitB
 
   if( iRefIdx0 >= 0 && iRefIdx1 >= 0 )
   {
+#if !JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
     if (pu.cu->affine && (m_applyPROF[0] || m_applyPROF[1]))
     {
       xApplyBiPROF(pu, pcYuvSrc0.bufs[COMPONENT_Y], pcYuvSrc1.bufs[COMPONENT_Y], pcYuvDst.bufs[COMPONENT_Y], clpRngs.comp[COMPONENT_Y]);
@@ -1317,6 +1340,7 @@ void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitB
       CHECK(yuvDstTmp, "yuvDstTmp is disallowed with PROF");
       return;
     }
+#endif
     if( pu.cu->GBiIdx != GBI_DEFAULT && (yuvDstTmp || !pu.mhIntraFlag) )
     {
       CHECK(bioApplied, "GBi is disallowed with BIO");
@@ -1399,6 +1423,7 @@ void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitB
   }
 }
 
+#if !JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
 void InterPrediction::xApplyBiPROF(const PredictionUnit &pu, const CPelBuf& pcYuvSrc0, const CPelBuf& pcYuvSrc1, PelBuf& pcYuvDst, const ClpRng& clpRng)
 {
   int blockWidth = AFFINE_MIN_BLOCK_SIZE;
@@ -1542,6 +1567,7 @@ void InterPrediction::xApplyBiPROF(const PredictionUnit &pu, const CPelBuf& pcYu
   else
     g_pelBufOP.applyBiPROF[0](dstY, pcYuvDst.stride, srcY1, srcY0, pcYuvSrc0.stride, width, height, gX1, gY1, gX0, gY0, gradXExt0.stride, dMvX1, dMvY1, dMvX0, dMvY0, blockWidth, getGbiWeight(pu.cu->GBiIdx, REF_PIC_LIST_1), clpRng);
 }
+#endif
 
 void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBuf, const RefPicList &eRefPicList
   , const bool luma, const bool chroma
@@ -1787,11 +1813,12 @@ void InterPrediction::xPrefetch(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicLis
     width = pcPad.bufs[compID].width;
     height = pcPad.bufs[compID].height;
     offset = (DMVR_NUM_ITERATION) * (pcPad.bufs[compID].stride + 1);
-    int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
+    int mvshiftTempHor = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
+    int mvshiftTempVer = mvShift + getComponentScaleY((ComponentID)compID, pu.chromaFormat);
     width += (filtersize - 1);
     height += (filtersize - 1);
-    cMv += Mv(-(((filtersize >> 1) - 1) << mvshiftTemp),
-      -(((filtersize >> 1) - 1) << mvshiftTemp));
+    cMv += Mv(-(((filtersize >> 1) - 1) << mvshiftTempHor),
+      -(((filtersize >> 1) - 1) << mvshiftTempVer));
     bool wrapRef = false;
     if( pu.cs->sps->getWrapAroundEnabledFlag() )
     {
@@ -1804,7 +1831,7 @@ void InterPrediction::xPrefetch(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicLis
     /* Pre-fetch similar to HEVC*/
     {
       CPelBuf refBuf;
-      Position Rec_offset = pu.blocks[compID].pos().offset(cMv.getHor() >> mvshiftTemp, cMv.getVer() >> mvshiftTemp);
+      Position Rec_offset = pu.blocks[compID].pos().offset(cMv.getHor() >> mvshiftTempHor, cMv.getVer() >> mvshiftTempVer);
       refBuf = refPic->getRecoBuf(CompArea((ComponentID)compID, pu.chromaFormat, Rec_offset, pu.blocks[compID].size()), wrapRef);
       PelBuf &dstBuf = pcPad.bufs[compID];
       g_pelBufOP.copyBuffer((Pel *)refBuf.buf, refBuf.stride, ((Pel *)dstBuf.buf) + offset, dstBuf.stride, width, height);
@@ -1822,7 +1849,8 @@ void InterPrediction::xPad(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList ref
     width = pcPad.bufs[compID].width;
     height = pcPad.bufs[compID].height;
     offset = (DMVR_NUM_ITERATION) * (pcPad.bufs[compID].stride + 1);
-    padsize = (DMVR_NUM_ITERATION) >> getComponentScaleX((ComponentID)compID, pu.chromaFormat);
+    /*using the larger padsize for 422*/
+    padsize = (DMVR_NUM_ITERATION) >> getComponentScaleY((ComponentID)compID, pu.chromaFormat);
     width += (filtersize - 1);
     height += (filtersize - 1);
     /*padding on all side of size DMVR_PAD_LENGTH*/
@@ -1982,7 +2010,8 @@ void InterPrediction::xFinalPaddedMCForDMVR(PredictionUnit& pu, PelUnitBuf &pcYu
       if (blockMoved || (compID == 0))
       {
         pcPadstride = pcPadTemp.bufs[compID].stride;
-        int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
+        int mvshiftTempHor = mvShift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
+        int mvshiftTempVer = mvShift + getComponentScaleY((ComponentID)compID, pu.chromaFormat);
         int leftPixelExtra;
         if (compID == COMPONENT_Y)
         {
@@ -1993,10 +2022,10 @@ void InterPrediction::xFinalPaddedMCForDMVR(PredictionUnit& pu, PelUnitBuf &pcYu
           leftPixelExtra = (NTAPS_CHROMA >> 1) - 1;
         }
         PelBuf &srcBuf = pcPadTemp.bufs[compID];
-        deltaIntMvX = (cMv.getHor() >> mvshiftTemp) -
-          (startMv.getHor() >> mvshiftTemp);
-        deltaIntMvY = (cMv.getVer() >> mvshiftTemp) -
-          (startMv.getVer() >> mvshiftTemp);
+        deltaIntMvX = (cMv.getHor() >> mvshiftTempHor) -
+          (startMv.getHor() >> mvshiftTempHor);
+        deltaIntMvY = (cMv.getVer() >> mvshiftTempVer) -
+          (startMv.getVer() >> mvshiftTempVer);
 
         CHECK((abs(deltaIntMvX) > DMVR_NUM_ITERATION) || (abs(deltaIntMvY) > DMVR_NUM_ITERATION), "not expected DMVR movement");
 
diff --git a/source/Lib/CommonLib/InterPrediction.h b/source/Lib/CommonLib/InterPrediction.h
index 85f008395f12a20cba23344c30c09e9eab01da5b..452c2c84429605471f267fd0930e599a0170c6f8 100644
--- a/source/Lib/CommonLib/InterPrediction.h
+++ b/source/Lib/CommonLib/InterPrediction.h
@@ -102,9 +102,15 @@ protected:
                              Mv(-2, 2), Mv(-1, 2), Mv(0, 2), Mv(1, 2), Mv(2, 2) };
   uint64_t m_SADsArray[((2 * DMVR_NUM_ITERATION) + 1) * ((2 * DMVR_NUM_ITERATION) + 1)];
 
+#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
+  Pel                  m_gradBuf[2][(AFFINE_MIN_BLOCK_SIZE + 2) * (AFFINE_MIN_BLOCK_SIZE + 2)];
+#else
   Pel                  m_gradBuf[2][2][(MAX_CU_SIZE + 2) * (MAX_CU_SIZE + 2)];
+#endif
   int                  m_dMvBuf[2][16 * 2];
+#if !JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
   bool                 m_applyPROF[2];
+#endif
   bool                 m_skipPROF;
   bool                 m_encOnly;
   bool                 m_isBi;
@@ -141,7 +147,9 @@ protected:
   void xCalcBIOPar              (const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, int bitDepth);
   void xCalcBlkGradient         (int sx, int sy, int    *arraysGx2, int     *arraysGxGy, int     *arraysGxdI, int     *arraysGy2, int     *arraysGydI, int     &sGx2, int     &sGy2, int     &sGxGy, int     &sGxdI, int     &sGydI, int width, int height, int unitSize);
   void xWeightedAverage         ( const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied, PelUnitBuf* yuvDstTmp = NULL );
+#if !JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
   void xApplyBiPROF             (const PredictionUnit& pu, const CPelBuf& pcYuvSrc0, const CPelBuf& pcYuvSrc1, PelBuf& pcYuvDst, const ClpRng& clpRng);
+#endif
   void xPredAffineBlk           ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng, const std::pair<int, int> scalingRatio = SCALE_1X );
 
   void xWeightedTriangleBlk     ( const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 );
diff --git a/source/Lib/CommonLib/IntraPrediction.cpp b/source/Lib/CommonLib/IntraPrediction.cpp
index 56ba627b164ad57d15ab32d1b0c7160b88f3f863..01295fa4e31ca594aec6b52a1800ac349a7e5c75 100644
--- a/source/Lib/CommonLib/IntraPrediction.cpp
+++ b/source/Lib/CommonLib/IntraPrediction.cpp
@@ -67,6 +67,7 @@ const uint8_t IntraPrediction::m_aucIntraFilter[MAX_INTRA_FILTER_DEPTHS] =
   0   // 128xn
 };
 
+#if !JVET_P0599_INTRA_SMOOTHING_INTERP_FILT
 const TFilterCoeff g_intraGaussFilter[32][4] = {
   { 16, 32, 16, 0 },
   { 15, 29, 17, 3 },
@@ -101,6 +102,7 @@ const TFilterCoeff g_intraGaussFilter[32][4] = {
   { 3, 17, 29, 15 },
   { 3, 17, 29, 15 }
 };
+#endif //!JVET_P0599_INTRA_SMOOTHING_INTERP_FILT
 
 // ====================================================================================================================
 // Constructor / destructor / initialize
@@ -556,7 +558,7 @@ void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const Ch
     // Extend main reference to right using replication
     const int log2Ratio = floorLog2(width) - floorLog2(height);
     const int s         = std::max<int>(0, bIsModeVer ? log2Ratio : -log2Ratio);
-    const int maxIndex  = (multiRefIdx << s) + 2;
+    const int maxIndex  = (multiRefIdx << s) + 1;
     const int refLength = bIsModeVer ? m_topRefLength : m_leftRefLength;
     const Pel val       = refMain[refLength + multiRefIdx];
     for (int z = 1; z <= maxIndex; z++)
@@ -618,8 +620,13 @@ void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const Ch
         {
           const bool useCubicFilter = !m_ipaParam.interpolationFlag;
 
+#if JVET_P0599_INTRA_SMOOTHING_INTERP_FILT
+          const TFilterCoeff        intraSmoothingFilter[4] = {TFilterCoeff(16 - (deltaFract >> 1)), TFilterCoeff(32 - (deltaFract >> 1)), TFilterCoeff(16 + (deltaFract >> 1)), TFilterCoeff(deltaFract >> 1)};
+          const TFilterCoeff* const f                       = (useCubicFilter) ? InterpolationFilter::getChromaFilterTable(deltaFract) : intraSmoothingFilter;
+#else //!JVET_P0599_INTRA_SMOOTHING_INTERP_FILT
           const TFilterCoeff *const f =
             (useCubicFilter) ? InterpolationFilter::getChromaFilterTable(deltaFract) : g_intraGaussFilter[deltaFract];
+#endif //JVET_P0599_INTRA_SMOOTHING_INTERP_FILT
 
           for (int x = 0; x < width; x++)
           {
@@ -1665,7 +1672,7 @@ void IntraPrediction::xGetLMParameters(const PredictionUnit &pu, const Component
 
   const int baseUnitSize = 1 << MIN_CU_LOG2;
   const int unitWidth    = baseUnitSize >> getComponentScaleX(chromaArea.compID, nChromaFormat);
-  const int unitHeight   = baseUnitSize >> getComponentScaleX(chromaArea.compID, nChromaFormat);
+  const int unitHeight   = baseUnitSize >> getComponentScaleY(chromaArea.compID, nChromaFormat);
 
   const int tuWidthInUnits  = tuWidth / unitWidth;
   const int tuHeightInUnits = tuHeight / unitHeight;
diff --git a/source/Lib/CommonLib/LoopFilter.cpp b/source/Lib/CommonLib/LoopFilter.cpp
index c7b4d9fd2e6c5b084efdfb96327abd1824ac776a..2200bfd2b91494dce95bdc2c09e4ca4a78d9ca01 100644
--- a/source/Lib/CommonLib/LoopFilter.cpp
+++ b/source/Lib/CommonLib/LoopFilter.cpp
@@ -689,15 +689,23 @@ unsigned LoopFilter::xGetBoundaryStrengthSingle ( const CodingUnit& cu, const De
   const CodingUnit& cuQ = cu;
   const CodingUnit& cuP = *cu.cs->getCU( posP, cu.chType );
 
+#if !JVET_P0571_FIX_BS_BDPCM_CHROMA
   if( ( MODE_INTRA == cuP.predMode && cuP.bdpcmMode ) && ( MODE_INTRA == cuQ.predMode && cuQ.bdpcmMode ) )
   {
     return 0;
   }
+#endif
 
   //-- Set BS for Intra MB : BS = 4 or 3
   if( ( MODE_INTRA == cuP.predMode ) || ( MODE_INTRA == cuQ.predMode ) )
   {
+#if JVET_P0571_FIX_BS_BDPCM_CHROMA
+    int bsY = (MODE_INTRA == cuP.predMode && cuP.bdpcmMode) && (MODE_INTRA == cuQ.predMode && cuQ.bdpcmMode) ? 0 : 2;
+    int bsC = 2;
+    return (BsSet(bsY, COMPONENT_Y) + BsSet(bsC, COMPONENT_Cb) + BsSet(bsC, COMPONENT_Cr));
+#else
     return (BsSet(2, COMPONENT_Y) + BsSet(2, COMPONENT_Cb) + BsSet(2, COMPONENT_Cr));
+#endif
   }
 
   const TransformUnit& tuQ = *cuQ.cs->getTU(posQ, cuQ.chType);
diff --git a/source/Lib/CommonLib/Quant.cpp b/source/Lib/CommonLib/Quant.cpp
index 78f49d973be2c2b117314b6a2be0d5bd41ed233c..480fffc30715a321d32764e544c3e592d157a671 100644
--- a/source/Lib/CommonLib/Quant.cpp
+++ b/source/Lib/CommonLib/Quant.cpp
@@ -405,7 +405,11 @@ void Quant::dequant(const TransformUnit &tu,
   const int QP_per = cQP.per(isTransformSkip);
   const int QP_rem = cQP.rem(isTransformSkip);
 
+#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE
+  const int  rightShift = (IQUANT_SHIFT - ((isTransformSkip ? 0 : iTransformShift) + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
+#else
   const int  rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
+#endif
 
   if(enableScalingLists)
   {
@@ -977,8 +981,11 @@ void Quant::quant(TransformUnit &tu, const ComponentID &compID, const CCoeffBuf
       iTransformShift = std::max<int>(0, iTransformShift);
     }
 
-
+#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE
+    const int iQBits = QUANT_SHIFT + cQP.per(useTransformSkip) + (useTransformSkip ? 0 : iTransformShift);
+#else
     const int iQBits = QUANT_SHIFT + cQP.per(useTransformSkip) + iTransformShift;
+#endif
     // QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset
 
     const int64_t iAdd = int64_t(tu.cs->slice->isIRAP() ? 171 : 85) << int64_t(iQBits - 9);
@@ -1109,8 +1116,11 @@ void Quant::transformSkipQuantOneSample(TransformUnit &tu, const ComponentID &co
   * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
   * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
   */
-
+#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE
+  const int iQBits = QUANT_SHIFT + cQP.per(useTransformSkip) + (useTransformSkip ? 0 : iTransformShift);
+#else
   const int iQBits = QUANT_SHIFT + cQP.per(useTransformSkip) + iTransformShift;
+#endif
   // QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset
   const int iAdd = int64_t(bUseHalfRoundingPoint ? 256 : (tu.cs->slice->isIRAP() ? 171 : 85)) << int64_t(iQBits - 9);
   TCoeff transformedCoefficient;
@@ -1162,7 +1172,16 @@ void Quant::invTrSkipDeQuantOneSample(TransformUnit &tu, const ComponentID &comp
 
   CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
 
+#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE
+#if JVET_P0058_CHROMA_TS
+  const bool isTransformSkip = (tu.mtsIdx[compID] == MTS_SKIP);
+#else
+  const bool isTransformSkip = (tu.mtsIdx == MTS_SKIP && isLuma(compID));
+#endif
+  const int rightShift = (IQUANT_SHIFT - ((isTransformSkip ? 0 : iTransformShift) + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
+#else
   const int rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
+#endif
 
   const TCoeff transformMinimum = -(1 << maxLog2TrDynamicRange);
   const TCoeff transformMaximum =  (1 << maxLog2TrDynamicRange) - 1;
@@ -1228,7 +1247,9 @@ void Quant::invTrSkipDeQuantOneSample(TransformUnit &tu, const ComponentID &comp
   }
 
   // Inverse transform-skip
-
+#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE
+  reconSample = Pel(dequantisedSample);
+#else
   if (iTransformShift >= 0)
   {
     const TCoeff offset = iTransformShift == 0 ? 0 : (1 << (iTransformShift - 1));
@@ -1239,6 +1260,7 @@ void Quant::invTrSkipDeQuantOneSample(TransformUnit &tu, const ComponentID &comp
     const int iTrShiftNeg = -iTransformShift;
     reconSample = Pel(dequantisedSample << iTrShiftNeg);
   }
+#endif
 }
 
 #if ADAPTIVE_COLOR_TRANSFORM
diff --git a/source/Lib/CommonLib/QuantRDOQ.cpp b/source/Lib/CommonLib/QuantRDOQ.cpp
index ff150c610b2aa4ba2c3a4b61c00dc9701a9f51c1..7cc8a7891be50e4e028284e1d9800a43b7b82ea2 100644
--- a/source/Lib/CommonLib/QuantRDOQ.cpp
+++ b/source/Lib/CommonLib/QuantRDOQ.cpp
@@ -372,10 +372,17 @@ void QuantRDOQ::setScalingList(ScalingList *scalingList, const int maxLog2TrDyna
 }
 
 
-
+#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE
+double QuantRDOQ::xGetErrScaleCoeff(const bool needsSqrt2, SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth, bool bTransformSkip=false)
+#else
 double QuantRDOQ::xGetErrScaleCoeff(const bool needsSqrt2, SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth)
+#endif
 {
+#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE
+  const int iTransformShift = bTransformSkip ? 0 : getTransformShift(channelBitDepth, Size(width, height), maxLog2TrDynamicRange);
+#else
   const int iTransformShift = getTransformShift(channelBitDepth, Size(width, height), maxLog2TrDynamicRange);
+#endif
   double    dErrScale = (double)(1 << SCALE_BITS);                                // Compensate for scaling of bitcount in Lagrange cost function
   double    dTransShift = (double)iTransformShift + (needsSqrt2 ? -0.5 : 0.0);
   dErrScale = dErrScale * pow(2.0, (-2.0*dTransShift));                     // Compensate for scaling through forward transform
@@ -1241,9 +1248,17 @@ void QuantRDOQ::xRateDistOptQuantTS( TransformUnit &tu, const ComponentID &compI
 #else
   const bool   isTransformSkip = tu.mtsIdx==MTS_SKIP && isLuma(compID);
 #endif
+#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE
+  const int    qBits = QUANT_SHIFT + qp.per(isTransformSkip) + (isTransformSkip ? 0 : transformShift) + (needsSqrt2Scale ? -1 : 0);  // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
+#else
   const int    qBits = QUANT_SHIFT + qp.per(isTransformSkip) + transformShift + ( needsSqrt2Scale ? -1 : 0 );  // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
+#endif
   const int    quantisationCoefficient = g_quantScales[needsSqrt2Scale?1:0][qp.rem(isTransformSkip)];
+#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE
+  const double errorScale              = xGetErrScaleCoeff( TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth, isTransformSkip);
+#else
   const double errorScale              = xGetErrScaleCoeff( TU::needsSqrt2Scale( tu, compID ), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth );
+#endif
 
   const TCoeff entropyCodingMaximum = ( 1 << maxLog2TrDynamicRange ) - 1;
 
@@ -1481,12 +1496,23 @@ void QuantRDOQ::forwardRDPCM( TransformUnit &tu, const ComponentID &compID, cons
 #else
   const bool   isTransformSkip = tu.mtsIdx==MTS_SKIP && isLuma(compID);
 #endif
+#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE
+  const int    qBits = QUANT_SHIFT + qp.per(isTransformSkip) + (isTransformSkip? 0 : transformShift) + ( needsSqrt2Scale ? -1 : 0);  // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
+#else
   const int    qBits = QUANT_SHIFT + qp.per(isTransformSkip) + transformShift + ( needsSqrt2Scale ? -1 : 0 );  // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
+#endif
   const int    quantisationCoefficient = g_quantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)];
+#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE
+  const double errorScale = xGetErrScaleCoeff(TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth, isTransformSkip);
+#else
   const double errorScale = xGetErrScaleCoeff(TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth);
-
+#endif
   TrQuantParams trQuantParams;
+#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE
+  trQuantParams.rightShift = (IQUANT_SHIFT - ((isTransformSkip ? 0 : transformShift) + qp.per(isTransformSkip)));
+#else
   trQuantParams.rightShift = (IQUANT_SHIFT - (transformShift + qp.per(isTransformSkip)));
+#endif
   trQuantParams.qScale = g_invQuantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)];
 
   const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1;
diff --git a/source/Lib/CommonLib/QuantRDOQ.h b/source/Lib/CommonLib/QuantRDOQ.h
index d733613435cae903f4aa274e784eb2f4abd4e756..301e5a0a6bc7748b0cf456dd55cd2547bd2998c5 100644
--- a/source/Lib/CommonLib/QuantRDOQ.h
+++ b/source/Lib/CommonLib/QuantRDOQ.h
@@ -69,7 +69,11 @@ public:
 
 private:
   double* xGetErrScaleCoeffSL            ( uint32_t list, uint32_t sizeX, uint32_t sizeY, int qp ) { return m_errScale[sizeX][sizeY][list][qp]; };  //!< get Error Scale Coefficent
+#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE
+  double  xGetErrScaleCoeff              ( const bool needsSqrt2, SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth, bool bTransformSkip);
+#else
   double  xGetErrScaleCoeff              ( const bool needsSqrt2, SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth);
+#endif
   double& xGetErrScaleCoeffNoScalingList ( uint32_t list, uint32_t sizeX, uint32_t sizeY, int qp ) { return m_errScaleNoScalingList[sizeX][sizeY][list][qp]; };  //!< get Error Scale Coefficent
   void    xInitScalingList               ( const QuantRDOQ* other );
   void    xDestroyScalingList            ();
diff --git a/source/Lib/CommonLib/Slice.cpp b/source/Lib/CommonLib/Slice.cpp
index ffd2e912747c15878df5633d44b9fe763a4d190c..0b607527abf8862924a2b6eefb7dddbe8c5d2f8d 100644
--- a/source/Lib/CommonLib/Slice.cpp
+++ b/source/Lib/CommonLib/Slice.cpp
@@ -1816,7 +1816,9 @@ PPS::PPS()
 , m_PPSDepQuantEnabledIdc            (0)
 , m_PPSRefPicListSPSIdc0             (0)
 , m_PPSRefPicListSPSIdc1             (0)
+#if !JVET_P0206_TMVP_flags
 , m_PPSTemporalMVPEnabledIdc         (0)
+#endif
 , m_PPSMvdL1ZeroIdc                  (0)
 , m_PPSCollocatedFromL0Idc           (0)
 , m_PPSSixMinusMaxNumMergeCandPlus1  (0)
@@ -2506,6 +2508,10 @@ void Slice::scaleRefPicList( Picture *scaledRefPic[ ], APS** apss, APS* lmcsAps,
   const SPS* sps = getSPS();
   const PPS* pps = getPPS();
 
+#if JVET_P0206_TMVP_flags
+  bool refPicIsSameRes = false;
+#endif
+   
   // this is needed for IBC
   m_pcPic->unscaledPic = m_pcPic;
 
@@ -2532,6 +2538,13 @@ void Slice::scaleRefPicList( Picture *scaledRefPic[ ], APS** apss, APS* lmcsAps,
       CU::getRprScaling( sps, pps, m_apcRefPicList[refList][rIdx], xScale, yScale );
       m_scalingRatio[refList][rIdx] = std::pair<int, int>( xScale, yScale );
 
+#if JVET_P0206_TMVP_flags
+      if( m_scalingRatio[refList][rIdx] == SCALE_1X )
+      {
+        refPicIsSameRes = true;
+      }
+#endif
+
       if( m_scalingRatio[refList][rIdx] == SCALE_1X || isDecoder )
       {
         m_scaledRefPicList[refList][rIdx] = m_apcRefPicList[refList][rIdx];
@@ -2617,6 +2630,14 @@ void Slice::scaleRefPicList( Picture *scaledRefPic[ ], APS** apss, APS* lmcsAps,
       m_apcRefPicList[refList][rIdx]->unscaledPic = m_savedRefPicList[refList][rIdx];
     }
   }
+  
+#if JVET_P0206_TMVP_flags
+  //Make sure that TMVP is disabled when there are no reference pictures with the same resolution
+  if(!refPicIsSameRes)
+  {
+    CHECK(m_enableTMVPFlag != 0, "TMVP cannot be enabled in slices that have no reference pictures with the same resolution")
+  }
+#endif
 }
 
 void Slice::freeScaledRefPicList( Picture *scaledRefPic[] )
diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h
index 122023d00681c72d68484701f4a39016c619f714..513b31aecb316dc29644ca3e32b09613bf9c670e 100644
--- a/source/Lib/CommonLib/Slice.h
+++ b/source/Lib/CommonLib/Slice.h
@@ -1246,7 +1246,9 @@ private:
   int               m_PPSDepQuantEnabledIdc;
   int               m_PPSRefPicListSPSIdc0;
   int               m_PPSRefPicListSPSIdc1;
+#if !JVET_P0206_TMVP_flags
   int               m_PPSTemporalMVPEnabledIdc;
+#endif
   int               m_PPSMvdL1ZeroIdc;
   int               m_PPSCollocatedFromL0Idc;
   uint32_t          m_PPSSixMinusMaxNumMergeCandPlus1;
@@ -1446,8 +1448,10 @@ public:
   void                    setPPSRefPicListSPSIdc0(int u)                                  { m_PPSRefPicListSPSIdc0 = u;                   }
   int                     getPPSRefPicListSPSIdc1() const                                 { return m_PPSRefPicListSPSIdc1;                }
   void                    setPPSRefPicListSPSIdc1(int u)                                  { m_PPSRefPicListSPSIdc1 = u;                   }
+#if !JVET_P0206_TMVP_flags
   int                     getPPSTemporalMVPEnabledIdc() const                             { return m_PPSTemporalMVPEnabledIdc;            }
   void                    setPPSTemporalMVPEnabledIdc(int u)                              { m_PPSTemporalMVPEnabledIdc = u;               }
+#endif
   int                     getPPSMvdL1ZeroIdc() const                                      { return m_PPSMvdL1ZeroIdc;                     }
   void                    setPPSMvdL1ZeroIdc(int u)                                       { m_PPSMvdL1ZeroIdc = u;                        }
   int                     getPPSCollocatedFromL0Idc() const                               { return m_PPSCollocatedFromL0Idc;              }
diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp
index 51ba7016fdafa50ffca741cd3bb9e74922721e1c..c5524917d20379aacbf1adb84ada39280b12fb32 100644
--- a/source/Lib/CommonLib/TrQuant.cpp
+++ b/source/Lib/CommonLib/TrQuant.cpp
@@ -940,6 +940,16 @@ void TrQuant::xITransformSkip(const CCoeffBuf     &pCoeff,
   const CompArea &area      = tu.blocks[compID];
   const int width           = area.width;
   const int height          = area.height;
+
+#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE
+  for (uint32_t y = 0; y < height; y++)
+  {
+      for (uint32_t x = 0; x < width; x++)
+      {
+          pResidual.at(x, y) = Pel(pCoeff.at(x, y));
+      }
+  }
+#else
   const int maxLog2TrDynamicRange = tu.cs->sps->getMaxLog2TrDynamicRange(toChannelType(compID));
   const int channelBitDepth = tu.cs->sps->getBitDepth(toChannelType(compID));
 
@@ -981,6 +991,7 @@ void TrQuant::xITransformSkip(const CCoeffBuf     &pCoeff,
       }
     }
   }
+#endif
 }
 
 void TrQuant::xQuant(TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx& ctx)
@@ -1051,6 +1062,18 @@ void TrQuant::transformNxN( TransformUnit& tu, const ComponentID& compID, const
     {
       scaleSAD=1.0/1.414213562; // compensate for not scaling transform skip coefficients by 1/sqrt(2)
     }
+#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE
+#if JVET_P0058_CHROMA_TS
+    if (tu.mtsIdx[compID] == MTS_SKIP)
+#else
+    if (isLuma(compID) && tu.mtsIdx == MTS_SKIP)
+#endif
+    {
+        int trShift = getTransformShift(tu.cu->slice->getSPS()->getBitDepth(toChannelType(compID)), rect.size(), tu.cu->slice->getSPS()->getMaxLog2TrDynamicRange(toChannelType(compID)));
+        scaleSAD *= pow(2, trShift);
+    }
+#endif
+
     trCosts.push_back( TrCost( int(sumAbs*scaleSAD), pos++ ) );
     it++;
   }
@@ -1306,11 +1329,24 @@ void TrQuant::rdpcmNxN(TransformUnit &tu, const ComponentID &compID, const QpPar
 
 void TrQuant::xTransformSkip(const TransformUnit &tu, const ComponentID &compID, const CPelBuf &resi, TCoeff* psCoeff)
 {
-  const SPS &sps            = *tu.cs->sps;
-  const CompArea &rect      = tu.blocks[compID];
-  const uint32_t width          = rect.width;
-  const uint32_t height         = rect.height;
-  const ChannelType chType  = toChannelType(compID);
+#if JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE
+  const CompArea &rect = tu.blocks[compID];
+  const uint32_t width = rect.width;
+  const uint32_t height = rect.height;
+
+  for (uint32_t y = 0, coefficientIndex = 0; y < height; y++)
+  {
+      for (uint32_t x = 0; x < width; x++, coefficientIndex++)
+      {
+          psCoeff[ coefficientIndex ] = TCoeff(resi.at(x, y));
+      }
+  }
+#else
+    const SPS &sps = *tu.cs->sps;
+    const CompArea &rect = tu.blocks[compID];
+    const uint32_t width = rect.width;
+    const uint32_t height = rect.height;
+    const ChannelType chType = toChannelType(compID);
   const int channelBitDepth = sps.getBitDepth(chType);
   const int maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(chType);
   int iTransformShift       = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
@@ -1348,6 +1384,7 @@ void TrQuant::xTransformSkip(const TransformUnit &tu, const ComponentID &compID,
       }
     }
   }
+#endif
 }
 
 //! \}
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index 4451e6203c1493288d6466fe71b7183c7667d987..6589a2e84d87da337c5e307e3da8f1965e432bab 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -49,6 +49,15 @@
 #include <cstring>
 #include <assert.h>
 #include <cassert>
+
+#define JVET_P0206_TMVP_flags                             1 // JVET-P0206: Signalling TMVP usage (remove pps TMVP idc and constraint when RPR is used)
+
+#define JVET_P0599_INTRA_SMOOTHING_INTERP_FILT            1 // JVET-P0599: Cleanup of interpolation filtering for intra prediction
+
+#define JVET_P1026_MTS_SIGNALLING                         1 // JVET-P1026: CU level MTS signalling
+
+#define JVET_P0571_FIX_BS_BDPCM_CHROMA                    1 // JVET-P0571: align boundary strength for Chroma BDPCM
+
 #define JVET_P0983_REMOVE_SPS_SBT_MAX_SIZE_FLAG           1 // JVET-P0983/JVET-P0391: Remove sps_sbt_max_size_64_flag
 
 #define JVET_P0530_TPM_WEIGHT_ALIGN                       1 // JVET-P0530: align chroma weights with luma weights for TPM blending
@@ -63,7 +72,7 @@
 #define DELTA_QP_FOR_Co                                  -3
 #endif
 
-#define JVET_P0298_DISABLE_LEVELMAPPING_IN_BYPASS         1 // JVET-P0298: Disable level mapping in bypass mode
+#define JVET_P1000_REMOVE_TRANFORMSHIFT_IN_TS_MODE        1 // JVET-P1000: Remove Transformshift in TS mode#define JVET_P0298_DISABLE_LEVELMAPPING_IN_BYPASS         1 // JVET-P0298: Disable level mapping in bypass mode
 #define JVET_P0347_MAX_MTT_DEPTH_CONSTRAINT               1 // JVET-P0347: Max MTT Depth constraint
 
 #define JVET_P0325_CHANGE_MERGE_CANDIDATE_ORDER           1 // JVET-P0325: reorder the spatial merge candidates
diff --git a/source/Lib/CommonLib/Unit.cpp b/source/Lib/CommonLib/Unit.cpp
index 5c1d2f782ba4e3258d0a99e555b064cef1a15d7f..b696d5e253c449036972ae25d6a4f7f9601e7762 100644
--- a/source/Lib/CommonLib/Unit.cpp
+++ b/source/Lib/CommonLib/Unit.cpp
@@ -865,10 +865,15 @@ int TransformUnit::getTbAreaAfterCoefZeroOut(ComponentID compID) const
   int tbArea = blocks[compID].width * blocks[compID].height;
   int tbZeroOutWidth = blocks[compID].width;
   int tbZeroOutHeight = blocks[compID].height;
+
+#if JVET_P1026_MTS_SIGNALLING
+  if ( cs->sps->getUseMTS() && cu->sbtInfo != 0 && blocks[compID].width <= 32 && blocks[compID].height <= 32 && !cu->transQuantBypass && compID == COMPONENT_Y )
+#else
 #if JVET_P0058_CHROMA_TS
   if ((mtsIdx[compID] > MTS_SKIP || (cs->sps->getUseMTS() && cu->sbtInfo != 0 && blocks[compID].width <= 32 && blocks[compID].height <= 32)) && !cu->transQuantBypass && compID == COMPONENT_Y)
 #else
   if ((mtsIdx > MTS_SKIP || (cs->sps->getUseMTS() && cu->sbtInfo != 0 && blocks[compID].width <= 32 && blocks[compID].height <= 32)) && !cu->transQuantBypass && compID == COMPONENT_Y)
+#endif
 #endif
   {
     tbZeroOutWidth = (blocks[compID].width == 32) ? 16 : tbZeroOutWidth;
diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp
index dd49aed52a395140fc5d90784cec7693ebfb7cc8..95141fe5bfba2f4255b414d800c0391218af6fe4 100644
--- a/source/Lib/CommonLib/UnitTools.cpp
+++ b/source/Lib/CommonLib/UnitTools.cpp
@@ -3765,6 +3765,25 @@ bool CU::bdpcmAllowed( const CodingUnit& cu, const ComponentID compID )
 
   return bdpcmAllowed;
 }
+
+#if JVET_P1026_MTS_SIGNALLING
+bool CU::isMTSAllowed(const CodingUnit &cu, const ComponentID compID)
+{
+  SizeType tsMaxSize = 1 << cu.cs->pps->getLog2MaxTransformSkipBlockSize();
+  const int maxSize  = CU::isIntra( cu ) ? MTS_INTRA_MAX_CU_SIZE : MTS_INTER_MAX_CU_SIZE;
+  const int cuWidth  = cu.blocks[0].lumaSize().width;
+  const int cuHeight = cu.blocks[0].lumaSize().height;
+  bool mtsAllowed    = cu.chType == CHANNEL_TYPE_LUMA && compID == COMPONENT_Y;
+
+  mtsAllowed &= CU::isIntra( cu ) ? cu.cs->sps->getUseIntraMTS() : cu.cs->sps->getUseInterMTS() && CU::isInter( cu );
+  mtsAllowed &= cuWidth <= maxSize && cuHeight <= maxSize;
+  mtsAllowed &= !cu.ispMode;
+  mtsAllowed &= !cu.sbtInfo;
+  mtsAllowed &= !(cu.bdpcmMode && cuWidth <= tsMaxSize && cuHeight <= tsMaxSize);
+  return mtsAllowed;
+}
+#endif
+
 // TU tools
 
 bool TU::isNonTransformedResidualRotated(const TransformUnit &tu, const ComponentID &compID)
@@ -3822,6 +3841,7 @@ bool TU::isTSAllowed(const TransformUnit &tu, const ComponentID compID)
   return tsAllowed;
 }
 
+#if !JVET_P1026_MTS_SIGNALLING
 bool TU::isMTSAllowed(const TransformUnit &tu, const ComponentID compID)
 {
   bool   mtsAllowed = compID == COMPONENT_Y;
@@ -3835,6 +3855,7 @@ bool TU::isMTSAllowed(const TransformUnit &tu, const ComponentID compID)
   mtsAllowed &= !( tu.cu->bdpcmMode && tu.lwidth() <= transformSkipMaxSize && tu.lheight() <= transformSkipMaxSize);
   return mtsAllowed;
 }
+#endif
 
 int TU::getICTMode( const TransformUnit& tu, int jointCbCr )
 {
diff --git a/source/Lib/CommonLib/UnitTools.h b/source/Lib/CommonLib/UnitTools.h
index a1c766ff382a35bb7b2a6ef16892523d363d5a5d..497679860c42c1483a8f7307c98f2ac1d29d9d3c 100644
--- a/source/Lib/CommonLib/UnitTools.h
+++ b/source/Lib/CommonLib/UnitTools.h
@@ -90,6 +90,9 @@ namespace CU
   void  setGbiIdx                     (CodingUnit& cu, uint8_t uh);
   uint8_t deriveGbiIdx                (uint8_t gbiLO, uint8_t gbiL1);
   bool bdpcmAllowed                   (const CodingUnit& cu, const ComponentID compID);
+#if JVET_P1026_MTS_SIGNALLING
+  bool isMTSAllowed                   (const CodingUnit& cu, const ComponentID compID);
+#endif
 
 
   bool      divideTuInRows            ( const CodingUnit &cu );
@@ -199,7 +202,9 @@ namespace TU
   bool getCbfAtDepth                  (const TransformUnit &tu, const ComponentID &compID, const unsigned &depth);
   void setCbfAtDepth                  (      TransformUnit &tu, const ComponentID &compID, const unsigned &depth, const bool &cbf);
   bool isTSAllowed                    (const TransformUnit &tu, const ComponentID  compID);
+#if !JVET_P1026_MTS_SIGNALLING
   bool isMTSAllowed                   (const TransformUnit &tu, const ComponentID  compID);
+#endif
   bool hasCrossCompPredInfo           (const TransformUnit &tu, const ComponentID &compID);
 
 
diff --git a/source/Lib/CommonLib/x86/BufferX86.h b/source/Lib/CommonLib/x86/BufferX86.h
index b91ef72f2fada8d561d6b9582587ba08e4048d3c..ae44540199073b88010b619e5d22261730164e0e 100644
--- a/source/Lib/CommonLib/x86/BufferX86.h
+++ b/source/Lib/CommonLib/x86/BufferX86.h
@@ -249,7 +249,8 @@ void addBIOAvg4_SSE(const Pel* src0, int src0Stride, const Pel* src1, int src1St
       a   = _mm_unpacklo_epi16(_mm_loadl_epi64((const __m128i *) (src0 + x)),
                              _mm_loadl_epi64((const __m128i *) (src1 + x)));
 #if JVET_P0091_REMOVE_BDOF_OFFSET_SHIFT
-      sum = _mm_add_epi32(sum, _mm_set1_epi32(2 * offset));
+      sum = _mm_add_epi32(sum, _mm_madd_epi16(a, _mm_set1_epi16(1)));
+      sum = _mm_add_epi32(sum, _mm_set1_epi32(offset));
       sum = _mm_sra_epi32(sum, _mm_cvtsi32_si128(shift));
 #else
       sum = _mm_add_epi32(sum, _mm_madd_epi16(a, _mm_set1_epi16(2)));
@@ -350,10 +351,45 @@ void calcBIOSums_SSE(const Pel* srcY0Tmp, const Pel* srcY1Tmp, Pel* gradX0, Pel*
 }
 
 template< X86_VEXT vext >
+#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
+void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride, int width, int height, const Pel* gradX, const Pel* gradY, int gradStride, const int* dMvX, const int* dMvY, int dMvStride, const bool& bi, int shiftNum, Pel offset, const ClpRng& clpRng)
+#else
 void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride, int width, int height, const Pel* gradX, const Pel* gradY, int gradStride, const int* dMvX, const int* dMvY, int dMvStride, int shiftNum, Pel offset, const ClpRng& clpRng)
+#endif
 {
   CHECKD((width & 3), "block width error!");
 
+#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
+  const int dILimit = 1 << std::max<int>(clpRng.bd + 1, 13);
+
+#ifdef USE_AVX2
+  __m256i mm_dmvx, mm_dmvy, mm_gradx, mm_grady, mm_dI, mm_dI0, mm_src;
+#if !JVET_P0057_BDOF_PROF_HARMONIZATION
+  __m256i mm_dIoffset = _mm256_set1_epi32(1);
+#endif
+  __m256i mm_offset = _mm256_set1_epi16(offset);
+  __m256i vibdimin = _mm256_set1_epi16(clpRng.min);
+  __m256i vibdimax = _mm256_set1_epi16(clpRng.max);
+  __m256i mm_dimin = _mm256_set1_epi32(-dILimit);
+  __m256i mm_dimax = _mm256_set1_epi32(dILimit - 1);
+#else
+  __m128i mm_dmvx, mm_dmvy, mm_gradx, mm_grady, mm_dI, mm_dI0;
+#if !JVET_P0057_BDOF_PROF_HARMONIZATION
+  __m128i mm_dIoffset = _mm_set1_epi32(1);
+#endif
+  __m128i mm_offset = _mm_set1_epi16(offset);
+  __m128i vibdimin = _mm_set1_epi16(clpRng.min);
+  __m128i vibdimax = _mm_set1_epi16(clpRng.max);
+  __m128i mm_dimin = _mm_set1_epi32(-dILimit);
+  __m128i mm_dimax = _mm_set1_epi32(dILimit - 1);
+#endif
+
+#if USE_AVX2
+  for (int h = 0; h < height; h += 4)
+#else
+  for (int h = 0; h < height; h += 2)
+#endif
+#else
   __m128i mm_dmvx, mm_dmvy, mm_gradx, mm_grady, mm_dI, mm_src;
 #if !JVET_P0057_BDOF_PROF_HARMONIZATION
   __m128i mm_dIoffset = _mm_set1_epi32(1);
@@ -363,13 +399,8 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride,
   __m128i vibdimax  = _mm_set1_epi32(clpRng.max);
   __m128i vzero     = _mm_setzero_si128();
 
-#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING 
-  const int dILimit = 1 << std::max<int>(clpRng.bd + 1, 13);
-  __m128i vdImin = _mm_set1_epi32(-dILimit);
-  __m128i vdImax = _mm_set1_epi32(dILimit - 1);
-#endif
-
   for (int h = 0; h < height; h++)
+#endif
   {
     const int* vX = dMvX;
     const int* vY = dMvY;
@@ -380,6 +411,100 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride,
 
     for (int w = 0; w < width; w += 4)
     {
+#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
+#if USE_AVX2
+      const int *vX0 = vX, *vY0 = vY;
+      const Pel *gX0 = gX, *gY0 = gY;
+
+      // first two rows
+      mm_dmvx = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)vX0)), _mm_loadu_si128((const __m128i *)(vX0 + dMvStride)), 1);
+      mm_dmvy = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)vY0)), _mm_loadu_si128((const __m128i *)(vY0 + dMvStride)), 1);
+      mm_gradx = _mm256_inserti128_si256(
+        _mm256_castsi128_si256(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gX0))),
+        _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gX0 + gradStride))), 1);
+      mm_grady = _mm256_inserti128_si256(
+        _mm256_castsi128_si256(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY0))),
+        _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gY0 + gradStride))), 1);
+      mm_dI0 = _mm256_add_epi32(_mm256_mullo_epi32(mm_dmvx, mm_gradx), _mm256_mullo_epi32(mm_dmvy, mm_grady));
+#if !JVET_P0057_BDOF_PROF_HARMONIZATION
+      mm_dI0 = _mm256_srai_epi32(_mm256_add_epi32(mm_dI0, mm_dIoffset), 1);
+#endif
+      mm_dI0 = _mm256_min_epi32(mm_dimax, _mm256_max_epi32(mm_dimin, mm_dI0));
+
+      // next two rows
+      vX0 += (dMvStride << 1); vY0 += (dMvStride << 1); gX0 += (gradStride << 1); gY0 += (gradStride << 1);
+      mm_dmvx = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)vX0)), _mm_loadu_si128((const __m128i *)(vX0 + dMvStride)), 1);
+      mm_dmvy = _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)vY0)), _mm_loadu_si128((const __m128i *)(vY0 + dMvStride)), 1);
+      mm_gradx = _mm256_inserti128_si256(
+        _mm256_castsi128_si256(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gX0))),
+        _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gX0 + gradStride))), 1);
+      mm_grady = _mm256_inserti128_si256(
+        _mm256_castsi128_si256(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY0))),
+        _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gY0 + gradStride))), 1);
+      mm_dI = _mm256_add_epi32(_mm256_mullo_epi32(mm_dmvx, mm_gradx), _mm256_mullo_epi32(mm_dmvy, mm_grady));
+#if !JVET_P0057_BDOF_PROF_HARMONIZATION
+      mm_dI = _mm256_srai_epi32(_mm256_add_epi32(mm_dI, mm_dIoffset), 1);
+#endif
+      mm_dI = _mm256_min_epi32(mm_dimax, _mm256_max_epi32(mm_dimin, mm_dI));
+
+      // combine four rows
+      mm_dI = _mm256_packs_epi32(mm_dI0, mm_dI);
+      const Pel* src0 = src + srcStride;
+      mm_src = _mm256_inserti128_si256(
+        _mm256_castsi128_si256(_mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)src), _mm_loadl_epi64((const __m128i *)(src + (srcStride << 1))))),
+        _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)src0), _mm_loadl_epi64((const __m128i *)(src0 + (srcStride << 1)))),
+        1
+      );
+      mm_dI = _mm256_add_epi16(mm_dI, mm_src);
+      if (!bi)
+      {
+        mm_dI = _mm256_srai_epi16(_mm256_add_epi16(mm_dI, mm_offset), shiftNum);
+        mm_dI = _mm256_min_epi16(vibdimax, _mm256_max_epi16(vibdimin, mm_dI));
+      }
+
+      // store final results
+      __m128i dITmp = _mm256_extractf128_si256(mm_dI, 1);
+      Pel* dst0 = dst;
+      _mm_storel_epi64((__m128i *)dst0, _mm256_castsi256_si128(mm_dI));
+      dst0 += dstStride; _mm_storel_epi64((__m128i *)dst0, dITmp);
+      dst0 += dstStride; _mm_storel_epi64((__m128i *)dst0, _mm_unpackhi_epi64(_mm256_castsi256_si128(mm_dI), _mm256_castsi256_si128(mm_dI)));
+      dst0 += dstStride; _mm_storel_epi64((__m128i *)dst0, _mm_unpackhi_epi64(dITmp, dITmp));
+#else
+      // first row
+      mm_dmvx = _mm_loadu_si128((const __m128i *)vX);
+      mm_dmvy = _mm_loadu_si128((const __m128i *)vY);
+      mm_gradx = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gX));
+      mm_grady = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY));
+      mm_dI0 = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx, mm_gradx), _mm_mullo_epi32(mm_dmvy, mm_grady));
+#if !JVET_P0057_BDOF_PROF_HARMONIZATION
+      mm_dI0 = _mm_srai_epi32(_mm_add_epi32(mm_dI0, mm_dIoffset), 1);
+#endif
+      mm_dI0 = _mm_min_epi32(mm_dimax, _mm_max_epi32(mm_dimin, mm_dI0));
+
+      // second row
+      mm_dmvx = _mm_loadu_si128((const __m128i *)(vX + dMvStride));
+      mm_dmvy = _mm_loadu_si128((const __m128i *)(vY + dMvStride));
+      mm_gradx = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gX + gradStride)));
+      mm_grady = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(gY + gradStride)));
+      mm_dI = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx, mm_gradx), _mm_mullo_epi32(mm_dmvy, mm_grady));
+#if !JVET_P0057_BDOF_PROF_HARMONIZATION
+      mm_dI = _mm_srai_epi32(_mm_add_epi32(mm_dI, mm_dIoffset), 1);
+#endif
+      mm_dI = _mm_min_epi32(mm_dimax, _mm_max_epi32(mm_dimin, mm_dI));
+
+      // combine both rows
+      mm_dI = _mm_packs_epi32(mm_dI0, mm_dI);
+      mm_dI = _mm_add_epi16(_mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)src), _mm_loadl_epi64((const __m128i *)(src + srcStride))), mm_dI);
+      if (!bi)
+      {
+        mm_dI = _mm_srai_epi16(_mm_add_epi16(mm_dI, mm_offset), shiftNum);
+        mm_dI = _mm_min_epi16(vibdimax, _mm_max_epi16(vibdimin, mm_dI));
+      }
+
+      _mm_storel_epi64((__m128i *)dst, mm_dI);
+      _mm_storel_epi64((__m128i *)(dst + dstStride), _mm_unpackhi_epi64(mm_dI, mm_dI));
+#endif
+#else
       mm_dmvx = _mm_loadu_si128((const __m128i *)vX);
       mm_dmvy = _mm_loadu_si128((const __m128i *)vY);
       mm_gradx = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gX));
@@ -390,24 +515,43 @@ void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride,
 #if !JVET_P0057_BDOF_PROF_HARMONIZATION 
       mm_dI = _mm_srai_epi32(_mm_add_epi32(mm_dI, mm_dIoffset), 1);
 #endif
-#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
-      mm_dI = _mm_min_epi32(vdImax, _mm_max_epi32(vdImin, mm_dI));
-#endif
+
       mm_dI = _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(mm_dI, mm_src), mm_offset), shiftNum);
       mm_dI = _mm_packs_epi32(_mm_min_epi32(vibdimax, _mm_max_epi32(vibdimin, mm_dI)), vzero);
       _mm_storel_epi64((__m128i *)dst, mm_dI);
 
+#endif
       vX += 4; vY += 4; gX += 4; gY += 4; src += 4; dst += 4;
     }
+
+#if JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
+#if USE_AVX2
+    dMvX += (dMvStride << 2);
+    dMvY += (dMvStride << 2);
+    gradX += (gradStride << 2);
+    gradY += (gradStride << 2);
+    srcPel += (srcStride << 2);
+    dstPel += (dstStride << 2);
+#else
+    dMvX += (dMvStride << 1);
+    dMvY += (dMvStride << 1);
+    gradX += (gradStride << 1);
+    gradY += (gradStride << 1);
+    srcPel += (srcStride << 1);
+    dstPel += (dstStride << 1);
+#endif
+#else
     dMvX += dMvStride;
     dMvY += dMvStride;
     gradX += gradStride;
     gradY += gradStride;
     srcPel += srcStride;
     dstPel += dstStride;
+#endif
   }
 }
 
+#if !JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
 template< X86_VEXT vext, bool l1PROFEnabled = true>
 void applyBiPROF_SSE(Pel* dst, int dstStride, const Pel* src0, const Pel* src1, int srcStride, int width, int height, const Pel* gradX0, const Pel* gradY0, const Pel* gradX1, const Pel* gradY1, int gradStride, const int* dMvX0, const int* dMvY0, const int* dMvX1, const int* dMvY1, int dMvStride, const int8_t w0, const ClpRng& clpRng)
 {
@@ -525,6 +669,7 @@ void applyBiPROF_SSE(Pel* dst, int dstStride, const Pel* src0, const Pel* src1,
     dst += dstStride;
   }
 }
+#endif
 
 template< X86_VEXT vext >
 void roundIntVector_SIMD(int* v, int size, unsigned int nShift, const int dmvLimit)
@@ -1294,8 +1439,10 @@ void PelBufferOps::_initPelBufOpsX86()
 #endif
   profGradFilter = gradFilter_SSE<vext, false>;
   applyPROF      = applyPROF_SSE<vext>;
+#if !JVET_P0154_PROF_SAMPLE_OFFSET_CLIPPING
   applyBiPROF[1] = applyBiPROF_SSE<vext>;
   applyBiPROF[0] = applyBiPROF_SSE<vext, false>;
+#endif
   roundIntVector = roundIntVector_SIMD<vext>;
 }
 
diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp
index da84544f19fcab1063cf7cbfd7f184b535ea4d89..10d9e6d589914e3953fa2c6d5b07cbb8f2333bd2 100644
--- a/source/Lib/DecoderLib/CABACReader.cpp
+++ b/source/Lib/DecoderLib/CABACReader.cpp
@@ -1552,7 +1552,10 @@ void CABACReader::cu_residual( CodingUnit& cu, Partitioner &partitioner, CUCtx&
 
   cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_LUMA]   = false;
   cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] = false;
-  cuCtx.lfnstLastScanPos = false;
+  cuCtx.lfnstLastScanPos                              = false;
+#if JVET_P1026_MTS_SIGNALLING
+  cuCtx.violatesMtsCoeffConstraint                    = false;
+#endif
 
   ChromaCbfs chromaCbfs;
   if( cu.ispMode && isLuma( partitioner.chType ) )
@@ -1564,7 +1567,11 @@ void CABACReader::cu_residual( CodingUnit& cu, Partitioner &partitioner, CUCtx&
   {
     transform_tree( *cu.cs, partitioner, cuCtx             );
   }
+
   residual_lfnst_mode( cu, cuCtx );
+#if JVET_P1026_MTS_SIGNALLING
+  mts_idx            ( cu, cuCtx );
+#endif
 }
 
 void CABACReader::rqt_root_cbf( CodingUnit& cu )
@@ -2992,7 +2999,11 @@ void CABACReader::residual_coding( TransformUnit& tu, ComponentID compID, CUCtx&
     return;
 
   // parse transform skip and explicit rdpcm mode
+#if JVET_P1026_MTS_SIGNALLING
+  ts_flag            ( tu, compID );
+#else
   mts_coding         ( tu, compID );
+#endif
   explicit_rdpcm_mode( tu, compID );
 
 #if JVET_P0058_CHROMA_TS
@@ -3045,6 +3056,13 @@ void CABACReader::residual_coding( TransformUnit& tu, ComponentID compID, CUCtx&
     const int lfnstLastScanPosTh = isLuma( compID ) ? LFNST_LAST_SIG_LUMA : LFNST_LAST_SIG_CHROMA;
     cuCtx.lfnstLastScanPos |= cctx.scanPosLast() >= lfnstLastScanPosTh;
   }
+#if JVET_P1026_MTS_SIGNALLING
+  if( isLuma(compID) && ( cctx.posX(cctx.scanPosLast()) >= 16 || cctx.posY(cctx.scanPosLast()) >= 16 ) )
+  {
+    cuCtx.violatesMtsCoeffConstraint = true;
+  }
+#endif
+
   // parse subblocks
   const int stateTransTab = ( tu.cs->slice->getDepQuantEnabledFlag() ? 32040 : 0 );
   int       state         = 0;
@@ -3055,10 +3073,15 @@ void CABACReader::residual_coding( TransformUnit& tu, ComponentID compID, CUCtx&
     for( int subSetId = ( cctx.scanPosLast() >> cctx.log2CGSize() ); subSetId >= 0; subSetId--)
     {
       cctx.initSubblock       ( subSetId );
+
+#if JVET_P1026_MTS_SIGNALLING
+      if( tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[ compID ].height <= 32 && tu.blocks[ compID ].width <= 32 && !tu.cu->transQuantBypass && compID == COMPONENT_Y )
+#else
 #if JVET_P0058_CHROMA_TS
       if( ( tu.mtsIdx[compID] > MTS_SKIP || (tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[compID].height <= 32 && tu.blocks[compID].width <= 32)) && !tu.cu->transQuantBypass && compID == COMPONENT_Y)
 #else
       if( ( tu.mtsIdx > MTS_SKIP || ( tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[ compID ].height <= 32 && tu.blocks[ compID ].width <= 32 ) ) && !tu.cu->transQuantBypass && compID == COMPONENT_Y )
+#endif
 #endif
       {
         if( ( tu.blocks[ compID ].height == 32 && cctx.cgPosY() >= ( 16 >> cctx.log2CGHeight() ) ) || ( tu.blocks[ compID ].width == 32 && cctx.cgPosX() >= ( 16 >> cctx.log2CGWidth() ) ) )
@@ -3071,6 +3094,74 @@ void CABACReader::residual_coding( TransformUnit& tu, ComponentID compID, CUCtx&
 
 }
 
+#if JVET_P1026_MTS_SIGNALLING
+void CABACReader::ts_flag( TransformUnit& tu, ComponentID compID )
+{
+#if JVET_P0058_CHROMA_TS
+  int tsFlag = tu.cu->bdpcmMode && isLuma(compID) ? 1 : tu.mtsIdx[compID] == MTS_SKIP ? 1 : 0;
+  int ctxIdx = isLuma(compID) ? 6 : 11;
+#else
+  int tsFlag = tu.cu->bdpcmMode ? 1 : tu.mtsIdx == MTS_SKIP ? 1 : 0;
+  int ctxIdx = 6;
+#endif
+
+  if( TU::isTSAllowed ( tu, compID ) )
+  {
+    RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__MTS_FLAGS, tu.blocks[compID], compID );
+    tsFlag = m_BinDecoder.decodeBin( Ctx::MTSIndex( ctxIdx ) );
+  }
+  
+#if JVET_P0058_CHROMA_TS
+  tu.mtsIdx[compID] = tsFlag ? MTS_SKIP : MTS_DCT2_DCT2;
+#else
+  tu.mtsIdx = tsFlag ? MTS_SKIP : MTS_DCT2_DCT2;
+#endif
+  
+  DTRACE(g_trace_ctx, D_SYNTAX, "ts_flag() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, tu.cu->lx(), tu.cu->ly(), tsFlag);
+}
+
+void CABACReader::mts_idx( CodingUnit& cu, CUCtx& cuCtx )
+{
+  TransformUnit &tu = *cu.firstTU;
+#if JVET_P0058_CHROMA_TS
+  int        mtsIdx = tu.mtsIdx[COMPONENT_Y]; // Transform skip flag has already been decoded
+#else
+  int        mtsIdx = tu.mtsIdx;              // Transform skip flag has already been decoded
+#endif
+  
+  if( CU::isMTSAllowed( cu, COMPONENT_Y ) && !cuCtx.violatesMtsCoeffConstraint &&
+      cu.lfnstIdx == 0 && mtsIdx != MTS_SKIP && TU::getCbf(tu, COMPONENT_Y) )
+  {
+    RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__MTS_FLAGS, tu.blocks[COMPONENT_Y], COMPONENT_Y );
+    int ctxIdx = 0;
+    int symbol = m_BinDecoder.decodeBin( Ctx::MTSIndex( ctxIdx ) );
+    
+    if( symbol )
+    {
+      ctxIdx = 7;
+      mtsIdx = MTS_DST7_DST7; // mtsIdx = 2 -- 4
+      for( int i = 0; i < 3; i++, ctxIdx++ )
+      {
+        symbol  = m_BinDecoder.decodeBin( Ctx::MTSIndex( ctxIdx ) );
+        mtsIdx += symbol;
+        
+        if( !symbol )
+        {
+          break;
+        }
+      }
+    }
+  }
+  
+#if JVET_P0058_CHROMA_TS
+  tu.mtsIdx[COMPONENT_Y] = mtsIdx;
+#else
+  tu.mtsIdx = mtsIdx;
+#endif
+  
+  DTRACE(g_trace_ctx, D_SYNTAX, "mts_idx() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, tu.cu->lx(), tu.cu->ly(), mtsIdx);
+}
+#else
 void CABACReader::mts_coding( TransformUnit& tu, ComponentID compID )
 {
   const bool  tsAllowed = TU::isTSAllowed ( tu, compID );
@@ -3144,7 +3235,8 @@ void CABACReader::mts_coding( TransformUnit& tu, ComponentID compID )
   DTRACE(g_trace_ctx, D_SYNTAX, "mts_coding() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, tu.cu->lx(), tu.cu->ly(), tu.mtsIdx);
 #endif
 }
-
+#endif
+  
 void CABACReader::isp_mode( CodingUnit& cu )
 {
   if( !CU::isIntra( cu ) || !isLuma( cu.chType ) || cu.firstPU->multiRefIdx || !cu.cs->sps->getUseISP() || cu.bdpcmMode || !CU::canUseISP( cu, getFirstComponentOfChannel( cu.chType ) ) )
@@ -3213,12 +3305,21 @@ void CABACReader::residual_lfnst_mode( CodingUnit& cu,  CUCtx& cuCtx  )
     const bool lumaFlag              = cu.isSepTree() ? (   isLuma( cu.chType ) ? true : false ) : true;
     const bool chromaFlag            = cu.isSepTree() ? ( isChroma( cu.chType ) ? true : false ) : true;
     bool nonZeroCoeffNonTsCorner8x8 = ( lumaFlag && cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_LUMA] ) || (chromaFlag && cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] );
+#if JVET_P1026_MTS_SIGNALLING
+#if JVET_P0058_CHROMA_TS
+    const bool isTrSkip = TU::getCbf(*cu.firstTU, COMPONENT_Y) && cu.firstTU->mtsIdx[COMPONENT_Y] == MTS_SKIP;
+#else
+    const bool isTrSkip = TU::getCbf(*cu.firstTU, COMPONENT_Y) && cu.firstTU->mtsIdx == MTS_SKIP;
+#endif
+    if( !cuCtx.lfnstLastScanPos || nonZeroCoeffNonTsCorner8x8 || isTrSkip )
+#else
 #if JVET_P0058_CHROMA_TS
     const bool isNonDCT2 = (TU::getCbf(*cu.firstTU, ComponentID(COMPONENT_Y)) && cu.firstTU->mtsIdx[COMPONENT_Y] != MTS_DCT2_DCT2);
 #else
     const bool isNonDCT2 = (TU::getCbf(*cu.firstTU, ComponentID(COMPONENT_Y)) && cu.firstTU->mtsIdx != MTS_DCT2_DCT2);
 #endif
     if( !cuCtx.lfnstLastScanPos || nonZeroCoeffNonTsCorner8x8 || isNonDCT2 )
+#endif
     {
       cu.lfnstIdx = 0;
       return;
@@ -3252,10 +3353,14 @@ int CABACReader::last_sig_coeff( CoeffCodingContext& cctx, TransformUnit& tu, Co
   unsigned maxLastPosX = cctx.maxLastPosX();
   unsigned maxLastPosY = cctx.maxLastPosY();
 
+#if JVET_P1026_MTS_SIGNALLING
+  if( tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[ compID ].width <= 32 && tu.blocks[ compID ].height <= 32 && !tu.cu->transQuantBypass && compID == COMPONENT_Y )
+#else
 #if JVET_P0058_CHROMA_TS
   if( ( tu.mtsIdx[compID] > MTS_SKIP || (tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[compID].width <= 32 && tu.blocks[compID].height <= 32)) && !tu.cu->transQuantBypass && compID == COMPONENT_Y)
 #else
   if( ( tu.mtsIdx > MTS_SKIP || ( tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[ compID ].width <= 32 && tu.blocks[ compID ].height <= 32 ) ) && !tu.cu->transQuantBypass && compID == COMPONENT_Y )
+#endif
 #endif
   {
     maxLastPosX = ( tu.blocks[ compID ].width  == 32 ) ? g_uiGroupIdx[ 15 ] : maxLastPosX;
diff --git a/source/Lib/DecoderLib/CABACReader.h b/source/Lib/DecoderLib/CABACReader.h
index 6425ce8149ae370719618aef9b333f40f5c9b307..7406715f55a77b9fe5c6a65041d8b08f4ea4ace4 100644
--- a/source/Lib/DecoderLib/CABACReader.h
+++ b/source/Lib/DecoderLib/CABACReader.h
@@ -135,7 +135,12 @@ public:
 
   // residual coding (clause 7.3.8.11)
   void        residual_coding           ( TransformUnit&                tu,     ComponentID     compID, CUCtx& cuCtx );
+#if JVET_P1026_MTS_SIGNALLING
+  void        ts_flag                   ( TransformUnit&                tu,     ComponentID     compID );
+  void        mts_idx                   ( CodingUnit&                   cu,     CUCtx&          cuCtx  );
+#else
   void        mts_coding                ( TransformUnit&                tu,     ComponentID     compID );
+#endif
   void        residual_lfnst_mode       ( CodingUnit&                   cu,     CUCtx&          cuCtx  );
   void        isp_mode                  ( CodingUnit&                   cu );
   void        explicit_rdpcm_mode       ( TransformUnit&                tu,     ComponentID     compID );
diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp
index a9c149fc599dd61de9b0dcb4103cb1818cd021cf..322689c4604b43ba6515e58f27b74d0cd6c52d06 100644
--- a/source/Lib/DecoderLib/VLCReader.cpp
+++ b/source/Lib/DecoderLib/VLCReader.cpp
@@ -405,7 +405,9 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS, ParameterSetManager *parameterSetMana
     READ_CODE( 2, uiCode, "pps_dep_quant_enabled_idc");        pcPPS->setPPSDepQuantEnabledIdc(uiCode);
     READ_CODE( 2, uiCode, "pps_ref_pic_list_sps_idc[0]");      pcPPS->setPPSRefPicListSPSIdc0(uiCode);
     READ_CODE( 2, uiCode, "pps_ref_pic_list_sps_idc[1]");      pcPPS->setPPSRefPicListSPSIdc1(uiCode);
+#if !JVET_P0206_TMVP_flags
     READ_CODE( 2, uiCode, "pps_temporal_mvp_enabled_idc");     pcPPS->setPPSTemporalMVPEnabledIdc(uiCode);
+#endif
     READ_CODE( 2, uiCode, "pps_mvd_l1_zero_idc");              pcPPS->setPPSMvdL1ZeroIdc(uiCode);
     READ_CODE( 2, uiCode, "pps_collocated_from_l0_idc");       pcPPS->setPPSCollocatedFromL0Idc(uiCode);
     READ_UVLC( uiCode, "pps_six_minus_max_num_merge_cand_plus1"); pcPPS->setPPSSixMinusMaxNumMergeCandPlus1(uiCode);
@@ -417,7 +419,9 @@ void HLSyntaxReader::parsePPS( PPS* pcPPS, ParameterSetManager *parameterSetMana
     pcPPS->setPPSDepQuantEnabledIdc(0);
     pcPPS->setPPSRefPicListSPSIdc0(0);
     pcPPS->setPPSRefPicListSPSIdc1(0);
+#if !JVET_P0206_TMVP_flags
     pcPPS->setPPSTemporalMVPEnabledIdc(0);
+#endif
     pcPPS->setPPSMvdL1ZeroIdc(0);
     pcPPS->setPPSCollocatedFromL0Idc(0);
     pcPPS->setPPSSixMinusMaxNumMergeCandPlus1(0);
@@ -2090,13 +2094,20 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para
 
     if(!pcSlice->isIntra())
     {
+#if JVET_P0206_TMVP_flags
+      if (sps->getSPSTemporalMVPEnabledFlag())
+#else
       if (sps->getSPSTemporalMVPEnabledFlag() && !pps->getPPSTemporalMVPEnabledIdc())
+#endif
       {
         READ_FLAG( uiCode, "slice_temporal_mvp_enabled_flag" );
         pcSlice->setEnableTMVPFlag( uiCode == 1 ? true : false );
       }
       else
       {
+#if JVET_P0206_TMVP_flags
+        pcSlice->setEnableTMVPFlag(false);
+#else
         if (!sps->getSPSTemporalMVPEnabledFlag())
         {
           pcSlice->setEnableTMVPFlag(false);
@@ -2105,6 +2116,7 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para
         {
           pcSlice->setEnableTMVPFlag((pps->getPPSTemporalMVPEnabledIdc() - 1) == 1 ? true: false);
         }
+#endif
       }
     }
 
diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp
index 38a373a251eb8b8b70562d761159928a6b993197..edf3050c903c4dc6f19fdc5435e500ec4f0e6b1c 100644
--- a/source/Lib/EncoderLib/CABACWriter.cpp
+++ b/source/Lib/EncoderLib/CABACWriter.cpp
@@ -1325,7 +1325,10 @@ void CABACWriter::cu_residual( const CodingUnit& cu, Partitioner& partitioner, C
 
   cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_LUMA]   = false;
   cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] = false;
-  cuCtx.lfnstLastScanPos = false;
+  cuCtx.lfnstLastScanPos                              = false;
+#if JVET_P1026_MTS_SIGNALLING
+  cuCtx.violatesMtsCoeffConstraint                    = false;
+#endif
 
   if( cu.ispMode && isLuma( partitioner.chType ) )
   {
@@ -1338,6 +1341,9 @@ void CABACWriter::cu_residual( const CodingUnit& cu, Partitioner& partitioner, C
   }
 
   residual_lfnst_mode( cu, cuCtx );
+#if JVET_P1026_MTS_SIGNALLING
+  mts_idx            ( cu, cuCtx );
+#endif
 }
 
 void CABACWriter::rqt_root_cbf( const CodingUnit& cu )
@@ -2766,7 +2772,11 @@ void CABACWriter::residual_coding( const TransformUnit& tu, ComponentID compID,
     return;
 
   // code transform skip and explicit rdpcm mode
+#if JVET_P1026_MTS_SIGNALLING
+  ts_flag            ( tu, compID );
+#else
   mts_coding         ( tu, compID );
+#endif
   explicit_rdpcm_mode( tu, compID );
 
 #if JVET_P0058_CHROMA_TS
@@ -2832,6 +2842,13 @@ void CABACWriter::residual_coding( const TransformUnit& tu, ComponentID compID,
     const int lfnstLastScanPosTh = isLuma( compID ) ? LFNST_LAST_SIG_LUMA : LFNST_LAST_SIG_CHROMA;
     cuCtx->lfnstLastScanPos |= cctx.scanPosLast() >= lfnstLastScanPosTh;
   }
+#if JVET_P1026_MTS_SIGNALLING
+  if( cuCtx && isLuma(compID) && ( cctx.posX(cctx.scanPosLast()) >= 16 || cctx.posY(cctx.scanPosLast()) >= 16 ) )
+  {
+    cuCtx->violatesMtsCoeffConstraint = true;
+  }
+#endif
+  
   // code last coeff position
   last_sig_coeff( cctx, tu, compID );
 
@@ -2845,11 +2862,16 @@ void CABACWriter::residual_coding( const TransformUnit& tu, ComponentID compID,
   for( int subSetId = ( cctx.scanPosLast() >> cctx.log2CGSize() ); subSetId >= 0; subSetId--)
   {
     cctx.initSubblock       ( subSetId, sigGroupFlags[subSetId] );
+
+#if JVET_P1026_MTS_SIGNALLING
+    if( tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[ compID ].height <= 32 && tu.blocks[ compID ].width <= 32 && !tu.cu->transQuantBypass && compID == COMPONENT_Y )
+#else
 #if JVET_P0058_CHROMA_TS
     if( ( tu.mtsIdx[compID] > MTS_SKIP || (tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[compID].height <= 32 && tu.blocks[compID].width <= 32)) && !tu.cu->transQuantBypass && compID == COMPONENT_Y)
 #else
     if( ( tu.mtsIdx > MTS_SKIP || ( tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[ compID ].height <= 32 && tu.blocks[ compID ].width <= 32 ) ) && !tu.cu->transQuantBypass && compID == COMPONENT_Y )
-#endif    
+#endif
+#endif
     {
       if( ( tu.blocks[ compID ].height == 32 && cctx.cgPosY() >= ( 16 >> cctx.log2CGHeight() ) )
        || ( tu.blocks[ compID ].width  == 32 && cctx.cgPosX() >= ( 16 >> cctx.log2CGWidth()  ) ) )
@@ -2861,6 +2883,59 @@ void CABACWriter::residual_coding( const TransformUnit& tu, ComponentID compID,
   }
 }
 
+#if JVET_P1026_MTS_SIGNALLING
+void CABACWriter::ts_flag( const TransformUnit& tu, ComponentID compID )
+{
+#if JVET_P0058_CHROMA_TS
+  int tsFlag = tu.mtsIdx[compID] == MTS_SKIP ? 1 : 0;
+  int ctxIdx = isLuma(compID) ? 6 : 11;
+#else
+  int tsFlag = tu.mtsIdx == MTS_SKIP ? 1 : 0;
+  int ctxIdx = 6;
+#endif
+  
+  if( TU::isTSAllowed ( tu, compID ) )
+  {
+    m_BinEncoder.encodeBin( tsFlag, Ctx::MTSIndex( ctxIdx ) );
+  }
+  DTRACE( g_trace_ctx, D_SYNTAX, "ts_flag() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, tu.cu->lx(), tu.cu->ly(), tsFlag );
+}
+
+void CABACWriter::mts_idx( const CodingUnit& cu, CUCtx& cuCtx )
+{
+  TransformUnit &tu = *cu.firstTU;
+#if JVET_P0058_CHROMA_TS
+  int        mtsIdx = tu.mtsIdx[COMPONENT_Y];
+#else
+  int        mtsIdx = tu.mtsIdx;
+#endif
+  
+  if( CU::isMTSAllowed( cu, COMPONENT_Y ) && !cuCtx.violatesMtsCoeffConstraint &&
+      cu.lfnstIdx == 0 && mtsIdx != MTS_SKIP && TU::getCbf(tu, COMPONENT_Y) )
+  {
+    int symbol = mtsIdx != MTS_DCT2_DCT2 ? 1 : 0;
+    int ctxIdx = 0;
+    
+    m_BinEncoder.encodeBin( symbol, Ctx::MTSIndex( ctxIdx ) );
+    
+    if( symbol )
+    {
+      ctxIdx = 7;
+      for( int i = 0; i < 3; i++, ctxIdx++ )
+      {
+        symbol = mtsIdx > i + MTS_DST7_DST7 ? 1 : 0;
+        m_BinEncoder.encodeBin( symbol, Ctx::MTSIndex( ctxIdx ) );
+        
+        if( !symbol )
+        {
+          break;
+        }
+      }
+    }
+  }
+  DTRACE( g_trace_ctx, D_SYNTAX, "mts_idx() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, tu.cu->lx(), tu.cu->ly(), mtsIdx);
+}
+#else
 void CABACWriter::mts_coding( const TransformUnit& tu, ComponentID compID )
 {
   const bool  tsAllowed = TU::isTSAllowed ( tu, compID );
@@ -2929,6 +3004,7 @@ void CABACWriter::mts_coding( const TransformUnit& tu, ComponentID compID )
   DTRACE( g_trace_ctx, D_SYNTAX, "mts_coding() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, tu.cu->lx(), tu.cu->ly(), tu.mtsIdx);
 #endif
 }
+#endif
 
 void CABACWriter::isp_mode( const CodingUnit& cu )
 {
@@ -2992,12 +3068,22 @@ void CABACWriter::residual_lfnst_mode( const CodingUnit& cu, CUCtx& cuCtx )
     const bool lumaFlag                   = cu.isSepTree() ? (   isLuma( cu.chType ) ? true : false ) : true;
     const bool chromaFlag                 = cu.isSepTree() ? ( isChroma( cu.chType ) ? true : false ) : true;
           bool nonZeroCoeffNonTsCorner8x8 = ( lumaFlag && cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_LUMA] ) || (chromaFlag && cuCtx.violatesLfnstConstrained[CHANNEL_TYPE_CHROMA] );
+
+#if JVET_P1026_MTS_SIGNALLING
+#if JVET_P0058_CHROMA_TS
+    const bool isTrSkip = TU::getCbf(*cu.firstTU, COMPONENT_Y) && cu.firstTU->mtsIdx[COMPONENT_Y] == MTS_SKIP;
+#else
+    const bool isTrSkip = TU::getCbf(*cu.firstTU, COMPONENT_Y) && cu.firstTU->mtsIdx == MTS_SKIP;
+#endif
+    if( !cuCtx.lfnstLastScanPos || nonZeroCoeffNonTsCorner8x8 || isTrSkip )
+#else
 #if JVET_P0058_CHROMA_TS
     const bool isNonDCT2 = (TU::getCbf(*cu.firstTU, ComponentID(COMPONENT_Y)) && cu.firstTU->mtsIdx[COMPONENT_Y] != MTS_DCT2_DCT2);
 #else
     const bool isNonDCT2 = (TU::getCbf(*cu.firstTU, ComponentID(COMPONENT_Y)) && cu.firstTU->mtsIdx != MTS_DCT2_DCT2);
 #endif
     if( !cuCtx.lfnstLastScanPos || nonZeroCoeffNonTsCorner8x8 || isNonDCT2 )
+#endif
     {
       return;
     }
@@ -3039,10 +3125,14 @@ void CABACWriter::last_sig_coeff( CoeffCodingContext& cctx, const TransformUnit&
   unsigned maxLastPosX = cctx.maxLastPosX();
   unsigned maxLastPosY = cctx.maxLastPosY();
 
+#if JVET_P1026_MTS_SIGNALLING
+  if( tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[ compID ].width <= 32 && tu.blocks[ compID ].height <= 32 && !tu.cu->transQuantBypass && compID == COMPONENT_Y )
+#else
 #if JVET_P0058_CHROMA_TS
   if ((tu.mtsIdx[compID] > MTS_SKIP || (tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[compID].width <= 32 && tu.blocks[compID].height <= 32)) && !tu.cu->transQuantBypass && compID == COMPONENT_Y)
 #else
   if( ( tu.mtsIdx > MTS_SKIP || ( tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tu.blocks[ compID ].width <= 32 && tu.blocks[ compID ].height <= 32 ) ) && !tu.cu->transQuantBypass && compID == COMPONENT_Y )
+#endif
 #endif
   {
     maxLastPosX = ( tu.blocks[compID].width  == 32 ) ? g_uiGroupIdx[ 15 ] : maxLastPosX;
diff --git a/source/Lib/EncoderLib/CABACWriter.h b/source/Lib/EncoderLib/CABACWriter.h
index 72c63a2a66663d55bfdb37750c63dacaff95a747..7ffde4fb19a451bbaca0b2d9507f1b19b1d2a6f9 100644
--- a/source/Lib/EncoderLib/CABACWriter.h
+++ b/source/Lib/EncoderLib/CABACWriter.h
@@ -146,7 +146,12 @@ public:
 
   // residual coding (clause 7.3.8.11)
   void        residual_coding           ( const TransformUnit&          tu,       ComponentID       compID, CUCtx* cuCtx = nullptr );
+#if JVET_P1026_MTS_SIGNALLING
+  void        ts_flag                   ( const TransformUnit&          tu,       ComponentID       compID );
+  void        mts_idx                   ( const CodingUnit&             cu,       CUCtx&            cuCtx  );
+#else
   void        mts_coding                ( const TransformUnit&          tu,       ComponentID       compID );
+#endif
   void        residual_lfnst_mode       ( const CodingUnit&             cu,       CUCtx&            cuCtx );
   void        isp_mode                  ( const CodingUnit&             cu );
   void        explicit_rdpcm_mode       ( const TransformUnit&          tu,       ComponentID       compID );
diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h
index 364fb348946225003f4ef069dec4917fedf3dec9..db791a5c84d7f6c5a4d522c65014020f7ecafdbc 100644
--- a/source/Lib/EncoderLib/EncCfg.h
+++ b/source/Lib/EncoderLib/EncCfg.h
@@ -591,7 +591,9 @@ protected:
   int       m_PPSDepQuantEnabledIdc;
   int       m_PPSRefPicListSPSIdc0;
   int       m_PPSRefPicListSPSIdc1;
+#if !JVET_P0206_TMVP_flags
   int       m_PPSTemporalMVPEnabledIdc;
+#endif
   int       m_PPSMvdL1ZeroIdc;
   int       m_PPSCollocatedFromL0Idc;
   uint32_t  m_PPSSixMinusMaxNumMergeCandPlus1;
@@ -1513,8 +1515,10 @@ public:
   int          getPPSRefPicListSPSIdc0 ()                            { return m_PPSRefPicListSPSIdc0; }
   void         setPPSRefPicListSPSIdc1 ( int u )                     { m_PPSRefPicListSPSIdc1 = u; }
   int          getPPSRefPicListSPSIdc1 ()                            { return m_PPSRefPicListSPSIdc1; }
+#if !JVET_P0206_TMVP_flags
   void         setPPSTemporalMVPEnabledIdc ( int u )                 { m_PPSTemporalMVPEnabledIdc = u; }
   int          getPPSTemporalMVPEnabledIdc ()                        { return m_PPSTemporalMVPEnabledIdc; }
+#endif
   void         setPPSMvdL1ZeroIdc ( int u )                          { m_PPSMvdL1ZeroIdc = u; }
   int          getPPSMvdL1ZeroIdc ()                                 { return m_PPSMvdL1ZeroIdc; }
   void         setPPSCollocatedFromL0Idc ( int u )                   { m_PPSCollocatedFromL0Idc = u; }
diff --git a/source/Lib/EncoderLib/EncGOP.cpp b/source/Lib/EncoderLib/EncGOP.cpp
index c3b588dbee0506d52da2fcbf9f7277778ab19f12..eb349662a354055268937183908290d59b040c32 100644
--- a/source/Lib/EncoderLib/EncGOP.cpp
+++ b/source/Lib/EncoderLib/EncGOP.cpp
@@ -2319,7 +2319,9 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
 
     if (m_pcEncLib->getTMVPModeId() == 2)
     {
+#if !JVET_P0206_TMVP_flags
       assert (m_pcEncLib->getPPSTemporalMVPEnabledIdc() == 0);
+#endif
       if (iGOPid == 0) // first picture in SOP (i.e. forward B)
       {
         pcSlice->setEnableTMVPFlag(0);
@@ -2330,7 +2332,11 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
         pcSlice->setEnableTMVPFlag(1);
       }
     }
+#if JVET_P0206_TMVP_flags
+    else if (m_pcEncLib->getTMVPModeId() == 1)
+#else
     else if (m_pcEncLib->getTMVPModeId() == 1 && m_pcEncLib->getPPSTemporalMVPEnabledIdc() != 1)
+#endif
     {
       pcSlice->setEnableTMVPFlag(1);
     }
@@ -3419,7 +3425,7 @@ static inline double calcWeightedSquaredError(const CPelBuf& org,        const C
 
 uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1, const uint32_t rshift
 #if ENABLE_QPA
-                                    , const uint32_t chromaShift /*= 0*/
+                                    , const uint32_t chromaShiftHor /*= 0*/, const uint32_t chromaShiftVer /*= 0*/
 #endif
                                       )
 {
@@ -3439,7 +3445,7 @@ uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1,
       const uint32_t   W = pic0.width;  // image width
       const uint32_t   H = pic0.height; // image height
       const double     R = double(W * H) / (1920.0 * 1080.0);
-      const uint32_t   B = Clip3<uint32_t>(0, 128 >> chromaShift, 4 * uint32_t(16.0 * sqrt(R) + 0.5)); // WPSNR block size in integer multiple of 4 (for SIMD, = 64 at full-HD)
+      const uint32_t   B = Clip3<uint32_t>(0, 128 >> chromaShiftVer, 4 * uint32_t(16.0 * sqrt(R) + 0.5)); // WPSNR block size in integer multiple of 4 (for SIMD, = 64 at full-HD)
 
       uint32_t x, y;
 
@@ -3474,7 +3480,7 @@ uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1,
       }
 
       // integer weighted distortion
-      sumAct = 16.0 * sqrt ((3840.0 * 2160.0) / double((W << chromaShift) * (H << chromaShift))) * double(1 << BD);
+      sumAct = 16.0 * sqrt ((3840.0 * 2160.0) / double((W << chromaShiftHor) * (H << chromaShiftVer))) * double(1 << BD);
 
       return (wmse <= 0.0) ? 0 : uint64_t(wmse * pow(sumAct, BETA) + 0.5);
     }
@@ -3725,7 +3731,7 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni
     const CPelBuf orgPB(o.bufAt(0, 0), o.stride, width, height);
     const uint32_t    bitDepth = sps.getBitDepth(toChannelType(compID));
 #if ENABLE_QPA
-    const uint64_t uiSSDtemp = xFindDistortionPlane(recPB, orgPB, useWPSNR ? bitDepth : 0, ::getComponentScaleX(compID, format));
+    const uint64_t uiSSDtemp = xFindDistortionPlane(recPB, orgPB, useWPSNR ? bitDepth : 0, ::getComponentScaleX(compID, format), ::getComponentScaleY(compID, format));
 #else
     const uint64_t uiSSDtemp = xFindDistortionPlane(recPB, orgPB, 0);
 #endif
@@ -3748,7 +3754,7 @@ void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUni
       const CPelBuf& upscaledOrg = sps.getUseReshaper() ? pcPic->M_BUFS( 0, PIC_TRUE_ORIGINAL_INPUT ).get( compID ) : pcPic->M_BUFS( 0, PIC_ORIGINAL_INPUT ).get( compID );
 
 #if ENABLE_QPA
-      const uint64_t upscaledSSD = xFindDistortionPlane( upscaledRec.get( compID ), upscaledOrg, useWPSNR ? bitDepth : 0, ::getComponentScaleX( compID, format ) );
+      const uint64_t upscaledSSD = xFindDistortionPlane( upscaledRec.get( compID ), upscaledOrg, useWPSNR ? bitDepth : 0, ::getComponentScaleX( compID, format ), ::getComponentScaleY( compID, format ) );
 #else
       const uint64_t scaledSSD = xFindDistortionPlane( upscaledRec.get( compID ), upscaledOrg, 0 );
 #endif
@@ -4158,7 +4164,7 @@ void EncGOP::xCalculateInterlacedAddPSNR( Picture* pcPicOrgFirstField, Picture*
     {
       CHECK(!(conversion == IPCOLOURSPACE_UNCHANGED), "Unspecified error");
 #if ENABLE_QPA
-      uiSSDtemp += xFindDistortionPlane( acPicRecFields[fieldNum].get(ch), apcPicOrgFields[fieldNum]->getOrigBuf().get(ch), useWPSNR ? bitDepth : 0, ::getComponentScaleX(ch, format) );
+      uiSSDtemp += xFindDistortionPlane( acPicRecFields[fieldNum].get(ch), apcPicOrgFields[fieldNum]->getOrigBuf().get(ch), useWPSNR ? bitDepth : 0, ::getComponentScaleX(ch, format), ::getComponentScaleY(ch, format) );
 #else
       uiSSDtemp += xFindDistortionPlane( acPicRecFields[fieldNum].get(ch), apcPicOrgFields[fieldNum]->getOrigBuf().get(ch), 0 );
 #endif
diff --git a/source/Lib/EncoderLib/EncGOP.h b/source/Lib/EncoderLib/EncGOP.h
index e9667c5d0f0afacbd2dfcd95d8f3aa60a6d2610e..1db3355bf687c1630876348228d6beda5b7bec64 100644
--- a/source/Lib/EncoderLib/EncGOP.h
+++ b/source/Lib/EncoderLib/EncGOP.h
@@ -287,7 +287,7 @@ protected:
 
   uint64_t xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1, const uint32_t rshift
 #if ENABLE_QPA
-                            , const uint32_t chromaShift = 0
+                            , const uint32_t chromaShiftHor = 0, const uint32_t chromaShiftVer = 0
 #endif
                              );
 #if WCG_WPSNR
diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp
index c195a983db58cd5103eafb08aecad9034b33e26f..9ee126d417ba8954d4d0cde259506f3f985e0856 100644
--- a/source/Lib/EncoderLib/EncLib.cpp
+++ b/source/Lib/EncoderLib/EncLib.cpp
@@ -1168,7 +1168,9 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps)
   pps.setPPSDepQuantEnabledIdc(getPPSDepQuantEnabledIdc());
   pps.setPPSRefPicListSPSIdc0(getPPSRefPicListSPSIdc0());
   pps.setPPSRefPicListSPSIdc1(getPPSRefPicListSPSIdc1());
+#if !JVET_P0206_TMVP_flags
   pps.setPPSTemporalMVPEnabledIdc(getPPSTemporalMVPEnabledIdc());
+#endif
   pps.setPPSMvdL1ZeroIdc(getPPSMvdL1ZeroIdc());
   pps.setPPSCollocatedFromL0Idc(getPPSCollocatedFromL0Idc());
   pps.setPPSSixMinusMaxNumMergeCandPlus1(getPPSSixMinusMaxNumMergeCandPlus1());
diff --git a/source/Lib/EncoderLib/InterSearch.cpp b/source/Lib/EncoderLib/InterSearch.cpp
index a3f34e285a551a3d088addf5b9455007c7a22665..9c47b307ecae5bb40652896fa017a9166aebd045 100644
--- a/source/Lib/EncoderLib/InterSearch.cpp
+++ b/source/Lib/EncoderLib/InterSearch.cpp
@@ -6583,7 +6583,12 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
 #else
       const bool tsAllowed  = TU::isTSAllowed ( tu, compID );
 #endif
+#if JVET_P1026_MTS_SIGNALLING
+      const bool mtsAllowed = CU::isMTSAllowed( *tu.cu, compID );
+#else
       const bool mtsAllowed = TU::isMTSAllowed( tu, compID );
+#endif
+      
       uint8_t nNumTransformCands = 1 + ( tsAllowed ? 1 : 0 ) + ( mtsAllowed ? 4 : 0 ); // DCT + TS + 4 MTS = 6 tests
       std::vector<TrMode> trModes;
       trModes.push_back( TrMode( 0, true ) ); //DCT2
diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp
index 062ecbafcec255080b22a7d993d7e23938aee605..d2abffdab0c46afb2d6792fb32d7aca9f808f9d4 100644
--- a/source/Lib/EncoderLib/IntraSearch.cpp
+++ b/source/Lib/EncoderLib/IntraSearch.cpp
@@ -362,10 +362,9 @@ bool IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner,
     m_ispCandListHor.clear();
     m_ispCandListVer.clear();
     m_regIntraRDListWithCosts.clear();
-    m_ispTestedModes.clear();
-    //save the number of subpartitions
-    m_ispTestedModes.numTotalParts[0] = (int)height >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_HORZ_SPLIT));
-    m_ispTestedModes.numTotalParts[1] = (int)width >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_VERT_SPLIT));
+    int numTotalPartsHor = (int)width  >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_VERT_SPLIT));
+    int numTotalPartsVer = (int)height >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_HORZ_SPLIT));
+    m_ispTestedModes.init(numTotalPartsHor, numTotalPartsVer);
   }
 
 #if ADAPTIVE_COLOR_TRANSFORM 
@@ -2328,6 +2327,15 @@ void IntraSearch::xEncCoeffQT( CodingStructure &cs, Partitioner &partitioner, co
     }
     if( TU::getCbf( currTU, compID ) )
     {
+#if JVET_P1026_MTS_SIGNALLING
+      if( isLuma(compID) )
+      {
+        CUCtx cuCtx;
+        m_CABACEstimator->residual_coding( currTU, compID, &cuCtx );
+        m_CABACEstimator->mts_idx( *currTU.cu, cuCtx );
+      }
+      else
+#endif
       m_CABACEstimator->residual_coding( currTU, compID );
     }
   }
@@ -3103,7 +3111,11 @@ bool IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par
     tu.depth = currDepth;
 
     const bool tsAllowed  = TU::isTSAllowed( tu, COMPONENT_Y );
+#if JVET_P1026_MTS_SIGNALLING
+    const bool mtsAllowed = CU::isMTSAllowed( cu, COMPONENT_Y );
+#else
     const bool mtsAllowed = TU::isMTSAllowed( tu, COMPONENT_Y );
+#endif
     std::vector<TrMode> trModes;
 
     if( sps.getUseLFNST() )
@@ -4874,15 +4886,18 @@ void IntraSearch::xGetNextISPMode(ModeInfo& modeInfo, const ModeInfo* lastMode,
   static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM>* rdModeLists[2] = { &m_ispCandListHor, &m_ispCandListVer };
 
   ISPType nextISPcandSplitType;
-  if (!m_ispTestedModes.stopTestingHorSplit && !m_ispTestedModes.stopTestingVerSplit)
+  auto& ispTestedModes = m_ispTestedModes;
+  const bool horSplitIsTerminated = ispTestedModes.splitIsFinished[HOR_INTRA_SUBPARTITIONS - 1];
+  const bool verSplitIsTerminated = ispTestedModes.splitIsFinished[VER_INTRA_SUBPARTITIONS - 1];
+  if (!horSplitIsTerminated && !verSplitIsTerminated)
   {
     nextISPcandSplitType = !lastMode ? HOR_INTRA_SUBPARTITIONS : lastMode->ispMod == HOR_INTRA_SUBPARTITIONS ? VER_INTRA_SUBPARTITIONS : HOR_INTRA_SUBPARTITIONS;
   }
-  else if (!m_ispTestedModes.stopTestingHorSplit && m_ispTestedModes.stopTestingVerSplit)
+  else if (!horSplitIsTerminated && verSplitIsTerminated)
   {
     nextISPcandSplitType = HOR_INTRA_SUBPARTITIONS;
   }
-  else if (m_ispTestedModes.stopTestingHorSplit && !m_ispTestedModes.stopTestingVerSplit)
+  else if (horSplitIsTerminated && !verSplitIsTerminated)
   {
     nextISPcandSplitType = VER_INTRA_SUBPARTITIONS;
   }
@@ -4891,70 +4906,70 @@ void IntraSearch::xGetNextISPMode(ModeInfo& modeInfo, const ModeInfo* lastMode,
     return;   // no more modes will be tested
   }
 
-  int maxNumSubPartitions = m_ispTestedModes.numTotalParts[nextISPcandSplitType - 1];
+  int maxNumSubPartitions = ispTestedModes.numTotalParts[nextISPcandSplitType - 1];
 
-  if (m_ispTestedModes.numTestedModes[nextISPcandSplitType - 1] >= 2)
+  if (ispTestedModes.numTestedModes[nextISPcandSplitType - 1] >= 2)
   {
     // Split stop criteria after checking the performance of previously tested intra modes
     const int thresholdSplit1 = maxNumSubPartitions;
+    bool stopThisSplit = false;
 
-    int mode1 = m_ispTestedModes.getTestedIntraMode((ISPType)nextISPcandSplitType, 0);
+    int mode1 = ispTestedModes.getTestedIntraMode((ISPType)nextISPcandSplitType, 0);
     mode1 = mode1 == DC_IDX ? -1 : mode1;
-    int numSubPartsBestMode1 = mode1 != -1 ? m_ispTestedModes.getNumCompletedSubParts((ISPType)nextISPcandSplitType, mode1) : -1;
-    int mode2 = m_ispTestedModes.getTestedIntraMode((ISPType)nextISPcandSplitType, 1);
+    int numSubPartsBestMode1 = mode1 != -1 ? ispTestedModes.getNumCompletedSubParts((ISPType)nextISPcandSplitType, mode1) : -1;
+    int mode2 = ispTestedModes.getTestedIntraMode((ISPType)nextISPcandSplitType, 1);
     mode2 = mode2 == DC_IDX ? -1 : mode2;
-    int numSubPartsBestMode2 = mode2 != -1 ? m_ispTestedModes.getNumCompletedSubParts((ISPType)nextISPcandSplitType, mode2) : -1;
+    int numSubPartsBestMode2 = mode2 != -1 ? ispTestedModes.getNumCompletedSubParts((ISPType)nextISPcandSplitType, mode2) : -1;
 
     // 1) The 2 most promising modes do not reach a certain number of sub-partitions
     if (numSubPartsBestMode1 != -1 && numSubPartsBestMode2 != -1)
     {
       if (numSubPartsBestMode1 < thresholdSplit1 && numSubPartsBestMode2 < thresholdSplit1)
       {
-        m_ispTestedModes.stopTestingVerSplit = nextISPcandSplitType == VER_INTRA_SUBPARTITIONS ? true : m_ispTestedModes.stopTestingVerSplit;
-        m_ispTestedModes.stopTestingHorSplit = nextISPcandSplitType == HOR_INTRA_SUBPARTITIONS ? true : m_ispTestedModes.stopTestingHorSplit;
-        return;
+        stopThisSplit = true;
       }
     }
 
-    // 2) One split is better than the other after PLANAR and one angle have been tested
-    ISPType otherSplit = nextISPcandSplitType == HOR_INTRA_SUBPARTITIONS ? VER_INTRA_SUBPARTITIONS : HOR_INTRA_SUBPARTITIONS;
-    int  numSubPartsBestAngleOtherSplit = mode2 != -1 ? m_ispTestedModes.getNumCompletedSubParts(otherSplit, mode2) : -1;
-    bool stopThisSplit = false;
-    if (numSubPartsBestAngleOtherSplit != -1 && numSubPartsBestMode2 != -1)
+    if (!stopThisSplit)
     {
-      if (numSubPartsBestAngleOtherSplit > numSubPartsBestMode2)
+      // 2) One split type may be discarded by comparing the number of sub-partitions of the best angle modes of both splits 
+      ISPType otherSplit = nextISPcandSplitType == HOR_INTRA_SUBPARTITIONS ? VER_INTRA_SUBPARTITIONS : HOR_INTRA_SUBPARTITIONS;
+      int  numSubPartsBestMode2OtherSplit = mode2 != -1 ? ispTestedModes.getNumCompletedSubParts(otherSplit, mode2) : -1;
+      if (numSubPartsBestMode2OtherSplit != -1 && numSubPartsBestMode2 != -1)
       {
-        stopThisSplit = true;
-      }
-      else if (numSubPartsBestAngleOtherSplit == numSubPartsBestMode2 && numSubPartsBestAngleOtherSplit == maxNumSubPartitions)
-      {
-        double rdCostBestAngleThisSplit = m_ispTestedModes.getRDCost(nextISPcandSplitType, mode2, maxNumSubPartitions);
-        double rdCostBestAngleOtherSplit = m_ispTestedModes.getRDCost(otherSplit, mode2, maxNumSubPartitions);
-
-        if (rdCostBestAngleThisSplit == MAX_DOUBLE || rdCostBestAngleOtherSplit < rdCostBestAngleThisSplit * 1.3)
+        if (numSubPartsBestMode2OtherSplit > numSubPartsBestMode2)
         {
           stopThisSplit = true;
         }
+        else if (numSubPartsBestMode2OtherSplit == numSubPartsBestMode2 && numSubPartsBestMode2OtherSplit == maxNumSubPartitions)
+        {
+          double rdCostBestMode2ThisSplit = ispTestedModes.getRDCost(nextISPcandSplitType, mode2);
+          double rdCostBestMode2OtherSplit = ispTestedModes.getRDCost(otherSplit, mode2);
+          double threshold = 1.3;
+          if (rdCostBestMode2ThisSplit == MAX_DOUBLE || rdCostBestMode2OtherSplit < rdCostBestMode2ThisSplit * threshold)
+          {
+            stopThisSplit = true;
+          }
+        }
       }
     }
     if (stopThisSplit)
     {
-      m_ispTestedModes.stopTestingVerSplit = nextISPcandSplitType == VER_INTRA_SUBPARTITIONS ? true : m_ispTestedModes.stopTestingVerSplit;
-      m_ispTestedModes.stopTestingHorSplit = nextISPcandSplitType == HOR_INTRA_SUBPARTITIONS ? true : m_ispTestedModes.stopTestingHorSplit;
+      ispTestedModes.splitIsFinished[nextISPcandSplitType - 1] = true;
       return;
     }
   }
 
   // Now a new mode is retrieved from the list and it has to be decided whether it should be tested or not
-  if (m_ispTestedModes.candIndexInList[nextISPcandSplitType - 1] < rdModeLists[nextISPcandSplitType - 1]->size())
+  if (ispTestedModes.candIndexInList[nextISPcandSplitType - 1] < rdModeLists[nextISPcandSplitType - 1]->size())
   {
-    ModeInfo candidate = rdModeLists[nextISPcandSplitType - 1]->at(m_ispTestedModes.candIndexInList[nextISPcandSplitType - 1]);
-    m_ispTestedModes.candIndexInList[nextISPcandSplitType - 1]++;
+    ModeInfo candidate = rdModeLists[nextISPcandSplitType - 1]->at(ispTestedModes.candIndexInList[nextISPcandSplitType - 1]);
+    ispTestedModes.candIndexInList[nextISPcandSplitType - 1]++;
 
     // extra modes are only tested if ISP has won so far
-    if (m_ispTestedModes.candIndexInList[nextISPcandSplitType - 1] > m_ispTestedModes.numOrigModesToTest)
+    if (ispTestedModes.candIndexInList[nextISPcandSplitType - 1] > ispTestedModes.numOrigModesToTest)
     {
-      if (m_ispTestedModes.bestSplitSoFar != candidate.ispMod || m_ispTestedModes.bestModeSoFar == PLANAR_IDX)
+      if (ispTestedModes.bestSplitSoFar != candidate.ispMod || ispTestedModes.bestModeSoFar == PLANAR_IDX)
       {
         return;
       }
@@ -4963,7 +4978,7 @@ void IntraSearch::xGetNextISPMode(ModeInfo& modeInfo, const ModeInfo* lastMode,
     bool testCandidate = true;
 
     // we look for a reference mode that has already been tested within the window and decide to test the new one according to the reference mode costs
-    if (candidate.modeId >= DC_IDX && maxNumSubPartitions > 2 && m_ispTestedModes.numTestedModes[nextISPcandSplitType - 1] >= 2)
+    if (candidate.modeId >= DC_IDX && maxNumSubPartitions > 2 && ispTestedModes.numTestedModes[nextISPcandSplitType - 1] >= 2)
     {
       const int angWindowSize = 5;
       int       numSubPartsLeftMode, numSubPartsRightMode, numSubPartsRefMode, leftIntraMode = -1, rightIntraMode = -1;
@@ -4973,8 +4988,8 @@ void IntraSearch::xGetNextISPMode(ModeInfo& modeInfo, const ModeInfo* lastMode,
 
       xFindAlreadyTestedNearbyIntraModes((int)candidate.modeId, &leftIntraMode, &rightIntraMode, (ISPType)candidate.ispMod, windowSize);
 
-      numSubPartsLeftMode = leftIntraMode != -1 ? m_ispTestedModes.getNumCompletedSubParts((ISPType)candidate.ispMod, leftIntraMode) : -1;
-      numSubPartsRightMode = rightIntraMode != -1 ? m_ispTestedModes.getNumCompletedSubParts((ISPType)candidate.ispMod, rightIntraMode) : -1;
+      numSubPartsLeftMode = leftIntraMode != -1 ? ispTestedModes.getNumCompletedSubParts((ISPType)candidate.ispMod, leftIntraMode) : -1;
+      numSubPartsRightMode = rightIntraMode != -1 ? ispTestedModes.getNumCompletedSubParts((ISPType)candidate.ispMod, rightIntraMode) : -1;
 
       numSubPartsRefMode = std::max(numSubPartsLeftMode, numSubPartsRightMode);
 
@@ -5023,8 +5038,10 @@ void IntraSearch::xSortISPCandList(double bestCostSoFar, double bestNonISPCost)
     double thSkipISP = 1.4;
     if (bestNonISPCost > bestCostSoFar * thSkipISP)
     {
-      m_ispTestedModes.stopTestingHorSplit = true;
-      m_ispTestedModes.stopTestingVerSplit = true;
+      for (int splitIdx = 0; splitIdx < NUM_INTRA_SUBPARTITIONS_MODES - 1; splitIdx++)
+      {
+        m_ispTestedModes.splitIsFinished[splitIdx] = true;
+      }
       return;
     }
   }
diff --git a/source/Lib/EncoderLib/IntraSearch.h b/source/Lib/EncoderLib/IntraSearch.h
index 544db3f1d9bcc3615373693cc91f414501317edc..12517db466cd7daec959b16925fbca13556db3d0 100644
--- a/source/Lib/EncoderLib/IntraSearch.h
+++ b/source/Lib/EncoderLib/IntraSearch.h
@@ -233,8 +233,7 @@ private:
     double                                      bestCost[2];
     int                                         numTestedModes[2];
     int                                         candIndexInList[2];
-    bool                                        stopTestingHorSplit;
-    bool                                        stopTestingVerSplit;
+    bool                                        splitIsFinished[2];
     int                                         numOrigModesToTest;
 
     // set a tested mode results
@@ -267,13 +266,11 @@ private:
       return modeHasBeenTested[iModeIdx][st] ? intraMode[iModeIdx][st].numCompSubParts : -1;
     }
 
-    double getRDCost(ISPType splitType, int iModeIdx, int maxNumSubParts)
+    double getRDCost(ISPType splitType, int iModeIdx)
     {
       const unsigned st = splitType - 1;
       CHECKD(st > 1, "The split type is invalid!");
-      return modeHasBeenTested[iModeIdx][st] && intraMode[iModeIdx][st].numCompSubParts == maxNumSubParts
-        ? intraMode[iModeIdx][st].rdCost
-        : -1;
+      return modeHasBeenTested[iModeIdx][st] ? intraMode[iModeIdx][st].rdCost : MAX_DOUBLE;
     }
 
     // get a tested intra mode index
@@ -287,16 +284,16 @@ private:
     // set everything to default values
     void clear()
     {
-      numTestedModes[0] = numTestedModes[1] = 0;
-      candIndexInList[0] = candIndexInList[1] = 0;
-      stopTestingHorSplit = false;
-      stopTestingVerSplit = false;
-      testedModes[0].clear();
-      testedModes[1].clear();
-      bestCost[0] = MAX_DOUBLE;
-      bestCost[1] = MAX_DOUBLE;
-      bestMode[0] = -1;
-      bestMode[1] = -1;
+      for (int splitIdx = 0; splitIdx < NUM_INTRA_SUBPARTITIONS_MODES - 1; splitIdx++)
+      {
+        numTestedModes [splitIdx] = 0;
+        candIndexInList[splitIdx] = 0;
+        numTotalParts  [splitIdx] = 0;
+        splitIsFinished[splitIdx] = false;
+        testedModes    [splitIdx].clear();
+        bestCost       [splitIdx] = MAX_DOUBLE;
+        bestMode       [splitIdx] = -1;
+      }
       bestModeSoFar = -1;
       bestSplitSoFar = NOT_INTRA_SUBPARTITIONS;
       numOrigModesToTest = -1;
@@ -307,6 +304,15 @@ private:
       intraMode[idx][0].clear();
       intraMode[idx][1].clear();
     }
+    void init(const int numTotalPartsHor, const int numTotalPartsVer)
+    {
+      clear();
+      const int horSplit = HOR_INTRA_SUBPARTITIONS - 1, verSplit = VER_INTRA_SUBPARTITIONS - 1;
+      numTotalParts  [horSplit] = numTotalPartsHor;
+      numTotalParts  [verSplit] = numTotalPartsVer;
+      splitIsFinished[horSplit] = (numTotalParts[horSplit] == 0);
+      splitIsFinished[verSplit] = (numTotalParts[verSplit] == 0);
+    }
   };
 
   static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> m_ispCandListHor, m_ispCandListVer;
diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp
index adb300ef0742c29b37e2add19ed6d7ca56d1193f..39b5b733dcd4f656db2b2cc696ba959caf47cc51 100644
--- a/source/Lib/EncoderLib/VLCWriter.cpp
+++ b/source/Lib/EncoderLib/VLCWriter.cpp
@@ -236,7 +236,9 @@ void HLSWriter::codePPS( const PPS* pcPPS, const SPS* pcSPS )
     WRITE_CODE( pcPPS->getPPSDepQuantEnabledIdc(), 2,                        "pps_dep_quant_enabled_idc");
     WRITE_CODE( pcPPS->getPPSRefPicListSPSIdc0(), 2,                         "pps_ref_pic_list_sps_idc[0]");
     WRITE_CODE( pcPPS->getPPSRefPicListSPSIdc1(), 2,                         "pps_ref_pic_list_sps_idc[1]");
+#if !JVET_P0206_TMVP_flags
     WRITE_CODE( pcPPS->getPPSTemporalMVPEnabledIdc(), 2,                     "pps_temporal_mvp_enabled_idc");
+#endif
     WRITE_CODE( pcPPS->getPPSMvdL1ZeroIdc(), 2,                              "pps_mvd_l1_zero_idc");
     WRITE_CODE( pcPPS->getPPSCollocatedFromL0Idc(), 2,                       "pps_collocated_from_l0_idc");
     WRITE_UVLC( pcPPS->getPPSSixMinusMaxNumMergeCandPlus1(),                 "pps_six_minus_max_num_merge_cand_plus1");
@@ -1324,7 +1326,11 @@ void HLSWriter::codeSliceHeader         ( Slice* pcSlice )
 
     if(!pcSlice->isIntra())
     {
+#if JVET_P0206_TMVP_flags
+      if( pcSlice->getSPS()->getSPSTemporalMVPEnabledFlag())
+#else
       if( pcSlice->getSPS()->getSPSTemporalMVPEnabledFlag() && !pcSlice->getPPS()->getPPSTemporalMVPEnabledIdc() )
+#endif
       {
         WRITE_FLAG( pcSlice->getEnableTMVPFlag() ? 1 : 0, "slice_temporal_mvp_enabled_flag" );
       }