diff --git a/cfg/encoder_lowdelay_P_vtm.cfg b/cfg/encoder_lowdelay_P_vtm.cfg
index 2d24145ce7acb6aabf6cc9e5b58d88f1aa0d3db0..9b3089579746d5ef9d60de6861d0896e9a4125c0 100644
--- a/cfg/encoder_lowdelay_P_vtm.cfg
+++ b/cfg/encoder_lowdelay_P_vtm.cfg
@@ -130,6 +130,7 @@ LMCSEnable                   : 1      # LMCS: 0: disable, 1:enable
 LMCSSignalType               : 0      # Input signal type: 0:SDR, 1:HDR-PQ, 2:HDR-HLG
 LMCSUpdateCtrl               : 2      # LMCS model update control: 0:RA, 1:AI, 2:LDB/LDP
 MIP                          : 1
+PROF                         : 1
 
 # Fast tools
 PBIntraFast                  : 1
diff --git a/cfg/encoder_lowdelay_vtm.cfg b/cfg/encoder_lowdelay_vtm.cfg
index 1c7925030439df8d5d21787f90c455d2d77d3a1c..148f03230bde4390f01110b46104acdc695b9252 100644
--- a/cfg/encoder_lowdelay_vtm.cfg
+++ b/cfg/encoder_lowdelay_vtm.cfg
@@ -134,6 +134,7 @@ LMCSEnable                   : 1      # LMCS: 0: disable, 1:enable
 LMCSSignalType               : 0      # Input signal type: 0:SDR, 1:HDR-PQ, 2:HDR-HLG
 LMCSUpdateCtrl               : 2      # LMCS model update control: 0:RA, 1:AI, 2:LDB/LDP
 MIP                          : 1
+PROF                         : 1
 
 # Fast tools
 PBIntraFast                  : 1
diff --git a/cfg/encoder_randomaccess_vtm.cfg b/cfg/encoder_randomaccess_vtm.cfg
index 7938ee72a22e3e306fe3f352ed1508229168130f..2e4c7a0332fbd01f2bda17df62aded26c0a047bd 100644
--- a/cfg/encoder_randomaccess_vtm.cfg
+++ b/cfg/encoder_randomaccess_vtm.cfg
@@ -152,6 +152,7 @@ LMCSUpdateCtrl               : 0      # LMCS model update control: 0:RA, 1:AI, 2
 MIP                          : 1
 DMVR                         : 1
 SMVD                         : 1
+PROF                         : 1
 
 # Fast tools
 PBIntraFast                  : 1
diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp
index 9d54934350bdf8d3f7fda545951cb022b7f26cc3..cb741fa353b14f8e95c114d3e9dff9240737067a 100644
--- a/source/App/EncoderApp/EncApp.cpp
+++ b/source/App/EncoderApp/EncApp.cpp
@@ -254,6 +254,9 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setSubPuMvpMode                                      ( m_SubPuMvpMode );
   m_cEncLib.setAffine                                            ( m_Affine );
   m_cEncLib.setAffineType                                        ( m_AffineType );
+#if JVET_O0070_PROF
+  m_cEncLib.setPROF                                              ( m_PROF );
+#endif
   m_cEncLib.setBIO                                               (m_BIO);
   m_cEncLib.setUseLMChroma                                       ( m_LMChroma );
   m_cEncLib.setCclmCollocatedChromaFlag                          ( m_cclmCollocatedChromaFlag );
diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp
index 053b1c4b73ae7837e5fece3ab9e136438a3e666f..8e2167f99801d0a549f39842786d328e937c1b78 100644
--- a/source/App/EncoderApp/EncAppCfg.cpp
+++ b/source/App/EncoderApp/EncAppCfg.cpp
@@ -860,6 +860,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("MMVD",                                           m_MMVD,                                            true, "Enable Merge mode with Motion Vector Difference (0:off, 1:on)  [default: 1]")
   ("Affine",                                         m_Affine,                                         false, "Enable affine prediction (0:off, 1:on)  [default: off]")
   ("AffineType",                                     m_AffineType,                                     true,  "Enable affine type prediction (0:off, 1:on)  [default: on]" )
+#if JVET_O0070_PROF
+  ("PROF",                                           m_PROF,                                           false, "Enable Prediction refinement with optical flow for affine mode (0:off, 1:on)  [default: off]")
+#endif
   ("BIO",                                            m_BIO,                                             false, "Enable bi-directional optical flow")
   ("IMV",                                             m_ImvMode,                                            1, "Adaptive MV precision Mode (IMV)\n"
                                                                                                                "\t0: disabled\n"
@@ -2515,20 +2518,26 @@ bool EncAppCfg::xCheckParameter()
   xConfirmPara( m_uiMinQT[1] < 1<<MIN_CU_LOG2,                                              "Minimum QT size should be larger than or equal to 4");
   xConfirmPara( m_uiCTUSize < 16,                                                           "Maximum partition width size should be larger than or equal to 16");
   xConfirmPara( m_uiCTUSize < 16,                                                           "Maximum partition height size should be larger than or equal to 16");
+#if !JVET_O0640_PICTURE_SIZE_CONSTRAINT
   xConfirmPara( (m_iSourceWidth  % (1<<MIN_CU_LOG2))!=0,                                    "Resulting coded frame width must be a multiple of the minimum unit size");
   xConfirmPara( (m_iSourceHeight % (1<<MIN_CU_LOG2))!=0,                                    "Resulting coded frame height must be a multiple of the minimum unit size");
   xConfirmPara( (m_iSourceWidth  % (1<<MIN_CU_LOG2))!=0,                                    "Resulting coded frame width must be a multiple of the minimum unit size");
   xConfirmPara( (m_iSourceHeight % (1<<MIN_CU_LOG2))!=0,                                    "Resulting coded frame height must be a multiple of the minimum unit size");
   xConfirmPara( (m_iSourceWidth  % (1<<MIN_CU_LOG2))!=0,                                    "Resulting coded frame width must be a multiple of the minimum unit size");
   xConfirmPara( (m_iSourceHeight % (1<<MIN_CU_LOG2))!=0,                                    "Resulting coded frame height must be a multiple of the minimum unit size");
+#endif
   xConfirmPara( m_uiMaxCUDepth < 1,                                                         "MaxPartitionDepth must be greater than zero");
   xConfirmPara( (m_uiMaxCUWidth  >> m_uiMaxCUDepth) < 4,                                    "Minimum partition width size should be larger than or equal to 8");
   xConfirmPara( (m_uiMaxCUHeight >> m_uiMaxCUDepth) < 4,                                    "Minimum partition height size should be larger than or equal to 8");
   xConfirmPara( m_uiMaxCUWidth < 16,                                                        "Maximum partition width size should be larger than or equal to 16");
   xConfirmPara( m_uiMaxCUHeight < 16,                                                       "Maximum partition height size should be larger than or equal to 16");
+#if JVET_O0640_PICTURE_SIZE_CONSTRAINT
+  xConfirmPara( (m_iSourceWidth  % (std::max(8, int(m_uiMaxCUWidth  >> (m_uiMaxCUDepth - 1))))) != 0, "Resulting coded frame width must be a multiple of Max(8, the minimum CU size)");
+  xConfirmPara( (m_iSourceHeight % (std::max(8, int(m_uiMaxCUHeight >> (m_uiMaxCUDepth - 1))))) != 0, "Resulting coded frame height must be a multiple of Max(8, the minimum CU size)");
+#else
   xConfirmPara( (m_iSourceWidth  % (m_uiMaxCUWidth  >> (m_uiMaxCUDepth-1)))!=0,             "Resulting coded frame width must be a multiple of the minimum CU size");
   xConfirmPara( (m_iSourceHeight % (m_uiMaxCUHeight >> (m_uiMaxCUDepth-1)))!=0,             "Resulting coded frame height must be a multiple of the minimum CU size");
-
+#endif
 #if MAX_TB_SIZE_SIGNALLING
   xConfirmPara( m_log2MaxTbSize > 6, "Log2MaxTbSize must be 6 or smaller." );
 #endif
@@ -2549,6 +2558,10 @@ bool EncAppCfg::xCheckParameter()
   if ( m_Affine == 0 )
   {
     m_maxNumAffineMergeCand = m_SubPuMvpMode;
+#if JVET_O0070_PROF
+    if (m_PROF) msg(WARNING, "PROF is forcefully disabled when Affine is off \n");
+    m_PROF = false;
+#endif
   }
 
   xConfirmPara( m_MTS < 0 || m_MTS > 3, "MTS must be greater than 0 smaller than 4" );
@@ -3359,6 +3372,9 @@ void EncAppCfg::xPrintParameter()
     {
       msg( VERBOSE, "AffineType:%d ", m_AffineType );
     }
+#if JVET_O0070_PROF
+    msg(VERBOSE, "PROF:%d ", m_PROF);
+#endif
     msg(VERBOSE, "SubPuMvp:%d+%d ", m_SubPuMvpMode & 1, (m_SubPuMvpMode & 2) == 2);
     msg( VERBOSE, "DualITree:%d ", m_dualTree );
     msg( VERBOSE, "IMV:%d ", m_ImvMode );
diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h
index 6a0f421b677cc2204b621467b9466285ab4e3323..afb9f1024b4376a5e85781d5fe5be468d656453b 100644
--- a/source/App/EncoderApp/EncAppCfg.h
+++ b/source/App/EncoderApp/EncAppCfg.h
@@ -253,6 +253,9 @@ protected:
   int       m_SubPuMvpMode;
   bool      m_Affine;
   bool      m_AffineType;
+#if JVET_O0070_PROF
+  bool      m_PROF;
+#endif
   bool      m_BIO;
   int       m_LMChroma;
   bool      m_cclmCollocatedChromaFlag;
diff --git a/source/Lib/CommonLib/AdaptiveLoopFilter.h b/source/Lib/CommonLib/AdaptiveLoopFilter.h
index 42be89b58354401bf9e554e2edc844d0e997d5b8..da2c5e064614d95f4cd2e3ba49fe81a2eadc6982 100644
--- a/source/Lib/CommonLib/AdaptiveLoopFilter.h
+++ b/source/Lib/CommonLib/AdaptiveLoopFilter.h
@@ -115,7 +115,7 @@ protected:
   static const int             m_classToFilterMapping[NUM_FIXED_FILTER_SETS][MAX_NUM_ALF_CLASSES];
   static const int             m_fixedFilterSetCoeff[ALF_FIXED_FILTER_NUM][MAX_NUM_ALF_LUMA_COEFF];
   short                        m_fixedFilterSetCoeffDec[NUM_FIXED_FILTER_SETS][MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF];
-#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB
+#if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB || JVET_O_MAX_NUM_ALF_APS_8
   short                        m_coeffApsLuma[ALF_CTB_MAX_NUM_APS][MAX_NUM_ALF_LUMA_COEFF * MAX_NUM_ALF_CLASSES];
   short                        m_clippApsLuma[ALF_CTB_MAX_NUM_APS][MAX_NUM_ALF_LUMA_COEFF * MAX_NUM_ALF_CLASSES];
 #else
diff --git a/source/Lib/CommonLib/Buffer.cpp b/source/Lib/CommonLib/Buffer.cpp
index 663d34dd00339d5b7f8c8c109f8feb41c280ec22..7af9b1fdf06e828e665bca22880b3f2c5230ff8f 100644
--- a/source/Lib/CommonLib/Buffer.cpp
+++ b/source/Lib/CommonLib/Buffer.cpp
@@ -42,6 +42,83 @@
 #include "Buffer.h"
 #include "InterpolationFilter.h"
 
+#if JVET_O0070_PROF
+void applyPROFCore(Pel* dst, int dstStride, const Pel* src, int srcStride, int width, int height, const Pel* gradX, const Pel* gradY, int gradStride, const int* dMvX, const int* dMvY, int dMvStride, int shiftNum, Pel offset, const ClpRng& clpRng)
+{
+  int idx = 0;
+  const int dIshift = 1;
+  const int dIoffset = 1 << (dIshift - 1);
+
+  for (int h = 0; h < height; h++)
+  {
+    for (int w = 0; w < width; w++)
+    {
+      int32_t dI = dMvX[idx] * gradX[w] + dMvY[idx] * gradY[w];
+      dI = (dI + dIoffset) >> dIshift;
+
+      dI = (src[w] + dI + offset) >> shiftNum;
+      dst[w] = (Pel)ClipPel(dI, clpRng);
+
+      idx++;
+    }
+    gradX += gradStride;
+    gradY += gradStride;
+    dst += dstStride;
+    src += srcStride;
+  }
+}
+
+template<bool l1PROFEnabled = true>
+void applyBiPROFCore (Pel* dst, int dstStride, const Pel* src0, const Pel* src1, int srcStride, int width, int height, const Pel* gradX0, const Pel* gradY0, const Pel* gradX1, const Pel* gradY1, int gradStride, const int* dMvX0, const int* dMvY0, const int* dMvX1, const int* dMvY1, int dMvStride, const int8_t w0, const ClpRng& clpRng)
+{
+  int idx = 16;
+  int32_t dI0 = 0;
+  int32_t dI1 = 0;
+  const int dIshift = 1;
+  const int dIoffset = 1 << (dIshift - 1);
+
+  const int clipbd = clpRng.bd;
+  const int shiftNum = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + g_GbiLog2WeightBase;
+  const int offset = (1 << (shiftNum - 1)) + (IF_INTERNAL_OFFS << g_GbiLog2WeightBase);
+
+  const int8_t w1 = g_GbiWeightBase - w0;
+
+  for (int h = 0; h < height; h++)
+  {
+    if (!(h & 3)) idx -= 16;
+    idx += 4;
+
+    for (int w = 0; w < width; w++)
+    {
+      if (!(w & 3)) idx -= 4;
+      dI0 = dMvX0[idx] * gradX0[w] + dMvY0[idx] * gradY0[w];
+      dI0 = (dI0 + dIoffset) >> dIshift;
+      if (l1PROFEnabled) 
+      {
+        dI1 = dMvX1[idx] * gradX1[w] + dMvY1[idx] * gradY1[w];
+        dI1 = (dI1 + dIoffset) >> dIshift;
+        dst[w] = (Pel)ClipPel(rightShift(((src0[w] + dI0) * w0 + (src1[w] + dI1) * w1 + offset), shiftNum), clpRng);
+      }
+      else 
+        dst[w] = (Pel)ClipPel(rightShift(((src0[w] + dI0) * w0 + src1[w] * w1 + offset), shiftNum), clpRng);
+
+      idx++;
+    }
+
+    gradX0 += gradStride;
+    gradY0 += gradStride;
+    if (l1PROFEnabled) 
+    {
+      gradX1 += gradStride;
+      gradY1 += gradStride;
+    }
+    dst += dstStride;
+    src0 += srcStride;
+    src1 += srcStride;
+  }
+}
+#endif
+
 template< typename T >
 void addAvgCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T* dest, int dstStride, int width, int height, int rshift, int offset, const ClpRng& clpRng )
 {
@@ -86,6 +163,9 @@ void addBIOAvgCore(const Pel* src0, int src0Stride, const Pel* src1, int src1Str
   }
 }
 
+#if JVET_O0070_PROF
+template<bool PAD = true>
+#endif
 void gradFilterCore(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, const int bitDepth)
 {
   Pel* srcTmp = pSrc + srcStride + 1;
@@ -97,14 +177,23 @@ void gradFilterCore(Pel* pSrc, int srcStride, int width, int height, int gradStr
   {
     for (int x = 0; x < (width - 2 * BIO_EXTEND_SIZE); x++)
     {
+#if JVET_O0570_GRAD_SIMP
+      gradYTmp[x] = ( srcTmp[x + srcStride] >> shift1 ) - ( srcTmp[x - srcStride] >> shift1 );
+      gradXTmp[x] = ( srcTmp[x + 1] >> shift1 ) - ( srcTmp[x - 1] >> shift1 );
+#else
       gradYTmp[x] = (srcTmp[x + srcStride] - srcTmp[x - srcStride]) >> shift1;
       gradXTmp[x] = (srcTmp[x + 1] - srcTmp[x - 1]) >> shift1;
+#endif
     }
     gradXTmp += gradStride;
     gradYTmp += gradStride;
     srcTmp += srcStride;
   }
 
+#if JVET_O0070_PROF
+  if (PAD)
+  {
+#endif
   gradXTmp = gradX + gradStride + 1;
   gradYTmp = gradY + gradStride + 1;
   for (int y = 0; y < (height - 2 * BIO_EXTEND_SIZE); y++)
@@ -124,6 +213,9 @@ void gradFilterCore(Pel* pSrc, int srcStride, int width, int height, int gradStr
   ::memcpy(gradXTmp + (height - 2 * BIO_EXTEND_SIZE)*gradStride, gradXTmp + (height - 2 * BIO_EXTEND_SIZE - 1)*gradStride, sizeof(Pel)*(width));
   ::memcpy(gradYTmp - gradStride, gradYTmp, sizeof(Pel)*(width));
   ::memcpy(gradYTmp + (height - 2 * BIO_EXTEND_SIZE)*gradStride, gradYTmp + (height - 2 * BIO_EXTEND_SIZE - 1)*gradStride, sizeof(Pel)*(width));
+#if JVET_O0070_PROF
+  }
+#endif
 }
 
 void calcBIOParCore(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, const int bitDepth)
@@ -280,6 +372,13 @@ PelBufferOps::PelBufferOps()
   removeHighFreq4 = removeHighFreq;
 #endif
 
+#if JVET_O0070_PROF
+  profGradFilter = gradFilterCore <false>;
+  applyPROF      = applyPROFCore;
+  applyBiPROF[1] = applyBiPROFCore;
+  applyBiPROF[0] = applyBiPROFCore <false>;
+  roundIntVector = nullptr;
+#endif
 }
 
 PelBufferOps g_pelBufOP = PelBufferOps();
diff --git a/source/Lib/CommonLib/Buffer.h b/source/Lib/CommonLib/Buffer.h
index 5c287d84f7402454d793450f58b932a65c7a0d36..b595ee53445ef1336d4684dc9090d74a1dad354f 100644
--- a/source/Lib/CommonLib/Buffer.h
+++ b/source/Lib/CommonLib/Buffer.h
@@ -79,6 +79,12 @@ struct PelBufferOps
   void ( *removeHighFreq8)        ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height);
   void ( *removeHighFreq4)        ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height);
 #endif
+#if JVET_O0070_PROF
+  void (*profGradFilter) (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, const int bitDepth);
+  void (*applyPROF)      (Pel* dst, int dstStride, const Pel* src, int srcStride, int width, int height, const Pel* gradX, const Pel* gradY, int gradStride, const int* dMvX, const int* dMvY, int dMvStride, int shiftNum, Pel offset, const ClpRng& clpRng);
+  void (*applyBiPROF[2]) (Pel* dst, int dstStride, const Pel* src0, const Pel* src1, int srcStride, int width, int height, const Pel* gradX0, const Pel* gradY0, const Pel* gradX1, const Pel* gradY1, int gradStride, const int* dMvX0, const int* dMvY0, const int* dMvX1, const int* dMvY1, int dMvStride, const int8_t gbiWeightL0, const ClpRng& clpRng);
+  void (*roundIntVector) (int* v, int size, unsigned int nShift, const int dmvLimit);
+#endif
 };
 
 extern PelBufferOps g_pelBufOP;
diff --git a/source/Lib/CommonLib/CodingStructure.cpp b/source/Lib/CommonLib/CodingStructure.cpp
index 36d3109af203229fbbe75f7a39754ad409055632..71b0c4ce17c710cd2907479ca59f0995b40815ae 100644
--- a/source/Lib/CommonLib/CodingStructure.cpp
+++ b/source/Lib/CommonLib/CodingStructure.cpp
@@ -67,6 +67,12 @@ CodingStructure::CodingStructure(CUCache& cuCache, PUCache& puCache, TUCache& tu
   , m_cuCache ( cuCache )
   , m_puCache ( puCache )
   , m_tuCache ( tuCache )
+#if JVET_O0070_PROF
+  , bestParent ( nullptr )
+#endif
+#if JVET_O1170_CHECK_BV_AT_DECODER
+  , resetIBCBuffer (false)
+#endif
 {
   for( uint32_t i = 0; i < MAX_NUM_COMPONENT; i++ )
   {
@@ -1439,4 +1445,4 @@ IbcLumaCoverage CodingStructure::getIbcLumaCoverage(const CompArea& chromaArea)
 
   return coverage;
 }
-#endif
\ No newline at end of file
+#endif
diff --git a/source/Lib/CommonLib/CodingStructure.h b/source/Lib/CommonLib/CodingStructure.h
index db56903aa1a01590686b33e8729793912e17a96e..8589a75a78e47024d8e7fdadc2982458059a6700 100644
--- a/source/Lib/CommonLib/CodingStructure.h
+++ b/source/Lib/CommonLib/CodingStructure.h
@@ -97,7 +97,11 @@ public:
   bool        isLossless;
   const SPS *sps;
   const PPS *pps;
+#if JVET_O_MAX_NUM_ALF_APS_8
+  APS*       alfApss[ALF_CTB_MAX_NUM_APS];
+#else
   APS*       alfApss[MAX_NUM_APS];
+#endif
   APS *      lmcsAps;
   const VPS *vps;
   const PreCalcValues* pcv;
@@ -195,10 +199,6 @@ public:
 
   LutMotionCand motionLut;
 
-#if JVET_O1170_CHECK_BV_AT_DECODER
-  bool resetIBCBuffer;
-#endif
-
   void addMiToLut(static_vector<MotionInfo, MAX_NUM_HMVP_CANDS>& lut, const MotionInfo &mi);
 
 private:
@@ -234,7 +234,13 @@ private:
   MotionInfo *m_motionBuf;
 
 public:
-
+#if JVET_O0070_PROF
+  CodingStructure *bestParent;
+#endif
+#if JVET_O1170_CHECK_BV_AT_DECODER
+  bool resetIBCBuffer;
+#endif
+  
   MotionBuf getMotionBuf( const     Area& _area );
   MotionBuf getMotionBuf( const UnitArea& _area ) { return getMotionBuf( _area.Y() ); }
   MotionBuf getMotionBuf()                        { return getMotionBuf(  area.Y() ); }
diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h
index d7347880934615c86d8d58ed3bdaaa8c9f2a0cb8..15f3f0650b406e841b62af05b841fec9d09baee7 100644
--- a/source/Lib/CommonLib/CommonDef.h
+++ b/source/Lib/CommonLib/CommonDef.h
@@ -190,7 +190,11 @@ static const int MAX_NUM_ALF_COEFF           =                     MAX_ALF_FILTE
 static const int MAX_ALF_PADDING_SIZE        =                      4;
 
 static const int ALF_FIXED_FILTER_NUM        =                     64;
+#if JVET_O_MAX_NUM_ALF_APS_8
+static const int ALF_CTB_MAX_NUM_APS         =                      8;
+#else
 static const int ALF_CTB_MAX_NUM_APS         =                      6;
+#endif
 static const int NUM_FIXED_FILTER_SETS       =                     16;
 static const int NUM_TOTAL_FILTER_SETS       =                     NUM_FIXED_FILTER_SETS + ALF_CTB_MAX_NUM_APS;
 
@@ -371,6 +375,10 @@ static const int MAX_NUM_GT2_BINS_2x2SUBBLOCK =                     2; ///< max
 static const int BIO_EXTEND_SIZE              =                     1;
 static const int BIO_TEMP_BUFFER_SIZE         =                     (MAX_CU_SIZE + 2 * BIO_EXTEND_SIZE) * (MAX_CU_SIZE + 2 * BIO_EXTEND_SIZE);
 
+#if JVET_O0070_PROF
+static const int PROF_BORDER_EXT_W            =                     1;
+static const int PROF_BORDER_EXT_H            =                     1;
+#endif
 static const int GBI_NUM =                                          5; ///< the number of weight options
 static const int GBI_DEFAULT =                                      ((uint8_t)(GBI_NUM >> 1)); ///< Default weighting index representing for w=0.5
 static const int GBI_SIZE_CONSTRAINT =                            256; ///< disabling GBi if cu size is smaller than 256
diff --git a/source/Lib/CommonLib/ContextModelling.h b/source/Lib/CommonLib/ContextModelling.h
index ba1335b4bac1eb3d09ad3ff929f6bff71dfcb79b..af67152c7a99756347c8692c9679de86f34f3946 100644
--- a/source/Lib/CommonLib/ContextModelling.h
+++ b/source/Lib/CommonLib/ContextModelling.h
@@ -129,11 +129,23 @@ public:
       }
     }
 #undef UPDATE
+
+
+#if JVET_O0617_SIG_FLAG_CONTEXT_REDUCTION
+    int ctxOfs = std::min((sumAbs+1)>>1, 3) + ( diag < 2 ? 4 : 0 );
+#else
     int ctxOfs = std::min( sumAbs, 5 ) + ( diag < 2 ? 6 : 0 );
+#endif
+
     if( m_chType == CHANNEL_TYPE_LUMA )
     {
+#if JVET_O0617_SIG_FLAG_CONTEXT_REDUCTION
+      ctxOfs += diag < 5 ? 4 : 0;
+#else
       ctxOfs += diag < 5 ? 6 : 0;
+#endif
     }
+
     m_tmplCpDiag = diag;
     m_tmplCpSum1 = sumAbs - numPos;
     return m_sigFlagCtxSet[std::max( 0, state-1 )]( ctxOfs );
diff --git a/source/Lib/CommonLib/Contexts.cpp b/source/Lib/CommonLib/Contexts.cpp
index d1d3c9cff6db554e56b8cd59c9f550aac8c7dea5..8903da8fae4c4653fdab9fbff1ff9bb953a7b9ad 100755
--- a/source/Lib/CommonLib/Contexts.cpp
+++ b/source/Lib/CommonLib/Contexts.cpp
@@ -538,6 +538,50 @@ const CtxSet ContextSetCfg::SigCoeffGroup[] =
 
 const CtxSet ContextSetCfg::SigFlag[] =
 {
+#if JVET_O0617_SIG_FLAG_CONTEXT_REDUCTION
+  ContextSetCfg::addCtxSet
+  ({
+    {  88, 166, 182, 169, 101, 167, 168, 155, 194, 213, 183, 156, },
+    { 132, 152, 168, 140, 177, 182, 154, 155, 151, 213, 169, 156, },
+    {  89, 138, 139, 140, 150, 139, 140, 141, 138, 185, 141, 157, },
+    {  12,   9,   9,  10,   9,   9,   9,   9,   8,   8,   8,  10, },
+  }),
+  ContextSetCfg::addCtxSet
+  ({
+    {  27, 167, 168, 140, 180, 199, 214, 186, },
+    { 133, 138, 139, 140, 181, 214, 200, 157, },
+    { 134, 153, 154, 155, 167, 186, 186, 143, },
+    {   9,   9,   9,  13,   5,   5,   8,   9, },
+  }),
+  ContextSetCfg::addCtxSet
+  ({
+    { 152, 156, 186, 202, 182, 249, 247, 207, 182, 223, 223, 223, },
+    { 123, 142, 172, 218, 138, 250, 248, 223, 125, 223, 223, 223, },
+    {  93, 142, 143, 175, 153, 223, 223, 238, 154, 223, 223, 223, },
+    {   9,  12,   8,   8,   8,   8,   8,   5,   8,   0,   0,   0, },
+  }),
+  ContextSetCfg::addCtxSet
+  ({
+    { 182, 171, 143, 190, 183, 223, 223, 223, },
+    { 168, 156, 216, 249, 169, 223, 223, 223, },
+    { 138, 173, 157, 223, 170, 223, 223, 223, },
+    {   8,  12,   8,   8,   4,   0,   0,   0, },
+  }),
+  ContextSetCfg::addCtxSet
+  ({
+    { 123, 175, 223, 223, 212, 223, 223, 223,   0, 223, 223, 223, },
+    { 123, 223, 205, 223, 138, 223, 223, 223, 196, 223, 223, 223, },
+    { 107, 206, 223, 223,  93, 223, 223, 238,  55, 223, 223, 223, },
+    {   8,   8,   8,   8,   8,   0,   4,   4,   0,   0,   0,   0, },
+  }),
+  ContextSetCfg::addCtxSet
+  ({
+    { 167, 187, 249, 207, 181, 223, 223, 223, },
+    { 167, 157, 191, 223, 152, 223, 223, 223, },
+    { 152, 236, 223, 223, 123, 223, 223, 223, },
+    {   8,   8,   8,   8,   4,   0,   0,   0, },
+  }),
+#else
   ContextSetCfg::addCtxSet
   ({
     {  88, 166, 152, 182, 168, 154, 116, 167, 182, 168, 183, 155, 208, 213, 183, 183, 169, 185, },
@@ -580,6 +624,7 @@ const CtxSet ContextSetCfg::SigFlag[] =
     { 137, 250, 223, 237, 234, 223, 123, 223, 223, 223, 223, 223, },
     {   8,   8,   1,   8,   8,   8,   4,   0,   0,   0,   0,   0, },
   })
+#endif
 };
 
 const CtxSet ContextSetCfg::ParFlag[] =
diff --git a/source/Lib/CommonLib/DepQuant.cpp b/source/Lib/CommonLib/DepQuant.cpp
index 152fe036d39b71d6d30e1c68b7ba0a52d2e9cd90..0e97c92f19bd72f750b604bd0b7ecc65f1676f1d 100644
--- a/source/Lib/CommonLib/DepQuant.cpp
+++ b/source/Lib/CommonLib/DepQuant.cpp
@@ -401,12 +401,20 @@ namespace DQIntern
       const int diag        = m_scanId2BlkPos[nextScanIdx].x + m_scanId2BlkPos[nextScanIdx].y;
       if( m_chType == CHANNEL_TYPE_LUMA )
       {
+#if JVET_O0617_SIG_FLAG_CONTEXT_REDUCTION
+        scanInfo.sigCtxOffsetNext = ( diag < 2 ? 8 : diag < 5 ?  4 : 0 );
+#else
         scanInfo.sigCtxOffsetNext = ( diag < 2 ? 12 : diag < 5 ?  6 : 0 );
+#endif
         scanInfo.gtxCtxOffsetNext = ( diag < 1 ? 16 : diag < 3 ? 11 : diag < 10 ? 6 : 1 );
       }
       else
       {
+#if JVET_O0617_SIG_FLAG_CONTEXT_REDUCTION
+        scanInfo.sigCtxOffsetNext = ( diag < 2 ? 4 : 0 );
+#else
         scanInfo.sigCtxOffsetNext = ( diag < 2 ? 6 : 0 );
+#endif
         scanInfo.gtxCtxOffsetNext = ( diag < 1 ? 6 : 1 );
       }
       scanInfo.nextInsidePos      = nextScanIdx & m_sbbMask;
@@ -452,7 +460,11 @@ namespace DQIntern
     static const unsigned sm_numCtxSetsSig    = 3;
     static const unsigned sm_numCtxSetsGtx    = 2;
     static const unsigned sm_maxNumSigSbbCtx  = 2;
+#if JVET_O0617_SIG_FLAG_CONTEXT_REDUCTION
+    static const unsigned sm_maxNumSigCtx     = 12;
+#else
     static const unsigned sm_maxNumSigCtx     = 18;
+#endif
     static const unsigned sm_maxNumGtxCtx     = 21;
 
   private:
@@ -570,7 +582,11 @@ namespace DQIntern
     {
       BinFracBits*    bits    = m_sigFracBits [ ctxSetId ];
       const CtxSet&   ctxSet  = Ctx::SigFlag  [ chType + 2*ctxSetId ];
+#if JVET_O0617_SIG_FLAG_CONTEXT_REDUCTION
+      const unsigned  numCtx  = ( chType == CHANNEL_TYPE_LUMA ? 12 : 8 );
+#else
       const unsigned  numCtx  = ( chType == CHANNEL_TYPE_LUMA ? 18 : 12 );
+#endif
       for( unsigned ctxId = 0; ctxId < numCtx; ctxId++ )
       {
         bits[ ctxId ] = fracBitsAccess.getFracBitsArray( ctxSet( ctxId ) );
@@ -679,7 +695,11 @@ namespace DQIntern
   {
     CHECKD( lambda <= 0.0, "Lambda must be greater than 0" );
 
+#if JVET_O0919_TS_MIN_QP
+    const int         qpDQ                  = cQP.Qp(tu.mtsIdx==MTS_SKIP && isLuma(compID)) + 1;
+#else
     const int         qpDQ                  = cQP.Qp + 1;
+#endif
     const int         qpPer                 = qpDQ / 6;
     const int         qpRem                 = qpDQ - 6 * qpPer;
     const SPS&        sps                   = *tu.cs->sps;
@@ -748,7 +768,11 @@ namespace DQIntern
     }
 
     //----- set dequant parameters -----
+#if JVET_O0919_TS_MIN_QP
+    const int         qpDQ                  = cQP.Qp(tu.mtsIdx==MTS_SKIP && isLuma(compID)) + 1;
+#else
     const int         qpDQ                  = cQP.Qp + 1;
+#endif
     const int         qpPer                 = qpDQ / 6;
     const int         qpRem                 = qpDQ - 6 * qpPer;
     const SPS&        sps                   = *tu.cs->sps;
@@ -1163,7 +1187,11 @@ namespace DQIntern
         }
 #undef UPDATE
         TCoeff sumGt1 = sumAbs1 - sumNum;
+#if JVET_O0617_SIG_FLAG_CONTEXT_REDUCTION
+        m_sigFracBits = m_sigFracBitsArray[scanInfo.sigCtxOffsetNext + std::min( (sumAbs1+1)>>1, 3 )];
+#else
         m_sigFracBits = m_sigFracBitsArray[scanInfo.sigCtxOffsetNext + (sumAbs1 < 5 ? sumAbs1 : 5)];
+#endif
         m_coeffFracBits = m_gtxFracBitsArray[scanInfo.gtxCtxOffsetNext + (sumGt1 < 4 ? sumGt1 : 4)];
 
         TCoeff  sumAbs = m_absLevelsAndCtxInit[8 + scanInfo.nextInsidePos] >> 8;
@@ -1277,7 +1305,11 @@ namespace DQIntern
       TCoeff  sumNum  =   tinit        & 7;
       TCoeff  sumAbs1 = ( tinit >> 3 ) & 31;
       TCoeff  sumGt1  = sumAbs1        - sumNum;
+#if JVET_O0617_SIG_FLAG_CONTEXT_REDUCTION
+      m_sigFracBits   = m_sigFracBitsArray[ scanInfo.sigCtxOffsetNext + std::min( (sumAbs1+1)>>1, 3 ) ];
+#else
       m_sigFracBits   = m_sigFracBitsArray[ scanInfo.sigCtxOffsetNext + ( sumAbs1 < 5 ? sumAbs1 : 5 ) ];
+#endif
       m_coeffFracBits = m_gtxFracBitsArray[ scanInfo.gtxCtxOffsetNext + ( sumGt1  < 4 ? sumGt1  : 4 ) ];
     }
   }
@@ -1721,7 +1753,11 @@ void DepQuant::quant( TransformUnit &tu, const ComponentID &compID, const CCoeff
   if( tu.cs->slice->getDepQuantEnabledFlag() && (tu.mtsIdx != MTS_SKIP || !isLuma(compID)) )
   {
     //===== scaling matrix ====
+#if JVET_O0919_TS_MIN_QP
+    const int         qpDQ            = cQP.Qp(tu.mtsIdx==MTS_SKIP && isLuma(compID)) + 1;
+#else
     const int         qpDQ            = cQP.Qp + 1;
+#endif
     const int         qpPer           = qpDQ / 6;
     const int         qpRem           = qpDQ - 6 * qpPer;
     const CompArea    &rect           = tu.blocks[compID];
@@ -1744,7 +1780,11 @@ void DepQuant::dequant( const TransformUnit &tu, CoeffBuf &dstCoeff, const Compo
 {
   if( tu.cs->slice->getDepQuantEnabledFlag() && (tu.mtsIdx != MTS_SKIP || !isLuma(compID)) )
   {
+#if JVET_O0919_TS_MIN_QP
+    const int         qpDQ            = cQP.Qp(tu.mtsIdx==MTS_SKIP && isLuma(compID)) + 1;
+#else
     const int         qpDQ            = cQP.Qp + 1;
+#endif
     const int         qpPer           = qpDQ / 6;
     const int         qpRem           = qpDQ - 6 * qpPer;
     const CompArea    &rect           = tu.blocks[compID];
diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp
index 0a9d1d834c67eb7174508a52d17c33343864e620..ae31a9a74c9bc98dc72441fd5c763ff6798cace7 100644
--- a/source/Lib/CommonLib/InterPrediction.cpp
+++ b/source/Lib/CommonLib/InterPrediction.cpp
@@ -57,6 +57,11 @@ InterPrediction::InterPrediction()
 , m_maxCompIDToPred ( MAX_NUM_COMPONENT )
 , m_pcRdCost        ( nullptr )
 , m_storedMv        ( nullptr )
+#if JVET_O0070_PROF
+, m_skipPROF (false)
+, m_encOnly  (false)
+, m_isBi     (false)
+#endif
 , m_gradX0(nullptr)
 , m_gradY0(nullptr)
 , m_gradX1(nullptr)
@@ -471,6 +476,9 @@ void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList&
     if ( pu.cu->affine )
     {
       CHECK( bioApplied, "BIO is not allowed with affine" );
+#if JVET_O0070_PROF
+      m_iRefListIdx = eRefPicList;
+#endif
       xPredAffineBlk( compID, pu, pu.cu->slice->getRefPic( eRefPicList, iRefIdx ), mv, pcYuvPred, bi, pu.cu->slice->clpRng( compID ) );
     }
     else
@@ -527,7 +535,13 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred)
       if (biocheck0
         && biocheck1
         && PU::isBiPredFromDifferentDir(pu)
+#if JVET_O0634_BDOF_SIZE_CONSTRAINT
+        && (pu.Y().height >= 8)
+        && (pu.Y().width >= 8)
+        && ((pu.Y().height * pu.Y().width) >= 128)
+#else
         && pu.Y().height != 4
+#endif
        )
       {
         bioApplied = true;
@@ -915,13 +929,99 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio
   const int iVerMax = ( sps.getPicHeightInLumaSamples()    + iOffset -      pu.Y().y - 1 ) << iMvShift;
   const int iVerMin = (      -(int)pu.cs->pcv->maxCUHeight - iOffset - (int)pu.Y().y + 1 ) << iMvShift;
 
+#if !JVET_O0070_PROF
   PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][compID], pu.blocks[compID]);
+#endif
   const int vFilterSize = isLuma(compID) ? NTAPS_LUMA : NTAPS_CHROMA;
 
   const int shift = iBit - 4 + MV_FRACTIONAL_BITS_INTERNAL;
   bool      wrapRef = false;
   const bool subblkMVSpreadOverLimit = isSubblockVectorSpreadOverLimit( iDMvHorX, iDMvHorY, iDMvVerX, iDMvVerY, pu.interDir );
 
+#if JVET_O0070_PROF
+  bool enablePROF = (sps.getUsePROF()) && (!m_skipPROF) && (compID == COMPONENT_Y);
+  enablePROF &= !((pu.cu->affineType == AFFINEMODEL_6PARAM && _mv[0] == _mv[1] && _mv[0] == _mv[2]) || (pu.cu->affineType == AFFINEMODEL_4PARAM && _mv[0] == _mv[1]));
+  enablePROF &= !subblkMVSpreadOverLimit;
+  const int profThres = 1 << (iBit + (m_isBi ? 1 : 0));
+  enablePROF &= !m_encOnly || pu.cu->slice->getCheckLDC() || iDMvHorX > profThres || iDMvHorY > profThres || iDMvVerX > profThres || iDMvVerY > profThres || iDMvHorX < -profThres || iDMvHorY < -profThres || iDMvVerX < -profThres || iDMvVerY < -profThres;
+
+  if (compID == COMPONENT_Y)
+  {
+    m_applyPROF[m_iRefListIdx] = enablePROF;
+  }
+
+  bool isLast = enablePROF ? false : !bi;
+
+  const int cuExtW = pu.blocks[compID].width + PROF_BORDER_EXT_W * 2;
+  const int cuExtH = pu.blocks[compID].height + PROF_BORDER_EXT_H * 2;
+
+  PelBuf gradXExt(m_gradBuf[m_iRefListIdx][0], cuExtW, cuExtH);
+  PelBuf gradYExt(m_gradBuf[m_iRefListIdx][1], cuExtW, cuExtH);
+
+  const int MAX_FILTER_SIZE = std::max<int>(NTAPS_LUMA, NTAPS_CHROMA);
+  const int dstExtW = ((blockWidth + PROF_BORDER_EXT_W * 2 + 7) >> 3) << 3;
+  const int dstExtH = blockHeight + PROF_BORDER_EXT_H * 2;
+  PelBuf dstExtBuf(m_filteredBlockTmp[1][compID], dstExtW, dstExtH);
+
+  const int refExtH = dstExtH + MAX_FILTER_SIZE - 1;
+  PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][compID], dstExtW, refExtH);
+
+  PelBuf &dstBuf = dstPic.bufs[compID];
+
+  int *dMvScaleHor = m_dMvBuf[m_iRefListIdx];
+  int *dMvScaleVer = m_dMvBuf[m_iRefListIdx] + 16;
+
+  if (enablePROF && !bi)
+  {
+    int* dMvH = dMvScaleHor;
+    int* dMvV = dMvScaleVer;
+    int quadHorX = iDMvHorX << 2;
+    int quadHorY = iDMvHorY << 2;
+    int quadVerX = iDMvVerX << 2;
+    int quadVerY = iDMvVerY << 2;
+
+    dMvH[0] = ((iDMvHorX + iDMvVerX) << 1) - ((quadHorX + quadVerX) << 1);
+    dMvV[0] = ((iDMvHorY + iDMvVerY) << 1) - ((quadHorY + quadVerY) << 1);
+
+    for (int w = 1; w < blockWidth; w++)
+    {
+      dMvH[w] = dMvH[w - 1] + quadHorX;
+      dMvV[w] = dMvV[w - 1] + quadHorY;
+    }
+
+    dMvH += blockWidth;
+    dMvV += blockWidth;
+    for (int h = 1; h < blockHeight; h++)
+    {
+      for (int w = 0; w < blockWidth; w++)
+      {
+        dMvH[w] = dMvH[w - blockWidth] + quadVerX;
+        dMvV[w] = dMvV[w - blockWidth] + quadVerY;
+      }
+      dMvH += blockWidth;
+      dMvV += blockWidth;
+    }
+
+    const int bdlimit = std::max<int>(6, clpRng.bd - 6);
+    const int dmvLimit = 1 << bdlimit;
+
+    if (!g_pelBufOP.roundIntVector)
+    {
+      for (int idx = 0; idx < blockWidth * blockHeight; idx++)
+      {
+        roundAffineMv(dMvScaleHor[idx], dMvScaleVer[idx], shift);
+        dMvScaleHor[idx] = Clip3(-dmvLimit, dmvLimit - 1, dMvScaleHor[idx]);
+        dMvScaleVer[idx] = Clip3(-dmvLimit, dmvLimit - 1, dMvScaleVer[idx]);
+      }
+    }
+    else
+    {
+      int sz = blockWidth * blockHeight;
+      g_pelBufOP.roundIntVector(dMvScaleHor, sz, shift, dmvLimit);
+      g_pelBufOP.roundIntVector(dMvScaleVer, sz, shift, dmvLimit);
+    }
+  }
+#endif
   // get prediction block by block
   for ( int h = 0; h < cxHeight; h += blockHeight )
   {
@@ -1007,23 +1107,108 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio
       }
 
       const CPelBuf refBuf = refPic->getRecoBuf( CompArea( compID, chFmt, pu.blocks[compID].offset(xInt + w, yInt + h), pu.blocks[compID] ), wrapRef );
+#if !JVET_O0070_PROF
       PelBuf &dstBuf = dstPic.bufs[compID];
+#endif
+
+#if JVET_O0070_PROF
+      Pel* ref = (Pel*) refBuf.buf;
+      Pel* dst = dstBuf.buf + w + h * dstBuf.stride;
+
+      int refStride = refBuf.stride;
+      int dstStride = dstBuf.stride;
+
+      int bw = blockWidth;
+      int bh = blockHeight;
+
+      if (enablePROF)
+      {
+        dst = dstExtBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
+        dstStride = dstExtBuf.stride;
+      }
+#endif
 
       if ( yFrac == 0 )
       {
+#if JVET_O0070_PROF
+        m_if.filterHor( compID, (Pel*) ref, refStride, dst, dstStride, bw, bh, xFrac, isLast, chFmt, clpRng);
+#else
         m_if.filterHor( compID, (Pel*) refBuf.buf, refBuf.stride, dstBuf.buf + w + h * dstBuf.stride, dstBuf.stride, blockWidth, blockHeight, xFrac, !bi, chFmt, clpRng );
+#endif
       }
       else if ( xFrac == 0 )
       {
+#if JVET_O0070_PROF
+        m_if.filterVer( compID, (Pel*) ref, refStride, dst, dstStride, bw, bh, yFrac, true, isLast, chFmt, clpRng);
+#else
         m_if.filterVer( compID, (Pel*) refBuf.buf, refBuf.stride, dstBuf.buf + w + h * dstBuf.stride, dstBuf.stride, blockWidth, blockHeight, yFrac, true, !bi, chFmt, clpRng );
+#endif
       }
       else
       {
+#if JVET_O0070_PROF
+        m_if.filterHor( compID, (Pel*)ref - ((vFilterSize>>1) -1)*refStride, refStride, tmpBuf.buf, tmpBuf.stride, bw, bh+vFilterSize-1, xFrac, false,      chFmt, clpRng);
+#else
         m_if.filterHor( compID, (Pel*) refBuf.buf - ((vFilterSize>>1) -1)*refBuf.stride, refBuf.stride, tmpBuf.buf, tmpBuf.stride, blockWidth, blockHeight+vFilterSize-1, xFrac, false,      chFmt, clpRng);
+#endif
         JVET_J0090_SET_CACHE_ENABLE( false );
+#if JVET_O0070_PROF
+        m_if.filterVer( compID, tmpBuf.buf + ((vFilterSize>>1) -1)*tmpBuf.stride, tmpBuf.stride, dst, dstStride, bw, bh, yFrac, false, isLast, chFmt, clpRng);
+#else
         m_if.filterVer( compID, tmpBuf.buf + ((vFilterSize>>1) -1)*tmpBuf.stride, tmpBuf.stride, dstBuf.buf + w + h * dstBuf.stride, dstBuf.stride, blockWidth, blockHeight, yFrac, false, !bi, chFmt, clpRng);
+#endif
         JVET_J0090_SET_CACHE_ENABLE( true );
       }
+#if JVET_O0070_PROF
+      if (enablePROF)
+      {
+        const int shift = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd));
+        const int xOffset = xFrac >> 3;
+        const int yOffset = yFrac >> 3;
+
+        const int refOffset = (blockHeight + 1) * refStride;
+        const int dstOffset = (blockHeight + 1)* dstStride;
+
+        const Pel* refPel = ref - (1 - yOffset) * refStride + xOffset - 1;
+        Pel* dstPel = dst - dstStride - 1;
+        for (int pw = 0; pw < blockWidth + 2; pw++)
+        {
+          dstPel[pw] = leftShift_round(refPel[pw], shift) - (Pel)IF_INTERNAL_OFFS;
+          dstPel[pw+dstOffset] = leftShift_round(refPel[pw+refOffset], shift) - (Pel)IF_INTERNAL_OFFS;
+        }
+
+        refPel = ref + yOffset * refBuf.stride + xOffset;
+        dstPel = dst;
+        for (int ph = 0; ph < blockHeight; ph++, refPel += refStride, dstPel += dstStride)
+        {
+          dstPel[-1] = leftShift_round(refPel[-1], shift) - (Pel)IF_INTERNAL_OFFS;
+          dstPel[blockWidth] = leftShift_round(refPel[blockWidth], shift) - (Pel)IF_INTERNAL_OFFS;
+        }
+
+        PelBuf gradXBuf = gradXExt.subBuf(w, h, blockWidth + 2, blockHeight + 2);
+        PelBuf gradYBuf = gradYExt.subBuf(w, h, blockWidth + 2, blockHeight + 2);
+        g_pelBufOP.profGradFilter(dstExtBuf.buf, dstExtBuf.stride, blockWidth + 2, blockHeight + 2, gradXBuf.stride, gradXBuf.buf, gradYBuf.buf, clpRng.bd);
+
+        const int shiftNum = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd));
+        const Pel offset = (1 << (shiftNum - 1)) + IF_INTERNAL_OFFS;
+        Pel* src = dstExtBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
+        Pel* gX = gradXBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
+        Pel* gY = gradYBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
+
+        Pel * dstY = dstBuf.bufAt(w, h);
+
+        if (!bi)
+        {
+          g_pelBufOP.applyPROF(dstY, dstBuf.stride, src, dstExtBuf.stride, blockWidth, blockHeight, gX, gY, gradXBuf.stride, dMvScaleHor, dMvScaleVer, blockWidth, shiftNum, offset, clpRng);
+        }
+        else
+        {
+          PelBuf srcExtBuf(src, dstExtBuf.stride, Size(blockWidth, blockHeight));
+          PelBuf destBuf(dstY, dstBuf.stride, Size(blockWidth, blockHeight));
+          destBuf.copyFrom(srcExtBuf);
+        }
+      }
+#endif
     }
   }
 }
@@ -1231,6 +1416,17 @@ void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitB
 
   if( iRefIdx0 >= 0 && iRefIdx1 >= 0 )
   {
+#if JVET_O0070_PROF
+    if (pu.cu->affine && (m_applyPROF[0] || m_applyPROF[1]))
+    {
+      xApplyBiPROF(pu, pcYuvSrc0.bufs[COMPONENT_Y], pcYuvSrc1.bufs[COMPONENT_Y], pcYuvDst.bufs[COMPONENT_Y], clpRngs.comp[COMPONENT_Y]);
+      pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, pu.cu->GBiIdx, true);
+#if JVET_O0108_DIS_DMVR_BDOF_CIIP
+      CHECK(yuvDstTmp, "yuvDstTmp is disallowed with PROF");
+#endif
+      return;
+    }
+#endif
 #if JVET_O0681_DIS_BPWA_CIIP
     if( pu.cu->GBiIdx != GBI_DEFAULT && (yuvDstTmp || !pu.mhIntraFlag) )
 #else
@@ -1337,6 +1533,124 @@ void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitB
   }
 }
 
+#if JVET_O0070_PROF
+void InterPrediction::xApplyBiPROF(const PredictionUnit &pu, const CPelBuf& pcYuvSrc0, const CPelBuf& pcYuvSrc1, PelBuf& pcYuvDst, const ClpRng& clpRng)
+{
+  int blockWidth = AFFINE_MIN_BLOCK_SIZE;
+  int blockHeight = AFFINE_MIN_BLOCK_SIZE;
+
+  CHECK(!m_applyPROF[0] && !m_applyPROF[1], "xApplyBiPROF() applies PROF for at least one list.");
+  const int width = pu.Y().width;
+  const int height = pu.Y().height;
+
+  const int bit = MAX_CU_DEPTH;
+  const int shift = bit - 4 + MV_FRACTIONAL_BITS_INTERNAL;
+  const int bdlimit = std::max<int>(6, clpRng.bd - 6);
+  const int dmvLimit = 1 << bdlimit;
+
+  for (int list = 0; list < 2; list++)
+  {
+    if (m_applyPROF[list])
+    {
+      Mv mvLT = pu.mvAffi[list][0];
+      Mv mvRT = pu.mvAffi[list][1];
+      Mv mvLB = pu.mvAffi[list][2];
+
+      int dMvHorX, dMvHorY, dMvVerX, dMvVerY;
+      dMvHorX = (mvRT - mvLT).getHor() << (bit - g_aucLog2[width]);
+      dMvHorY = (mvRT - mvLT).getVer() << (bit - g_aucLog2[width]);
+      if (pu.cu->affineType == AFFINEMODEL_6PARAM)
+      {
+        dMvVerX = (mvLB - mvLT).getHor() << (bit - g_aucLog2[height]);
+        dMvVerY = (mvLB - mvLT).getVer() << (bit - g_aucLog2[height]);
+      }
+      else
+      {
+        dMvVerX = -dMvHorY;
+        dMvVerY = dMvHorX;
+      }
+
+      int *dMvScaleHor = m_dMvBuf[list];
+      int *dMvScaleVer = m_dMvBuf[list] + 16;
+
+      int* dMvH = dMvScaleHor;
+      int* dMvV = dMvScaleVer;
+      int  quadHorX = dMvHorX << 2;
+      int  quadHorY = dMvHorY << 2;
+      int  quadVerX = dMvVerX << 2;
+      int  quadVerY = dMvVerY << 2;
+
+      dMvH[0] = ((dMvHorX + dMvVerX) << 1) - ((quadHorX + quadVerX) << 1);
+      dMvV[0] = ((dMvHorY + dMvVerY) << 1) - ((quadHorY + quadVerY) << 1);
+
+      for (int w = 1; w < blockWidth; w++)
+      {
+        dMvH[w] = dMvH[w - 1] + quadHorX;
+        dMvV[w] = dMvV[w - 1] + quadHorY;
+      }
+
+      dMvH += blockWidth;
+      dMvV += blockWidth;
+      for (int h = 1; h < blockHeight; h++)
+      {
+        for (int w = 0; w < blockWidth; w++)
+        {
+          dMvH[w] = dMvH[w - blockWidth] + quadVerX;
+          dMvV[w] = dMvV[w - blockWidth] + quadVerY;
+        }
+        dMvH += blockWidth;
+        dMvV += blockWidth;
+      }
+
+      if (!g_pelBufOP.roundIntVector)
+      {
+        for (int idx = 0; idx < blockWidth * blockHeight; idx++)
+        {
+          roundAffineMv(dMvScaleHor[idx], dMvScaleVer[idx], shift);
+          dMvScaleHor[idx] = Clip3(-dmvLimit, dmvLimit - 1, dMvScaleHor[idx]);
+          dMvScaleVer[idx] = Clip3(-dmvLimit, dmvLimit - 1, dMvScaleVer[idx]);
+        }
+      }
+      else
+      {
+        int sz = blockWidth * blockHeight;
+        g_pelBufOP.roundIntVector(dMvScaleHor, sz, shift, dmvLimit);
+        g_pelBufOP.roundIntVector(dMvScaleVer, sz, shift, dmvLimit);
+      }
+    }
+  }
+
+  const int cuExtW = width + PROF_BORDER_EXT_W * 2;
+  const int cuExtH = height + PROF_BORDER_EXT_H * 2;
+
+  PelBuf gradXExt0 = PelBuf(m_gradBuf[REF_PIC_LIST_0][0], cuExtW, cuExtH);
+  PelBuf gradYExt0 = PelBuf(m_gradBuf[REF_PIC_LIST_0][1], cuExtW, cuExtH);
+  PelBuf gradXExt1 = PelBuf(m_gradBuf[REF_PIC_LIST_1][0], cuExtW, cuExtH);
+  PelBuf gradYExt1 = PelBuf(m_gradBuf[REF_PIC_LIST_1][1], cuExtW, cuExtH);
+
+  Pel* gX0 = gradXExt0.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
+  Pel* gY0 = gradYExt0.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
+  Pel* gX1 = gradXExt1.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
+  Pel* gY1 = gradYExt1.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
+  
+  int *dMvX0 = m_dMvBuf[REF_PIC_LIST_0];
+  int *dMvY0 = m_dMvBuf[REF_PIC_LIST_0] + 16;
+  int *dMvX1 = m_dMvBuf[REF_PIC_LIST_1];
+  int *dMvY1 = m_dMvBuf[REF_PIC_LIST_1] + 16;
+
+  const Pel* srcY0 = pcYuvSrc0.bufAt(0, 0);
+  const Pel* srcY1 = pcYuvSrc1.bufAt(0, 0);
+  Pel* dstY = pcYuvDst.bufAt(0, 0);
+
+  if(m_applyPROF[0] && m_applyPROF[1])
+    g_pelBufOP.applyBiPROF[1](dstY, pcYuvDst.stride, srcY0, srcY1, pcYuvSrc0.stride, width, height, gX0, gY0, gX1, gY1, gradXExt0.stride, dMvX0, dMvY0, dMvX1, dMvY1, blockWidth, getGbiWeight(pu.cu->GBiIdx, REF_PIC_LIST_0), clpRng);
+  else if (m_applyPROF[0])
+    g_pelBufOP.applyBiPROF[0](dstY, pcYuvDst.stride, srcY0, srcY1, pcYuvSrc0.stride, width, height, gX0, gY0, gX1, gY1, gradXExt0.stride, dMvX0, dMvY0, dMvX1, dMvY1, blockWidth, getGbiWeight(pu.cu->GBiIdx, REF_PIC_LIST_0), clpRng);
+  else
+    g_pelBufOP.applyBiPROF[0](dstY, pcYuvDst.stride, srcY1, srcY0, pcYuvSrc0.stride, width, height, gX1, gY1, gX0, gY0, gradXExt0.stride, dMvX1, dMvY1, dMvX0, dMvY0, blockWidth, getGbiWeight(pu.cu->GBiIdx, REF_PIC_LIST_1), clpRng);
+}
+#endif
+
 void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBuf, const RefPicList &eRefPicList
   , const bool luma, const bool chroma
 #if JVET_O0108_DIS_DMVR_BDOF_CIIP
@@ -1440,7 +1754,13 @@ void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBu
         if (biocheck0
           && biocheck1
           && PU::isBiPredFromDifferentDir(pu)
+#if JVET_O0634_BDOF_SIZE_CONSTRAINT
+          && (pu.Y().height >= 8)
+          && (pu.Y().width >= 8)
+          && ((pu.Y().height * pu.Y().width) >= 128)
+#else
           && pu.Y().height != 4
+#endif
           )
         {
           bioApplied = true;
diff --git a/source/Lib/CommonLib/InterPrediction.h b/source/Lib/CommonLib/InterPrediction.h
index a6bcd6939dbfded81abc982e5b2036e825746148..fc9703d303aa1dae42bdc7ec7cddd1736aac2594 100644
--- a/source/Lib/CommonLib/InterPrediction.h
+++ b/source/Lib/CommonLib/InterPrediction.h
@@ -108,6 +108,15 @@ protected:
                              Mv(-2, 2), Mv(-1, 2), Mv(0, 2), Mv(1, 2), Mv(2, 2) };
   uint64_t m_SADsArray[((2 * DMVR_NUM_ITERATION) + 1) * ((2 * DMVR_NUM_ITERATION) + 1)];
 
+#if JVET_O0070_PROF
+  Pel                  m_gradBuf[2][2][(MAX_CU_SIZE + 2) * (MAX_CU_SIZE + 2)];
+  int                  m_dMvBuf[2][16 * 2];
+  bool                 m_applyPROF[2];
+  bool                 m_skipPROF;
+  bool                 m_encOnly;
+  bool                 m_isBi;
+#endif
+
   Pel*                 m_gradX0;
   Pel*                 m_gradY0;
   Pel*                 m_gradX1;
@@ -149,6 +158,9 @@ protected:
   void xWeightedAverage         ( const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied, PelUnitBuf* yuvDstTmp = NULL );
 #else
   void xWeightedAverage         ( const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied );
+#endif
+#if JVET_O0070_PROF
+  void xApplyBiPROF             (const PredictionUnit& pu, const CPelBuf& pcYuvSrc0, const CPelBuf& pcYuvSrc1, PelBuf& pcYuvDst, const ClpRng& clpRng);
 #endif
   void xPredAffineBlk( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng );
 
diff --git a/source/Lib/CommonLib/Picture.cpp b/source/Lib/CommonLib/Picture.cpp
index b3e3d9f9b588d2d0366592c1a5dc553c4b3977ae..a32727d252c738d8f5f1874d7b631bc489735548 100644
--- a/source/Lib/CommonLib/Picture.cpp
+++ b/source/Lib/CommonLib/Picture.cpp
@@ -961,7 +961,11 @@ Slice *Picture::swapSliceObject(Slice * p, uint32_t i)
   slices[i] = p;
   pTmp->setSPS(0);
   pTmp->setPPS(0);
+#if JVET_O_MAX_NUM_ALF_APS_8
+  memset(pTmp->getAlfAPSs(), 0, sizeof(*pTmp->getAlfAPSs())*ALF_CTB_MAX_NUM_APS);
+#else
   memset(pTmp->getAlfAPSs(), 0, sizeof(*pTmp->getAlfAPSs())*MAX_NUM_APS);
+#endif
 
   pTmp->setLmcsAPS(0);
   return pTmp;
diff --git a/source/Lib/CommonLib/Quant.cpp b/source/Lib/CommonLib/Quant.cpp
index 94ca3df437c07fc540e513e4b95ad6d5e498382d..71ca554f0148e2ad52516b588d51bfb7591be9d3 100644
--- a/source/Lib/CommonLib/Quant.cpp
+++ b/source/Lib/CommonLib/Quant.cpp
@@ -65,6 +65,9 @@
 QpParam::QpParam(const int           qpy,
                  const ChannelType   chType,
                  const int           qpBdOffset,
+#if JVET_O0919_TS_MIN_QP
+                 const int           minQpPrimeTsMinus4,
+#endif
                  const int           chromaQPOffset,
                  const ChromaFormat  chFmt,
                  const int           dqp )
@@ -91,9 +94,26 @@ QpParam::QpParam(const int           qpy,
 
   baseQp = Clip3( 0, MAX_QP+qpBdOffset, baseQp + dqp );
 
+#if JVET_O0919_TS_MIN_QP
+  Qps[0] =baseQp;
+  pers[0]=baseQp/6;
+  rems[0]=baseQp%6;
+
+  int baseQpTS = baseQp;
+
+  if( isLuma( chType ) )
+  {
+    baseQpTS = std::max(baseQpTS , 4 + minQpPrimeTsMinus4);
+  }
+
+  Qps[1]  = baseQpTS;
+  pers[1] = baseQpTS / 6;
+  rems[1] = baseQpTS % 6;
+#else
   Qp =baseQp;
   per=baseQp/6;
   rem=baseQp%6;
+#endif
 }
 
 QpParam::QpParam(const TransformUnit& tu, const ComponentID &compIDX, const int QP /*= -MAX_INT*/)
@@ -126,7 +146,11 @@ QpParam::QpParam(const TransformUnit& tu, const ComponentID &compIDX, const int
 
   int dqp = 0;
 
+#if JVET_O0919_TS_MIN_QP
+  *this = QpParam(QP <= -MAX_INT ? tu.cu->qp : QP, toChannelType(compID), tu.cs->sps->getQpBDOffset(toChannelType(compID)), tu.cs->sps->getMinQpPrimeTsMinus4(toChannelType(compID)), chromaQpOffset, tu.chromaFormat, dqp);
+#else
   *this = QpParam(QP <= -MAX_INT ? tu.cu->qp : QP, toChannelType(compID), tu.cs->sps->getQpBDOffset(toChannelType(compID)), chromaQpOffset, tu.chromaFormat, dqp);
+#endif
 }
 
 
@@ -397,8 +421,13 @@ void Quant::dequant(const TransformUnit &tu,
   const bool needSqrtAdjustment     = TU::needsBlockSizeTrafoScale( tu, compID );
   const int  iTransformShift        = (bClipTransformShiftTo0 ? std::max<int>(0, originalTransformShift) : originalTransformShift) + (needSqrtAdjustment?-1:0);
 
+#if JVET_O0919_TS_MIN_QP
+  const int QP_per = cQP.per(isTransformSkip);
+  const int QP_rem = cQP.rem(isTransformSkip);
+#else
   const int QP_per = cQP.per;
   const int QP_rem = cQP.rem;
+#endif
 
   const int  rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
 
@@ -943,7 +972,11 @@ void Quant::quant(TransformUnit &tu, const ComponentID &compID, const CCoeffBuf
     CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
     const uint32_t uiLog2TrWidth = g_aucLog2[uiWidth];
     const uint32_t uiLog2TrHeight = g_aucLog2[uiHeight];
+#if JVET_O0919_TS_MIN_QP
+    int *piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem(useTransformSkip), uiLog2TrWidth, uiLog2TrHeight);
+#else
     int *piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem, uiLog2TrWidth, uiLog2TrHeight);
+#endif
 
     const bool enableScalingLists             = getUseScalingList(uiWidth, uiHeight, useTransformSkip);
 
@@ -951,7 +984,11 @@ void Quant::quant(TransformUnit &tu, const ComponentID &compID, const CCoeffBuf
     // compensated by a bit-shift (the quantised result will be sqrt(2) * larger than required).
     // The quantScale table and shift is used to compensate for this.
     const bool needSqrtAdjustment= TU::needsBlockSizeTrafoScale( tu, compID );
+#if JVET_O0919_TS_MIN_QP
+    const int defaultQuantisationCoefficient    = g_quantScales[needSqrtAdjustment?1:0][cQP.rem(useTransformSkip)];
+#else
     const int defaultQuantisationCoefficient    = g_quantScales[needSqrtAdjustment?1:0][cQP.rem];
+#endif
     int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange) + ( needSqrtAdjustment?-1:0);
 
     if (useTransformSkip && sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag())
@@ -960,7 +997,11 @@ void Quant::quant(TransformUnit &tu, const ComponentID &compID, const CCoeffBuf
     }
 
 
+#if JVET_O0919_TS_MIN_QP
+    const int iQBits = QUANT_SHIFT + cQP.per(useTransformSkip) + iTransformShift;
+#else
     const int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;
+#endif
     // QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset
 
     const int64_t iAdd = int64_t(tu.cs->slice->isIRAP() ? 171 : 85) << int64_t(iQBits - 9);
@@ -1014,7 +1055,11 @@ bool Quant::xNeedRDOQ(TransformUnit &tu, const ComponentID &compID, const CCoeff
 
   const uint32_t uiLog2TrWidth  = g_aucLog2[uiWidth];
   const uint32_t uiLog2TrHeight = g_aucLog2[uiHeight];
+#if JVET_O0919_TS_MIN_QP
+  int *piQuantCoeff         = getQuantCoeff(scalingListType, cQP.rem(useTransformSkip), uiLog2TrWidth, uiLog2TrHeight);
+#else
   int *piQuantCoeff         = getQuantCoeff(scalingListType, cQP.rem, uiLog2TrWidth, uiLog2TrHeight);
+#endif
 
   const bool enableScalingLists             = getUseScalingList(uiWidth, uiHeight, (useTransformSkip != 0));
 
@@ -1024,7 +1069,11 @@ bool Quant::xNeedRDOQ(TransformUnit &tu, const ComponentID &compID, const CCoeff
     * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
     */
   const bool needSqrtAdjustment= TU::needsBlockSizeTrafoScale( tu, compID );
+#if JVET_O0919_TS_MIN_QP
+  const int defaultQuantisationCoefficient    = g_quantScales[needSqrtAdjustment?1:0][cQP.rem(useTransformSkip)];
+#else
   const int defaultQuantisationCoefficient    = g_quantScales[needSqrtAdjustment?1:0][cQP.rem];
+#endif
   int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange) + (needSqrtAdjustment?-1:0);
 
   if (useTransformSkip && sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag())
@@ -1033,7 +1082,11 @@ bool Quant::xNeedRDOQ(TransformUnit &tu, const ComponentID &compID, const CCoeff
   }
 
 
+#if JVET_O0919_TS_MIN_QP
+  const int iQBits = QUANT_SHIFT + cQP.per(useTransformSkip) + iTransformShift;
+#else
   const int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;
+#endif
   assert(iQBits>=0);
   // QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset
 
@@ -1066,13 +1119,22 @@ void Quant::transformSkipQuantOneSample(TransformUnit &tu, const ComponentID &co
   const int            iTransformShift                = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
   const int            scalingListType                = getScalingListType(tu.cu->predMode, compID);
   const bool           enableScalingLists             = getUseScalingList(uiWidth, uiHeight, true);
+#if JVET_O0919_TS_MIN_QP
+  const bool useTransformSkip      = tu.mtsIdx == MTS_SKIP && isLuma(compID);
+  const int            defaultQuantisationCoefficient = g_quantScales[0][cQP.rem(useTransformSkip)];
+#else
   const int            defaultQuantisationCoefficient = g_quantScales[0][cQP.rem];
+#endif
 
   CHECK( scalingListType >= SCALING_LIST_NUM, "Invalid scaling list" );
 
   const uint32_t uiLog2TrWidth      = g_aucLog2[uiWidth];
   const uint32_t uiLog2TrHeight     = g_aucLog2[uiHeight];
+#if JVET_O0919_TS_MIN_QP
+  const int *const piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem(useTransformSkip), uiLog2TrWidth, uiLog2TrHeight);
+#else
   const int *const piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem, uiLog2TrWidth, uiLog2TrHeight);
+#endif
 
   /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
   * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
@@ -1080,7 +1142,11 @@ void Quant::transformSkipQuantOneSample(TransformUnit &tu, const ComponentID &co
   * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
   */
 
+#if JVET_O0919_TS_MIN_QP
+  const int iQBits = QUANT_SHIFT + cQP.per(useTransformSkip) + iTransformShift;
+#else
   const int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;
+#endif
   // QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset
   const int iAdd = int64_t(bUseHalfRoundingPoint ? 256 : (tu.cs->slice->isIRAP() ? 171 : 85)) << int64_t(iQBits - 9);
   TCoeff transformedCoefficient;
@@ -1117,8 +1183,13 @@ void Quant::invTrSkipDeQuantOneSample(TransformUnit &tu, const ComponentID &comp
   const CompArea      &rect                   = tu.blocks[compID];
   const uint32_t           uiWidth                = rect.width;
   const uint32_t           uiHeight               = rect.height;
+#if JVET_O0919_TS_MIN_QP
+  const int            QP_per                 = cQP.per(tu.mtsIdx==MTS_SKIP && isLuma(compID));
+  const int            QP_rem                 = cQP.rem(tu.mtsIdx==MTS_SKIP && isLuma(compID));
+#else
   const int            QP_per                 = cQP.per;
   const int            QP_rem                 = cQP.rem;
+#endif
   const int            maxLog2TrDynamicRange  = sps.getMaxLog2TrDynamicRange(toChannelType(compID));
   const int            channelBitDepth        = sps.getBitDepth(toChannelType(compID));
   const int            iTransformShift        = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
diff --git a/source/Lib/CommonLib/Quant.h b/source/Lib/CommonLib/Quant.h
index b53de50b4884da9a9f8f4b2c5238beaf5eb56078..16eba76cbc871781b021e5b6ebfe1909bc67c25f 100644
--- a/source/Lib/CommonLib/Quant.h
+++ b/source/Lib/CommonLib/Quant.h
@@ -65,17 +65,30 @@ struct TrQuantParams
 };
 
 /// QP struct
+#if JVET_O0919_TS_MIN_QP
+class QpParam
+#else
 struct QpParam
+#endif
 {
+#if JVET_O0919_TS_MIN_QP
+  int Qps[2];
+  int pers[2];
+  int rems[2];
+#else
   int Qp;
   int per;
   int rem;
+#endif
 
 private:
 
   QpParam(const int           qpy,
           const ChannelType   chType,
           const int           qpBdOffset,
+#if JVET_O0919_TS_MIN_QP
+          const int           minQpPrimeTsMinus4,
+#endif
           const int           chromaQPOffset,
           const ChromaFormat  chFmt,
           const int           dqp );
@@ -84,6 +97,12 @@ public:
 
   QpParam(const TransformUnit& tu, const ComponentID &compID, const int QP = -MAX_INT);
 
+#if JVET_O0919_TS_MIN_QP
+  int Qp ( const bool ts ) const { return Qps [ts?1:0]; }
+  int per( const bool ts ) const { return pers[ts?1:0]; }
+  int rem( const bool ts ) const { return rems[ts?1:0]; }
+#endif
+
 }; // END STRUCT DEFINITION QpParam
 
 /// transform and quantization class
diff --git a/source/Lib/CommonLib/QuantRDOQ.cpp b/source/Lib/CommonLib/QuantRDOQ.cpp
index 30e3ba2e25340ca6c46e851055c8e5b475cb26b3..add6b384f439d91995bc054809056ede253450b6 100644
--- a/source/Lib/CommonLib/QuantRDOQ.cpp
+++ b/source/Lib/CommonLib/QuantRDOQ.cpp
@@ -628,13 +628,25 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID,
 
 
   const bool needSqrtAdjustment= TU::needsBlockSizeTrafoScale( tu, compID );
+#if JVET_O0919_TS_MIN_QP
+  const bool   isTransformSkip = tu.mtsIdx==MTS_SKIP && isLuma(compID);
+  const double *const pdErrScale = xGetErrScaleCoeffSL(scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem(isTransformSkip));
+  const int    *const piQCoef    = getQuantCoeff(scalingListType, cQP.rem(isTransformSkip), uiLog2BlockWidth, uiLog2BlockHeight);
+#else
   const double *const pdErrScale = xGetErrScaleCoeffSL(scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem);
   const int    *const piQCoef    = getQuantCoeff(scalingListType, cQP.rem, uiLog2BlockWidth, uiLog2BlockHeight);
   const bool   isTransformSkip = tu.mtsIdx==MTS_SKIP && isLuma(compID);
+#endif
   const bool   enableScalingLists             = getUseScalingList(uiWidth, uiHeight, isTransformSkip);
+#if JVET_O0919_TS_MIN_QP
+  const int    defaultQuantisationCoefficient = g_quantScales[ needSqrtAdjustment ?1:0][cQP.rem(isTransformSkip)];
+  const double defaultErrorScale              = xGetErrScaleCoeffNoScalingList(scalingListType, (uiLog2BlockWidth-1), (uiLog2BlockHeight-1), cQP.rem(isTransformSkip));
+  const int iQBits = QUANT_SHIFT + cQP.per(isTransformSkip) + iTransformShift + (needSqrtAdjustment?-1:0);                   // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
+#else
   const int    defaultQuantisationCoefficient = g_quantScales[ needSqrtAdjustment ?1:0][cQP.rem];
   const double defaultErrorScale              = xGetErrScaleCoeffNoScalingList(scalingListType, (uiLog2BlockWidth-1), (uiLog2BlockHeight-1), cQP.rem);
   const int iQBits = QUANT_SHIFT + cQP.per + iTransformShift + (needSqrtAdjustment?-1:0);                   // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
+#endif
 
 
   const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange);
@@ -1078,10 +1090,17 @@ void QuantRDOQ::xRateDistOptQuant(TransformUnit &tu, const ComponentID &compID,
 
   if( cctx.signHiding() && uiAbsSum>=2)
   {
+#if JVET_O0919_TS_MIN_QP
+    const double inverseQuantScale = double(g_invQuantScales[0][cQP.rem(isTransformSkip)]);
+    int64_t rdFactor = (int64_t)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per(isTransformSkip))) / m_dLambda / 16
+                                  / (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth)))
+                             + 0.5);
+#else
     const double inverseQuantScale = double(g_invQuantScales[0][cQP.rem]);
     int64_t rdFactor = (int64_t)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per)) / m_dLambda / 16
                                / (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth)))
                              + 0.5);
+#endif
 
     int lastCG = -1;
     int absSum = 0 ;
@@ -1252,9 +1271,16 @@ void QuantRDOQ::xRateDistOptQuantTS( TransformUnit &tu, const ComponentID &compI
 #endif
 
   const bool   needsSqrt2Scale = TU::needsSqrt2Scale( tu, compID );  // should always be false - transform-skipped blocks don't require sqrt(2) compensation.
+#if JVET_O0919_TS_MIN_QP
+  const bool   isTransformSkip = tu.mtsIdx==MTS_SKIP && isLuma(compID);
+  const int    qBits = QUANT_SHIFT + qp.per(isTransformSkip) + transformShift + ( needsSqrt2Scale ? -1 : 0 );  // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
+  const int    quantisationCoefficient = g_quantScales[needsSqrt2Scale?1:0][qp.rem(isTransformSkip)];
+  const double errorScale              = xGetErrScaleCoeff( TU::needsSqrt2Scale( tu, compID ), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth );
+#else
   const int    qBits = QUANT_SHIFT + qp.per + transformShift + (needsSqrt2Scale?-1:0);  // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
   const int    quantisationCoefficient = g_quantScales[needsSqrt2Scale?1:0][qp.rem];
   const double errorScale              = xGetErrScaleCoeff( TU::needsSqrt2Scale( tu, compID ), width, height, qp.rem, maxLog2TrDynamicRange, channelBitDepth );
+#endif
 
   const TCoeff entropyCodingMaximum = ( 1 << maxLog2TrDynamicRange ) - 1;
 
@@ -1500,13 +1526,25 @@ void QuantRDOQ::forwardRDPCM( TransformUnit &tu, const ComponentID &compID, cons
 #endif
 
   const bool   needsSqrt2Scale = TU::needsSqrt2Scale(tu, compID);  // should always be false - transform-skipped blocks don't require sqrt(2) compensation.
+#if JVET_O0919_TS_MIN_QP
+  const bool   isTransformSkip = tu.mtsIdx==MTS_SKIP && isLuma(compID);
+  const int    qBits = QUANT_SHIFT + qp.per(isTransformSkip) + transformShift + ( needsSqrt2Scale ? -1 : 0 );  // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
+  const int    quantisationCoefficient = g_quantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)];
+  const double errorScale = xGetErrScaleCoeff(TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth);
+#else
   const int    qBits = QUANT_SHIFT + qp.per + transformShift + (needsSqrt2Scale ? -1 : 0);  // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
   const int    quantisationCoefficient = g_quantScales[needsSqrt2Scale ? 1 : 0][qp.rem];
   const double errorScale = xGetErrScaleCoeff(TU::needsSqrt2Scale(tu, compID), width, height, qp.rem, maxLog2TrDynamicRange, channelBitDepth);
+#endif
 
   TrQuantParams trQuantParams;
+#if JVET_O0919_TS_MIN_QP
+  trQuantParams.rightShift = (IQUANT_SHIFT - (transformShift + qp.per(isTransformSkip)));
+  trQuantParams.qScale = g_invQuantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)];
+#else
   trQuantParams.rightShift = (IQUANT_SHIFT - (transformShift + qp.per));
   trQuantParams.qScale = g_invQuantScales[needsSqrt2Scale ? 1 : 0][qp.rem];
+#endif
 
   const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1;
 
diff --git a/source/Lib/CommonLib/Slice.cpp b/source/Lib/CommonLib/Slice.cpp
index b7bccee56532eb6a1520331742354b75d3be1129..e2d06bda5921ad3659b0fc6a7affead2ff84d56e 100644
--- a/source/Lib/CommonLib/Slice.cpp
+++ b/source/Lib/CommonLib/Slice.cpp
@@ -1453,6 +1453,9 @@ SPS::SPS()
 , m_LFNST                     ( false )
 , m_Affine                    ( false )
 , m_AffineType                ( false )
+#if JVET_O0070_PROF
+, m_PROF                      ( false )
+#endif
 , m_MHIntra                   ( false )
 , m_Triangle                  ( false )
 #if LUMA_ADAPTIVE_DEBLOCKING_FILTER_QP_OFFSET
diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h
index 1446966ad0d511659282272a6e578eb98149b6ee..cc195899c8e3deb1852af8ad2d5a38dac5b14509 100644
--- a/source/Lib/CommonLib/Slice.h
+++ b/source/Lib/CommonLib/Slice.h
@@ -735,6 +735,9 @@ private:
   // Parameter
   BitDepths         m_bitDepths;
   int               m_qpBDOffset[MAX_NUM_CHANNEL_TYPE];
+#if JVET_O0919_TS_MIN_QP
+  int               m_minQpMinus4[MAX_NUM_CHANNEL_TYPE]; //  QP_internal - QP_input;
+#endif
   int               m_pcmBitDepths[MAX_NUM_CHANNEL_TYPE];
   bool              m_bPCMFilterDisableFlag;
 
@@ -797,6 +800,9 @@ private:
   bool              m_SMVD;
   bool              m_Affine;
   bool              m_AffineType;
+#if JVET_O0070_PROF
+  bool              m_PROF;
+#endif
   bool              m_GBi;                        //
   bool              m_MHIntra;
   bool              m_Triangle;
@@ -933,6 +939,10 @@ public:
   int                     getDifferentialLumaChromaBitDepth() const                                       { return int(m_bitDepths.recon[CHANNEL_TYPE_LUMA]) - int(m_bitDepths.recon[CHANNEL_TYPE_CHROMA]); }
   int                     getQpBDOffset(ChannelType type) const                                           { return m_qpBDOffset[type];                                           }
   void                    setQpBDOffset(ChannelType type, int i)                                          { m_qpBDOffset[type] = i;                                              }
+#if JVET_O0919_TS_MIN_QP
+  int                     getMinQpPrimeTsMinus4(ChannelType type) const                                         { return m_minQpMinus4[type];                                           }
+  void                    setMinQpPrimeTsMinus4(ChannelType type, int i)                                        { m_minQpMinus4[type] = i;                                              }
+#endif
 
   void                    setSAOEnabledFlag(bool bVal)                                                    { m_saoEnabledFlag = bVal;                                                    }
   bool                    getSAOEnabledFlag() const                                                       { return m_saoEnabledFlag;                                                    }
@@ -1016,6 +1026,10 @@ public:
   bool      getUseAffine          ()                                      const     { return m_Affine; }
   void      setUseAffineType      ( bool b )                                        { m_AffineType = b; }
   bool      getUseAffineType      ()                                      const     { return m_AffineType; }
+#if JVET_O0070_PROF
+  void      setUsePROF            ( bool b )                                        { m_PROF = b; }
+  bool      getUsePROF            ()                                      const     { return m_PROF; }
+#endif
   void      setUseLMChroma        ( bool b )                                        { m_LMChroma = b; }
   bool      getUseLMChroma        ()                                      const     { return m_LMChroma; }
   void      setCclmCollocatedChromaFlag( bool b )                                   { m_cclmCollocatedChromaFlag = b; }
@@ -1559,7 +1573,11 @@ private:
   uint32_t                   m_uiMaxTTSizeIChroma;
   uint32_t                   m_uiMaxBTSize;
 
+#if JVET_O_MAX_NUM_ALF_APS_8
+  APS*                       m_alfApss[ALF_CTB_MAX_NUM_APS];
+#else
   APS*                       m_alfApss[MAX_NUM_APS];
+#endif
   bool                       m_tileGroupAlfEnabledFlag[MAX_NUM_COMPONENT];
   int                        m_tileGroupNumAps;
   std::vector<int>           m_tileGroupLumaApsId;
@@ -2067,7 +2085,11 @@ protected:
   ParameterSetMap<APS> m_apsMap;
   ParameterSetMap<DPS> m_dpsMap;
 
+#if JVET_O_MAX_NUM_ALF_APS_8
+  APS* m_apss[ALF_CTB_MAX_NUM_APS];
+#else
   APS* m_apss[MAX_NUM_APS];
+#endif
 
   int m_activeDPSId; // -1 for nothing active
   int m_activeSPSId; // -1 for nothing active
diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp
index ab45af5ed2e5cae336fec0a6d378c24c5bfd5bc4..d45791d5d0dc44ec8d24bd8292d3c2fbc87faf4a 100644
--- a/source/Lib/CommonLib/TrQuant.cpp
+++ b/source/Lib/CommonLib/TrQuant.cpp
@@ -774,6 +774,11 @@ void TrQuant::getTrTypes(const TransformUnit tu, const ComponentID compID, int &
   trTypeHor = DCT2;
   trTypeVer = DCT2;
 
+#if JVET_O0538_SPS_CONTROL_ISP_SBT
+  if (!tu.cs->sps->getUseMTS())
+    return;
+#endif
+
   if (isImplicitMTS || isISP)
   {
     int  width = tu.blocks[compID].width;
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index 9e4a29405954f4db0f8489466768a2c5e68eb1f6..d5138ac17133ff8b7bb0d3191ae30e3a646ad1f1 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -50,15 +50,27 @@
 #include <assert.h>
 #include <cassert>
 
-#define JVET_O0455_IBC_MAX_MERGE_NUM                      1 // JVET-O0455: Control the max number of IBC merge candidates independently from regular merge candidates
+#define JVET_O0640_PICTURE_SIZE_CONSTRAINT                1 // JVET-O0640: Picture width and height shall be a multiple of Max(8, minCU size)
+
+#define JVET_O_MAX_NUM_ALF_APS_8                          1 // JVET-O: number of ALF APSs is reduced to 8
+
+#define JVET_O0070_PROF                                   1 // JVET-O0070 method 4-2.1a: Prediction refinement with optical flow for affine mode
+
+#define JVET_O0570_GRAD_SIMP                              1 // JVET-O0570/JVET-O0211, SMID friendly spatial gradient calculation
 
 #define JVET_O1170_IBC_VIRTUAL_BUFFER                     1 // JVET-O1170/O1171: IBC virtual buffer
 #if JVET_O1170_IBC_VIRTUAL_BUFFER
 #define JVET_O1170_CHECK_BV_AT_DECODER                    1 // For decoder to check if a BV is valid or not
 #endif
 
+#define JVET_O0538_SPS_CONTROL_ISP_SBT                    1 // JVET-O0538: SPS control for ISP and SBT transform
+
+#define JVET_O0634_BDOF_SIZE_CONSTRAINT                   1 // JVET-O0634: BDOF applied CU size align with DMVR
+
 #define JVET_O0213_RESTRICT_LFNST_TO_MAX_TB_SIZE          1 // JVET-O0213: Block size restriction of LFNST to maximum transform size
 
+#define JVET_O0617_SIG_FLAG_CONTEXT_REDUCTION             1 // JVET-O0617: Significant flag context reduction
+
 #define JVET_O0244_DELTA_POC                              1 // JVET-O0244: weighted prediction in SPS and delta POC
 
 #define JVET_O1153_INTRA_CHROMAMODE_CODING                1  //JVET-O1153: simplified intra chromamode coding
@@ -91,6 +103,8 @@
 
 #define JVET_O0366_AFFINE_BCW                             1 // JVET-O0366: Simplifications on BCW index derivation process
 
+#define JVET_O0919_TS_MIN_QP                              1 // JVET-O0919: Minimum QP for Transform Skip Mode
+
 #define JVET_O1168_CU_CHROMA_QP_OFFSET                    1 // JVET-O1168: cu chroma QP offset
 
 #define JVET_O0368_LFNST_WITH_DCT2_ONLY                   1 // JVET-O0368/O0292/O0521/O0466: disable LFNST for non-DCT2 MTS candidates normatively
diff --git a/source/Lib/CommonLib/x86/BufferX86.h b/source/Lib/CommonLib/x86/BufferX86.h
index ae39695890e2df4acf95c4e5e35f739a9c9971e7..6e82e3f411a3b3666cc0ec707988be68d023abd2 100644
--- a/source/Lib/CommonLib/x86/BufferX86.h
+++ b/source/Lib/CommonLib/x86/BufferX86.h
@@ -43,7 +43,7 @@
 #include "CommonDefX86.h"
 #include "CommonLib/Unit.h"
 #include "CommonLib/Buffer.h"
-
+#include "CommonLib/InterpolationFilter.h"
 
 #if ENABLE_SIMD_OPT_BUFFER
 #ifdef TARGET_SIMD_X86
@@ -278,10 +278,222 @@ void addBIOAvg4_SSE(const Pel* src0, int src0Stride, const Pel* src1, int src1St
   }
 }
 
+#if JVET_O0070_PROF
+template< X86_VEXT vext >
+void applyPROF_SSE(Pel* dstPel, int dstStride, const Pel* srcPel, int srcStride, int width, int height, const Pel* gradX, const Pel* gradY, int gradStride, const int* dMvX, const int* dMvY, int dMvStride, int shiftNum, Pel offset, const ClpRng& clpRng)
+{
+  CHECKD((width & 3), "block width error!");
+
+  __m128i mm_dmvx, mm_dmvy, mm_gradx, mm_grady, mm_dI, mm_src;
+  __m128i mm_dIoffset = _mm_set1_epi32(1);
+  __m128i mm_offset = _mm_set1_epi32(offset);
+  __m128i vibdimin  = _mm_set1_epi32(clpRng.min);
+  __m128i vibdimax  = _mm_set1_epi32(clpRng.max);
+  __m128i vzero     = _mm_setzero_si128();
+
+  for (int h = 0; h < height; h++)
+  {
+    const int* vX = dMvX;
+    const int* vY = dMvY;
+    const Pel* gX = gradX;
+    const Pel* gY = gradY;
+    const Pel* src = srcPel;
+    Pel*       dst = dstPel;
+
+    for (int w = 0; w < width; w += 4)
+    {
+      mm_dmvx = _mm_loadu_si128((const __m128i *)vX);
+      mm_dmvy = _mm_loadu_si128((const __m128i *)vY);
+      mm_gradx = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gX));
+      mm_grady = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY));
+      mm_src = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)src));
+
+      mm_dI = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx, mm_gradx), _mm_mullo_epi32(mm_dmvy, mm_grady));
+      mm_dI = _mm_srai_epi32(_mm_add_epi32(mm_dI, mm_dIoffset), 1);
+      mm_dI = _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(mm_dI, mm_src), mm_offset), shiftNum);
+      mm_dI = _mm_packs_epi32(_mm_min_epi32(vibdimax, _mm_max_epi32(vibdimin, mm_dI)), vzero);
+      _mm_storel_epi64((__m128i *)dst, mm_dI);
+
+      vX += 4; vY += 4; gX += 4; gY += 4; src += 4; dst += 4;
+    }
+    dMvX += dMvStride;
+    dMvY += dMvStride;
+    gradX += gradStride;
+    gradY += gradStride;
+    srcPel += srcStride;
+    dstPel += dstStride;
+  }
+}
+
+template< X86_VEXT vext, bool l1PROFEnabled = true>
+void applyBiPROF_SSE(Pel* dst, int dstStride, const Pel* src0, const Pel* src1, int srcStride, int width, int height, const Pel* gradX0, const Pel* gradY0, const Pel* gradX1, const Pel* gradY1, int gradStride, const int* dMvX0, const int* dMvY0, const int* dMvX1, const int* dMvY1, int dMvStride, const int8_t w0, const ClpRng& clpRng)
+{
+  const int rShift = IF_INTERNAL_PREC - clpRng.bd;
+  const int shiftNum = (rShift > 2 ? rShift : 2) + g_GbiLog2WeightBase;
+  const int offset = (1 << (shiftNum - 1)) + (IF_INTERNAL_OFFS << g_GbiLog2WeightBase);
+  const int8_t w1 = g_GbiWeightBase - w0;
+
+  __m128i mm_offset = _mm_set1_epi32(offset);
+  __m128i mm_w0 = _mm_set1_epi32(w0);
+  __m128i mm_w1 = _mm_set1_epi32(w1);
+  __m128i vibdimin = _mm_set1_epi32(clpRng.min);
+  __m128i vibdimax = _mm_set1_epi32(clpRng.max);
+  __m128i vzero = _mm_setzero_si128();
+
+  __m128i mm_dmvx0, mm_dmvy0, mm_dmvx1, mm_dmvy1, mm_gradx0, mm_grady0, mm_gradx1, mm_grady1, mm_src0, mm_src1;
+  __m128i mm_dI0, mm_dI1, mm_dI;
+  __m128i mm_dIoffset = _mm_set1_epi32(1);
+  const int *mmMvX0, *mmMvY0, *mmMvX1, *mmMvY1;
+  const Pel *gX0, *gY0, *gX1, *gY1;
+
+  for (int h = 0; h < height; h++)
+  {
+    if (!(h & 3)) 
+    {
+      mmMvX0 = dMvX0;
+      mmMvY0 = dMvY0;
+      if (l1PROFEnabled) 
+      {
+        mmMvX1 = dMvX1;
+        mmMvY1 = dMvY1;
+      }
+    }
+
+    mm_dmvx0 = _mm_loadu_si128((const __m128i *)mmMvX0);
+    mm_dmvy0 = _mm_loadu_si128((const __m128i *)mmMvY0);
+    gX0 = gradX0;
+    gY0 = gradY0;
+
+    if (l1PROFEnabled) 
+    {
+      mm_dmvx1 = _mm_loadu_si128((const __m128i *)mmMvX1);
+      mm_dmvy1 = _mm_loadu_si128((const __m128i *)mmMvY1);
+      gX1 = gradX1;
+      gY1 = gradY1;
+    }
+
+    const Pel* pSrc0 = src0;
+    const Pel* pSrc1 = src1;
+    Pel*       pDst = dst;
+
+    for (int w = 0; w < width; w += 4)
+    {
+      mm_src0 = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)pSrc0));
+      mm_src1 = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)pSrc1));
+      mm_gradx0 = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gX0));
+      mm_grady0 = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY0));
+      mm_dI0 = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx0, mm_gradx0), _mm_mullo_epi32(mm_dmvy0, mm_grady0));
+      mm_dI0 = _mm_srai_epi32(_mm_add_epi32(mm_dI0, mm_dIoffset), 1);
+      mm_dI0 = _mm_mullo_epi32(_mm_add_epi32(mm_src0, mm_dI0), mm_w0);
+      gX0 += 4; gY0 += 4;
+
+      if (l1PROFEnabled) 
+      {
+        mm_gradx1 = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gX1));
+        mm_grady1 = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)gY1));
+        mm_dI1 = _mm_add_epi32(_mm_mullo_epi32(mm_dmvx1, mm_gradx1), _mm_mullo_epi32(mm_dmvy1, mm_grady1));
+        mm_dI1 = _mm_srai_epi32(_mm_add_epi32(mm_dI1, mm_dIoffset), 1);
+        mm_dI1 = _mm_mullo_epi32(_mm_add_epi32(mm_src1, mm_dI1), mm_w1);
+        gX1 += 4; gY1 += 4;
+      } 
+      else
+        mm_dI1 = _mm_mullo_epi32(mm_src1, mm_w1);
+
+      mm_dI = _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(mm_dI0, mm_dI1), mm_offset), shiftNum);
+      mm_dI = _mm_packs_epi32(_mm_min_epi32(vibdimax, _mm_max_epi32(vibdimin, mm_dI)), vzero);
+      _mm_storel_epi64((__m128i *)pDst, mm_dI);
+
+      pSrc0 += 4; pSrc1 += 4; pDst += 4;
+    }
+
+    mmMvX0 += dMvStride;
+    mmMvY0 += dMvStride;
+    gradX0 += gradStride;
+    gradY0 += gradStride;
+
+    if (l1PROFEnabled) 
+    {
+      mmMvX1 += dMvStride;
+      mmMvY1 += dMvStride;
+      gradX1 += gradStride;
+      gradY1 += gradStride;
+    }
+
+    src0 += srcStride;
+    src1 += srcStride;
+    dst += dstStride;
+  }
+}
+
 template< X86_VEXT vext >
+void roundIntVector_SIMD(int* v, int size, unsigned int nShift, const int dmvLimit)
+{
+  CHECKD(size % 16 != 0, "Size must be multiple of 16!");
+#ifdef USE_AVX512
+  if (vext >= AVX512 && size >= 16)
+  {
+    __m512i dMvMin = _mm256_set1_epi32(-dmvLimit);
+    __m512i dMvMax = _mm256_set1_epi32(dmvLimit - 1 );
+    __m512i nOffset = _mm512_set1_epi32((1 << (nShift - 1)));
+    __m512i vones = _mm512_set1_epi32(1);
+    __m512i vzero = _mm512_setzero_si512();
+    for (int i = 0; i < size; i += 16, v += 16)
+    {
+      __m512i src = _mm512_loadu_si512(v);
+      __mmask16 mask = _mm512_cmpge_epi32_mask(src, vzero);
+      src = __mm512_add_epi32(src, nOffset);
+      __mm512i dst = _mm512_srai_epi32(_mm512_mask_sub_epi32(src, mask, src, vones), nShift);
+      dst = _mm512_min_epi32(dMvMax, _mm512_max_epi32(dMvMin, dst));
+      _mm512_storeu_si512(v, dst);
+    }
+  }
+  else
+#endif
+#ifdef USE_AVX2
+  if (vext >= AVX2 && size >= 8)
+  {
+    __m256i dMvMin = _mm256_set1_epi32(-dmvLimit);
+    __m256i dMvMax = _mm256_set1_epi32(dmvLimit - 1);
+    __m256i nOffset = _mm256_set1_epi32(1 << (nShift - 1));
+    __m256i vzero = _mm256_setzero_si256();
+    for (int i = 0; i < size; i += 8, v += 8)
+    {
+      __m256i src = _mm256_lddqu_si256((__m256i*)v);
+      __m256i of  = _mm256_cmpgt_epi32(src, vzero);
+      __m256i dst = _mm256_srai_epi32(_mm256_add_epi32(_mm256_add_epi32(src, nOffset), of), nShift);
+      dst = _mm256_min_epi32(dMvMax, _mm256_max_epi32(dMvMin, dst));
+      _mm256_storeu_si256((__m256i*)v, dst);
+    }
+  }
+  else
+#endif
+  {
+    __m128i dMvMin = _mm_set1_epi32(-dmvLimit);
+    __m128i dMvMax = _mm_set1_epi32(dmvLimit - 1);
+    __m128i nOffset = _mm_set1_epi32((1 << (nShift - 1)));
+    __m128i vzero = _mm_setzero_si128();
+    for (int i = 0; i < size; i += 4, v += 4)
+    {
+      __m128i src = _mm_loadu_si128((__m128i*)v);
+      __m128i of  = _mm_cmpgt_epi32(src, vzero);
+      __m128i dst = _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(src, nOffset), of), nShift);
+      dst = _mm_min_epi32(dMvMax, _mm_max_epi32(dMvMin, dst));
+      _mm_storeu_si128((__m128i*)v, dst);
+    }
+  }
+}
+#endif
+
+#if JVET_O0070_PROF
+template< X86_VEXT vext, bool PAD = true>
+#else
+template< X86_VEXT vext >
+#endif
 void gradFilter_SSE(Pel* src, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, const int bitDepth)
 {
+#if !JVET_O0570_GRAD_SIMP
   __m128i vzero = _mm_setzero_si128();
+#endif
   Pel* srcTmp = src + srcStride + 1;
   Pel* gradXTmp = gradX + gradStride + 1;
   Pel* gradYTmp = gradY + gradStride + 1;
@@ -289,33 +501,84 @@ void gradFilter_SSE(Pel* src, int srcStride, int width, int height, int gradStri
   int widthInside = width - 2 * BIO_EXTEND_SIZE;
   int heightInside = height - 2 * BIO_EXTEND_SIZE;
   int shift1 = std::max<int>(6, bitDepth - 6);
-
+#if JVET_O0570_GRAD_SIMP
+  __m128i mmShift1 = _mm_cvtsi32_si128( shift1 );
+#endif
   assert((widthInside & 3) == 0);
 
-  for (int y = 0; y < heightInside; y++)
+#if JVET_O0570_GRAD_SIMP
+  if ( ( widthInside & 7 ) == 0 )
   {
-    int x = 0;
-    for (; x < widthInside; x += 4)
+#endif
+    for (int y = 0; y < heightInside; y++)
     {
-      __m128i mmPixTop = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x - srcStride)));
-      __m128i mmPixBottom = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x + srcStride)));
-      __m128i mmPixLeft = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x - 1)));
-      __m128i mmPixRight = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x + 1)));
+      int x = 0;
+#if JVET_O0570_GRAD_SIMP
+      for ( ; x < widthInside; x += 8 )
+      {
+        __m128i mmPixTop    = _mm_sra_epi16( _mm_loadu_si128( ( __m128i* ) ( srcTmp + x - srcStride ) ), mmShift1 );
+        __m128i mmPixBottom = _mm_sra_epi16( _mm_loadu_si128( ( __m128i* ) ( srcTmp + x + srcStride ) ), mmShift1 );
+        __m128i mmPixLeft   = _mm_sra_epi16( _mm_loadu_si128( ( __m128i* ) ( srcTmp + x - 1 ) ), mmShift1 );
+        __m128i mmPixRight  = _mm_sra_epi16( _mm_loadu_si128( ( __m128i* ) ( srcTmp + x + 1 ) ), mmShift1 );
 
-      __m128i mmGradVer = _mm_sra_epi32(_mm_sub_epi32(mmPixBottom, mmPixTop), _mm_cvtsi32_si128(shift1));
-      __m128i mmGradHor = _mm_sra_epi32(_mm_sub_epi32(mmPixRight, mmPixLeft), _mm_cvtsi32_si128(shift1));
-      mmGradVer = _mm_packs_epi32(mmGradVer, vzero);
-      mmGradHor = _mm_packs_epi32(mmGradHor, vzero);
+        __m128i mmGradVer = _mm_sub_epi16( mmPixBottom, mmPixTop );
+        __m128i mmGradHor = _mm_sub_epi16( mmPixRight, mmPixLeft );
 
-      _mm_storel_epi64((__m128i *)(gradYTmp + x), mmGradVer);
-      _mm_storel_epi64((__m128i *)(gradXTmp + x), mmGradHor);
+        _mm_storeu_si128( ( __m128i * ) ( gradYTmp + x ), mmGradVer );
+        _mm_storeu_si128( ( __m128i * ) ( gradXTmp + x ), mmGradHor );
+      }
+#else
+      for (; x < widthInside; x += 4)
+      {
+        __m128i mmPixTop = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x - srcStride)));
+        __m128i mmPixBottom = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x + srcStride)));
+        __m128i mmPixLeft = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x - 1)));
+        __m128i mmPixRight = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x + 1)));
+
+        __m128i mmGradVer = _mm_sra_epi32(_mm_sub_epi32(mmPixBottom, mmPixTop), _mm_cvtsi32_si128(shift1));
+        __m128i mmGradHor = _mm_sra_epi32(_mm_sub_epi32(mmPixRight, mmPixLeft), _mm_cvtsi32_si128(shift1));
+        mmGradVer = _mm_packs_epi32(mmGradVer, vzero);
+        mmGradHor = _mm_packs_epi32(mmGradHor, vzero);
+
+        _mm_storel_epi64((__m128i *)(gradYTmp + x), mmGradVer);
+        _mm_storel_epi64((__m128i *)(gradXTmp + x), mmGradHor);
+      }
+#endif
+      gradXTmp += gradStride;
+      gradYTmp += gradStride;
+      srcTmp += srcStride;
     }
-
-    gradXTmp += gradStride;
-    gradYTmp += gradStride;
-    srcTmp += srcStride;
+#if JVET_O0570_GRAD_SIMP
   }
+  else
+  {
+    __m128i mmPixTop = _mm_sra_epi16( _mm_unpacklo_epi64( _mm_loadl_epi64( (__m128i*) ( srcTmp - srcStride ) ), _mm_loadl_epi64( (__m128i*) ( srcTmp ) ) ), mmShift1 );
+    for ( int y = 0; y < heightInside; y += 2 )
+    {
+      __m128i mmPixBottom = _mm_sra_epi16( _mm_unpacklo_epi64( _mm_loadl_epi64( (__m128i*) ( srcTmp + srcStride ) ), _mm_loadl_epi64( (__m128i*) ( srcTmp + ( srcStride << 1 ) ) ) ), mmShift1 );
+      __m128i mmPixLeft   = _mm_sra_epi16( _mm_unpacklo_epi64( _mm_loadl_epi64( (__m128i*) ( srcTmp - 1 ) ), _mm_loadl_epi64( (__m128i*) ( srcTmp - 1 + srcStride ) ) ), mmShift1 );
+      __m128i mmPixRight  = _mm_sra_epi16( _mm_unpacklo_epi64( _mm_loadl_epi64( (__m128i*) ( srcTmp + 1 ) ), _mm_loadl_epi64( (__m128i*) ( srcTmp + 1 + srcStride ) ) ), mmShift1 );
+
+      __m128i mmGradVer = _mm_sub_epi16( mmPixBottom, mmPixTop );
+      __m128i mmGradHor = _mm_sub_epi16( mmPixRight, mmPixLeft );
+
+      _mm_storel_epi64( (__m128i *) gradYTmp, mmGradVer );
+      _mm_storel_epi64( (__m128i *) ( gradYTmp + gradStride ), _mm_unpackhi_epi64( mmGradVer, mmGradHor ) );
+      _mm_storel_epi64( (__m128i *) gradXTmp, mmGradHor );
+      _mm_storel_epi64( (__m128i *) ( gradXTmp + gradStride ), _mm_unpackhi_epi64( mmGradHor, mmGradVer ) );
+
+      mmPixTop = mmPixBottom;
+      gradXTmp += gradStride << 1;
+      gradYTmp += gradStride << 1;
+      srcTmp   += srcStride << 1;
+    }
+  }
+#endif
 
+#if JVET_O0070_PROF
+  if (PAD)
+  {
+#endif
   gradXTmp = gradX + gradStride + 1;
   gradYTmp = gradY + gradStride + 1;
   for (int y = 0; y < heightInside; y++)
@@ -335,6 +598,9 @@ void gradFilter_SSE(Pel* src, int srcStride, int width, int height, int gradStri
   ::memcpy(gradXTmp + heightInside*gradStride, gradXTmp + (heightInside - 1)*gradStride, sizeof(Pel)*(width));
   ::memcpy(gradYTmp - gradStride, gradYTmp, sizeof(Pel)*(width));
   ::memcpy(gradYTmp + heightInside*gradStride, gradYTmp + (heightInside - 1)*gradStride, sizeof(Pel)*(width));
+#if JVET_O0070_PROF
+  }
+#endif
 }
 
 template< X86_VEXT vext >
@@ -934,6 +1200,13 @@ void PelBufferOps::_initPelBufOpsX86()
   removeHighFreq8 = removeHighFreq_SSE<vext, 8>;
   removeHighFreq4 = removeHighFreq_SSE<vext, 4>;
 #endif
+#if JVET_O0070_PROF
+  profGradFilter = gradFilter_SSE<vext, false>;
+  applyPROF      = applyPROF_SSE<vext>;
+  applyBiPROF[1] = applyBiPROF_SSE<vext>;
+  applyBiPROF[0] = applyBiPROF_SSE<vext, false>;
+  roundIntVector = roundIntVector_SIMD<vext>;
+#endif
 }
 
 template void PelBufferOps::_initPelBufOpsX86<SIMDX86>();
diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp
index ea18a3700b54f48b2e38f21104ca2a13978448ee..1289aeaa535a7efa74f84384425c757e6064cf81 100755
--- a/source/Lib/DecoderLib/CABACReader.cpp
+++ b/source/Lib/DecoderLib/CABACReader.cpp
@@ -3032,7 +3032,11 @@ void CABACReader::residual_lfnst_mode( CodingUnit& cu )
   if( cu.ispMode != NOT_INTRA_SUBPARTITIONS || cu.mipFlag == true ||
     ( CS::isDualITree( *cu.cs ) && cu.chType == CHANNEL_TYPE_CHROMA && std::min( cu.blocks[ 1 ].width, cu.blocks[ 1 ].height ) < 4 )
 #if JVET_O0213_RESTRICT_LFNST_TO_MAX_TB_SIZE
+#if JVET_O0545_MAX_TB_SIGNALLING
+    || ( cu.blocks[ 0 ].width > cu.cs->sps->getMaxTbSize() || cu.blocks[ 0 ].height > cu.cs->sps->getMaxTbSize() )
+#else
     || ( cu.blocks[ 0 ].width > MAX_TB_SIZEY || cu.blocks[ 0 ].height > MAX_TB_SIZEY )
+#endif
 #endif
     )
   {
diff --git a/source/Lib/DecoderLib/DecLib.cpp b/source/Lib/DecoderLib/DecLib.cpp
index 6bfde1cb5e2e09fa89f18e0020379ccc0571a1cf..a8b680961d7c05a057bdd5c29dd37484560c3795 100644
--- a/source/Lib/DecoderLib/DecLib.cpp
+++ b/source/Lib/DecoderLib/DecLib.cpp
@@ -206,10 +206,8 @@ bool tryDecodePicture( Picture* pcEncPic, const int expectedPoc, const std::stri
                   {
                     std::copy( pic->getAlfCtuEnableFlag()[compIdx].begin(), pic->getAlfCtuEnableFlag()[compIdx].end(), pcEncPic->getAlfCtuEnableFlag()[compIdx].begin() );
                   }
-#if JVET_N0415_CTB_ALF
                   pcEncPic->resizeAlfCtbFilterIndex(pic->cs->pcv->sizeInCtus);
                   memcpy( pcEncPic->getAlfCtbFilterIndex(), pic->getAlfCtbFilterIndex(), sizeof(short)*pic->cs->pcv->sizeInCtus );
-#endif
 
 #if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB
                   std::copy( pic->getAlfCtuAlternative(COMPONENT_Cb).begin(), pic->getAlfCtuAlternative(COMPONENT_Cb).end(), pcEncPic->getAlfCtuAlternative(COMPONENT_Cb).begin() );
@@ -742,7 +740,11 @@ void DecLib::xActivateParameterSets()
   if (m_bFirstSliceInPicture)
   {
     APS** apss = m_parameterSetManager.getAPSs();
+#if JVET_O_MAX_NUM_ALF_APS_8
+    memset(apss, 0, sizeof(*apss) * ALF_CTB_MAX_NUM_APS);
+#else
     memset(apss, 0, sizeof(*apss) * MAX_NUM_APS);
+#endif
     const PPS *pps = m_parameterSetManager.getPPS(m_apcSlicePilot->getPPSId()); // this is a temporary PPS object. Do not store this value
     CHECK(pps == 0, "No PPS present");
 
@@ -937,7 +939,11 @@ void DecLib::xActivateParameterSets()
     {
       EXIT("Error - a new PPS has been decoded while processing a picture");
     }
+#if JVET_O_MAX_NUM_ALF_APS_8
+    for (int i = 0; i < ALF_CTB_MAX_NUM_APS; i++)
+#else
     for (int i = 0; i < MAX_NUM_APS; i++)
+#endif
     {
       APS* aps = m_parameterSetManager.getAPS(i, ALF_APS);
       if (aps && m_parameterSetManager.getAPSChangedFlag(i, ALF_APS))
@@ -1195,7 +1201,7 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
   if (endCtuIdx == numberOfCtusInFrame)
     EXIT("Cannot find the last CTU index of the current slice");
 
-  while (pcSlice->getSliceCurEndBrickIdx() == tileMap.getBrickIdxBsMap(endCtuIdx) && endCtuIdx < numberOfCtusInFrame)
+  while ( (endCtuIdx < numberOfCtusInFrame) && (pcSlice->getSliceCurEndBrickIdx() == tileMap.getBrickIdxBsMap(endCtuIdx)) )
   {
     endCtuIdx++;
   }
@@ -1437,6 +1443,7 @@ void DecLib::xDecodeVPS( InputNALUnit& nalu )
   VPS* vps = new VPS();
   m_HLSReader.setBitstream( &nalu.getBitstream() );
   m_HLSReader.parseVPS( vps );
+  delete vps;
 }
 
 void DecLib::xDecodeDPS( InputNALUnit& nalu )
diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp
index 03fbd78e7725625814697478597aaaddaf669940..992ef6f58986835074fecc0eade0fc3fd3d14717 100644
--- a/source/Lib/DecoderLib/VLCReader.cpp
+++ b/source/Lib/DecoderLib/VLCReader.cpp
@@ -1102,6 +1102,11 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS)
   pcSPS->setBitDepth(CHANNEL_TYPE_CHROMA, 8 + uiCode);
   pcSPS->setQpBDOffset(CHANNEL_TYPE_CHROMA,  (int) (6*uiCode) );
 
+#if JVET_O0919_TS_MIN_QP
+  READ_UVLC(     uiCode, "min_qp_prime_ts_minus4" );
+  pcSPS->setMinQpPrimeTsMinus4(CHANNEL_TYPE_LUMA, uiCode);
+#endif
+
   READ_UVLC( uiCode,    "log2_max_pic_order_cnt_lsb_minus4" );   pcSPS->setBitsForPOC( 4 + uiCode );
   CHECK(uiCode > 12, "Invalid code");
   READ_FLAG( uiCode, "sps_idr_rpl_present_flag" ); pcSPS->setIDRRefParamListPresent( (bool) uiCode);                 
@@ -1184,6 +1189,12 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS)
   READ_UVLC(uiCode, "log2_min_luma_coding_block_size_minus2");
   int log2MinCUSize = uiCode + 2;
   pcSPS->setLog2MinCodingBlockSize(log2MinCUSize);
+
+#if JVET_O0640_PICTURE_SIZE_CONSTRAINT
+  CHECK((pcSPS->getPicWidthInLumaSamples()  % (std::max(8, int(pcSPS->getMaxCUWidth()  >> (pcSPS->getMaxCodingDepth() - 1))))) != 0, "Coded frame width must be a multiple of Max(8, the minimum unit size)");
+  CHECK((pcSPS->getPicHeightInLumaSamples() % (std::max(8, int(pcSPS->getMaxCUHeight() >> (pcSPS->getMaxCodingDepth() - 1))))) != 0, "Coded frame height must be a multiple of Max(8, the minimum unit size)");
+#endif
+
   READ_FLAG(uiCode, "partition_constraints_override_enabled_flag"); pcSPS->setSplitConsOverrideEnabledFlag(uiCode);
   READ_UVLC(uiCode, "sps_log2_diff_min_qt_min_cb_intra_tile_group_luma");      minQT[0] = 1 << (uiCode + pcSPS->getLog2MinCodingBlockSize());
   READ_UVLC(uiCode, "sps_log2_diff_min_qt_min_cb_inter_tile_group");      minQT[1] = 1 << (uiCode + pcSPS->getLog2MinCodingBlockSize());
@@ -1307,6 +1318,9 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS)
   if ( pcSPS->getUseAffine() )
   {
     READ_FLAG( uiCode,  "affine_type_flag" );                       pcSPS->setUseAffineType          ( uiCode != 0 );
+#if JVET_O0070_PROF
+    READ_FLAG( uiCode, "sps_prof_enabled_flag");                    pcSPS->setUsePROF                ( uiCode != 0 );
+#endif
 #if JVET_O0438_SPS_AFFINE_AMVR_FLAG
     READ_FLAG( uiCode,  "sps_affine_amvr_enabled_flag" );           pcSPS->setAffineAmvrEnabledFlag  ( uiCode != 0 );
 #endif
@@ -1804,7 +1818,11 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para
       if (uiCode)
       {
 #if JVET_O0288_UNIFY_ALF_SLICE_TYPE_REMOVAL
+#if JVET_O_MAX_NUM_ALF_APS_8
+        READ_CODE(3, uiCode, "tile_group_num_APS");
+#else
         xReadTruncBinCode(uiCode, ALF_CTB_MAX_NUM_APS + 1);
+#endif
 #else
         if (pcSlice->isIntra())
         {
@@ -1812,7 +1830,11 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para
         }
         else
         {
+#if JVET_O_MAX_NUM_ALF_APS_8
+          READ_CODE(3, uiCode, "tile_group_num_APS");
+#else
           xReadTruncBinCode(uiCode, ALF_CTB_MAX_NUM_APS + 1);
+#endif
         }
 #endif
         int numAps = uiCode;
@@ -1820,7 +1842,11 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para
         std::vector<int> apsId(numAps, -1);
         for (int i = 0; i < numAps; i++)
         {
+#if JVET_O_MAX_NUM_ALF_APS_8
+          READ_CODE(3, uiCode, "tile_group_aps_id");
+#else
           READ_CODE(5, uiCode, "tile_group_aps_id");
+#endif
           apsId[i] = uiCode;
         }
 		
@@ -1841,7 +1867,11 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para
         if (alfChromaIdc)
         {
 #if JVET_O0288_UNIFY_ALF_SLICE_TYPE_REMOVAL
+#if JVET_O_MAX_NUM_ALF_APS_8
+          READ_CODE(3, uiCode, "tile_group_aps_id_chroma");
+#else
           READ_CODE(5, uiCode, "tile_group_aps_id_chroma");
+#endif
 #else
           if (pcSlice->isIntra() && pcSlice->getTileGroupNumAps() == 1)
           {
@@ -1849,7 +1879,11 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para
           }
           else
           {
+#if JVET_O_MAX_NUM_ALF_APS_8
+            READ_CODE(3, uiCode, "tile_group_aps_id_chroma");
+#else
             READ_CODE(5, uiCode, "tile_group_aps_id_chroma");
+#endif
           }
 #endif
           pcSlice->setTileGroupApsIdChroma(uiCode);
diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp
index b86518d2c0323170de0afc46b056a4e9d6da48f4..c8e8f8f89b0246d1055e3ab167ca50dd7b86e341 100755
--- a/source/Lib/EncoderLib/CABACWriter.cpp
+++ b/source/Lib/EncoderLib/CABACWriter.cpp
@@ -2915,7 +2915,11 @@ void CABACWriter::residual_lfnst_mode( const CodingUnit& cu, CUCtx& cuCtx )
   if( cu.ispMode != NOT_INTRA_SUBPARTITIONS || cu.mipFlag == true ||
     ( CS::isDualITree( *cu.cs ) && cu.chType == CHANNEL_TYPE_CHROMA && std::min( cu.blocks[ 1 ].width, cu.blocks[ 1 ].height ) < 4 )
 #if JVET_O0213_RESTRICT_LFNST_TO_MAX_TB_SIZE
+#if JVET_O0545_MAX_TB_SIGNALLING
+    || ( cu.blocks[ 0 ].width > cu.cs->sps->getMaxTbSize() || cu.blocks[ 0 ].height > cu.cs->sps->getMaxTbSize() )
+#else
     || ( cu.blocks[ 0 ].width > MAX_TB_SIZEY || cu.blocks[ 0 ].height > MAX_TB_SIZEY )
+#endif
 #endif
     )
   {
diff --git a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp
index 97956172c11aea7968071fdc8191c530a11b7eed..a3c85e561c39afd65f859d1c686060d30ed4b4ab 100644
--- a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp
+++ b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp
@@ -501,7 +501,11 @@ void EncAdaptiveLoopFilter::create( const EncCfg* encCfg, const int picWidth, co
     m_diffFilterCoeff[i] = new int[MAX_NUM_ALF_LUMA_COEFF];
   }
 
+#if JVET_O_MAX_NUM_ALF_APS_8
+  m_apsIdStart = ALF_CTB_MAX_NUM_APS;
+#else
   m_apsIdStart = (int)MAX_NUM_APS;
+#endif
   m_ctbDistortionFixedFilter = new double[m_numCTUsInPic];
   for (int comp = 0; comp < MAX_NUM_COMPONENT; comp++)
   {
@@ -654,15 +658,27 @@ void EncAdaptiveLoopFilter::ALFProcess(CodingStructure& cs, const double *lambda
 {
   if (cs.slice->getPendingRasInit() || cs.slice->isIDRorBLA())
   {
+#if JVET_O_MAX_NUM_ALF_APS_8
+    memset(cs.slice->getAlfAPSs(), 0, sizeof(*cs.slice->getAlfAPSs())*ALF_CTB_MAX_NUM_APS);
+    m_apsIdStart = ALF_CTB_MAX_NUM_APS;
+#else
     memset(cs.slice->getAlfAPSs(), 0, sizeof(*cs.slice->getAlfAPSs())*MAX_NUM_APS);
     m_apsIdStart = (int)MAX_NUM_APS;
+#endif
     m_apsMap->clear();
+#if JVET_O_MAX_NUM_ALF_APS_8
+    for (int i = 0; i < ALF_CTB_MAX_NUM_APS; i++)
+#else
     for (int i = 0; i < MAX_NUM_APS; i++)
+#endif
     {
       APS* alfAPS = m_apsMap->getPS((i << NUM_APS_TYPE_LEN) + ALF_APS);
       m_apsMap->clearChangedFlag((i << NUM_APS_TYPE_LEN) + ALF_APS);
       if (alfAPS)
+      {
+        alfAPS->getAlfAPSParam().reset();
         alfAPS = nullptr;
+      }
     }
   }
   AlfParam alfParam;
@@ -2963,16 +2979,28 @@ void EncAdaptiveLoopFilter::setCtuEnableFlag( uint8_t** ctuFlags, ChannelType ch
 std::vector<int> EncAdaptiveLoopFilter::getAvaiApsIdsLuma(CodingStructure& cs, int &newApsId)
 {
   APS** apss = cs.slice->getAlfAPSs();
+#if JVET_O_MAX_NUM_ALF_APS_8
+  for (int i = 0; i < ALF_CTB_MAX_NUM_APS; i++)
+#else
   for (int i = 0; i < MAX_NUM_APS; i++)
+#endif
   {
     apss[i] = m_apsMap->getPS((i << NUM_APS_TYPE_LEN) + ALF_APS);
   }
 
   std::vector<int> result;
   int apsIdChecked = 0, curApsId = m_apsIdStart;
+#if JVET_O_MAX_NUM_ALF_APS_8
+  if (curApsId < ALF_CTB_MAX_NUM_APS)
+#else
   if (curApsId < int(MAX_NUM_APS))
+#endif
   {
+#if JVET_O_MAX_NUM_ALF_APS_8
+    while (apsIdChecked < ALF_CTB_MAX_NUM_APS && !cs.slice->isIntra() && result.size() < ALF_CTB_MAX_NUM_APS && !cs.slice->getPendingRasInit() && !cs.slice->isIDRorBLA())
+#else
     while (apsIdChecked < MAX_NUM_APS && !cs.slice->isIntra() && result.size() < (ALF_CTB_MAX_NUM_APS - 1) && !cs.slice->getPendingRasInit() && !cs.slice->isIDRorBLA())
+#endif
     {
       APS* curAPS = cs.slice->getAlfAPSs()[curApsId];
       if (curAPS && curAPS->getTemporalId() <= cs.slice->getTLayer() && curAPS->getAlfAPSParam().newFilterFlag[CHANNEL_TYPE_LUMA])
@@ -2980,7 +3008,11 @@ std::vector<int> EncAdaptiveLoopFilter::getAvaiApsIdsLuma(CodingStructure& cs, i
         result.push_back(curApsId);
       }
       apsIdChecked++;
+#if JVET_O_MAX_NUM_ALF_APS_8
+      curApsId = (curApsId + 1) % ALF_CTB_MAX_NUM_APS;
+#else
       curApsId = (curApsId + 1) % MAX_NUM_APS;
+#endif
     }
   }
   cs.slice->setTileGroupNumAps((int)result.size());
@@ -2988,10 +3020,17 @@ std::vector<int> EncAdaptiveLoopFilter::getAvaiApsIdsLuma(CodingStructure& cs, i
   newApsId = m_apsIdStart - 1;
   if (newApsId < 0)
   {
+#if JVET_O_MAX_NUM_ALF_APS_8
+    newApsId = ALF_CTB_MAX_NUM_APS - 1;
+#else
     newApsId = (int)MAX_NUM_APS - 1;
+#endif
   }
-
+#if JVET_O_MAX_NUM_ALF_APS_8
+  CHECK(newApsId >= ALF_CTB_MAX_NUM_APS, "Wrong APS index assignment in getAvaiApsIdsLuma");
+#else
   CHECK(newApsId >= (int)MAX_NUM_APS, "Wrong APS index assignment in getAvaiApsIdsLuma");
+#endif
   return result;
 }
 void  EncAdaptiveLoopFilter::initDistortion()
@@ -3059,6 +3098,12 @@ void  EncAdaptiveLoopFilter::alfEncoderCtb(CodingStructure& cs, AlfParam& alfPar
     int numIter = useNewFilter ? 2 : 1;
     for (int numTemporalAps = 0; numTemporalAps <= apsIds.size(); numTemporalAps++)
     {
+#if JVET_O_MAX_NUM_ALF_APS_8
+      if (numTemporalAps + useNewFilter >= ALF_CTB_MAX_NUM_APS)
+      {
+        continue;
+      }
+#endif
       cs.slice->setTileGroupNumAps(numTemporalAps + useNewFilter);
       int numFilterSet = NUM_FIXED_FILTER_SETS + numTemporalAps + useNewFilter;
       if (numTemporalAps == apsIds.size() && numTemporalAps > 0 && useNewFilter && newApsId == apsIds.back()) //last temporalAPS is occupied by new filter set and this temporal APS becomes unavailable
@@ -3069,7 +3114,11 @@ void  EncAdaptiveLoopFilter::alfEncoderCtb(CodingStructure& cs, AlfParam& alfPar
       {
         m_alfParamTemp = alfParamNewFilters;
         m_alfParamTemp.enabledFlag[CHANNEL_TYPE_LUMA] = true;
+#if JVET_O_MAX_NUM_ALF_APS_8
+        double curCost = 3 * m_lambda[CHANNEL_TYPE_LUMA];
+#else
         double curCost = getTBlength(numTemporalAps + useNewFilter, ALF_CTB_MAX_NUM_APS + 1) * m_lambda[CHANNEL_TYPE_LUMA];
+#endif
         if (iter > 0)  //re-derive new filter-set
         {
           double dDistOrgNewFilter = 0;
@@ -3229,9 +3278,17 @@ void  EncAdaptiveLoopFilter::alfEncoderCtb(CodingStructure& cs, AlfParam& alfPar
           }
         } //for(ctbIdx)
 #if JVET_O0288_UNIFY_ALF_SLICE_TYPE_REMOVAL
+#if JVET_O_MAX_NUM_ALF_APS_8
+        int tmpBits = bitsNewFilter + 3 * (numFilterSet - NUM_FIXED_FILTER_SETS);
+#else
         int tmpBits = bitsNewFilter + 5 * (numFilterSet - NUM_FIXED_FILTER_SETS) + getTBlength(numFilterSet - NUM_FIXED_FILTER_SETS, ALF_CTB_MAX_NUM_APS + 1);
+#endif
+#else
+#if JVET_O_MAX_NUM_ALF_APS_8
+        int tmpBits = bitsNewFilter + 3 * (numFilterSet - NUM_FIXED_FILTER_SETS) + (cs.slice->isIntra() ? 1 : 3);
 #else
         int tmpBits = bitsNewFilter + 5 * (numFilterSet - NUM_FIXED_FILTER_SETS) + (cs.slice->isIntra() ? 1 : getTBlength(numFilterSet - NUM_FIXED_FILTER_SETS, ALF_CTB_MAX_NUM_APS + 1));
+#endif
 #endif
         curCost += tmpBits * m_lambda[COMPONENT_Y];
         if (curCost < costMin)
@@ -3291,6 +3348,7 @@ void  EncAdaptiveLoopFilter::alfEncoderCtb(CodingStructure& cs, AlfParam& alfPar
         newAPS->setAPSType(ALF_APS);
       }
       newAPS->setAlfAPSParam(alfParamNewFiltersBest);
+      newAPS->getAlfAPSParam().newFilterFlag[CHANNEL_TYPE_CHROMA] = false;
       m_apsMap->setChangedFlag((newApsId << NUM_APS_TYPE_LEN) + ALF_APS);
       m_apsIdStart = newApsId;
     }
@@ -3330,7 +3388,11 @@ void  EncAdaptiveLoopFilter::alfEncoderCtb(CodingStructure& cs, AlfParam& alfPar
       curId--;
       if (curId < 0)
       {
+#if JVET_O_MAX_NUM_ALF_APS_8
+        curId = ALF_CTB_MAX_NUM_APS - 1;
+#else
         curId = (int)MAX_NUM_APS - 1;
+#endif
       }
       if (std::find(bestApsIds.begin(), bestApsIds.end(), curId) == bestApsIds.end())
       {
@@ -3338,7 +3400,11 @@ void  EncAdaptiveLoopFilter::alfEncoderCtb(CodingStructure& cs, AlfParam& alfPar
       }
     }
   }
+#if JVET_O_MAX_NUM_ALF_APS_8
+  for (int curApsId = 0; curApsId < ALF_CTB_MAX_NUM_APS; curApsId++)
+#else
   for (int curApsId = 0; curApsId < MAX_NUM_APS; curApsId++)
+#endif
   {
     if ((cs.slice->getPendingRasInit() || cs.slice->isIDRorBLA() || cs.slice->isIntra()) && curApsId != newApsIdChroma)
     {
@@ -3346,9 +3412,17 @@ void  EncAdaptiveLoopFilter::alfEncoderCtb(CodingStructure& cs, AlfParam& alfPar
     }
     APS* curAPS = m_apsMap->getPS((curApsId << NUM_APS_TYPE_LEN) + ALF_APS);
 #if JVET_O0288_UNIFY_ALF_SLICE_TYPE_REMOVAL
+#if JVET_O_MAX_NUM_ALF_APS_8
+    double curCost = m_lambda[CHANNEL_TYPE_CHROMA] * 3;
+#else
     double curCost = m_lambda[CHANNEL_TYPE_CHROMA] * 5;
+#endif
+#else
+#if JVET_O_MAX_NUM_ALF_APS_8
+    double curCost = (cs.slice->isIntra() && cs.slice->getTileGroupNumAps() == 1) ? 0 : (m_lambda[CHANNEL_TYPE_CHROMA] * 3);
 #else
     double curCost = (cs.slice->isIntra() && cs.slice->getTileGroupNumAps() == 1) ? 0 : (m_lambda[CHANNEL_TYPE_CHROMA] * 5);
+#endif
 #endif
     if (curApsId == newApsIdChroma)
     {
@@ -3501,6 +3575,10 @@ void  EncAdaptiveLoopFilter::alfEncoderCtb(CodingStructure& cs, AlfParam& alfPar
         newAPS->getAlfAPSParam().reset();
       }
       newAPS->getAlfAPSParam().newFilterFlag[CHANNEL_TYPE_CHROMA] = true;
+      if (!alfParamNewFiltersBest.newFilterFlag[CHANNEL_TYPE_LUMA])
+      {
+        newAPS->getAlfAPSParam().newFilterFlag[CHANNEL_TYPE_LUMA] = false;
+      }
 #if JVET_O0090_ALF_CHROMA_FILTER_ALTERNATIVES_CTB
       newAPS->getAlfAPSParam().numAlternativesChroma = alfParamNewFilters.numAlternativesChroma;
       for( int altIdx = 0; altIdx < MAX_NUM_ALF_ALTERNATIVES_CHROMA; ++altIdx )
diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h
index 1e4eac65681e3d329b4d2a915d273abd1459615b..53f499ce905001924a86f96635d6d4fb98ed8fde 100644
--- a/source/Lib/EncoderLib/EncCfg.h
+++ b/source/Lib/EncoderLib/EncCfg.h
@@ -276,6 +276,9 @@ protected:
   int       m_SubPuMvpMode;
   bool      m_Affine;
   bool      m_AffineType;
+#if JVET_O0070_PROF
+  bool      m_PROF;
+#endif
   bool      m_BIO;
 
   bool      m_SMVD;
@@ -819,6 +822,10 @@ public:
   bool      getAffine                       ()         const { return m_Affine; }
   void      setAffineType( bool b )                          { m_AffineType = b; }
   bool      getAffineType()                            const { return m_AffineType; }
+#if JVET_O0070_PROF
+  void      setPROF                         (bool b)         { m_PROF = b; }
+  bool      getPROF                         ()         const { return m_PROF; }
+#endif
   void      setBIO(bool b)                                   { m_BIO = b; }
   bool      getBIO()                                   const { return m_BIO; }
 
diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp
index ec041e0b1dd662cde38d747146cc91a938189bd2..fe5b1633dd3eeefe92f4f1847336c1a60cf289bc 100644
--- a/source/Lib/EncoderLib/EncCu.cpp
+++ b/source/Lib/EncoderLib/EncCu.cpp
@@ -1243,7 +1243,13 @@ void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS,
       bestSubCS->sharedBndPos.y = (m_shareState == SHARING) ? m_shareBndPosY : tempSubCS->area.Y().lumaPos().y;
       bestSubCS->sharedBndSize.width = (m_shareState == SHARING) ? m_shareBndSizeW : tempSubCS->area.lwidth();
       bestSubCS->sharedBndSize.height = (m_shareState == SHARING) ? m_shareBndSizeH : tempSubCS->area.lheight();
+#if JVET_O0070_PROF
+      tempSubCS->bestParent = bestSubCS->bestParent = bestCS;
+#endif
       xCompressCU( tempSubCS, bestSubCS, partitioner );
+#if JVET_O0070_PROF
+      tempSubCS->bestParent = bestSubCS->bestParent = nullptr;
+#endif
 
       if( bestSubCS->cost == MAX_DOUBLE )
       {
@@ -1395,7 +1401,11 @@ void EncCu::xCheckRDCostIntra( CodingStructure *&tempCS, CodingStructure *&bestC
 
 #if JVET_O0213_RESTRICT_LFNST_TO_MAX_TB_SIZE
   const int  maxLfnstIdx         = ( CS::isDualITree( *tempCS ) && partitioner.chType == CHANNEL_TYPE_CHROMA && ( partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8 ) )
+#if JVET_O0545_MAX_TB_SIGNALLING
+                                   || ( partitioner.currArea().lwidth() > sps.getMaxTbSize() || partitioner.currArea().lheight() > sps.getMaxTbSize() ) ? 0 : 2;
+#else
                                    || ( partitioner.currArea().lwidth() > MAX_TB_SIZEY || partitioner.currArea().lheight() > MAX_TB_SIZEY ) ? 0 : 2;
+#endif
 #else
   const int  maxLfnstIdx         = CS::isDualITree( *tempCS ) && partitioner.chType == CHANNEL_TYPE_CHROMA && ( partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8 ) ? 0 : 2;
 #endif
diff --git a/source/Lib/EncoderLib/EncGOP.cpp b/source/Lib/EncoderLib/EncGOP.cpp
index c348ae0e23623207e95894ef085a82df0ac29ec8..c6dcaa5c6b03e0c659b03e8cad17ef6ed9a1ca89 100644
--- a/source/Lib/EncoderLib/EncGOP.cpp
+++ b/source/Lib/EncoderLib/EncGOP.cpp
@@ -2494,7 +2494,11 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
 
       if (pcSlice->getSPS()->getALFEnabledFlag() && pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Y))
       {
+#if JVET_O_MAX_NUM_ALF_APS_8
+        for (int apsId = 0; apsId < ALF_CTB_MAX_NUM_APS; apsId++)
+#else
         for (int apsId = 0; apsId < MAX_NUM_APS; apsId++)   //HD: shouldn't this be looping over slice_alf_aps_id_luma[ i ]? By looping over MAX_NUM_APS, it is possible unused ALF APS is written. Please check!
+#endif
         {
           ParameterSetMap<APS> *apsMap = m_pcEncLib->getApsMap();
 
diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp
index 6fdade2d5d17d78b44901c3ceb22bfa2e3dded80..dd743359da715987bb89226d26cce6ec2fc741fc 100644
--- a/source/Lib/EncoderLib/EncLib.cpp
+++ b/source/Lib/EncoderLib/EncLib.cpp
@@ -904,6 +904,9 @@ void EncLib::xInitSPS(SPS &sps)
   sps.setBDOFEnabledFlag                    ( m_BIO );
   sps.setUseAffine             ( m_Affine );
   sps.setUseAffineType         ( m_AffineType );
+#if JVET_O0070_PROF
+  sps.setUsePROF               ( m_PROF );
+#endif
   sps.setUseLMChroma           ( m_LMChroma ? true : false );
   sps.setCclmCollocatedChromaFlag( m_cclmCollocatedChromaFlag );
   sps.setUseMTS                ( m_IntraMTS || m_InterMTS || m_ImplicitMTS );
@@ -980,6 +983,9 @@ void EncLib::xInitSPS(SPS &sps)
   {
     sps.setBitDepth      (ChannelType(channelType), m_bitDepth[channelType] );
     sps.setQpBDOffset  (ChannelType(channelType), (6 * (m_bitDepth[channelType] - 8)));
+#if JVET_O0919_TS_MIN_QP
+    sps.setMinQpPrimeTsMinus4(ChannelType(channelType), (6 * (m_bitDepth[channelType] - m_inputBitDepth[channelType])));
+#endif
     sps.setPCMBitDepth (ChannelType(channelType), m_PCMBitDepth[channelType]         );
   }
 
diff --git a/source/Lib/EncoderLib/EncLib.h b/source/Lib/EncoderLib/EncLib.h
index d1ea9637df1cdfaa0f45384972bd31cc9dab3190..71bcf0d1b5abc5faa62e6359660f179ffc9b0ef7 100644
--- a/source/Lib/EncoderLib/EncLib.h
+++ b/source/Lib/EncoderLib/EncLib.h
@@ -138,7 +138,11 @@ private:
   CacheModel                m_cacheModel;
 #endif
 
+#if JVET_O_MAX_NUM_ALF_APS_8
+  APS*                      m_apss[ALF_CTB_MAX_NUM_APS];
+#else
   APS*                      m_apss[MAX_NUM_APS];
+#endif
 
   APS*                      m_lmcsAPS;
 
diff --git a/source/Lib/EncoderLib/InterSearch.cpp b/source/Lib/EncoderLib/InterSearch.cpp
index b1314953456596e4be9fd29616d3493cb45d0dcc..aa185316bef82966e93cd0a1f7feed517fb2d049 100644
--- a/source/Lib/EncoderLib/InterSearch.cpp
+++ b/source/Lib/EncoderLib/InterSearch.cpp
@@ -2339,6 +2339,13 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
   }
 
   {
+#if JVET_O0070_PROF
+    if (pu.cu->cs->bestParent != nullptr && pu.cu->cs->bestParent->getCU(CHANNEL_TYPE_LUMA) != nullptr && pu.cu->cs->bestParent->getCU(CHANNEL_TYPE_LUMA)->affine == false)
+    {
+      m_skipPROF = true;
+    }
+    m_encOnly = true;
+#endif
     // motion estimation only evaluates luma component
     m_maxCompIDToPred = MAX_NUM_COMPONENT;
 //    m_maxCompIDToPred = COMPONENT_Y;
@@ -3090,6 +3097,10 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner)
       PU::spanMotionInfo( pu, mergeCtx );
     }
 
+#if JVET_O0070_PROF
+    m_skipPROF = false;
+    m_encOnly = false;
+#endif
     //  MC
     PelUnitBuf predBuf = pu.cs->getPredBuf(pu);
     if ( gbiIdx == GBI_DEFAULT || !m_affineMotion.affine4ParaAvail || !m_affineMotion.affine6ParaAvail )
@@ -3324,6 +3335,9 @@ Distortion InterSearch::xGetAffineTemplateCost( PredictionUnit& pu, PelUnitBuf&
   const bool bi = pu.cu->slice->testWeightPred() && pu.cu->slice->getSliceType()==P_SLICE;
   Mv mv[3];
   memcpy(mv, acMvCand, sizeof(mv));
+#if JVET_O0070_PROF
+  m_iRefListIdx = eRefPicList;
+#endif
   xPredAffineBlk(COMPONENT_Y, pu, picRef, mv, predBuf, bi, pu.cu->slice->clpRng(COMPONENT_Y));
   if( bi )
   {
@@ -4989,6 +5003,9 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
   {
 	  tryBipred = 1;
     pu.interDir = 3;
+#if JVET_O0070_PROF
+    m_isBi = true;
+#endif
     // Set as best list0 and list1
     iRefIdxBi[0] = iRefIdx[0];
     iRefIdxBi[1] = iRefIdx[1];
@@ -5189,6 +5206,9 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit&       pu,
       }
     } // for loop-iter
     }
+#if JVET_O0070_PROF
+    m_isBi = false;
+#endif
   } // if (B_SLICE)
 
   pu.mv    [REF_PIC_LIST_0] = Mv();
@@ -5462,6 +5482,9 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
 
   PelUnitBuf  origBufTmp = m_tmpStorageLCU.getBuf( UnitAreaRelative( *pu.cu, pu ) );
   enum DFunc distFunc = (pu.cu->transQuantBypass || pu.cs->slice->getDisableSATDForRD()) ? DF_SAD : DF_HAD;
+#if JVET_O0070_PROF
+  m_iRefListIdx = eRefPicList;
+#endif
 
   // if Bi, set to ( 2 * Org - ListX )
   if ( bBi )
diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp
index 7b93acc8c47fcc1bb37ac741664ffcc4bbb16621..f562e86d0da6e16d0c77114beddc91d16f2845d5 100644
--- a/source/Lib/EncoderLib/VLCWriter.cpp
+++ b/source/Lib/EncoderLib/VLCWriter.cpp
@@ -748,6 +748,10 @@ void HLSWriter::codeSPS( const SPS* pcSPS )
   const bool         chromaEnabled         = isChromaEnabled(format);
   WRITE_UVLC( chromaEnabled ? (pcSPS->getBitDepth(CHANNEL_TYPE_CHROMA) - 8):0,  "bit_depth_chroma_minus8" );
 
+#if JVET_O0919_TS_MIN_QP
+  WRITE_UVLC( pcSPS->getMinQpPrimeTsMinus4(CHANNEL_TYPE_LUMA),                      "min_qp_prime_ts_minus4" );
+#endif
+
   WRITE_UVLC( pcSPS->getBitsForPOC()-4,                 "log2_max_pic_order_cnt_lsb_minus4" );
   WRITE_FLAG( pcSPS->getIDRRefParamListPresent(),                 "sps_idr_rpl_present_flag" );
   // KJS: Marakech decision: sub-layers added back
@@ -904,6 +908,9 @@ void HLSWriter::codeSPS( const SPS* pcSPS )
   if ( pcSPS->getUseAffine() )
   {
     WRITE_FLAG( pcSPS->getUseAffineType() ? 1 : 0,                                             "affine_type_flag" );
+#if JVET_O0070_PROF
+    WRITE_FLAG( pcSPS->getUsePROF() ? 1 : 0,                                                   "sps_prof_enabled_flag" );
+#endif
 #if JVET_O0438_SPS_AFFINE_AMVR_FLAG
     WRITE_FLAG( pcSPS->getAffineAmvrEnabledFlag() ? 1 : 0,                                     "sps_affine_amvr_enabled_flag" );
 #endif
@@ -1271,7 +1278,11 @@ void HLSWriter::codeSliceHeader         ( Slice* pcSlice )
       if (alfEnabled)
       {
 #if JVET_O0288_UNIFY_ALF_SLICE_TYPE_REMOVAL
+#if JVET_O_MAX_NUM_ALF_APS_8
+        WRITE_CODE(pcSlice->getTileGroupNumAps(), 3, "tile_group_num_aps");
+#else
         xWriteTruncBinCode(pcSlice->getTileGroupNumAps(), ALF_CTB_MAX_NUM_APS + 1);
+#endif
 #else
         if (pcSlice->isIntra())
         {
@@ -1279,13 +1290,21 @@ void HLSWriter::codeSliceHeader         ( Slice* pcSlice )
         }
         else
         {
+#if JVET_O_MAX_NUM_ALF_APS_8
+          WRITE_CODE(pcSlice->getTileGroupNumAps(), 3, "tile_group_num_aps");
+#else
           xWriteTruncBinCode(pcSlice->getTileGroupNumAps(), ALF_CTB_MAX_NUM_APS + 1);
+#endif
         }
 #endif
         const std::vector<int>&   apsId = pcSlice->getTileGroupApsIdLuma();
         for (int i = 0; i < pcSlice->getTileGroupNumAps(); i++)
         {
+#if JVET_O_MAX_NUM_ALF_APS_8
+          WRITE_CODE(apsId[i], 3, "tile_group_aps_id");
+#else
           WRITE_CODE(apsId[i], 5, "tile_group_aps_id");
+#endif
         }
 
         const int alfChromaIdc = pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Cb) + pcSlice->getTileGroupAlfEnabledFlag(COMPONENT_Cr) * 2 ;
@@ -1300,7 +1319,11 @@ void HLSWriter::codeSliceHeader         ( Slice* pcSlice )
         if (alfChromaIdc)
         {
 #if JVET_O0288_UNIFY_ALF_SLICE_TYPE_REMOVAL
+#if JVET_O_MAX_NUM_ALF_APS_8
+          WRITE_CODE(pcSlice->getTileGroupApsIdChroma(), 3, "tile_group_aps_id_chroma");
+#else
           WRITE_CODE(pcSlice->getTileGroupApsIdChroma(), 5, "tile_group_aps_id_chroma");
+#endif
 #else
           if (pcSlice->isIntra()&& pcSlice->getTileGroupNumAps() == 1)
           {
@@ -1308,7 +1331,11 @@ void HLSWriter::codeSliceHeader         ( Slice* pcSlice )
           }
           else
           {
+#if JVET_O_MAX_NUM_ALF_APS_8
+            WRITE_CODE(pcSlice->getTileGroupApsIdChroma(), 3, "tile_group_aps_id_chroma");
+#else
             WRITE_CODE(pcSlice->getTileGroupApsIdChroma(), 5, "tile_group_aps_id_chroma");
+#endif
           }
 #endif
         }