diff --git a/source/Lib/CommonLib/Buffer.h b/source/Lib/CommonLib/Buffer.h
index e714d2869888ebeb6cf2a80c2cb0288acdac23a8..216f8b57598cf0b37c6d87620ec93d4ba107fe0d 100644
--- a/source/Lib/CommonLib/Buffer.h
+++ b/source/Lib/CommonLib/Buffer.h
@@ -686,9 +686,17 @@ struct UnitBuf
   const AreaBuf<T>& Cr() const { return bufs[2]; }
 
   void fill                 ( const T &val );
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+  void copyFrom             ( const UnitBuf<const T> &other, const bool lumaOnly = false, const bool chromaOnly = false );
+#else
   void copyFrom             ( const UnitBuf<const T> &other );
+#endif
   void reconstruct          ( const UnitBuf<const T> &pred, const UnitBuf<const T> &resi, const ClpRngs& clpRngs );
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+  void copyClip             ( const UnitBuf<const T> &src, const ClpRngs& clpRngs, const bool lumaOnly = false, const bool chromaOnly = false );
+#else
   void copyClip             ( const UnitBuf<const T> &src, const ClpRngs& clpRngs );
+#endif
   void subtract             ( const UnitBuf<const T> &other );
   void addWeightedAvg       ( const UnitBuf<const T> &other1, const UnitBuf<const T> &other2, const ClpRngs& clpRngs, const uint8_t gbiIdx = GBI_DEFAULT, const bool chromaOnly = false, const bool lumaOnly = false);
   void addAvg               ( const UnitBuf<const T> &other1, const UnitBuf<const T> &other2, const ClpRngs& clpRngs, const bool chromaOnly = false, const bool lumaOnly = false);
@@ -718,11 +726,22 @@ void UnitBuf<T>::fill( const T &val )
 }
 
 template<typename T>
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+void UnitBuf<T>::copyFrom(const UnitBuf<const T> &other, const bool lumaOnly, const bool chromaOnly )
+#else
 void UnitBuf<T>::copyFrom( const UnitBuf<const T> &other )
+#endif
 {
   CHECK( chromaFormat != other.chromaFormat, "Incompatible formats" );
 
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+  CHECK( lumaOnly && chromaOnly, "Not allowed to have both lumaOnly and chromaOnly selected" );
+  const size_t compStart = chromaOnly ? 1 : 0;
+  const size_t compEnd   = lumaOnly ? 1 : (unsigned) bufs.size();
+  for( size_t i = compStart; i < compEnd; i++ )
+#else
   for( unsigned i = 0; i < bufs.size(); i++ )
+#endif
   {
     bufs[i].copyFrom( other.bufs[i] );
   }
@@ -742,11 +761,22 @@ void UnitBuf<T>::subtract( const UnitBuf<const T> &other )
 }
 
 template<typename T>
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+void UnitBuf<T>::copyClip(const UnitBuf<const T> &src, const ClpRngs &clpRngs, const bool lumaOnly, const bool chromaOnly )
+#else
 void UnitBuf<T>::copyClip(const UnitBuf<const T> &src, const ClpRngs& clpRngs)
+#endif
 {
   CHECK( chromaFormat != src.chromaFormat, "Incompatible formats" );
 
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+  CHECK( lumaOnly && chromaOnly, "Not allowed to have both lumaOnly and chromaOnly selected" );
+  const size_t compStart = chromaOnly ? 1 : 0;
+  const size_t compEnd   = lumaOnly ? 1 : bufs.size();
+  for( size_t i = compStart; i < compEnd; i++ )
+#else
   for( unsigned i = 0; i < bufs.size(); i++ )
+#endif
   {
     bufs[i].copyClip( src.bufs[i], clpRngs.comp[i] );
   }
diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp
index ef51d6960c16156e393fe28174a96d9b31f482c6..2cb8fc51a357c061b40cfbab162e186fa969bfc5 100644
--- a/source/Lib/CommonLib/InterPrediction.cpp
+++ b/source/Lib/CommonLib/InterPrediction.cpp
@@ -264,7 +264,11 @@ bool InterPrediction::xCheckIdenticalMotion( const PredictionUnit &pu )
   return false;
 }
 
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList /*= REF_PIC_LIST_X*/, const bool luma /*= true*/, const bool chroma /*= true*/)
+#else
 void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList /*= REF_PIC_LIST_X*/ )
+#endif
 {
 
   // compute the location of the current PU
@@ -331,7 +335,11 @@ void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const R
       PelUnitBuf subPredBuf = predBuf.subBuf(UnitAreaRelative(pu, subPu));
       subPu.mmvdEncOptMode = 0;
       subPu.mvRefine = false;
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+      motionCompensation(subPu, subPredBuf, eRefPicList, luma, chroma);
+#else
       motionCompensation(subPu, subPredBuf, eRefPicList);
+#endif
       secDim = later - secStep;
     }
   }
@@ -433,7 +441,12 @@ void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList&
     {
       CHECK( bioApplied, "BIO is not allowed with affine" );
       m_iRefListIdx = eRefPicList;
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+      bool genChromaMv = (!luma && chroma && compID == COMPONENT_Cb);
+      xPredAffineBlk( compID, pu, pu.cu->slice->getRefPic( eRefPicList, iRefIdx )->unscaledPic, mv, pcYuvPred, bi, pu.cu->slice->clpRng( compID ), genChromaMv, pu.cu->slice->getScalingRatio( eRefPicList, iRefIdx ));
+#else
       xPredAffineBlk( compID, pu, pu.cu->slice->getRefPic( eRefPicList, iRefIdx )->unscaledPic, mv, pcYuvPred, bi, pu.cu->slice->clpRng( compID ), pu.cu->slice->getScalingRatio( eRefPicList, iRefIdx ));
+#endif
     }
     else
     {
@@ -452,7 +465,11 @@ void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList&
   }
 }
 
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+void InterPrediction::xPredInterBi(PredictionUnit &pu, PelUnitBuf &pcYuvPred, const bool luma, const bool chroma, PelUnitBuf *yuvPredTmp /*= NULL*/)
+#else
 void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred, PelUnitBuf* yuvPredTmp /*= NULL*/)
+#endif
 {
   const PPS   &pps   = *pu.cs->pps;
   const Slice &slice = *pu.cs->slice;
@@ -537,12 +554,20 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred, Pe
       if (dmvrApplied)
       {
         if (yuvPredTmp)
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+          xPredInterUni(pu, eRefPicList, pcMbBuf, true, false, luma, chroma);
+#else
           xPredInterUni(pu, eRefPicList, pcMbBuf, true, false, true, true);
+#endif
         continue;
       }
       xPredInterUni ( pu, eRefPicList, pcMbBuf, true
         , bioApplied
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+        , luma, chroma
+#else
         , true, true
+#endif
       );
     }
     else
@@ -551,14 +576,22 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred, Pe
       {
         xPredInterUni ( pu, eRefPicList, pcMbBuf, true
           , bioApplied
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+          , luma, chroma
+#else
           , true, true
+#endif
         );
       }
       else
       {
         xPredInterUni( pu, eRefPicList, pcMbBuf, pu.cu->triangle
           , bioApplied
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+          , luma, chroma
+#else
           , true, true
+#endif
         );
       }
     }
@@ -569,15 +602,27 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred, Pe
   CPelUnitBuf srcPred1 = ( pu.chromaFormat == CHROMA_400 ?
                            CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvPred.Y())) :
                            CPelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvPred.Y()), PelBuf(m_acYuvPred[1][1], pcYuvPred.Cb()), PelBuf(m_acYuvPred[1][2], pcYuvPred.Cr())) );
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+  bool lumaOnly   = luma && !chroma;
+  bool chromaOnly = !luma && chroma;
+#endif
   if( !pu.cu->triangle && (!dmvrApplied) && (!bioApplied) && pps.getWPBiPred() && slice.getSliceType() == B_SLICE && pu.cu->GBiIdx==GBI_DEFAULT)
   {
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+    xWeightedPredictionBi( pu, srcPred0, srcPred1, pcYuvPred, m_maxCompIDToPred, lumaOnly, chromaOnly );
+#else
     xWeightedPredictionBi( pu, srcPred0, srcPred1, pcYuvPred, m_maxCompIDToPred );
+#endif
     if (yuvPredTmp)
       yuvPredTmp->copyFrom(pcYuvPred);
   }
   else if( !pu.cu->triangle && pps.getUseWP() && slice.getSliceType() == P_SLICE )
   {
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+    xWeightedPredictionUni( pu, srcPred0, REF_PIC_LIST_0, pcYuvPred, -1, m_maxCompIDToPred, lumaOnly, chromaOnly );
+#else
     xWeightedPredictionUni( pu, srcPred0, REF_PIC_LIST_0, pcYuvPred, -1, m_maxCompIDToPred );
+#endif
     if (yuvPredTmp)
       yuvPredTmp->copyFrom(pcYuvPred);
   }
@@ -593,7 +638,11 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred, Pe
     }
     else
     {
-      xWeightedAverage( pu, srcPred0, srcPred1, pcYuvPred, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied, yuvPredTmp);
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+      xWeightedAverage( pu, srcPred0, srcPred1, pcYuvPred, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied, lumaOnly, chromaOnly, yuvPredTmp );
+#else
+      xWeightedAverage( pu, srcPred0, srcPred1, pcYuvPred, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied, yuvPredTmp );
+#endif
     }
   }
 }
@@ -790,7 +839,11 @@ bool InterPrediction::isSubblockVectorSpreadOverLimit( int a, int b, int c, int
   return false;
 }
 
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+void InterPrediction::xPredAffineBlk(const ComponentID &compID, const PredictionUnit &pu, const Picture *refPic, const Mv *_mv, PelUnitBuf &dstPic, const bool &bi, const ClpRng &clpRng, bool genChromaMv, const std::pair<int, int> scalingRatio)
+#else
 void InterPrediction::xPredAffineBlk( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng, const std::pair<int, int> scalingRatio )
+#endif
 {
 
   JVET_J0090_SET_REF_PICTURE( refPic, compID );
@@ -946,6 +999,68 @@ void InterPrediction::xPredAffineBlk( const ComponentID& compID, const Predictio
 #endif
     }
   }
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+  int iScaleXLuma = ::getComponentScaleX(COMPONENT_Y, chFmt);
+  int iScaleYLuma = ::getComponentScaleY(COMPONENT_Y, chFmt);
+
+  if (genChromaMv && pu.chromaFormat != CHROMA_444)
+  {
+    CHECK(compID == COMPONENT_Y, "Chroma only subblock MV calculation should not apply to Luma");
+    int lumaBlockWidth  = AFFINE_MIN_BLOCK_SIZE;
+    int lumaBlockHeight = AFFINE_MIN_BLOCK_SIZE;
+
+    CHECK(lumaBlockWidth > (width >> iScaleXLuma), "Sub Block width  > Block width");
+    CHECK(lumaBlockHeight > (height >> iScaleYLuma), "Sub Block height > Block height");
+
+    const int cxWidthLuma  = width >> iScaleXLuma;
+    const int cxHeightLuma = height >> iScaleYLuma;
+    const int iHalfBWLuma  = lumaBlockWidth >> 1;
+    const int iHalfBHLuma  = lumaBlockHeight >> 1;
+
+    int iDMvHorXLuma, iDMvHorYLuma, iDMvVerXLuma, iDMvVerYLuma;
+    iDMvHorXLuma = (mvRT - mvLT).getHor() << (iBit - floorLog2(cxWidthLuma));
+    iDMvHorYLuma = (mvRT - mvLT).getVer() << (iBit - floorLog2(cxWidthLuma));
+    if (pu.cu->affineType == AFFINEMODEL_6PARAM)
+    {
+      iDMvVerXLuma = (mvLB - mvLT).getHor() << (iBit - floorLog2(cxHeightLuma));
+      iDMvVerYLuma = (mvLB - mvLT).getVer() << (iBit - floorLog2(cxHeightLuma));
+    }
+    else
+    {
+      iDMvVerXLuma = -iDMvHorYLuma;
+      iDMvVerYLuma = iDMvHorXLuma;
+    }
+
+    const bool subblkMVSpreadOverLimitLuma = isSubblockVectorSpreadOverLimit(iDMvHorXLuma, iDMvHorYLuma, iDMvVerXLuma, iDMvVerYLuma, pu.interDir);
+
+    // get prediction block by block
+    for (int h = 0; h < cxHeightLuma; h += lumaBlockHeight)
+    {
+      for (int w = 0; w < cxWidthLuma; w += lumaBlockWidth)
+      {
+        int iMvScaleTmpHor, iMvScaleTmpVer;
+        if (!subblkMVSpreadOverLimitLuma)
+        {
+          iMvScaleTmpHor = iMvScaleHor + iDMvHorXLuma * (iHalfBWLuma + w) + iDMvVerXLuma * (iHalfBHLuma + h);
+          iMvScaleTmpVer = iMvScaleVer + iDMvHorYLuma * (iHalfBWLuma + w) + iDMvVerYLuma * (iHalfBHLuma + h);
+        }
+        else
+        {
+          iMvScaleTmpHor = iMvScaleHor + iDMvHorXLuma * (cxWidthLuma >> 1) + iDMvVerXLuma * (cxHeightLuma >> 1);
+          iMvScaleTmpVer = iMvScaleVer + iDMvHorYLuma * (cxWidthLuma >> 1) + iDMvVerYLuma * (cxHeightLuma >> 1);
+        }
+
+        roundAffineMv(iMvScaleTmpHor, iMvScaleTmpVer, shift);
+        Mv tmpMv(iMvScaleTmpHor, iMvScaleTmpVer);
+        tmpMv.clipToStorageBitDepth();
+        iMvScaleTmpHor = tmpMv.getHor();
+        iMvScaleTmpVer = tmpMv.getVer();
+
+        m_storedMv[h / AFFINE_MIN_BLOCK_SIZE * MVBUFFER_SIZE + w / AFFINE_MIN_BLOCK_SIZE].set(iMvScaleTmpHor, iMvScaleTmpVer);
+      }
+    }
+  }
+#endif
   // get prediction block by block
   for ( int h = 0; h < cxHeight; h += blockHeight )
   {
@@ -1247,8 +1362,16 @@ void InterPrediction::xCalcBlkGradient(int sx, int sy, int    *arraysGx2, int
   g_pelBufOP.calcBlkGradient(sx, sy, arraysGx2, arraysGxGy, arraysGxdI, arraysGy2, arraysGydI, sGx2, sGy2, sGxGy, sGxdI, sGydI, width, height, unitSize);
 }
 
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied, bool lumaOnly, bool chromaOnly, PelUnitBuf* yuvDstTmp /*= NULL*/)
+#else
 void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied, PelUnitBuf* yuvDstTmp /*= NULL*/)
+#endif
 {
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+  CHECK( (chromaOnly && lumaOnly), "should not happen" );
+#endif
+
   const int iRefIdx0 = pu.refIdx[0];
   const int iRefIdx1 = pu.refIdx[1];
 
@@ -1264,9 +1387,17 @@ void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitB
     if( pu.cu->GBiIdx != GBI_DEFAULT && (yuvDstTmp || !pu.mhIntraFlag) )
     {
       CHECK(bioApplied, "GBi is disallowed with BIO");
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+      pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, pu.cu->GBiIdx, chromaOnly, lumaOnly);
+#else
       pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, pu.cu->GBiIdx);
+#endif
       if (yuvDstTmp)
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+        yuvDstTmp->addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, chromaOnly, lumaOnly);
+#else
         yuvDstTmp->addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, false);
+#endif
       return;
     }
     if (bioApplied)
@@ -1299,13 +1430,30 @@ void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitB
       getWpScaling(pu.cu->slice, iRefIdx0, iRefIdx1, pwp0, pwp1);
       if (!bioApplied)
       {
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+        if (!chromaOnly)
+#endif
         addWeightBiComponent(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, pcYuvDst, true, COMPONENT_Y);
       }
-      addWeightBiComponent(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, pcYuvDst, true, COMPONENT_Cb);
-      addWeightBiComponent(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, pcYuvDst, true, COMPONENT_Cr);
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+      if (!lumaOnly)
+      {
+#endif
+        addWeightBiComponent(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, pcYuvDst, true, COMPONENT_Cb);
+        addWeightBiComponent(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, pcYuvDst, true, COMPONENT_Cr);
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+      }
+#endif
     }
     else
     {
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+      if (!bioApplied && (lumaOnly || chromaOnly))
+      {
+        pcYuvDst.addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, chromaOnly, lumaOnly);
+      }
+      else
+#endif
       pcYuvDst.addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, bioApplied);
     }
     if (yuvDstTmp)
@@ -1316,7 +1464,11 @@ void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitB
         yuvDstTmp->bufs[2].copyFrom(pcYuvDst.bufs[2]);
       }
       else
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+        yuvDstTmp->copyFrom(pcYuvDst, lumaOnly, chromaOnly);
+#else
         yuvDstTmp->copyFrom(pcYuvDst);
+#endif
     }
   }
   else if( iRefIdx0 >= 0 && iRefIdx1 < 0 )
@@ -1326,9 +1478,17 @@ void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitB
       pcYuvDst.copyFrom( pcYuvSrc0 );
     }
     else
-    pcYuvDst.copyClip( pcYuvSrc0, clpRngs );
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+      pcYuvDst.copyClip( pcYuvSrc0, clpRngs, lumaOnly, chromaOnly );
+#else
+      pcYuvDst.copyClip( pcYuvSrc0, clpRngs );
+#endif
     if (yuvDstTmp)
-      yuvDstTmp->copyFrom(pcYuvDst);
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+      yuvDstTmp->copyFrom( pcYuvDst, lumaOnly, chromaOnly );
+#else
+      yuvDstTmp->copyFrom( pcYuvDst );
+#endif
   }
   else if( iRefIdx0 < 0 && iRefIdx1 >= 0 )
   {
@@ -1337,9 +1497,17 @@ void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitB
       pcYuvDst.copyFrom( pcYuvSrc1 );
     }
     else
-    pcYuvDst.copyClip( pcYuvSrc1, clpRngs );
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+      pcYuvDst.copyClip( pcYuvSrc1, clpRngs, lumaOnly, chromaOnly );
+#else
+      pcYuvDst.copyClip( pcYuvSrc1, clpRngs );
+#endif
     if (yuvDstTmp)
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+      yuvDstTmp->copyFrom(pcYuvDst, lumaOnly, chromaOnly);
+#else
       yuvDstTmp->copyFrom(pcYuvDst);
+#endif
   }
 }
 
@@ -1514,15 +1682,27 @@ void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBu
     {
       xPredInterUni         ( pu,          eRefPicList, predBuf, true
         , false
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+        , luma, chroma
+#else
         , true, true
+#endif
+      );
+      xWeightedPredictionUni( pu, predBuf, eRefPicList, predBuf, -1, m_maxCompIDToPred 
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+        , (luma && !chroma), (!luma && chroma)
+#endif
       );
-      xWeightedPredictionUni( pu, predBuf, eRefPicList, predBuf, -1, m_maxCompIDToPred );
     }
     else
     {
       xPredInterUni( pu, eRefPicList, predBuf, false
         , false
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+        , luma, chroma
+#else
         , true, true
+#endif
       );
     }
   }
@@ -1593,20 +1773,36 @@ void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBu
     if (pu.mergeType != MRG_TYPE_DEFAULT_N && pu.mergeType != MRG_TYPE_IBC)
     {
       CHECK(predBufWOBIO != NULL, "the case should not happen!");
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+      xSubPuMC( pu, predBuf, eRefPicList, luma, chroma );
+#else
       xSubPuMC( pu, predBuf, eRefPicList );
+#endif
     }
     else if( xCheckIdenticalMotion( pu ) )
     {
       xPredInterUni( pu, REF_PIC_LIST_0, predBuf, false
         , false
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+        , luma, chroma
+#else
         , true, true
+#endif
       );
       if (predBufWOBIO)
+#if 0//JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+        predBufWOBIO->copyFrom(predBuf, (luma && !chroma), (chroma && !luma));
+#else
         predBufWOBIO->copyFrom(predBuf);
+#endif
     }
     else
     {
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+      xPredInterBi(pu, predBuf, luma, chroma, predBufWOBIO);
+#else
       xPredInterBi(pu, predBuf, predBufWOBIO);
+#endif
     }
   }
   return;
diff --git a/source/Lib/CommonLib/InterPrediction.h b/source/Lib/CommonLib/InterPrediction.h
index b28b49dd3c93969837a8ef870ff9575f4ba593e8..0a03a6567f449f0b99e898e982175a6cf9b64f6f 100644
--- a/source/Lib/CommonLib/InterPrediction.h
+++ b/source/Lib/CommonLib/InterPrediction.h
@@ -124,7 +124,11 @@ protected:
                                   , const bool& bioApplied
                                   , const bool luma, const bool chroma
   );
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+  void xPredInterBi             ( PredictionUnit& pu, PelUnitBuf &pcYuvPred, const bool luma = true, const bool chroma = true, PelUnitBuf* yuvPredTmp = NULL );
+#else
   void xPredInterBi             ( PredictionUnit& pu, PelUnitBuf &pcYuvPred, PelUnitBuf* yuvPredTmp = NULL );
+#endif
   void xPredInterBlk            ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng
                                  , const bool& bioApplied
                                  , bool isIBC
@@ -140,15 +144,26 @@ protected:
   void xBioGradFilter           (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, int bitDepth);
   void xCalcBIOPar              (const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, int bitDepth);
   void xCalcBlkGradient         (int sx, int sy, int    *arraysGx2, int     *arraysGxGy, int     *arraysGxdI, int     *arraysGy2, int     *arraysGydI, int     &sGx2, int     &sGy2, int     &sGxGy, int     &sGxdI, int     &sGydI, int width, int height, int unitSize);
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+  void xWeightedAverage         ( const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied, const bool lumaOnly = false, const bool chromaOnly = false, PelUnitBuf* yuvDstTmp = NULL );
+#else
   void xWeightedAverage         ( const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied, PelUnitBuf* yuvDstTmp = NULL );
+#endif
   void xApplyBiPROF             (const PredictionUnit& pu, const CPelBuf& pcYuvSrc0, const CPelBuf& pcYuvSrc1, PelBuf& pcYuvDst, const ClpRng& clpRng);
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+  void xPredAffineBlk           ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng, const bool genChromaMv = false, const std::pair<int, int> scalingRatio = SCALE_1X );
+#else
   void xPredAffineBlk           ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng, const std::pair<int, int> scalingRatio = SCALE_1X );
-
+#endif
   void xWeightedTriangleBlk     ( const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const bool splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 );
 
   static bool xCheckIdenticalMotion( const PredictionUnit& pu );
 
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+  void xSubPuMC(PredictionUnit& pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList = REF_PIC_LIST_X, const bool luma = true, const bool chroma = true);
+#else
   void xSubPuMC(PredictionUnit& pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList = REF_PIC_LIST_X);
+#endif
   void xSubPuBio(PredictionUnit& pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList = REF_PIC_LIST_X, PelUnitBuf* yuvDstTmp = NULL);
   void destroy();
 
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index fbb7edbfbfc8055154a63dc3c684cf7e9afc3d78..435427f2b2b7067f3a8952d4381a7613a8b19369 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -50,6 +50,8 @@
 #include <assert.h>
 #include <cassert>
 
+#define JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP             1 // JVET-P0445: encoder speed up for sub-block based merge candidate search
+
 #define JVET_P0057_BDOF_PROF_HARMONIZATION                1 // JVET-P0057: harmonization of BDOF and PROF on motion refinement precis
 
 #define JVET_P0400_REMOVE_SHARED_MERGE_LIST               1 // JVET-P0400: removeal of shared merge list
diff --git a/source/Lib/CommonLib/WeightPrediction.cpp b/source/Lib/CommonLib/WeightPrediction.cpp
index 007fb08fc5a487d02b8c18df6826acc73719dfe3..d39c65b4d283324ad6f19a6fe3d3f28b77bad47e 100644
--- a/source/Lib/CommonLib/WeightPrediction.cpp
+++ b/source/Lib/CommonLib/WeightPrediction.cpp
@@ -161,13 +161,25 @@ void WeightPrediction::addWeightBi(const CPelUnitBuf          &pcYuvSrc0,
                                    const WPScalingParam *const wp1,
                                          PelUnitBuf           &rpcYuvDst,
                                    const bool                  bRoundLuma /*= true*/,
-                                   const ComponentID           maxNumComp)
+                                   const ComponentID           maxNumComp
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+                                  , bool                       lumaOnly
+                                  , bool                       chromaOnly
+#endif
+)
 {
   const bool enableRounding[MAX_NUM_COMPONENT] = { bRoundLuma, true, true };
 
   const uint32_t numValidComponent = (const uint32_t)pcYuvSrc0.bufs.size();
 
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+  CHECK( lumaOnly && chromaOnly, "Not allowed to have both lumaOnly and chromaOnly selected" );
+  int firstComponent = chromaOnly ? 1 : 0;
+  int lastComponent = lumaOnly ? 0 : maxNumComp;
+  for (int componentIndex = firstComponent; componentIndex < numValidComponent && componentIndex <= lastComponent; componentIndex++)
+#else
   for (int componentIndex = 0; componentIndex < numValidComponent && componentIndex <= maxNumComp; componentIndex++)
+#endif
   {
     const ComponentID compID = ComponentID(componentIndex);
 
@@ -275,11 +287,24 @@ void  WeightPrediction::addWeightUni(const CPelUnitBuf          &pcYuvSrc0,
                                      const ClpRngs              &clpRngs,
                                      const WPScalingParam *const wp0,
                                            PelUnitBuf           &rpcYuvDst,
-                                     const ComponentID           maxNumComp)
+                                     const ComponentID           maxNumComp
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+                                    , bool                       lumaOnly
+                                    , bool                       chromaOnly
+#endif
+)
 {
   const uint32_t numValidComponent = (const uint32_t)pcYuvSrc0.bufs.size();
 
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+  CHECK( lumaOnly && chromaOnly, "Not allowed to have both lumaOnly and chromaOnly selected" );
+  int firstComponent = chromaOnly ? 1 : 0;
+  int lastComponent  = lumaOnly ? 0 : maxNumComp;
+  for (int componentIndex = firstComponent; componentIndex < numValidComponent && componentIndex <= lastComponent;
+       componentIndex++)
+#else
   for (int componentIndex = 0; componentIndex < numValidComponent && componentIndex <= maxNumComp; componentIndex++)
+#endif
   {
     const ComponentID compID = ComponentID(componentIndex);
 
@@ -371,7 +396,12 @@ void  WeightPrediction::xWeightedPredictionUni(const PredictionUnit       &pu,
                                                const RefPicList           &eRefPicList,
                                                      PelUnitBuf           &pcYuvPred,
                                                const int                   iRefIdx_input/* = -1*/,
-                                               const ComponentID           maxNumComp)
+                                               const ComponentID           maxNumComp
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+                                              , bool                       lumaOnly
+                                              , bool                       chromaOnly
+#endif
+)
 {
   WPScalingParam  *pwp, *pwpTmp;
 
@@ -391,14 +421,23 @@ void  WeightPrediction::xWeightedPredictionUni(const PredictionUnit       &pu,
   {
     getWpScaling(pu.cs->slice, -1, iRefIdx, pwpTmp, pwp, maxNumComp);
   }
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+  addWeightUni(pcYuvSrc, pu.cu->slice->clpRngs(), pwp, pcYuvPred, maxNumComp, lumaOnly, chromaOnly);
+#else
   addWeightUni(pcYuvSrc, pu.cu->slice->clpRngs(), pwp, pcYuvPred, maxNumComp);
+#endif
 }
 
 void  WeightPrediction::xWeightedPredictionBi(const PredictionUnit       &pu,
                                               const CPelUnitBuf          &pcYuvSrc0,
                                               const CPelUnitBuf          &pcYuvSrc1,
                                                     PelUnitBuf           &rpcYuvDst,
-                                              const ComponentID           maxNumComp)
+                                              const ComponentID           maxNumComp
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+                                              , bool                      lumaOnly
+                                              , bool                      chromaOnly
+#endif
+)
 {
   const int iRefIdx0 = pu.refIdx[0];
   const int iRefIdx1 = pu.refIdx[1];
@@ -413,15 +452,27 @@ void  WeightPrediction::xWeightedPredictionBi(const PredictionUnit       &pu,
 
   if (iRefIdx0 >= 0 && iRefIdx1 >= 0)
   {
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+    addWeightBi(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, rpcYuvDst, true, maxNumComp, lumaOnly, chromaOnly);
+#else
     addWeightBi(pcYuvSrc0, pcYuvSrc1, pu.cu->slice->clpRngs(), pwp0, pwp1, rpcYuvDst, true, maxNumComp);
+#endif
   }
   else if (iRefIdx0 >= 0 && iRefIdx1 < 0)
   {
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+    addWeightUni(pcYuvSrc0, pu.cu->slice->clpRngs(), pwp0, rpcYuvDst, maxNumComp, lumaOnly, chromaOnly);
+#else
     addWeightUni(pcYuvSrc0, pu.cu->slice->clpRngs(), pwp0, rpcYuvDst, maxNumComp);
+#endif
   }
   else if (iRefIdx0 < 0 && iRefIdx1 >= 0)
   {
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+    addWeightUni(pcYuvSrc1, pu.cu->slice->clpRngs(), pwp1, rpcYuvDst, maxNumComp, lumaOnly, chromaOnly);
+#else
     addWeightUni(pcYuvSrc1, pu.cu->slice->clpRngs(), pwp1, rpcYuvDst, maxNumComp);
+#endif
   }
   else
   {
diff --git a/source/Lib/CommonLib/WeightPrediction.h b/source/Lib/CommonLib/WeightPrediction.h
index a038dd2571dbbe69de1b33f475b6ed05e5bfb31c..5df9ad1cc5a100641aeaf9267607a473f1d39681 100644
--- a/source/Lib/CommonLib/WeightPrediction.h
+++ b/source/Lib/CommonLib/WeightPrediction.h
@@ -69,7 +69,12 @@ public:
                                 const WPScalingParam *const wp1,
                                       PelUnitBuf           &rpcYuvDst,
                                 const bool                  bRoundLuma = true,
-                                const ComponentID           maxNumComp = MAX_NUM_COMPONENT );
+                                const ComponentID           maxNumComp = MAX_NUM_COMPONENT 
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+                                , bool                      lumaOnly = false
+                                , bool                      chromaOnly = false
+#endif
+                                );
 
   void addWeightBiComponent(    const CPelUnitBuf          &pcYuvSrc0,
                                 const CPelUnitBuf          &pcYuvSrc1,
@@ -84,20 +89,35 @@ public:
                                 const ClpRngs              &clpRngs,
                                 const WPScalingParam *const wp0,
                                       PelUnitBuf           &rpcYuvDst,
-                                const ComponentID           maxNumComp = MAX_NUM_COMPONENT);
+                                const ComponentID           maxNumComp = MAX_NUM_COMPONENT
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+                                , bool                      lumaOnly = false
+                                , bool                      chromaOnly = false
+#endif
+                                );
 
   void  xWeightedPredictionUni( const PredictionUnit       &pu,
                                 const CPelUnitBuf          &pcYuvSrc,
                                 const RefPicList           &eRefPicList,
                                       PelUnitBuf           &pcYuvPred,
                                 const int                   iRefIdx=-1,
-                                const ComponentID           maxNumComp = MAX_NUM_COMPONENT);
+                                const ComponentID           maxNumComp = MAX_NUM_COMPONENT
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+                                , bool                      lumaOnly = false
+                                , bool                      chromaOnly = false
+#endif
+                                );
 
   void  xWeightedPredictionBi(  const PredictionUnit       &pu,
                                 const CPelUnitBuf          &pcYuvSrc0,
                                 const CPelUnitBuf          &pcYuvSrc1,
                                       PelUnitBuf           &pcYuvDst,
-                                const ComponentID           maxNumComp = MAX_NUM_COMPONENT );
+                                const ComponentID           maxNumComp = MAX_NUM_COMPONENT 
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+                                , bool                      lumaOnly = false
+                                , bool                      chromaOnly = false
+#endif
+                                );
 };
 
 #endif
diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp
index 10d54df714e00d0fe6efb503aaa74413b5297f2c..5485e57ba51f56b8006fdda253116306f0498aaf 100644
--- a/source/Lib/EncoderLib/EncCu.cpp
+++ b/source/Lib/EncoderLib/EncCu.cpp
@@ -3149,7 +3149,11 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct
 
         distParam.cur = acMergeBuffer[uiMergeCand].Y();
 
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+        m_pcInterSearch->motionCompensation( pu, acMergeBuffer[uiMergeCand], REF_PIC_LIST_X, true, false );
+#else
         m_pcInterSearch->motionCompensation( pu, acMergeBuffer[uiMergeCand] );
+#endif
 
         Distortion uiSad = distParam.distFunc( distParam );
         uint32_t   uiBitsCand = uiMergeCand + 1;
@@ -3248,7 +3252,12 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct
       }
       if ( mrgTempBufSet )
       {
+#if JVET_P0445_SUBBLOCK_MERGE_ENC_SPEEDUP
+        tempCS->getPredBuf().copyFrom(acMergeBuffer[uiMergeCand], true, false);   // Copy Luma Only
+        m_pcInterSearch->motionCompensation(pu, REF_PIC_LIST_X, false, true);
+#else
         tempCS->getPredBuf().copyFrom( acMergeBuffer[uiMergeCand] );
+#endif
       }
       else
       {