diff --git a/cfg/encoder_randomaccess_vtm.cfg b/cfg/encoder_randomaccess_vtm.cfg
index 0b61084fb0bdc8c4265d4f2fea0f1c42dca97bad..2f0ff3dad0747818d5482af32a690a4d4119efea 100644
--- a/cfg/encoder_randomaccess_vtm.cfg
+++ b/cfg/encoder_randomaccess_vtm.cfg
@@ -150,6 +150,7 @@ IBC                          : 0      # turned off in CTC
 AllowDisFracMMVD             : 1
 AffineAmvr                   : 1
 LumaReshapeEnable            : 1      # luma reshaping. 0: disable 1:enable 
+DMVR                         : 1
 
 # Fast tools
 PBIntraFast                  : 1
diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp
index f36dce781232edbc556714e74c5d8625d73cf1c3..fee7d084f33243a393120727c94c3a155b8b802a 100644
--- a/source/App/EncoderApp/EncApp.cpp
+++ b/source/App/EncoderApp/EncApp.cpp
@@ -268,6 +268,9 @@ void EncApp::xInitLibCfg()
 #endif
 #if JVET_M0247_AFFINE_AMVR_ENCOPT
   m_cEncLib.setUseAffineAmvrEncOpt                               ( m_AffineAmvrEncOpt );
+#endif
+#if JVET_M0147_DMVR
+  m_cEncLib.setDMVR                                              ( m_DMVR );
 #endif
   m_cEncLib.setIBCMode                                           ( m_IBCMode );
   m_cEncLib.setIBCLocalSearchRangeX                              ( m_IBCLocalSearchRangeX );
diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp
index 13d7daadf087da8c4e2eeb6ca598fbabb8a7b5b8..23fce78dc386bcf8d9336f5468fc7818e2c5cc7c 100644
--- a/source/App/EncoderApp/EncAppCfg.cpp
+++ b/source/App/EncoderApp/EncAppCfg.cpp
@@ -875,6 +875,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
 #endif
 #if JVET_M0247_AFFINE_AMVR_ENCOPT
   ("AffineAmvrEncOpt",                                m_AffineAmvrEncOpt,                               false, "Enable encoder optimization of affine AMVR")
+#endif
+#if JVET_M0147_DMVR
+  ("DMVR",                                            m_DMVR,        
 #endif
   ( "IBC",                                            m_IBCMode,                                           0u, "IBCMode (0x1:enabled, 0x0:disabled)  [default: disabled]")
   ( "IBCLocalSearchRangeX",                           m_IBCLocalSearchRangeX,                            128u, "Search range of IBC local search in x direction")
@@ -1971,6 +1974,9 @@ bool EncAppCfg::xCheckParameter()
     xConfirmPara( m_GBi, "GBi is only allowed with NEXT profile" );
     xConfirmPara( m_GBiFast, "GBiFast is only allowed with NEXT profile" );
     xConfirmPara( m_Triangle, "Triangle is only allowed with NEXT profile" );
+#if JVET_M0147_DMVR
+    xConfirmPara(m_DMVR, "DMVR only allowed with NEXT profile");
+#endif
     // ADD_NEW_TOOL : (parameter check) add a check for next tools here
   }
   else
@@ -3202,6 +3208,9 @@ void EncAppCfg::xPrintParameter()
 #if JVET_M0247_AFFINE_AMVR_ENCOPT
     m_AffineAmvrEncOpt = m_AffineAmvr ? m_AffineAmvrEncOpt : false;
     msg( VERBOSE, "AffineAmvrEncOpt:%d ", m_AffineAmvrEncOpt );
+#endif
+#if JVET_M0147_DMVR
+    msg(VERBOSE, "DMVR:%d ", m_DMVR);
 #endif
   }
     msg(VERBOSE, "IBC:%d ", m_IBCMode);
diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h
index bdda6164b3033631666272351b5a54f62ddb325f..03bc50438e24395c64383dcd438f8bc8669903f2 100644
--- a/source/App/EncoderApp/EncAppCfg.h
+++ b/source/App/EncoderApp/EncAppCfg.h
@@ -249,6 +249,9 @@ protected:
 #if JVET_M0247_AFFINE_AMVR_ENCOPT
   bool      m_AffineAmvrEncOpt;
 #endif
+#if JVET_M0147_DMVR
+  bool      m_DMVR;
+#endif
 
   unsigned  m_IBCMode;
   unsigned  m_IBCLocalSearchRangeX;
diff --git a/source/Lib/CommonLib/Buffer.cpp b/source/Lib/CommonLib/Buffer.cpp
index a5c7197e8966d0558b5acfa5103c7d06e593386d..ad437016a5712f2c411c35c5281ec191b84889da 100644
--- a/source/Lib/CommonLib/Buffer.cpp
+++ b/source/Lib/CommonLib/Buffer.cpp
@@ -299,6 +299,10 @@ PelBufferOps::PelBufferOps()
   calcBIOPar      = calcBIOParCore;
   calcBlkGradient = calcBlkGradientCore;
 
+#if JVET_M0147_DMVR
+  copyBuffer = copyBufferCore;
+  padding = paddingCore;
+#endif
 #if ENABLE_SIMD_OPT_GBI
   removeWeightHighFreq8 = removeWeightHighFreq;
   removeWeightHighFreq4 = removeWeightHighFreq;
@@ -313,6 +317,42 @@ PelBufferOps g_pelBufOP = PelBufferOps();
 #endif
 #endif
 
+#if JVET_M0147_DMVR
+void copyBufferCore(Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height)
+{
+  int numBytes = width * sizeof(Pel);
+  for (int i = 0; i < height; i++)
+  {
+    memcpy(dst + i * dstStride, src + i * srcStride, numBytes);
+  }
+}
+
+void paddingCore(Pel *ptr, int iStride, int iWidth, int iHeight, int padSize)
+{
+  /*left and right padding*/
+  Pel *ptrTemp1 = ptr;
+  Pel *ptrTemp2 = ptr + (iWidth - 1);
+  int offset = 0;
+  for (int i = 0; i < iHeight; i++)
+  {
+    offset = iStride * i;
+    for (int j = 1; j <= padSize; j++)
+    {
+      *(ptrTemp1 - j + offset) = *(ptrTemp1 + offset);
+      *(ptrTemp2 + j + offset) = *(ptrTemp2 + offset);
+    }
+  }
+  /*Top and Bottom padding*/
+  int numBytes = (iWidth + padSize + padSize) * sizeof(Pel);
+  ptrTemp1 = (ptr - padSize);
+  ptrTemp2 = (ptr + (iStride * (iHeight - 1)) - padSize);
+  for (int i = 1; i <= padSize; i++)
+  {
+    memcpy(ptrTemp1 - (i * iStride), (ptrTemp1), numBytes);
+    memcpy(ptrTemp2 + (i * iStride), (ptrTemp2), numBytes);
+  }
+}
+#endif
 template<>
 void AreaBuf<Pel>::addWeightedAvg(const AreaBuf<const Pel> &other1, const AreaBuf<const Pel> &other2, const ClpRng& clpRng, const int8_t gbiIdx)
 {
diff --git a/source/Lib/CommonLib/Buffer.h b/source/Lib/CommonLib/Buffer.h
index e80b015059c292db541c232746837caa33656641..50b6571771024191c73e4516247caa25b9b6448f 100644
--- a/source/Lib/CommonLib/Buffer.h
+++ b/source/Lib/CommonLib/Buffer.h
@@ -77,6 +77,10 @@ struct PelBufferOps
   void(*calcBIOPar)    (const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG);
 #endif
   void(*calcBlkGradient)(int sx, int sy, int    *arraysGx2, int     *arraysGxGy, int     *arraysGxdI, int     *arraysGy2, int     *arraysGydI, int     &sGx2, int     &sGy2, int     &sGxGy, int     &sGxdI, int     &sGydI, int width, int height, int unitSize);
+#if JVET_M0147_DMVR
+  void(*copyBuffer)(Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height);
+  void(*padding)(Pel *dst, int stride, int width, int height, int padSize);
+#endif
 #if ENABLE_SIMD_OPT_GBI
   void ( *removeWeightHighFreq8)  ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int gbiWeight);
   void ( *removeWeightHighFreq4)  ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int gbiWeight);
@@ -90,6 +94,12 @@ extern PelBufferOps g_pelBufOP;
 #endif
 #endif
 
+
+#if JVET_M0147_DMVR
+void paddingCore(Pel *ptr, int iStride, int iWidth, int iHeight, int padSize);
+void copyBufferCore(Pel *src, int srcStride, Pel *Dst, int dstStride, int width, int height);
+#endif
+
 template<typename T>
 struct AreaBuf : public Size
 {
diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h
index 9c1c06d633da7e46e064dd434267a25ec199d25d..bc2d375cb4704db3fc0cf74e161ceb2750fc336e 100644
--- a/source/Lib/CommonLib/CommonDef.h
+++ b/source/Lib/CommonLib/CommonDef.h
@@ -332,6 +332,14 @@ static const uint32_t LUMA_LEVEL_TO_DQP_LUT_MAXSIZE =                1024; ///<
 #if !JVET_M0464_UNI_MTS
 static const int NUM_EMT_CU_FLAG_CTX =                              6;      ///< number of context models for EMT CU-level flag
 #endif
+#if JVET_M0147_DMVR
+static const int DMVR_SUBCU_WIDTH = 16;
+static const int DMVR_SUBCU_HEIGHT = 16;
+static const int DMVR_SUBCU_WIDTH_LOG2 = 4;
+static const int DMVR_SUBCU_HEIGHT_LOG2 = 4;
+static const int MAX_NUM_SUBCU_DMVR = ((MAX_CU_SIZE * MAX_CU_SIZE) >> (DMVR_SUBCU_WIDTH_LOG2 + DMVR_SUBCU_HEIGHT_LOG2));
+static const int DMVR_NUM_ITERATION = 2;
+#endif
 
 //QTBT high level parameters
 //for I slice luma CTB configuration para.
diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp
index cd524e4c7d019bf30961f3722be968b92082887a..9108117e1ea4deefcd042e06a73e6edcfdcdfb9e 100644
--- a/source/Lib/CommonLib/InterPrediction.cpp
+++ b/source/Lib/CommonLib/InterPrediction.cpp
@@ -82,7 +82,15 @@ InterPrediction::InterPrediction()
       m_filteredBlockTmp[i][c] = nullptr;
     }
   }
-
+#if JVET_M0147_DMVR
+  m_cYuvPredTempDMVRL1 = nullptr;
+  m_cYuvPredTempDMVRL0 = nullptr;
+  for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++)
+  {
+    m_cRefSamplesDMVRL0[ch] = nullptr;
+    m_cRefSamplesDMVRL1[ch] = nullptr;
+  }
+#endif
 }
 
 InterPrediction::~InterPrediction()
@@ -128,6 +136,19 @@ void InterPrediction::destroy()
   xFree(m_gradY0);   m_gradY0 = nullptr;
   xFree(m_gradX1);   m_gradX1 = nullptr;
   xFree(m_gradY1);   m_gradY1 = nullptr;
+#if JVET_M0147_DMVR
+  xFree(m_cYuvPredTempDMVRL0);
+  m_cYuvPredTempDMVRL0 = nullptr;
+  xFree(m_cYuvPredTempDMVRL1);
+  m_cYuvPredTempDMVRL1 = nullptr;
+  for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++)
+  {
+    xFree(m_cRefSamplesDMVRL0[ch]);
+    m_cRefSamplesDMVRL0[ch] = nullptr;
+    xFree(m_cRefSamplesDMVRL1[ch]);
+    m_cRefSamplesDMVRL1[ch] = nullptr;
+  }
+#endif
 }
 
 void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC )
@@ -148,6 +169,10 @@ void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC )
     {
       int extWidth = MAX_CU_SIZE + (2 * BIO_EXTEND_SIZE + 2) + 16;
       int extHeight = MAX_CU_SIZE + (2 * BIO_EXTEND_SIZE + 2) + 1;
+#if JVET_M0147_DMVR
+      extWidth = extWidth > (MAX_CU_SIZE + (DMVR_NUM_ITERATION << 1) + 16) ? extWidth : MAX_CU_SIZE + (DMVR_NUM_ITERATION << 1) + 16;
+      extHeight = extHeight > (MAX_CU_SIZE + (DMVR_NUM_ITERATION << 1) + 1) ? extHeight : MAX_CU_SIZE + (DMVR_NUM_ITERATION << 1) + 1;
+#endif
       for( uint32_t i = 0; i < LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS_SIGNAL; i++ )
       {
         m_filteredBlockTmp[i][c] = ( Pel* ) xMalloc( Pel, ( extWidth + 4 ) * ( extHeight + 7 + 4 ) );
@@ -175,6 +200,15 @@ void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC )
     m_gradY1 = (Pel*)xMalloc(Pel, BIO_TEMP_BUFFER_SIZE);
   }
 
+#if JVET_M0147_DMVR
+  m_cYuvPredTempDMVRL0 = (Pel*)xMalloc(Pel, (MAX_CU_SIZE + (DMVR_NUM_ITERATION << 1)) * (MAX_CU_SIZE + (DMVR_NUM_ITERATION << 1)));
+  m_cYuvPredTempDMVRL1 = (Pel*)xMalloc(Pel, (MAX_CU_SIZE + (DMVR_NUM_ITERATION << 1)) * (MAX_CU_SIZE + (DMVR_NUM_ITERATION << 1)));
+  for (uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++)
+  {    
+    m_cRefSamplesDMVRL0[ch] = (Pel*)xMalloc(Pel, (MAX_CU_SIZE + (DMVR_NUM_ITERATION << 1) + NTAPS_LUMA) * (MAX_CU_SIZE + (DMVR_NUM_ITERATION << 1) + NTAPS_LUMA));
+    m_cRefSamplesDMVRL1[ch] = (Pel*)xMalloc(Pel, (MAX_CU_SIZE + (DMVR_NUM_ITERATION << 1) + NTAPS_LUMA) * (MAX_CU_SIZE + (DMVR_NUM_ITERATION << 1) + NTAPS_LUMA));
+  }
+#endif
 #if !JVET_J0090_MEMORY_BANDWITH_MEASURE
   m_if.initInterpolationFilter( true );
 #endif
@@ -328,6 +362,9 @@ void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const R
       PelUnitBuf subPredBuf = predBuf.subBuf(UnitAreaRelative(pu, subPu));
 #if JVET_M0823_MMVD_ENCOPT
       subPu.mmvdEncOptMode = 0;
+#endif
+#if JVET_M0147_DMVR
+      subPu.mvRefine = false;
 #endif
       motionCompensation(subPu, subPredBuf, eRefPicList);
       secDim = later - secStep;
@@ -468,6 +505,10 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred)
   if (pu.mmvdEncOptMode == 2 && pu.mmvdMergeFlag) {
     bioApplied = false;
   }
+#endif
+#if JVET_M0147_DMVR
+  bool bDMVRApplied = false;
+  bDMVRApplied = (pu.mvRefine) && PU::checkDMVRCondition(pu);
 #endif
   for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++)
   {
@@ -487,6 +528,10 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred)
 
     if (pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0)
     {
+#if JVET_M0147_DMVR
+      if (bDMVRApplied)
+        continue; // mc will happen in processDMVR
+#endif
       xPredInterUni ( pu, eRefPicList, pcMbBuf, true
         , bioApplied
         , true, true
@@ -510,6 +555,12 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred)
       }
     }
   }
+#if JVET_M0147_DMVR
+  if (bDMVRApplied)
+  {
+    xProcessDMVR(pu, pcYuvPred, slice.clpRngs(), bioApplied);
+  }
+#endif
 
 
   CPelUnitBuf srcPred0 = ( pu.chromaFormat == CHROMA_400 ?
@@ -528,14 +579,33 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred)
   }
   else
   {
+#if JVET_M0147_DMVR
+    if (bDMVRApplied == false)
+    {
+#endif
     xWeightedAverage( pu, srcPred0, srcPred1, pcYuvPred, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied );
+#if JVET_M0147_DMVR
+    }
+#endif
   }
 }
 
+#if JVET_M0147_DMVR
 void InterPrediction::xPredInterBlk ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng
                                      , const bool& bioApplied
                                      , bool isIBC
+                                     , SizeType dmvrWidth
+                                     , SizeType dmvrHeight
+                                     , bool bilinearMC
+                                     , Pel *srcPadBuf
+                                     , int32_t srcPadStride
                                     )
+#else
+void InterPrediction::xPredInterBlk ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng
+                                     , const bool& bioApplied
+                                     , bool isIBC
+                                    )
+#endif
 {
   JVET_J0090_SET_REF_PICTURE( refPic, compID );
   const ChromaFormat  chFmt = pu.chromaFormat;
@@ -559,9 +629,28 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio
   CPelBuf refBuf;
   {
     Position offset = pu.blocks[compID].pos().offset( _mv.getHor() >> shiftHor, _mv.getVer() >> shiftVer );
+#if JVET_M0147_DMVR
+    if (dmvrWidth)
+    {
+      refBuf = refPic->getRecoBuf(CompArea(compID, chFmt, offset, Size(dmvrWidth, dmvrHeight)));
+    }
+    else
+#endif
     refBuf = refPic->getRecoBuf( CompArea( compID, chFmt, offset, pu.blocks[compID].size() ) );
   }
 
+#if JVET_M0147_DMVR
+  if (NULL != srcPadBuf)
+  {
+    refBuf.buf = srcPadBuf;
+    refBuf.stride = srcPadStride;
+  }
+  if (dmvrWidth)
+  {
+    width = dmvrWidth;
+    height = dmvrHeight;
+  }
+#endif
   // backup data
   int backupWidth = width;
   int backupHeight = height;
@@ -580,21 +669,49 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio
 
   if( yFrac == 0 )
   {
+#if JVET_M0147_DMVR
+    m_if.filterHor(compID, (Pel*)refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, xFrac, rndRes, chFmt, clpRng, bilinearMC, bilinearMC);
+#else
     m_if.filterHor(compID, (Pel*)refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, xFrac, rndRes, chFmt, clpRng);
+#endif
   }
   else if( xFrac == 0 )
   {
+#if JVET_M0147_DMVR
+    m_if.filterVer(compID, (Pel*)refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, true, rndRes, chFmt, clpRng, bilinearMC, bilinearMC);
+#else
     m_if.filterVer(compID, (Pel*)refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, true, rndRes, chFmt, clpRng);
+#endif
   }
   else
   {
+#if JVET_M0147_DMVR
+    PelBuf tmpBuf = dmvrWidth ? PelBuf(m_filteredBlockTmp[0][compID], Size(dmvrWidth, dmvrHeight)) : PelBuf(m_filteredBlockTmp[0][compID], pu.blocks[compID]);
+    if (dmvrWidth == 0)
+      tmpBuf.stride = dstBuf.stride;
+#else
     PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][compID], pu.blocks[compID]);
     tmpBuf.stride = dstBuf.stride;
+#endif
 
     int vFilterSize = isLuma(compID) ? NTAPS_LUMA : NTAPS_CHROMA;
+#if JVET_M0147_DMVR
+    if (bilinearMC)
+    {
+      vFilterSize = NTAPS_BILINEAR;
+    }
+#endif
+#if JVET_M0147_DMVR
+    m_if.filterHor(compID, (Pel*)refBuf.buf - ((vFilterSize >> 1) - 1) * refBuf.stride, refBuf.stride, tmpBuf.buf, tmpBuf.stride, backupWidth, backupHeight + vFilterSize - 1, xFrac, false, chFmt, clpRng, bilinearMC, bilinearMC);
+#else
     m_if.filterHor(compID, (Pel*)refBuf.buf - ((vFilterSize >> 1) - 1) * refBuf.stride, refBuf.stride, tmpBuf.buf, tmpBuf.stride, backupWidth, backupHeight + vFilterSize - 1, xFrac, false, chFmt, clpRng);
+#endif
     JVET_J0090_SET_CACHE_ENABLE( false );
+#if JVET_M0147_DMVR
+    m_if.filterVer(compID, (Pel*)tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, false, rndRes, chFmt, clpRng, bilinearMC, bilinearMC);
+#else
     m_if.filterVer(compID, (Pel*)tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, false, rndRes, chFmt, clpRng);
+#endif
   }
   JVET_J0090_SET_CACHE_ENABLE( true );
   if (bioApplied && compID == COMPONENT_Y)
@@ -632,6 +749,13 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio
     }
 #else
     refBuf.buf = refBuf.buf - refBuf.stride - 1;
+#if JVET_M0147_DMVR
+    if (srcPadBuf)
+    {
+      refBuf.buf = srcPadBuf - srcPadStride - 1;
+      refBuf.stride = srcPadStride;
+    }
+#endif
     dstBuf.buf = m_filteredBlockTmp[2 + m_iRefListIdx][compID] + dstBuf.stride + 1;
     bioSampleExtendBilinearFilter(refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, width - 2, height - 2, 1, xFrac, yFrac, rndRes, chFmt, clpRng);
 #endif
@@ -1250,9 +1374,15 @@ void InterPrediction::motionCompensation( CodingUnit &cu, const RefPicList &eRef
   for( auto &pu : CU::traversePUs( cu ) )
   {
     PelUnitBuf predBuf = cu.cs->getPredBuf( pu );
+#if JVET_M0147_DMVR
+    pu.mvRefine = true;
+#endif
     motionCompensation( pu, predBuf, eRefPicList 
       , luma, chroma
     );
+#if JVET_M0147_DMVR
+    pu.mvRefine = false;
+#endif
   }
 }
 
@@ -1452,6 +1582,448 @@ void InterPrediction::xWeightedTriangleBlk( const PredictionUnit &pu, const uint
   }
 }
 
+#if JVET_M0147_DMVR
+const uint64_t   MAX_UINT64 = 0xFFFFFFFFFFFFFFFFU;
+void InterPrediction::xPrefetchPad(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId)
+{
+  int offset, width, height;
+  int padsize;
+  Mv cMv;
+  const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]);
+  int mvshift = (MV_FRACTIONAL_BITS_INTERNAL);
+  for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++)
+  {
+    cMv = Mv(pu.mv[refId].getHor(), pu.mv[refId].getVer());
+    pcPad.bufs[compID].stride = (MAX_CU_SIZE + (DMVR_NUM_ITERATION << 1) + NTAPS_LUMA);
+    int filtersize = (compID == (COMPONENT_Y)) ? NTAPS_LUMA : NTAPS_CHROMA;
+    width = pcPad.bufs[compID].width;
+    height = pcPad.bufs[compID].height;
+    offset = (DMVR_NUM_ITERATION) * (pcPad.bufs[compID].stride + 1);
+    padsize = (DMVR_NUM_ITERATION) >> getComponentScaleX((ComponentID)compID, pu.chromaFormat);
+    int mvshiftTemp = mvshift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
+    width += (filtersize - 1);
+    height += (filtersize - 1);
+    cMv += Mv(-(((filtersize >> 1) - 1) << mvshiftTemp),
+      -(((filtersize >> 1) - 1) << mvshiftTemp));
+    clipMv(cMv, pu.lumaPos(), pu.lumaSize(),*pu.cs->sps);
+    /* Pre-fetch similar to HEVC*/
+    {
+      CPelBuf refBuf;
+      Position Rec_offset = pu.blocks[compID].pos().offset(cMv.getHor() >> mvshiftTemp, cMv.getVer() >> mvshiftTemp);
+      refBuf = refPic->getRecoBuf(CompArea((ComponentID)compID, pu.chromaFormat, Rec_offset, pu.blocks[compID].size()));
+      PelBuf &dstBuf = pcPad.bufs[compID];
+      g_pelBufOP.copyBuffer((Pel *)refBuf.buf, refBuf.stride, ((Pel *)dstBuf.buf) + offset, dstBuf.stride, width, height);
+    }
+    /*padding on all side of size DMVR_PAD_LENGTH*/
+    {
+      g_pelBufOP.padding(pcPad.bufs[compID].buf + offset, pcPad.bufs[compID].stride, width, height, padsize);
+    }
+  }
+}
+inline int32_t div_for_maxq7(int64_t N, int64_t D)
+{
+  int32_t sign, q;
+  sign = 0;
+  if (N < 0)
+  {
+    sign = 1;
+    N = -N;
+  }
+
+  q = 0;
+  D = (D << 3);
+  if (N >= D)
+  {
+    N -= D;
+    q++;
+  }
+  q = (q << 1);
+
+  D = (D >> 1);
+  if (N >= D)
+  {
+    N -= D;
+    q++;
+  }
+  q = (q << 1);
+
+  if (N >= (D >> 1))
+    q++;
+
+  if (sign)
+    return (-q);
+  return(q);
+}
+
+void xSubPelErrorSrfc(uint64_t *sadBuffer, int32_t *deltaMv)
+{
+  int64_t iNum, iDenom;
+  int32_t iMvDeltaSubPel;
+  int32_t MvSubPelLvl = 4;/*1: half pel, 2: Qpel, 3:1/8, 4: 1/16*/
+                                                        /*horizontal*/
+    iNum = (int64_t)((sadBuffer[1] - sadBuffer[3]) << MvSubPelLvl);
+    iDenom = (int64_t)((sadBuffer[1] + sadBuffer[3] - (sadBuffer[0] << 1)));
+
+    if (0 != iDenom)
+    {
+      if ((sadBuffer[1] != sadBuffer[0]) && (sadBuffer[3] != sadBuffer[0]))
+      {
+        iMvDeltaSubPel = div_for_maxq7(iNum, iDenom);
+        deltaMv[0] = (iMvDeltaSubPel);
+      }
+      else
+      {
+        if (sadBuffer[1] == sadBuffer[0])
+        {
+          deltaMv[0] = -8;// half pel
+        }
+        else
+        {
+          deltaMv[0] = 8;// half pel
+        }
+      }
+    }
+
+    /*vertical*/
+    iNum = (int64_t)((sadBuffer[2] - sadBuffer[4]) << MvSubPelLvl);
+    iDenom = (int64_t)((sadBuffer[2] + sadBuffer[4] - (sadBuffer[0] << 1)));
+    if (0 != iDenom)
+    {
+      if ((sadBuffer[2] != sadBuffer[0]) && (sadBuffer[4] != sadBuffer[0]))
+      {
+        iMvDeltaSubPel = div_for_maxq7(iNum, iDenom);
+        deltaMv[1] = (iMvDeltaSubPel);
+      }
+      else
+      {
+        if (sadBuffer[2] == sadBuffer[0])
+        {
+          deltaMv[1] = -8;// half pel
+        }
+        else
+        {
+          deltaMv[1] = 8;// half pel
+        }
+      }
+    }
+  return;
+}
+
+void InterPrediction::xBIPMVRefine(int bd, Pel *pRefL0, Pel *pRefL1, uint64_t& minCost, int16_t *delta_mv, uint64_t *pSADsArray, int width, int height)
+{
+  const int32_t refStrideL0 = m_biLinearBufStride;
+  const int32_t refStrideL1 = m_biLinearBufStride;
+  Pel *pRefL0Orig = pRefL0;
+  Pel *pRefL1Orig = pRefL1;
+  for (int nIdx = SAD_BOTTOM; nIdx <= SAD_TOP_LEFT; ++nIdx)
+  {
+    int32_t SadOffset = ((m_pSearchOffset[nIdx].getVer() * ((DMVR_NUM_ITERATION << 1) + 1)) + m_pSearchOffset[nIdx].getHor());
+    pRefL0 = pRefL0Orig + m_pSearchOffset[nIdx].hor + (m_pSearchOffset[nIdx].ver * refStrideL0);
+    pRefL1 = pRefL1Orig - m_pSearchOffset[nIdx].hor - (m_pSearchOffset[nIdx].ver * refStrideL1);
+    if (*(pSADsArray + SadOffset) == MAX_UINT64)
+    {
+      const uint64_t cost = xDMVRCost(bd, pRefL0, refStrideL0, pRefL1, refStrideL1, width, height);
+      *(pSADsArray + SadOffset) = cost;
+    }
+    if (nIdx == SAD_LEFT)
+    {
+      int32_t down = -1, right = -1;
+      if (pSADsArray[(((DMVR_NUM_ITERATION << 1) + 1))] < pSADsArray[-(((DMVR_NUM_ITERATION << 1) + 1))])
+      {
+        down = 1;
+      }
+      if (pSADsArray[1] < pSADsArray[-1])
+      {
+        right = 1;
+      }
+      m_pSearchOffset[SAD_TOP_LEFT].set(right, down);
+    }
+    if (*(pSADsArray + SadOffset) < minCost)
+    {
+      minCost = *(pSADsArray + SadOffset);
+      delta_mv[0] = m_pSearchOffset[nIdx].getHor();
+      delta_mv[1] = m_pSearchOffset[nIdx].getVer();
+    }
+  }
+}
+
+void InterPrediction::xFinalPaddedMCForDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvSrc0, PelUnitBuf &pcYuvSrc1, PelUnitBuf &pcPad0, PelUnitBuf &pcPad1, const bool bBIOApplied
+  , const Mv mergeMV[NUM_REF_PIC_LIST_01]
+)
+{
+  int offset, deltaIntMvX, deltaIntMvY;
+
+  PelUnitBuf pcYUVTemp = pcYuvSrc0;
+  PelUnitBuf pcPadTemp = pcPad0;
+  /*always high precision MVs are used*/
+  int mvshift = 4;
+
+  for (int k = 0; k < NUM_REF_PIC_LIST_01; k++)
+  {
+    RefPicList refId = (RefPicList)k;
+    Mv cMv = pu.mv[refId];
+    m_iRefListIdx = refId;
+    const Picture* refPic = pu.cu->slice->getRefPic(refId, pu.refIdx[refId]);
+    clipMv(cMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps);
+
+    Mv startMv = mergeMV[refId];
+    clipMv(startMv, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps);
+
+    for (int compID = 0; compID < MAX_NUM_COMPONENT; compID++)
+    {
+      int mvshiftTemp = mvshift + getComponentScaleX((ComponentID)compID, pu.chromaFormat);
+      int leftPixelExtra;
+      if (compID == COMPONENT_Y)
+      {
+        leftPixelExtra = (NTAPS_LUMA >> 1) - 1;
+      }
+      else
+      {
+        leftPixelExtra = (NTAPS_CHROMA >> 1) - 1;
+      }
+
+      deltaIntMvX = (cMv.getHor() >> mvshiftTemp) -
+        (startMv.getHor() >> mvshiftTemp);
+      deltaIntMvY = (cMv.getVer() >> mvshiftTemp) -
+        (startMv.getVer() >> mvshiftTemp);
+
+      CHECK((abs(deltaIntMvX) > DMVR_NUM_ITERATION) || (abs(deltaIntMvY) > DMVR_NUM_ITERATION), "not expected DMVR movement");
+
+      offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (pcPadTemp.bufs[compID].stride + 1);
+      offset += (deltaIntMvY)* pcPadTemp.bufs[compID].stride;
+      offset += (deltaIntMvX);
+      PelBuf &srcBuf = pcPadTemp.bufs[compID];
+      xPredInterBlk((ComponentID)compID, pu, refPic, cMv, pcYUVTemp, true, pu.cs->slice->getClpRngs().comp[compID],
+        bBIOApplied, false, 0, 0, 0, (srcBuf.buf + offset), pcPadTemp.bufs[compID].stride);
+    }
+    pcYUVTemp = pcYuvSrc1;
+    pcPadTemp = pcPad1;
+  }
+}
+
+uint64_t InterPrediction::xDMVRCost(int iBitDepth, Pel* pOrg, uint32_t uiRefStride, const Pel* pRef, uint32_t uiOrgStride, int iWidth, int iHeight)
+{
+  DistParam cDistParam;
+  cDistParam.applyWeight = false;
+  cDistParam.useMR = false;
+  m_pcRdCost->setDistParam(cDistParam, pOrg, pRef, uiOrgStride, uiRefStride, iBitDepth, COMPONENT_Y, iWidth, iHeight , 1);
+  uint64_t uiCost = cDistParam.distFunc(cDistParam);
+  return uiCost;
+}
+
+void xDMVRSubPixelErrorSurface(bool notZeroCost, int16_t *total_delta_mv, int16_t *delta_mv, uint64_t *pSADsArray)
+{
+
+  int sadStride = (((DMVR_NUM_ITERATION << 1) + 1));
+  uint64_t sadbuffer[5];
+  int32_t deltaMv[2] = { 0,0 };
+  if (notZeroCost && delta_mv[0] == 0 && delta_mv[1] == 0)
+  {
+    sadbuffer[0] = pSADsArray[0];
+    sadbuffer[1] = pSADsArray[-1];
+    sadbuffer[2] = pSADsArray[-sadStride];
+    sadbuffer[3] = pSADsArray[1];
+    sadbuffer[4] = pSADsArray[sadStride];
+    xSubPelErrorSrfc(sadbuffer, deltaMv);
+    total_delta_mv[0] += deltaMv[0];
+    total_delta_mv[1] += deltaMv[1];
+  }
+}
+
+void InterPrediction::xinitMC(PredictionUnit& pu, const ClpRngs &clpRngs)
+{
+  const int iRefIdx0 = pu.refIdx[0];
+  const int iRefIdx1 = pu.refIdx[1];
+  /*use merge MV as starting MV*/
+  Mv StartingMVL0(pu.mv[REF_PIC_LIST_0]);
+  Mv StartingMVL1(pu.mv[REF_PIC_LIST_1]);
+
+  /*Clip the starting MVs*/
+  clipMv(StartingMVL0, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps);
+  clipMv(StartingMVL1, pu.lumaPos(), pu.lumaSize(), *pu.cs->sps);
+
+  /*L0 MC for refinement*/
+  {
+    int offset;
+    int leftPixelExtra = (NTAPS_LUMA >> 1) - 1;
+    offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y].stride + 1);
+    offset += (-(int)DMVR_NUM_ITERATION)* (int)m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y].stride;
+    offset += (-(int)DMVR_NUM_ITERATION);
+    PelBuf srcBuf = m_cYuvRefBuffDMVRL0.bufs[COMPONENT_Y];
+    PelUnitBuf yuvPredTempL0 = PelUnitBuf(pu.chromaFormat, PelBuf(m_cYuvPredTempDMVRL0,
+      (MAX_CU_SIZE + (DMVR_NUM_ITERATION << 1)), pu.lwidth() + (DMVR_NUM_ITERATION << 1), pu.lheight() + (DMVR_NUM_ITERATION << 1)));
+
+    xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(REF_PIC_LIST_0, iRefIdx0), StartingMVL0, yuvPredTempL0, true, clpRngs.comp[COMPONENT_Y],
+      false, false, pu.lwidth() + (DMVR_NUM_ITERATION << 1), pu.lheight() + (DMVR_NUM_ITERATION << 1), true, ((Pel *)srcBuf.buf) + offset, srcBuf.stride
+    );
+  }
+
+  /*L1 MC for refinement*/
+  {
+    int offset;
+    int leftPixelExtra = (NTAPS_LUMA >> 1) - 1;
+    offset = (DMVR_NUM_ITERATION + leftPixelExtra) * (m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y].stride + 1);
+    offset += (-(int)DMVR_NUM_ITERATION)* (int)m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y].stride;
+    offset += (-(int)DMVR_NUM_ITERATION);
+    PelBuf srcBuf = m_cYuvRefBuffDMVRL1.bufs[COMPONENT_Y];
+    PelUnitBuf yuvPredTempL1 = PelUnitBuf(pu.chromaFormat, PelBuf(m_cYuvPredTempDMVRL1,
+      (MAX_CU_SIZE + (DMVR_NUM_ITERATION << 1)), pu.lwidth() + (DMVR_NUM_ITERATION << 1), pu.lheight() + (DMVR_NUM_ITERATION << 1)));
+
+    xPredInterBlk(COMPONENT_Y, pu, pu.cu->slice->getRefPic(REF_PIC_LIST_1, iRefIdx1), StartingMVL1, yuvPredTempL1, true, clpRngs.comp[COMPONENT_Y],
+      false, false, pu.lwidth() + (DMVR_NUM_ITERATION << 1), pu.lheight() + (DMVR_NUM_ITERATION << 1), true, ((Pel *)srcBuf.buf) + offset, srcBuf.stride
+    );
+  }
+}
+
+void InterPrediction::xProcessDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvDst, const ClpRngs &clpRngs, const bool bBIOApplied)
+{  
+  bool bDMVRApplied = true;
+  
+  int iterationCount = DMVR_NUM_ITERATION;
+  /*Always High Precision*/
+  int mvShift = MV_FRACTIONAL_BITS_INTERNAL;
+  
+  /*use merge MV as starting MV*/
+  Mv mergeMv[] = { pu.mv[REF_PIC_LIST_0] , pu.mv[REF_PIC_LIST_1] };
+  
+  m_biLinearBufStride = (MAX_CU_SIZE + (DMVR_NUM_ITERATION << 1));  
+
+  int dy = std::min<int>(pu.lumaSize().height, DMVR_SUBCU_HEIGHT);
+  int dx = std::min<int>(pu.lumaSize().width,  DMVR_SUBCU_WIDTH);
+  /*L0 Padding*/
+  m_cYuvRefBuffDMVRL0 = (pu.chromaFormat == CHROMA_400 ?
+    PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y())) :
+    PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL0[0], pcYuvDst.Y()),
+      PelBuf(m_cRefSamplesDMVRL0[1], pcYuvDst.Cb()), PelBuf(m_cRefSamplesDMVRL0[2], pcYuvDst.Cr())));
+
+  xPrefetchPad(pu, m_cYuvRefBuffDMVRL0, REF_PIC_LIST_0);
+
+  /*L1 Padding*/
+  m_cYuvRefBuffDMVRL1 = (pu.chromaFormat == CHROMA_400 ?
+    PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y())) :
+    PelUnitBuf(pu.chromaFormat, PelBuf(m_cRefSamplesDMVRL1[0], pcYuvDst.Y()), PelBuf(m_cRefSamplesDMVRL1[1], pcYuvDst.Cb()),
+      PelBuf(m_cRefSamplesDMVRL1[2], pcYuvDst.Cr())));
+
+  xPrefetchPad(pu, m_cYuvRefBuffDMVRL1, REF_PIC_LIST_1);
+
+  xinitMC(pu, clpRngs);
+
+  // point mc buffer to cetre point to avoid multiplication to reach each iteration to the begining
+  Pel *biLinearPredL0 = m_cYuvPredTempDMVRL0 + (iterationCount * m_biLinearBufStride) + iterationCount;
+  Pel *biLinearPredL1 = m_cYuvPredTempDMVRL1 + (iterationCount * m_biLinearBufStride) + iterationCount;
+
+  Position puPos = pu.lumaPos();
+  
+  int bd = pu.cs->slice->getClpRngs().comp[COMPONENT_Y].bd;
+  if (bDMVRApplied)
+  {
+    int num = 0;
+
+    int yStart = 0;
+    for (int y = puPos.y; y < (puPos.y + pu.lumaSize().height); y = y + dy, yStart = yStart + dy)
+    {
+      for (int x = puPos.x, xStart = 0; x < (puPos.x + pu.lumaSize().width); x = x + dx, xStart = xStart + dx)
+      {       
+        uint64_t minCost = MAX_UINT64;
+        bool notZeroCost = true;
+        int16_t totalDeltaMV[2] = { 0,0 };
+        int16_t deltaMV[2] = { 0, 0 };
+        uint64_t  *pSADsArray;
+        for (int i = 0; i < (((DMVR_NUM_ITERATION << 1) + 1) * ((DMVR_NUM_ITERATION << 1) + 1)); i++)
+        {
+          m_SADsArray[i] = MAX_UINT64;
+        }
+        pSADsArray = &m_SADsArray[(((DMVR_NUM_ITERATION << 1) + 1) * ((DMVR_NUM_ITERATION << 1) + 1)) >> 1];
+
+        Pel *addrL0Centre = biLinearPredL0 + yStart * m_biLinearBufStride + xStart;
+        Pel *addrL1Centre = biLinearPredL1 + yStart * m_biLinearBufStride + xStart;
+        for (int i = 0; i < iterationCount; i++)
+        {
+          deltaMV[0] = 0;
+          deltaMV[1] = 0;
+          Pel *addrL0 = addrL0Centre + totalDeltaMV[0] + (totalDeltaMV[1] * m_biLinearBufStride);
+          Pel *addrL1 = addrL1Centre - totalDeltaMV[0] - (totalDeltaMV[1] * m_biLinearBufStride);
+          if (i == 0)
+          {
+            minCost = xDMVRCost(clpRngs.comp[COMPONENT_Y].bd, addrL0, m_biLinearBufStride, addrL1, m_biLinearBufStride, dx, dy);
+            if (minCost < ((4 * dx * (dy >> 1/*for alternate line*/))))
+            {
+              notZeroCost = false;
+              break;
+            }
+            pSADsArray[0] = minCost;
+          }
+          if (!minCost)
+          {
+            notZeroCost = false;
+            break;
+          }
+
+          xBIPMVRefine(bd, addrL0, addrL1, minCost, deltaMV, pSADsArray, dx, dy);
+
+          if (deltaMV[0] == 0 && deltaMV[1] == 0)
+          {
+            break;
+          }
+          totalDeltaMV[0] += deltaMV[0];
+          totalDeltaMV[1] += deltaMV[1];
+          pSADsArray += ((deltaMV[1] * (((DMVR_NUM_ITERATION << 1) + 1))) + deltaMV[0]);
+        }
+
+        totalDeltaMV[0] = (totalDeltaMV[0] << mvShift);
+        totalDeltaMV[1] = (totalDeltaMV[1] << mvShift);
+        xDMVRSubPixelErrorSurface(notZeroCost, totalDeltaMV, deltaMV, pSADsArray);
+        
+        pu.mvdL0SubPu[num] = Mv(totalDeltaMV[0], totalDeltaMV[1]);
+
+        num++;
+      }
+    }
+  }
+
+  {
+    PredictionUnit subPu = pu;
+    subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(puPos.x, puPos.y, dx, dy)));
+    PelUnitBuf           m_cYuvRefBuffSubCuDMVRL0;
+    PelUnitBuf           m_cYuvRefBuffSubCuDMVRL1;
+    PelUnitBuf srcPred0 = (pu.chromaFormat == CHROMA_400 ?
+      PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y())) :
+      PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[0][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[0][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[0][2], pcYuvDst.Cr())));
+    PelUnitBuf srcPred1 = (pu.chromaFormat == CHROMA_400 ?
+      PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y())) :
+      PelUnitBuf(pu.chromaFormat, PelBuf(m_acYuvPred[1][0], pcYuvDst.Y()), PelBuf(m_acYuvPred[1][1], pcYuvDst.Cb()), PelBuf(m_acYuvPred[1][2], pcYuvDst.Cr())));
+
+    srcPred0 = srcPred0.subBuf(UnitAreaRelative(pu, subPu));
+    srcPred1 = srcPred1.subBuf(UnitAreaRelative(pu, subPu));
+    PelUnitBuf subPredBuf = pcYuvDst.subBuf(UnitAreaRelative(pu, subPu));
+
+    int x = 0, y = 0;
+    int xStart = 0, yStart = 0;
+    int num = 0;    
+
+    int dstStride[MAX_NUM_COMPONENT] = { pcYuvDst.bufs[COMPONENT_Y].stride, pcYuvDst.bufs[COMPONENT_Cb].stride, pcYuvDst.bufs[COMPONENT_Cr].stride };
+    for (y = puPos.y; y < (puPos.y + pu.lumaSize().height); y = y + dy, yStart = yStart + dy)
+    {
+      for (x = puPos.x, xStart = 0; x < (puPos.x + pu.lumaSize().width); x = x + dx, xStart = xStart + dx)
+      {
+        subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, dx, dy)));
+
+        subPu.mv[0] = mergeMv[REF_PIC_LIST_0] + pu.mvdL0SubPu[num];
+        subPu.mv[1] = mergeMv[REF_PIC_LIST_1] - pu.mvdL0SubPu[num];
+        m_cYuvRefBuffSubCuDMVRL0 = m_cYuvRefBuffDMVRL0.subBuf(UnitAreaRelative(pu, subPu));
+        m_cYuvRefBuffSubCuDMVRL1 = m_cYuvRefBuffDMVRL1.subBuf(UnitAreaRelative(pu, subPu));
+        xFinalPaddedMCForDMVR(subPu, srcPred0, srcPred1, m_cYuvRefBuffSubCuDMVRL0, m_cYuvRefBuffSubCuDMVRL1, bBIOApplied, mergeMv);
+
+        subPredBuf.bufs[COMPONENT_Y].buf  = pcYuvDst.bufs[COMPONENT_Y].buf + xStart + yStart * dstStride[COMPONENT_Y];
+        subPredBuf.bufs[COMPONENT_Cb].buf = pcYuvDst.bufs[COMPONENT_Cb].buf + (xStart >> 1) + ((yStart >> 1) * dstStride[COMPONENT_Cb]);
+        subPredBuf.bufs[COMPONENT_Cr].buf = pcYuvDst.bufs[COMPONENT_Cr].buf + (xStart >> 1) + ((yStart >> 1) * dstStride[COMPONENT_Cr]);
+        xWeightedAverage(subPu, srcPred0, srcPred1, subPredBuf, subPu.cu->slice->getSPS()->getBitDepths(), subPu.cu->slice->clpRngs(), bBIOApplied);
+        num++;
+      }
+    }
+  }
+}
+#endif
 #if JVET_J0090_MEMORY_BANDWITH_MEASURE
 void InterPrediction::cacheAssign( CacheModel *cache )
 {
diff --git a/source/Lib/CommonLib/InterPrediction.h b/source/Lib/CommonLib/InterPrediction.h
index 5574f28e05bdd8a2abb7b1ab9a54ea7a1a26f9c5..44c11d9d919b1fd2fe00d8934ab40be93c8ce51f 100644
--- a/source/Lib/CommonLib/InterPrediction.h
+++ b/source/Lib/CommonLib/InterPrediction.h
@@ -94,6 +94,33 @@ protected:
   int                  m_iRefListIdx;
   PelStorage           m_triangleBuf;
   Mv*                  m_storedMv;
+#if JVET_M0147_DMVR
+ /*buffers for bilinear Filter data for DMVR refinement*/  
+  Pel*                 m_cYuvPredTempDMVRL0;
+  Pel*                 m_cYuvPredTempDMVRL1;
+  int                  m_biLinearBufStride;
+  /*buffers for padded data*/
+  PelUnitBuf           m_cYuvRefBuffDMVRL0;
+  PelUnitBuf           m_cYuvRefBuffDMVRL1;
+  Pel*                 m_cRefSamplesDMVRL0[MAX_NUM_COMPONENT];
+  Pel*                 m_cRefSamplesDMVRL1[MAX_NUM_COMPONENT];
+  enum SAD_POINT_INDEX
+  {
+    NOT_AVAILABLE = -1,
+    SAD_BOTTOM = 0,
+    SAD_TOP,
+    SAD_RIGHT,
+    SAD_LEFT,
+    SAD_TOP_LEFT,
+    SAD_TOP_RIGHT,
+    SAD_BOTTOM_LEFT,
+    SAD_BOTTOM_RIGHT,
+    SAD_CENTER,
+    SAD_COUNT
+  };
+  Mv m_pSearchOffset[5] = { Mv(0, 1), Mv(0, -1), Mv(1, 0), Mv(-1, 0), Mv(0, 0) };
+  uint64_t m_SADsArray[((DMVR_NUM_ITERATION << 1) + 1) * ((DMVR_NUM_ITERATION << 1) + 1)];
+#endif
  
   Pel*                 m_gradX0;
   Pel*                 m_gradY0;
@@ -112,10 +139,22 @@ protected:
                                   , const bool luma, const bool chroma
   );
   void xPredInterBi             ( PredictionUnit& pu, PelUnitBuf &pcYuvPred );
+#if JVET_M0147_DMVR
   void xPredInterBlk            ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng
                                  , const bool& bioApplied
                                  , bool isIBC
+                                 , SizeType dmvrWidth = 0
+                                 , SizeType dmvrHeight = 0
+                                 , bool bilinearMC = false
+                                 , Pel *srcPadBuf = NULL
+                                 , int32_t srcPadStride = 0
                                  );
+#else
+  void xPredInterBlk            ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng
+                                 , const bool& bioApplied
+                                 , bool isIBC
+                                 );
+#endif
 
   void xAddBIOAvg4              (const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng);
 #if JVET_M0063_BDOF_FIX
@@ -169,6 +208,16 @@ public:
 #else
   void    weightedTriangleBlk        ( PredictionUnit &pu, bool weights, const bool splitDir, int32_t channel, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1 );
 #endif
+#if JVET_M0147_DMVR
+  void xPrefetchPad(PredictionUnit& pu, PelUnitBuf &pcPad, RefPicList refId);
+  void xFinalPaddedMCForDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvSrc0, PelUnitBuf &pcYuvSrc1, PelUnitBuf &pcPad0, PelUnitBuf &pcPad1, const bool bBIOApplied
+    , const Mv startMV[NUM_REF_PIC_LIST_01]
+  );
+  void xBIPMVRefine(int bd, Pel *pRefL0, Pel *pRefL1, uint64_t& minCost, int16_t *delta_mv, uint64_t *pSADsArray, int width, int height);
+  uint64_t xDMVRCost(int iBitDepth, Pel* pRef, uint32_t uiRefStride, const Pel* pOrg, uint32_t uiOrgStride, int iWidth, int iHeight);
+  void xinitMC(PredictionUnit& pu, const ClpRngs &clpRngs);
+  void xProcessDMVR(PredictionUnit& pu, PelUnitBuf &pcYuvDst, const ClpRngs &clpRngs, const bool bBIOApplied );
+#endif
 
 #if JVET_J0090_MEMORY_BANDWITH_MEASURE
   void    cacheAssign( CacheModel *cache );
diff --git a/source/Lib/CommonLib/InterpolationFilter.cpp b/source/Lib/CommonLib/InterpolationFilter.cpp
index 4daa5056e7e2a603f5773e009e481101d6bde309..b33cd496629a813b0a267018d04fc7ee7409796e 100644
--- a/source/Lib/CommonLib/InterpolationFilter.cpp
+++ b/source/Lib/CommonLib/InterpolationFilter.cpp
@@ -131,6 +131,27 @@ const TFilterCoeff InterpolationFilter::m_bilinearFilter[LUMA_INTERPOLATION_FILT
   { 4, 60, },
 };
 
+#if JVET_M0147_DMVR
+const TFilterCoeff InterpolationFilter::m_bilinearFilterPrec4[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_BILINEAR] =
+{
+  { 16,  0, },
+  { 15,  1, },
+  { 14,  2, },
+  { 13, 3, },
+  { 12, 4, },
+  { 11, 5, },
+  { 10, 6, },
+  { 9, 7, },
+  { 8, 8, },
+  { 7, 9, },
+  { 6, 10, },
+  { 5, 11, },
+  { 4, 12, },
+  { 3, 13, },
+  { 2, 14, },
+  { 1, 15, }
+};
+#endif
 // ====================================================================================================================
 // Private member functions
 // ====================================================================================================================
@@ -197,7 +218,11 @@ InterpolationFilter::InterpolationFilter()
 //
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 template<bool isFirst, bool isLast>
+#if JVET_M0147_DMVR
+void InterpolationFilter::filterCopy( const ClpRng& clpRng, const Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height, bool biMC10BitOut)
+#else
 void InterpolationFilter::filterCopy( const ClpRng& clpRng, const Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height )
+#endif
 {
   int row, col;
 
@@ -223,6 +248,40 @@ void InterpolationFilter::filterCopy( const ClpRng& clpRng, const Pel *src, int
   {
     const int shift = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd));
 
+#if JVET_M0147_DMVR
+    if (biMC10BitOut)
+    {
+      int shift10BitOut, offset;
+      if ((clpRng.bd - IF_INTERNAL_PREC_BILINEAR) > 0)
+      {
+        shift10BitOut = (clpRng.bd - IF_INTERNAL_PREC_BILINEAR);
+        offset = (1 << (shift10BitOut - 1));
+        for (row = 0; row < height; row++)
+        {
+          for (col = 0; col < width; col++)
+          {
+            dst[col] = (src[col] + offset) >> shift10BitOut;
+          }
+          src += srcStride;
+          dst += dstStride;
+        }
+      }
+      else
+      {
+        shift10BitOut = (IF_INTERNAL_PREC_BILINEAR - clpRng.bd);
+        for (row = 0; row < height; row++)
+        {
+          for (col = 0; col < width; col++)
+          {
+            dst[col] = src[col] << shift10BitOut;
+          }
+          src += srcStride;
+          dst += dstStride;
+        }
+      }      
+    }
+    else
+#endif 
     for (row = 0; row < height; row++)
     {
       for (col = 0; col < width; col++)
@@ -240,6 +299,40 @@ void InterpolationFilter::filterCopy( const ClpRng& clpRng, const Pel *src, int
   {
     const int shift = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd));
 
+#if JVET_M0147_DMVR
+    if (biMC10BitOut)
+    {   
+      int shift10BitOut, offset;
+      if ((clpRng.bd - IF_INTERNAL_PREC_BILINEAR) > 0)
+      {
+        shift10BitOut = (clpRng.bd - IF_INTERNAL_PREC_BILINEAR);
+        offset = (1 << (shift10BitOut - 1));
+        for (row = 0; row < height; row++)
+        {
+          for (col = 0; col < width; col++)
+          {
+            dst[col] = (src[col] + offset) >> shift10BitOut;
+          }
+          src += srcStride;
+          dst += dstStride;
+        }
+      }
+      else
+      {
+        shift10BitOut = (IF_INTERNAL_PREC_BILINEAR - clpRng.bd);
+        for (row = 0; row < height; row++)
+        {
+          for (col = 0; col < width; col++)
+          {
+            dst[col] = src[col] << shift10BitOut;
+          }
+          src += srcStride;
+          dst += dstStride;
+        }
+      }
+    }
+    else
+#endif
     for (row = 0; row < height; row++)
     {
       for (col = 0; col < width; col++)
@@ -282,7 +375,11 @@ void InterpolationFilter::filterCopy( const ClpRng& clpRng, const Pel *src, int
 //
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 template<int N, bool isVertical, bool isFirst, bool isLast>
+#if JVET_M0147_DMVR
+void InterpolationFilter::filter(const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff, bool biMC10BitOut)
+#else
 void InterpolationFilter::filter(const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff)
+#endif
 {
   int row, col;
 
@@ -327,6 +424,13 @@ void InterpolationFilter::filter(const ClpRng& clpRng, Pel const *src, int srcSt
     offset = (isFirst) ? -IF_INTERNAL_OFFS << shift : 0;
   }
 
+#if JVET_M0147_DMVR
+  if (biMC10BitOut)
+  {
+    shift = IF_FILTER_PREC_BILINEAR - (IF_INTERNAL_PREC_BILINEAR - clpRng.bd);
+    offset = 1 << (shift - 1);
+  }
+#endif
   for (row = 0; row < height; row++)
   {
     for (col = 0; col < width; col++)
@@ -387,20 +491,36 @@ void InterpolationFilter::filter(const ClpRng& clpRng, Pel const *src, int srcSt
  * \param  coeff      Pointer to filter taps
  */
 template<int N>
+#if JVET_M0147_DMVR
+void InterpolationFilter::filterHor(const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, bool isLast, TFilterCoeff const *coeff, bool biMC10BitOut)
+#else
 void InterpolationFilter::filterHor(const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, bool isLast, TFilterCoeff const *coeff)
+#endif
 {
 //#if ENABLE_SIMD_OPT_MCIF
   if( N == 8 )
   {
+#if JVET_M0147_DMVR
+    m_filterHor[0][1][isLast](clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMC10BitOut);
+#else
     m_filterHor[0][1][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, coeff );
+#endif
   }
   else if( N == 4 )
   {
+#if JVET_M0147_DMVR
+    m_filterHor[1][1][isLast](clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMC10BitOut);
+#else
     m_filterHor[1][1][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, coeff );
+#endif 
   }
   else if( N == 2 )
   {
+#if JVET_M0147_DMVR
+    m_filterHor[2][1][isLast](clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMC10BitOut);
+#else
     m_filterHor[2][1][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, coeff );
+#endif
   }
   else
   {
@@ -424,20 +544,36 @@ void InterpolationFilter::filterHor(const ClpRng& clpRng, Pel const *src, int sr
  * \param  coeff      Pointer to filter taps
  */
 template<int N>
+#if JVET_M0147_DMVR
+void InterpolationFilter::filterVer(const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, bool isFirst, bool isLast, TFilterCoeff const *coeff, bool biMC10BitOut)
+#else
 void InterpolationFilter::filterVer(const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, bool isFirst, bool isLast, TFilterCoeff const *coeff)
+#endif
 {
 //#if ENABLE_SIMD_OPT_MCIF
   if( N == 8 )
   {
+#if JVET_M0147_DMVR
+    m_filterVer[0][isFirst][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMC10BitOut);
+#else
     m_filterVer[0][isFirst][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, coeff );
+#endif
   }
   else if( N == 4 )
   {
+#if JVET_M0147_DMVR
+    m_filterVer[1][isFirst][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMC10BitOut);
+#else
     m_filterVer[1][isFirst][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, coeff );
+#endif
   }
   else if( N == 2 )
   {
+#if JVET_M0147_DMVR
+    m_filterVer[2][isFirst][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, coeff, biMC10BitOut);
+#else
     m_filterVer[2][isFirst][isLast]( clpRng, src, srcStride, dst, dstStride, width, height, coeff );
+#endif
   }
   else{
     THROW( "Invalid tap number" );
@@ -463,29 +599,49 @@ void InterpolationFilter::filterVer(const ClpRng& clpRng, Pel const *src, int sr
  * \param  fmt        Chroma format
  * \param  bitDepth   Bit depth
  */
+#if JVET_M0147_DMVR
+void InterpolationFilter::filterHor( const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx, bool biMC10BitOut)
+#else
 void InterpolationFilter::filterHor( const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx )
+#endif
 {
   if( frac == 0 )
   {
+#if JVET_M0147_DMVR
+    m_filterCopy[true][isLast](clpRng, src, srcStride, dst, dstStride, width, height, biMC10BitOut);
+#else
     m_filterCopy[true][isLast]( clpRng, src, srcStride, dst, dstStride, width, height );
+#endif
   }
   else if( isLuma( compID ) )
   {
     CHECK( frac < 0 || frac >= LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" );
     if( nFilterIdx == 1 )
     {
+#if JVET_M0147_DMVR
+      filterHor<NTAPS_BILINEAR>(clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_bilinearFilterPrec4[frac], biMC10BitOut);
+#else
       filterHor<NTAPS_BILINEAR>(clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_bilinearFilter[frac]);
+#endif
     }
     else
     {
+#if JVET_M0147_DMVR
+      filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter[frac], biMC10BitOut);
+#else
       filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter[frac] );
+#endif
     }
   }
   else
   {
     const uint32_t csx = getComponentScaleX( compID, fmt );
     CHECK( frac < 0 || csx >= 2 || ( frac << ( 1 - csx ) ) >= CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" );
+#if JVET_M0147_DMVR
+    filterHor<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilter[frac << ( 1 - csx )], biMC10BitOut);
+#else
     filterHor<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilter[frac << ( 1 - csx )] );
+#endif
   }
 }
 
@@ -506,29 +662,49 @@ void InterpolationFilter::filterHor( const ComponentID compID, Pel const *src, i
  * \param  fmt        Chroma format
  * \param  bitDepth   Bit depth
  */
+#if JVET_M0147_DMVR
+void InterpolationFilter::filterVer( const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx, bool biMC10BitOut)
+#else
 void InterpolationFilter::filterVer( const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx)
+#endif
 {
   if( frac == 0 )
   {
+#if JVET_M0147_DMVR
+    m_filterCopy[isFirst][isLast](clpRng, src, srcStride, dst, dstStride, width, height, biMC10BitOut);
+#else
     m_filterCopy[isFirst][isLast]( clpRng, src, srcStride, dst, dstStride, width, height );
+#endif
   }
   else if( isLuma( compID ) )
   {
     CHECK( frac < 0 || frac >= LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" );
     if (nFilterIdx == 1)
     {
+#if JVET_M0147_DMVR
+      filterVer<NTAPS_BILINEAR>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_bilinearFilterPrec4[frac], biMC10BitOut);        
+#else
       filterVer<NTAPS_BILINEAR>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_bilinearFilter[frac]);
+#endif
     }
     else
     {
+#if JVET_M0147_DMVR
+      filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilter[frac], biMC10BitOut);
+#else
       filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilter[frac] );
+#endif
     }
   }
   else
   {
     const uint32_t csy = getComponentScaleY( compID, fmt );
     CHECK( frac < 0 || csy >= 2 || ( frac << ( 1 - csy ) ) >= CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS, "Invalid fraction" );
+#if JVET_M0147_DMVR
+    filterVer<NTAPS_CHROMA>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_chromaFilter[frac << (1 - csy)], biMC10BitOut);
+#else
     filterVer<NTAPS_CHROMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_chromaFilter[frac << ( 1 - csy )] );
+#endif
   }
 }
 
diff --git a/source/Lib/CommonLib/InterpolationFilter.h b/source/Lib/CommonLib/InterpolationFilter.h
index e4ca95491de7a1f841fb35aed7a2ebf7b2053036..f8c5a3de0275f8ac8554e470097c79679173d049 100644
--- a/source/Lib/CommonLib/InterpolationFilter.h
+++ b/source/Lib/CommonLib/InterpolationFilter.h
@@ -48,7 +48,10 @@
 #define IF_INTERNAL_PREC 14 ///< Number of bits for internal precision
 #define IF_FILTER_PREC    6 ///< Log2 of sum of filter taps
 #define IF_INTERNAL_OFFS (1<<(IF_INTERNAL_PREC-1)) ///< Offset used internally
-
+#if JVET_M0147_DMVR
+#define IF_INTERNAL_PREC_BILINEAR 10 ///< Number of bits for internal precision
+#define IF_FILTER_PREC_BILINEAR   4  ///< Bilinear filter coeff precision so that intermediate value will not exceed 16 bit for SIMD - bit exact
+#endif
 /**
  * \brief Interpolation filter class
  */
@@ -57,17 +60,36 @@ class InterpolationFilter
   static const TFilterCoeff m_lumaFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA]; ///< Luma filter taps
   static const TFilterCoeff m_chromaFilter[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_CHROMA]; ///< Chroma filter taps
   static const TFilterCoeff m_bilinearFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_BILINEAR]; ///< bilinear filter taps
+#if JVET_M0147_DMVR
+  static const TFilterCoeff m_bilinearFilterPrec4[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_BILINEAR]; ///< bilinear filter taps
+#endif
 public:
   template<bool isFirst, bool isLast>
+#if JVET_M0147_DMVR
+  static void filterCopy( const ClpRng& clpRng, const Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height, bool biMC10BitOut);
+#else
   static void filterCopy( const ClpRng& clpRng, const Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height );
+#endif
 
   template<int N, bool isVertical, bool isFirst, bool isLast>
+#if JVET_M0147_DMVR
+  static void filter(const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff, bool biMC10BitOut);
+#else
   static void filter(const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff);
-
+#endif
   template<int N>
+#if JVET_M0147_DMVR
+  void filterHor(const ClpRng& clpRng, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, bool isLast, TFilterCoeff const *coeff, bool biMC10BitOut);
+#else
   void filterHor(const ClpRng& clpRng, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height,               bool isLast, TFilterCoeff const *coeff);
+#endif
+
   template<int N>
+#if JVET_M0147_DMVR
+  void filterVer(const ClpRng& clpRng, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, bool isFirst, bool isLast, TFilterCoeff const *coeff, bool biMC10BitOut);
+#else
   void filterVer(const ClpRng& clpRng, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, bool isFirst, bool isLast, TFilterCoeff const *coeff);
+#endif
 
 protected:
 #if JVET_J0090_MEMORY_BANDWITH_MEASURE
@@ -76,10 +98,21 @@ protected:
 public:
   InterpolationFilter();
   ~InterpolationFilter() {}
-
+#if JVET_M0147_DMVR
+  void( *m_filterHor[3][2][2] )( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff, bool biMC10BitOut);
+#else
   void( *m_filterHor[3][2][2] )( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff );
+#endif
+#if JVET_M0147_DMVR
+  void( *m_filterVer[3][2][2] )( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff, bool biMC10BitOut);
+#else
   void( *m_filterVer[3][2][2] )( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff );
+#endif
+#if JVET_M0147_DMVR
+  void( *m_filterCopy[2][2] )  ( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, bool biMC10BitOut);
+#else
   void( *m_filterCopy[2][2] )  ( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height );
+#endif
 
   void initInterpolationFilter( bool enable );
 #ifdef TARGET_SIMD_X86
@@ -87,9 +120,16 @@ public:
   template <X86_VEXT vext>
   void _initInterpolationFilterX86();
 #endif
-
+#if JVET_M0147_DMVR
+  void filterHor(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac,               bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx = 0, bool biMC10BitOut = false);
+#else
   void filterHor(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac,               bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx = 0);
+#endif
+#if JVET_M0147_DMVR
+  void filterVer(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx = 0, bool biMC10BitOut = false);
+#else
   void filterVer(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx = 0);
+#endif
 #if JVET_J0090_MEMORY_BANDWITH_MEASURE
   void cacheAssign( CacheModel *cache ) { m_cacheModel = cache; }
 #endif
diff --git a/source/Lib/CommonLib/RdCost.cpp b/source/Lib/CommonLib/RdCost.cpp
index 097c0cab7f19d722ea5bb92ccc20d43a52b15e82..5ca4883b2e2c9f6a3074e91885fbe8cbad3df0a8 100644
--- a/source/Lib/CommonLib/RdCost.cpp
+++ b/source/Lib/CommonLib/RdCost.cpp
@@ -336,12 +336,16 @@ void RdCost::setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY,
   rcDP.cur.stride = iRefStride;
   rcDP.cur.width  = width;
   rcDP.cur.height = height;
-
+#if JVET_M0147_DMVR
+  rcDP.subShift = subShiftMode;
+#endif
   rcDP.step       = step;
   rcDP.maximumDistortionForEarlyExit = std::numeric_limits<Distortion>::max();
-
+#if JVET_M0147_DMVR
+  CHECK( useHadamard || rcDP.useMR, "only used in xDMVRCost with these default parameters (so far...)" );
+#else
   CHECK( useHadamard || rcDP.useMR || subShiftMode > 0, "only used in xDirectMCCost with these default parameters (so far...)" );
-
+#endif
   if ( bioApplied )
   {
     rcDP.distFunc = m_afpDistortFunc[ DF_SAD_INTERMEDIATE_BITDEPTH ];
diff --git a/source/Lib/CommonLib/Slice.cpp b/source/Lib/CommonLib/Slice.cpp
index 921517ac96b8521863097f45fc8ef47395962ca4..c371599d01a9cf0b45b000114bd543e797433287 100644
--- a/source/Lib/CommonLib/Slice.cpp
+++ b/source/Lib/CommonLib/Slice.cpp
@@ -1810,6 +1810,9 @@ SPSNext::SPSNext( SPS& sps )
   , m_MTTEnabled                ( false )
   , m_MHIntra                   ( false )
   , m_Triangle                  ( false )
+#if JVET_M0147_DMVR
+  , m_DMVR                      ( false )
+#endif
 #if ENABLE_WPP_PARALLELISM
   , m_NextDQP                   ( false )
 #endif
diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h
index 298542f744998bba1cb3dd4dcbb3f340fe397349..2b84f963887f8730714a9a9e63ccef6cedddbb30 100644
--- a/source/Lib/CommonLib/Slice.h
+++ b/source/Lib/CommonLib/Slice.h
@@ -855,6 +855,9 @@ private:
   bool              m_MTTEnabled;                 //
   bool              m_MHIntra;
   bool              m_Triangle;
+#if JVET_M0147_DMVR
+  bool              m_DMVR;
+#endif
 #if ENABLE_WPP_PARALLELISM
   bool              m_NextDQP;
 #endif
@@ -896,6 +899,10 @@ public:
   bool      getUseAffine          ()                                      const     { return m_Affine; }
   void      setUseAffineType      ( bool b )                                        { m_AffineType = b; }
   bool      getUseAffineType      ()                                      const     { return m_AffineType; }
+#if JVET_M0147_DMVR
+  void      setUseDMVR(bool b)                                                      { m_DMVR = b; }
+  bool      getUseDMVR()                                                   const    { return m_DMVR; }
+#endif
   void      setDisableMotCompress ( bool b )                                        { m_DisableMotionCompression = b; }
   bool      getDisableMotCompress ()                                      const     { return m_DisableMotionCompression; }
   bool      getMTTEnabled         ()                                      const     { return m_MTTEnabled; }
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index a337d85141c34daf416d148d5b37ebb416c6513e..c71c077e531eb1c58f36b73df1a55bdbd0a5dc71 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -113,6 +113,8 @@
 #define JVET_M0255_FRACMMVD_SWITCH                        1 // disable fractional MVD in MMVD adaptively
 #define JVET_M0823_MMVD_ENCOPT                            1 // encoder optimization for MMVD
 
+#define JVET_M0147_DMVR                                   1 //Decoder side Motion Vector Refinement
+
 #if JVET_M0464_UNI_MTS
 typedef std::pair<int, bool> TrMode;
 typedef std::pair<int, int>  TrCost;
diff --git a/source/Lib/CommonLib/Unit.cpp b/source/Lib/CommonLib/Unit.cpp
index 2f36e96b0d2a65f9441719bf3a30e011c9dcb9aa..138617abf7246764d22a2a6348f93c2ffee6ea68 100644
--- a/source/Lib/CommonLib/Unit.cpp
+++ b/source/Lib/CommonLib/Unit.cpp
@@ -354,6 +354,13 @@ void PredictionUnit::initData()
   mergeType   = MRG_TYPE_DEFAULT_N;
   bv.setZero();
   bvd.setZero();
+#if JVET_M0147_DMVR
+  mvRefine = false;
+  for (uint32_t i = 0; i < MAX_NUM_SUBCU_DMVR; i++)
+  {
+    mvdL0SubPu[i].setZero();
+  }
+#endif
   for (uint32_t i = 0; i < NUM_REF_PIC_LIST_01; i++)
   {
     mvpIdx[i] = MAX_UCHAR;
@@ -407,6 +414,13 @@ PredictionUnit& PredictionUnit::operator=(const InterPredictionData& predData)
   mergeType   = predData.mergeType;
   bv          = predData.bv;
   bvd         = predData.bvd;
+#if JVET_M0147_DMVR
+  mvRefine = predData.mvRefine;
+  for (uint32_t i = 0; i < MAX_NUM_SUBCU_DMVR; i++)
+  {
+    mvdL0SubPu[i] = predData.mvdL0SubPu[i];
+  }
+#endif
   for (uint32_t i = 0; i < NUM_REF_PIC_LIST_01; i++)
   {
     mvpIdx[i]   = predData.mvpIdx[i];
@@ -452,6 +466,13 @@ PredictionUnit& PredictionUnit::operator=( const PredictionUnit& other )
   mergeType   = other.mergeType;
   bv          = other.bv;
   bvd         = other.bvd;
+#if JVET_M0147_DMVR
+  mvRefine = other.mvRefine;
+  for (uint32_t i = 0; i < MAX_NUM_SUBCU_DMVR; i++)
+  {
+    mvdL0SubPu[i] = other.mvdL0SubPu[i];
+  }
+#endif
   for (uint32_t i = 0; i < NUM_REF_PIC_LIST_01; i++)
   {
     mvpIdx[i]   = other.mvpIdx[i];
diff --git a/source/Lib/CommonLib/Unit.h b/source/Lib/CommonLib/Unit.h
index 064392af599e821ac9e325f0aad37ee2bae4601d..f2f30097778855feb8d00a7282a7a14ac2544476 100644
--- a/source/Lib/CommonLib/Unit.h
+++ b/source/Lib/CommonLib/Unit.h
@@ -372,6 +372,10 @@ struct InterPredictionData
   Mv        mv      [NUM_REF_PIC_LIST_01];
   int16_t     refIdx  [NUM_REF_PIC_LIST_01];
   MergeType mergeType;
+#if JVET_M0147_DMVR
+  bool      mvRefine;
+  Mv        mvdL0SubPu[MAX_NUM_SUBCU_DMVR];
+#endif
   Mv        mvdAffi [NUM_REF_PIC_LIST_01][3];
   Mv        mvAffi[NUM_REF_PIC_LIST_01][3];
   bool      mhIntraFlag;
diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp
index f22199dbe8b3b7d34d04db19f50ad2d01f4a0c31..0f2c1f3220dc3992c76be87dd4e4ee5698c1dc05 100644
--- a/source/Lib/CommonLib/UnitTools.cpp
+++ b/source/Lib/CommonLib/UnitTools.cpp
@@ -65,7 +65,39 @@ UnitArea CS::getArea( const CodingStructure &cs, const UnitArea &area, const Cha
 {
   return isDualITree( cs ) ? area.singleChan( chType ) : area;
 }
-
+#if JVET_M0147_DMVR
+void CS::setRefinedMotionField(CodingStructure &cs)
+{
+  for (CodingUnit *cu : cs.cus)
+  {
+    for (auto &pu : CU::traversePUs(*cu))
+    {
+      PredictionUnit subPu = pu;
+      int dx, dy, x, y, num = 0;
+      dy = std::min<int>(pu.lumaSize().height, DMVR_SUBCU_HEIGHT);
+      dx = std::min<int>(pu.lumaSize().width, DMVR_SUBCU_WIDTH);
+      Position puPos = pu.lumaPos();      
+      if (PU::checkDMVRCondition(pu))
+      {
+        for (y = puPos.y; y < (puPos.y + pu.lumaSize().height); y = y + dy)
+        {
+          for (x = puPos.x; x < (puPos.x + pu.lumaSize().width); x = x + dx)
+          {
+            subPu.UnitArea::operator=(UnitArea(pu.chromaFormat, Area(x, y, dx, dy)));
+            subPu.mv[0] = pu.mv[0];
+            subPu.mv[1] = pu.mv[1];
+            subPu.mv[REF_PIC_LIST_0] += pu.mvdL0SubPu[num];
+            subPu.mv[REF_PIC_LIST_1] -= pu.mvdL0SubPu[num];
+            pu.mvdL0SubPu[num].setZero();
+            num++;
+            PU::spanMotionInfo(subPu);
+          }
+        }
+      }
+    }
+  }  
+}
+#endif
 // CU tools
 
 bool CU::isIntra(const CodingUnit &cu)
@@ -1426,6 +1458,27 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx,
   }
   mrgCtx.numValidMergeCand = uiArrayAddr;
 }
+#if JVET_M0147_DMVR
+bool PU::checkDMVRCondition(const PredictionUnit& pu)
+{
+  if (pu.cs->sps->getSpsNext().getUseDMVR())
+  {
+    return pu.mergeFlag
+      && pu.mergeType == MRG_TYPE_DEFAULT_N
+      && !pu.cu->affine
+      && !pu.mmvdMergeFlag
+      && !pu.cu->mmvdSkip
+      && PU::isBiPredFromDifferentDirEqDistPoc(pu)
+      && (pu.lheight() >= 8)
+      && ((pu.lheight() * pu.lwidth()) >= 64)
+      ;
+  }
+  else
+  {
+    return false;
+  }
+}
+#endif
 // for ibc pu validation
 bool PU::isBlockVectorValid(PredictionUnit& pu, int xPos, int yPos, int width, int height, int picWidth, int picHeight, int xStartInCU, int yStartInCU, int xBv, int yBv, int ctuSize)
 {
@@ -3820,7 +3873,25 @@ bool PU::isBiPredFromDifferentDir( const PredictionUnit& pu )
 
   return false;
 }
-
+#if JVET_M0147_DMVR
+bool PU::isBiPredFromDifferentDirEqDistPoc(const PredictionUnit& pu)
+{
+  if (pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0)
+  {
+    const int iPOC0 = pu.cu->slice->getRefPOC(REF_PIC_LIST_0, pu.refIdx[0]);
+    const int iPOC1 = pu.cu->slice->getRefPOC(REF_PIC_LIST_1, pu.refIdx[1]);
+    const int iPOC = pu.cu->slice->getPOC();
+    if ((iPOC - iPOC0)*(iPOC - iPOC1) < 0)
+    {
+      if (abs(iPOC - iPOC0) == abs(iPOC - iPOC1))
+      {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+#endif
 void PU::restrictBiPredMergeCands( const PredictionUnit &pu, MergeCtx& mergeCtx )
 {
   if( PU::isBipredRestriction( pu ) )
diff --git a/source/Lib/CommonLib/UnitTools.h b/source/Lib/CommonLib/UnitTools.h
index 5bbf165cae5329d9d182bc57c23661fff01f1ff7..8b222f08869271e2a16021819cea3f1699a007f4 100644
--- a/source/Lib/CommonLib/UnitTools.h
+++ b/source/Lib/CommonLib/UnitTools.h
@@ -49,6 +49,9 @@ namespace CS
   uint64_t getEstBits                   ( const CodingStructure &cs );
   UnitArea getArea                    ( const CodingStructure &cs, const UnitArea &area, const ChannelType chType );
   bool   isDualITree                  ( const CodingStructure &cs );
+#if JVET_M0147_DMVR
+  void   setRefinedMotionField(CodingStructure &cs);
+#endif
 }
 
 
@@ -151,6 +154,9 @@ namespace PU
   );
   bool getInterMergeSubPuRecurCand(const PredictionUnit &pu, MergeCtx &mrgCtx, const int count);
   bool isBiPredFromDifferentDir       (const PredictionUnit &pu);
+#if JVET_M0147_DMVR
+  bool isBiPredFromDifferentDirEqDistPoc(const PredictionUnit &pu);
+#endif
   void restrictBiPredMergeCands       (const PredictionUnit &pu, MergeCtx& mrgCtx);
 #if JVET_M0068_M0171_MMVD_CLEANUP
   void restrictBiPredMergeCandsOne    (PredictionUnit &pu);
@@ -175,6 +181,9 @@ namespace PU
   void getIbcMVPsEncOnly(PredictionUnit &pu, Mv* MvPred, int& nbPred);
   bool getDerivedBV(PredictionUnit &pu, const Mv& currentMv, Mv& derivedMv);
   bool isBlockVectorValid(PredictionUnit& pu, int xPos, int yPos, int width, int height, int picWidth, int picHeight, int xStartInCU, int yStartInCU, int xBv, int yBv, int ctuSize);
+#if JVET_M0147_DMVR
+  bool checkDMVRCondition(const PredictionUnit& pu);
+#endif
 }
 
 // TU tools
diff --git a/source/Lib/CommonLib/x86/BufferX86.h b/source/Lib/CommonLib/x86/BufferX86.h
index e83a00ef15352cbe221bd78f9ae5eeeb7299fd71..15da0b788cc1fa0a03f52f76b9b97756be0d2663 100644
--- a/source/Lib/CommonLib/x86/BufferX86.h
+++ b/source/Lib/CommonLib/x86/BufferX86.h
@@ -128,6 +128,125 @@ void addAvg_SSE( const int16_t* src0, int src0Stride, const int16_t* src1, int s
   }
 }
 
+#if JVET_M0147_DMVR
+template<X86_VEXT vext>
+void copyBufferSimd(Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height)
+{
+  __m128i x;
+#ifdef USE_AVX2
+  __m256i x16;
+#endif
+  int j, temp;
+  for (int i = 0; i < height; i++)
+  {
+    j = 0;
+    temp = width;
+#ifdef USE_AVX2
+    while ((temp >> 4) > 0)
+    {
+      x16 = _mm256_loadu_si256((const __m256i*)(&src[i * srcStride + j]));
+      _mm256_storeu_si256((__m256i*)(&dst[i * dstStride + j]), x16);
+      j += 16;
+      temp -= 16;
+    }
+#endif
+    while ((temp >> 3) > 0)
+    {
+      x = _mm_loadu_si128((const __m128i*)(&src[ i * srcStride + j]));
+      _mm_storeu_si128((__m128i*)(&dst[ i * dstStride + j]), x);
+      j += 8;
+      temp -= 8;
+    }
+    while ((temp >> 2) > 0)
+    {
+      x = _mm_loadl_epi64((const __m128i*)(&src[i * srcStride + j]));
+      _mm_storel_epi64((__m128i*)(&dst[i*dstStride + j]), x);
+      j += 4;
+      temp -= 4;
+    }
+    while (temp > 0)
+    {
+      dst[i * dstStride + j] = src[i * srcStride + j];
+      j++;
+      temp--;
+    }
+  }
+}
+
+
+template<X86_VEXT vext>
+void paddingSimd(Pel *dst, int stride, int width, int height, int padSize)
+{
+  __m128i x;
+#ifdef USE_AVX2
+  __m256i x16;
+#endif
+  int temp, j;
+  for (int i = 1; i <= padSize; i++)
+  {
+    j = 0;
+    temp = width;
+#ifdef USE_AVX2
+    while ((temp >> 4) > 0)
+    {
+
+      x16 = _mm256_loadu_si256((const __m256i*)(&(dst[j])));
+      _mm256_storeu_si256((__m256i*)(dst + j - i*stride), x16);
+      x16 = _mm256_loadu_si256((const __m256i*)(dst + j + (height - 1)*stride));
+      _mm256_storeu_si256((__m256i*)(dst + j + (height - 1 + i)*stride), x16);
+
+
+      j = j + 16;
+      temp = temp - 16;
+    }
+#endif
+    while ((temp >> 3) > 0)
+    {
+
+      x = _mm_loadu_si128((const __m128i*)(&(dst[j])));
+      _mm_storeu_si128((__m128i*)(dst + j - i*stride), x);
+      x = _mm_loadu_si128((const __m128i*)(dst + j + (height - 1)*stride));
+      _mm_storeu_si128((__m128i*)(dst + j + (height - 1 + i)*stride), x);
+
+      j = j + 8;
+      temp = temp - 8;
+    }
+    while ((temp >> 2) > 0)
+    {
+      x = _mm_loadl_epi64((const __m128i*)(&dst[j]));
+      _mm_storel_epi64((__m128i*)(dst + j - i*stride), x);
+      x = _mm_loadl_epi64((const __m128i*)(dst + j + (height - 1)*stride));
+      _mm_storel_epi64((__m128i*)(dst + j + (height - 1 + i)*stride), x);
+
+      j = j + 4;
+      temp = temp - 4;
+    }
+    while (temp > 0)
+    {
+      dst[j - i*stride] = dst[j];
+      dst[j + (height - 1 + i)*stride] = dst[j + (height - 1)*stride];
+      j++;
+      temp--;
+    }
+  }
+
+
+  //Left and Right Padding
+  Pel* ptr1 = dst - padSize*stride;
+  Pel* ptr2 = dst - padSize*stride + width - 1;
+  int offset = 0;
+  for (int i = 0; i < height + 2 * padSize; i++)
+  {
+    offset = stride * i;
+    for (int j = 1; j <= padSize; j++)
+    {
+      *(ptr1 - j + offset) = *(ptr1 + offset);
+      *(ptr2 + j + offset) = *(ptr2 + offset);
+    }
+
+  }
+}
+#endif
 template< X86_VEXT vext >
 void addBIOAvg4_SSE(const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng)
 {
@@ -848,6 +967,10 @@ void PelBufferOps::_initPelBufOpsX86()
   calcBIOPar      = calcBIOPar_SSE<vext>;
   calcBlkGradient = calcBlkGradient_SSE<vext>;
 
+#if JVET_M0147_DMVR
+  copyBuffer = copyBufferSimd<vext>;
+  padding    = paddingSimd<vext>;
+#endif
   reco8 = reco_SSE<vext, 8>;
   reco4 = reco_SSE<vext, 4>;
 
diff --git a/source/Lib/CommonLib/x86/InterpolationFilterX86.h b/source/Lib/CommonLib/x86/InterpolationFilterX86.h
index 1c3b75f5383f4d4d51f27f44f23fb0f354c043eb..9218115c815f238c2fe4077bd0552a5ee4bbbbbb 100644
--- a/source/Lib/CommonLib/x86/InterpolationFilterX86.h
+++ b/source/Lib/CommonLib/x86/InterpolationFilterX86.h
@@ -193,7 +193,11 @@ static void fullPelCopyAVX2( const ClpRng& clpRng, const void*_src, int srcStrid
 
 
 template<X86_VEXT vext, bool isFirst, bool isLast>
+#if JVET_M0147_DMVR
+static void simdFilterCopy( const ClpRng& clpRng, const Pel* src, int srcStride, int16_t* dst, int dstStride, int width, int height, bool biMC10BitOut)
+#else
 static void simdFilterCopy( const ClpRng& clpRng, const Pel* src, int srcStride, int16_t* dst, int dstStride, int width, int height )
+#endif
 {
 #if !HM_JEM_CLIP_PEL
   if( vext >= AVX2 && ( width % 16 ) == 0 )
@@ -211,7 +215,11 @@ static void simdFilterCopy( const ClpRng& clpRng, const Pel* src, int srcStride,
   else
 #endif
   { //Scalar
+#if JVET_M0147_DMVR
+    InterpolationFilter::filterCopy<isFirst, isLast>( clpRng, src, srcStride, dst, dstStride, width, height, biMC10BitOut);
+#else
     InterpolationFilter::filterCopy<isFirst, isLast>( clpRng, src, srcStride, dst, dstStride, width, height );
+#endif
   }
 }
 
@@ -979,9 +987,126 @@ static void simdInterpolateN2_M4( const int16_t* src, int srcStride, int16_t *ds
     dst += dstStride;
   }
 }
+#if JVET_M0147_DMVR
+#ifdef USE_AVX2
+static inline __m256i simdInterpolateLuma10Bit2P16(int16_t const *src1, int srcStride, __m256i *mmCoeff, const __m256i & mmOffset, int shift)
+{
+  __m256i sumLo;
+  {
+    __m256i mmPix = _mm256_loadu_si256((__m256i*)src1);
+    __m256i mmPix1 = _mm256_loadu_si256((__m256i*)(src1 + srcStride));
+    __m256i lo0 = _mm256_mullo_epi16(mmPix, mmCoeff[0]);
+    __m256i lo1 = _mm256_mullo_epi16(mmPix1, mmCoeff[1]);
+    sumLo = _mm256_add_epi16(lo0, lo1);
+  }
+  sumLo = _mm256_srai_epi16(_mm256_add_epi16(sumLo, mmOffset), shift);
+  return(sumLo);
+}
+#endif
+
+static inline __m128i simdInterpolateLuma10Bit2P8(int16_t const *src1, int srcStride, __m128i *mmCoeff, const __m128i & mmOffset, int shift)
+{
+  __m128i sumLo;
+  {
+    __m128i mmPix = _mm_loadu_si128((__m128i*)src1);
+    __m128i mmPix1 = _mm_loadu_si128((__m128i*)(src1 + srcStride));
+    __m128i lo0 = _mm_mullo_epi16(mmPix, mmCoeff[0]);
+    __m128i lo1 = _mm_mullo_epi16(mmPix1, mmCoeff[1]);
+    sumLo = _mm_add_epi16(lo0, lo1);
+  }
+  sumLo = _mm_srai_epi16(_mm_add_epi16(sumLo, mmOffset), shift);
+  return(sumLo);
+}
+
+static inline __m128i simdInterpolateLuma10Bit2P4(int16_t const *src, int srcStride, __m128i *mmCoeff, const __m128i & mmOffset, int shift)
+{
+  __m128i sumLo;
+  {
+    __m128i mmPix = _mm_loadl_epi64((__m128i*)src);
+    __m128i mmPix1 = _mm_loadl_epi64((__m128i*)(src + srcStride));
+    __m128i lo0 = _mm_mullo_epi16(mmPix, mmCoeff[0]);
+    __m128i lo1 = _mm_mullo_epi16(mmPix1, mmCoeff[1]);
+    sumLo = _mm_add_epi16(lo0, lo1);
+  }
+  sumLo = _mm_srai_epi16(_mm_add_epi16(sumLo, mmOffset), shift);
+  return sumLo;
+}
+// intermediate are not expected to cross 16 bit
+#if USE_AVX2
+static inline __m256i simdClip3_256(__m256i mmMin, __m256i mmMax, __m256i mmPix)
+{
+  __m256i mmMask = _mm256_cmpgt_epi16(mmPix, mmMin);
+  mmPix = _mm256_or_si256(_mm256_and_si256(mmMask, mmPix), _mm256_andnot_si256(mmMask, mmMin));
+  mmMask = _mm256_cmpgt_epi16(mmMax, mmPix);
+  mmPix = _mm256_or_si256(_mm256_and_si256(mmMask, mmPix), _mm256_andnot_si256(mmMask, mmMax));
+  return(mmPix);
+}
+#endif
+
+template<X86_VEXT vext, bool isLast>
+static void simdInterpolateN2_10BIT_M4(const int16_t* src, int srcStride, int16_t *dst, int dstStride, int cStride, int width, int height, int shift, int offset, const ClpRng& clpRng, int16_t const *c)
+{
+  int row, col;
+  __m128i mmOffset = _mm_set1_epi16(offset);
+  __m128i mmCoeff[2];
+  __m128i mmMin = _mm_set1_epi16(clpRng.min);
+  __m128i mmMax = _mm_set1_epi16(clpRng.max);
+  for (int n = 0; n < 2; n++)
+    mmCoeff[n] = _mm_set1_epi16(c[n]);
+
+#if USE_AVX2
+  __m256i mm256Offset = _mm256_set1_epi16(offset);
+  __m256i mm256Coeff[2];
+  __m256i mm256Min = _mm256_set1_epi16(clpRng.min);
+  __m256i mm256Max = _mm256_set1_epi16(clpRng.max);
+  for (int n = 0; n < 2; n++)
+    mm256Coeff[n] = _mm256_set1_epi16(c[n]);
+#endif
+  for (row = 0; row < height; row++)
+  {
+    col = 0;
+#if USE_AVX2
+    // multiple of 16
+    for (; col < ((width >> 4) << 4); col += 16)
+    {
+      __m256i mmFiltered = simdInterpolateLuma10Bit2P16(src + col, cStride, mm256Coeff, mm256Offset, shift);
+      if (isLast)
+      {
+        mmFiltered = simdClip3_256(mm256Min, mm256Max, mmFiltered);
+      }
+      _mm256_storeu_si256((__m256i *)(dst + col), mmFiltered);
+    }
+#endif
+    // multiple of 8
+    for (; col < ((width >> 3) << 3); col += 8)
+    {
+      __m128i mmFiltered = simdInterpolateLuma10Bit2P8(src + col, cStride, mmCoeff, mmOffset, shift);
+      if (isLast)
+      {
+        mmFiltered = simdClip3(mmMin, mmMax, mmFiltered);
+      }
+      _mm_storeu_si128((__m128i *)(dst + col), mmFiltered);
+    }
+
+    // last 4 samples
+    __m128i mmFiltered = simdInterpolateLuma10Bit2P4(src + col, cStride, mmCoeff, mmOffset, shift);
+    if (isLast)
+    {
+      mmFiltered = simdClip3(mmMin, mmMax, mmFiltered);
+    }
+    _mm_storel_epi64((__m128i *)(dst + col), mmFiltered);
+    src += srcStride;
+    dst += dstStride;
+  }
+}
+#endif
 
 template<X86_VEXT vext, int N, bool isVertical, bool isFirst, bool isLast>
+#if JVET_M0147_DMVR
+static void simdFilter( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff, bool biMC10BitOut)
+#else
 static void simdFilter( const ClpRng& clpRng, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, TFilterCoeff const *coeff )
+#endif
 {
   int row, col;
 
@@ -1027,6 +1152,13 @@ static void simdFilter( const ClpRng& clpRng, Pel const *src, int srcStride, Pel
     offset = ( isFirst ) ? -IF_INTERNAL_OFFS << shift : 0;
   }
 
+#if JVET_M0147_DMVR
+  if (biMC10BitOut)
+  {
+    shift = IF_FILTER_PREC_BILINEAR - (IF_INTERNAL_PREC_BILINEAR - clpRng.bd);
+    offset = 1 << (shift - 1);
+  }
+#endif
   if( clpRng.bd <= 10 )
   {
     if( N == 8 && !( width & 0x07 ) )
@@ -1075,6 +1207,16 @@ static void simdFilter( const ClpRng& clpRng, Pel const *src, int srcStride, Pel
         simdInterpolateVerM4<vext, 4, isLast>( src, srcStride, dst, dstStride, width, height, shift, offset, clpRng, c );
       return;
     }
+#if JVET_M0147_DMVR
+    else if (biMC10BitOut)
+    {
+      if (N == 2 && !(width & 0x03))
+      {
+        simdInterpolateN2_10BIT_M4<vext, isLast>(src, srcStride, dst, dstStride, cStride, width, height, shift, offset, clpRng, c);
+        return;
+      }
+    }
+#endif
     else if( N == 2 && !( width & 0x07 ) )
     {
       simdInterpolateN2_M8<vext, isLast>( src, srcStride, dst, dstStride, cStride, width, height, shift, offset, clpRng, c );
diff --git a/source/Lib/DecoderLib/DecLib.cpp b/source/Lib/DecoderLib/DecLib.cpp
index 8602e729de947697c5e824f93694aba6285317ec..6bfbc6672e837f7d4a2c5b3cc886794a9e81296a 100644
--- a/source/Lib/DecoderLib/DecLib.cpp
+++ b/source/Lib/DecoderLib/DecLib.cpp
@@ -525,7 +525,9 @@ void DecLib::executeLoopFilters()
 #endif
   // deblocking filter
   m_cLoopFilter.loopFilterPic( cs );
-
+#if JVET_M0147_DMVR
+  CS::setRefinedMotionField(cs);
+#endif
   if( cs.sps->getSAOEnabledFlag() )
   {
     m_cSAO.SAOProcess( cs, cs.picture->getSAO() );
diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp
index 488c20620f6d784d3346ee67f37ecf8617f7c117..0ed70a77d2e5161f3ab4d2c332c31a3dcc338db9 100644
--- a/source/Lib/DecoderLib/VLCReader.cpp
+++ b/source/Lib/DecoderLib/VLCReader.cpp
@@ -829,6 +829,9 @@ void HLSyntaxReader::parseSPSNext( SPSNext& spsNext, const bool usePCM )
   READ_FLAG( symbol,  "mtt_enabled_flag" );                       spsNext.setMTTMode                ( symbol );
   READ_FLAG( symbol,  "mhintra_flag" );                           spsNext.setUseMHIntra             ( symbol != 0 );
   READ_FLAG( symbol,    "triangle_flag" );                          spsNext.setUseTriangle            ( symbol != 0 );
+#if JVET_M0147_DMVR
+  READ_FLAG(symbol, "dmvr_enable_flag");                          spsNext.setUseDMVR                  (symbol != 0);
+#endif
 #if ENABLE_WPP_PARALLELISM
   READ_FLAG( symbol,  "next_dqp_enabled_flag" );                  spsNext.setUseNextDQP             ( symbol != 0 );
 #else
diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h
index 951fce7285a9266ab7a15fea7cc560ba88145e5f..e6cbe169e188f1c7fe7c9d29329d81a72c806187 100644
--- a/source/Lib/EncoderLib/EncCfg.h
+++ b/source/Lib/EncoderLib/EncCfg.h
@@ -249,6 +249,9 @@ protected:
 #endif
 #if JVET_M0247_AFFINE_AMVR_ENCOPT
   bool      m_AffineAmvrEncOpt;
+#endif
+#if JVET_M0147_DMVR
+  bool      m_DMVR;
 #endif
   unsigned  m_IBCMode;
   unsigned  m_IBCLocalSearchRangeX;
@@ -783,6 +786,10 @@ public:
   void      setUseAffineAmvrEncOpt          ( bool b )       { m_AffineAmvrEncOpt = b;    }
   bool      getUseAffineAmvrEncOpt          ()         const { return m_AffineAmvrEncOpt; }
 #endif
+#if JVET_M0147_DMVR
+  void      setDMVR                      ( bool b )       { m_DMVR = b; }
+  bool      getDMVR                      ()         const { return m_DMVR; }
+#endif
 
   void      setIBCMode                      (unsigned n)     { m_IBCMode = n; }
   unsigned  getIBCMode                      ()         const { return m_IBCMode; }
diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp
index 0c1b6d192c52dbd60b645bc11a96b330543f0333..00f1682c29ad5ebb8c1ebe355c326e3af7538a0b 100644
--- a/source/Lib/EncoderLib/EncCu.cpp
+++ b/source/Lib/EncoderLib/EncCu.cpp
@@ -1774,6 +1774,9 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
     mergeCtx.subPuMvpMiBuf    = MotionBuf( m_SubPuMiBuf,    bufSize );
   }
 
+#if JVET_M0147_DMVR
+  Mv   refinedMvdL0[MAX_NUM_PARTS_IN_CTU][MRG_MAX_NUM_CANDS];
+#endif
   setMergeBestSATDCost( MAX_DOUBLE );
 
   {
@@ -1924,14 +1927,38 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
         mergeCtx.setMergeInfo( pu, uiMergeCand );
 
         PU::spanMotionInfo( pu, mergeCtx );
+#if JVET_M0147_DMVR
+        pu.mvRefine = true;
+#endif
         distParam.cur = singleMergeTempBuffer->Y();
         m_pcInterSearch->motionCompensation(pu, *singleMergeTempBuffer);
         acMergeBuffer[uiMergeCand] = m_acRealMergeBuffer[uiMergeCand].getBuf(localUnitArea);
         acMergeBuffer[uiMergeCand].copyFrom(*singleMergeTempBuffer);
+#if JVET_M0147_DMVR
+        pu.mvRefine = false;
+#endif
         if( mergeCtx.interDirNeighbours[uiMergeCand] == 3 && mergeCtx.mrgTypeNeighbours[uiMergeCand] == MRG_TYPE_DEFAULT_N )
         {
           mergeCtx.mvFieldNeighbours[2*uiMergeCand].mv   = pu.mv[0];
           mergeCtx.mvFieldNeighbours[2*uiMergeCand+1].mv = pu.mv[1];
+#if JVET_M0147_DMVR
+          {
+            int dx, dy, i, j, num = 0;
+            dy = std::min<int>(pu.lumaSize().height, DMVR_SUBCU_HEIGHT);
+            dx = std::min<int>(pu.lumaSize().width, DMVR_SUBCU_WIDTH);
+            if (PU::checkDMVRCondition(pu))
+            {
+              for (i = 0; i < (pu.lumaSize().height); i += dy)
+              {
+                for (j = 0; j < (pu.lumaSize().width); j += dx)
+                {
+                  refinedMvdL0[num][uiMergeCand] = pu.mvdL0SubPu[num];
+                  num++;
+                }
+              }
+            }
+          }
+#endif
         }
 
         Distortion uiSad = distParam.distFunc(distParam);
@@ -2123,6 +2150,9 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
         mergeCtx.setMmvdMergeCandiInfo(pu, mmvdMergeCand);
 
         PU::spanMotionInfo(pu, mergeCtx);
+#if JVET_M0147_DMVR
+        pu.mvRefine = true;
+#endif
         distParam.cur = singleMergeTempBuffer->Y();
 #if JVET_M0823_MMVD_ENCOPT
         pu.mmvdEncOptMode = (refineStep > 2 ? 2 : 1);
@@ -2130,6 +2160,9 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
         m_pcInterSearch->motionCompensation(pu, *singleMergeTempBuffer);
 #if JVET_M0823_MMVD_ENCOPT
         pu.mmvdEncOptMode = 0;
+#endif
+#if JVET_M0147_DMVR // store the refined MV
+        pu.mvRefine = false;
 #endif
         Distortion uiSad = distParam.distFunc(distParam);
 
@@ -2275,6 +2308,24 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
 
       if( mrgTempBufSet )
       {
+#if JVET_M0147_DMVR
+        {
+          int dx, dy, i, j, num = 0;
+          dy = std::min<int>(pu.lumaSize().height, DMVR_SUBCU_HEIGHT);
+          dx = std::min<int>(pu.lumaSize().width, DMVR_SUBCU_WIDTH);
+          if (PU::checkDMVRCondition(pu))
+          {
+            for (i = 0; i < (pu.lumaSize().height); i += dy)
+            {
+              for (j = 0; j < (pu.lumaSize().width); j += dx)
+              {
+                pu.mvdL0SubPu[num] = refinedMvdL0[num][uiMergeCand];
+                num++;
+              }
+            }
+          }
+        }
+#endif
         if (pu.mhIntraFlag)
         {
           uint32_t bufIdx = (pu.intraDir[0] > 1) ? (pu.intraDir[0] == HOR_IDX ? 2 : 3) : pu.intraDir[0];
@@ -2315,8 +2366,13 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
       }
       else
       {
+#if JVET_M0147_DMVR
+        pu.mvRefine = true;
+#endif
         m_pcInterSearch->motionCompensation( pu );
-        
+#if JVET_M0147_DMVR
+        pu.mvRefine = false;
+#endif        
       }
       if (!cu.mmvdSkip && !pu.mhIntraFlag && uiNoResidualPass != 0)
       {
diff --git a/source/Lib/EncoderLib/EncGOP.cpp b/source/Lib/EncoderLib/EncGOP.cpp
index aae805f73efbf366d37cb19589499e4a2d3f88cf..a6d39bd8fd6bced9c4dc39d55d5e035d64d75add 100644
--- a/source/Lib/EncoderLib/EncGOP.cpp
+++ b/source/Lib/EncoderLib/EncGOP.cpp
@@ -2309,6 +2309,9 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
 
       m_pcLoopFilter->loopFilterPic( cs );
 
+#if JVET_M0147_DMVR 
+      CS::setRefinedMotionField(cs);
+#endif
       DTRACE_UPDATE( g_trace_ctx, ( std::make_pair( "final", 1 ) ) );
 
       if( pcSlice->getSPS()->getSAOEnabledFlag() )
diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp
index f6eec6f24b8a55d62fc674c585ad7700390a76e6..0a8d44941516caa69e72c720008494fef59add51 100644
--- a/source/Lib/EncoderLib/EncLib.cpp
+++ b/source/Lib/EncoderLib/EncLib.cpp
@@ -930,6 +930,9 @@ void EncLib::xInitSPS(SPS &sps)
 #endif
 #if JVET_M0246_AFFINE_AMVR
   sps.setAffineAmvrEnabledFlag              ( m_AffineAmvr );
+#endif
+#if JVET_M0147_DMVR
+  sps.getSpsNext().setUseDMVR               ( m_DMVR );
 #endif
   sps.getSpsNext().setIBCMode               ( m_IBCMode );
 
diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp
index adee3159949ffe85b7bdd1e6e37920378e249262..5c4dd6896b3196cec7d6469995f5989cabae4228 100644
--- a/source/Lib/EncoderLib/VLCWriter.cpp
+++ b/source/Lib/EncoderLib/VLCWriter.cpp
@@ -570,6 +570,9 @@ void HLSWriter::codeSPSNext( const SPSNext& spsNext, const bool usePCM )
   WRITE_FLAG( spsNext.getMTTEnabled() ? 1 : 0,                                                  "mtt_enabled_flag" );
   WRITE_FLAG( spsNext.getUseMHIntra() ? 1 : 0,                                                  "mhintra_flag" );
   WRITE_FLAG( spsNext.getUseTriangle() ? 1: 0,                                                  "triangle_flag" );
+#if JVET_M0147_DMVR
+  WRITE_FLAG(spsNext.getUseDMVR() ? 1 : 0,                                                      "dmvr_enable_flag");
+#endif
 #if ENABLE_WPP_PARALLELISM
   WRITE_FLAG( spsNext.getUseNextDQP(),                                                          "next_dqp_enabled_flag" );
 #else