diff --git a/source/Lib/CommonLib/Buffer.cpp b/source/Lib/CommonLib/Buffer.cpp
index 73c21e22779daab047821e0830ee7c880ace63be..7af9b1fdf06e828e665bca22880b3f2c5230ff8f 100644
--- a/source/Lib/CommonLib/Buffer.cpp
+++ b/source/Lib/CommonLib/Buffer.cpp
@@ -177,8 +177,13 @@ void gradFilterCore(Pel* pSrc, int srcStride, int width, int height, int gradStr
   {
     for (int x = 0; x < (width - 2 * BIO_EXTEND_SIZE); x++)
     {
+#if JVET_O0570_GRAD_SIMP
+      gradYTmp[x] = ( srcTmp[x + srcStride] >> shift1 ) - ( srcTmp[x - srcStride] >> shift1 );
+      gradXTmp[x] = ( srcTmp[x + 1] >> shift1 ) - ( srcTmp[x - 1] >> shift1 );
+#else
       gradYTmp[x] = (srcTmp[x + srcStride] - srcTmp[x - srcStride]) >> shift1;
       gradXTmp[x] = (srcTmp[x + 1] - srcTmp[x - 1]) >> shift1;
+#endif
     }
     gradXTmp += gradStride;
     gradYTmp += gradStride;
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index fc95ff8b85e487da1c7b77001ead05c22a1c4e28..c5c6ca9c2418eace0749e97e68f2ef3a8d7ebd1a 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -53,6 +53,8 @@
 
 #define JVET_O0070_PROF                                   1 // JVET-O0070 method 4-2.1a: Prediction refinement with optical flow for affine mode
 
+#define JVET_O0570_GRAD_SIMP                              1 // JVET-O0570/JVET-O0211, SMID friendly spatial gradient calculation
+
 #define JVET_O1170_IBC_VIRTUAL_BUFFER                     1 // JVET-O1170/O1171: IBC virtual buffer
 #if JVET_O1170_IBC_VIRTUAL_BUFFER
 #define JVET_O1170_CHECK_BV_AT_DECODER                    1 // For decoder to check if a BV is valid or not
diff --git a/source/Lib/CommonLib/x86/BufferX86.h b/source/Lib/CommonLib/x86/BufferX86.h
index d9c88fa8e1b305e159cb18bd7f74c563365c3606..6e82e3f411a3b3666cc0ec707988be68d023abd2 100644
--- a/source/Lib/CommonLib/x86/BufferX86.h
+++ b/source/Lib/CommonLib/x86/BufferX86.h
@@ -491,7 +491,9 @@ template< X86_VEXT vext >
 #endif
 void gradFilter_SSE(Pel* src, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, const int bitDepth)
 {
+#if !JVET_O0570_GRAD_SIMP
   __m128i vzero = _mm_setzero_si128();
+#endif
   Pel* srcTmp = src + srcStride + 1;
   Pel* gradXTmp = gradX + gradStride + 1;
   Pel* gradYTmp = gradY + gradStride + 1;
@@ -499,32 +501,79 @@ void gradFilter_SSE(Pel* src, int srcStride, int width, int height, int gradStri
   int widthInside = width - 2 * BIO_EXTEND_SIZE;
   int heightInside = height - 2 * BIO_EXTEND_SIZE;
   int shift1 = std::max<int>(6, bitDepth - 6);
-
+#if JVET_O0570_GRAD_SIMP
+  __m128i mmShift1 = _mm_cvtsi32_si128( shift1 );
+#endif
   assert((widthInside & 3) == 0);
 
-  for (int y = 0; y < heightInside; y++)
+#if JVET_O0570_GRAD_SIMP
+  if ( ( widthInside & 7 ) == 0 )
   {
-    int x = 0;
-    for (; x < widthInside; x += 4)
+#endif
+    for (int y = 0; y < heightInside; y++)
     {
-      __m128i mmPixTop = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x - srcStride)));
-      __m128i mmPixBottom = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x + srcStride)));
-      __m128i mmPixLeft = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x - 1)));
-      __m128i mmPixRight = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x + 1)));
-
-      __m128i mmGradVer = _mm_sra_epi32(_mm_sub_epi32(mmPixBottom, mmPixTop), _mm_cvtsi32_si128(shift1));
-      __m128i mmGradHor = _mm_sra_epi32(_mm_sub_epi32(mmPixRight, mmPixLeft), _mm_cvtsi32_si128(shift1));
-      mmGradVer = _mm_packs_epi32(mmGradVer, vzero);
-      mmGradHor = _mm_packs_epi32(mmGradHor, vzero);
-
-      _mm_storel_epi64((__m128i *)(gradYTmp + x), mmGradVer);
-      _mm_storel_epi64((__m128i *)(gradXTmp + x), mmGradHor);
-    }
+      int x = 0;
+#if JVET_O0570_GRAD_SIMP
+      for ( ; x < widthInside; x += 8 )
+      {
+        __m128i mmPixTop    = _mm_sra_epi16( _mm_loadu_si128( ( __m128i* ) ( srcTmp + x - srcStride ) ), mmShift1 );
+        __m128i mmPixBottom = _mm_sra_epi16( _mm_loadu_si128( ( __m128i* ) ( srcTmp + x + srcStride ) ), mmShift1 );
+        __m128i mmPixLeft   = _mm_sra_epi16( _mm_loadu_si128( ( __m128i* ) ( srcTmp + x - 1 ) ), mmShift1 );
+        __m128i mmPixRight  = _mm_sra_epi16( _mm_loadu_si128( ( __m128i* ) ( srcTmp + x + 1 ) ), mmShift1 );
 
-    gradXTmp += gradStride;
-    gradYTmp += gradStride;
-    srcTmp += srcStride;
+        __m128i mmGradVer = _mm_sub_epi16( mmPixBottom, mmPixTop );
+        __m128i mmGradHor = _mm_sub_epi16( mmPixRight, mmPixLeft );
+
+        _mm_storeu_si128( ( __m128i * ) ( gradYTmp + x ), mmGradVer );
+        _mm_storeu_si128( ( __m128i * ) ( gradXTmp + x ), mmGradHor );
+      }
+#else
+      for (; x < widthInside; x += 4)
+      {
+        __m128i mmPixTop = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x - srcStride)));
+        __m128i mmPixBottom = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x + srcStride)));
+        __m128i mmPixLeft = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x - 1)));
+        __m128i mmPixRight = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x + 1)));
+
+        __m128i mmGradVer = _mm_sra_epi32(_mm_sub_epi32(mmPixBottom, mmPixTop), _mm_cvtsi32_si128(shift1));
+        __m128i mmGradHor = _mm_sra_epi32(_mm_sub_epi32(mmPixRight, mmPixLeft), _mm_cvtsi32_si128(shift1));
+        mmGradVer = _mm_packs_epi32(mmGradVer, vzero);
+        mmGradHor = _mm_packs_epi32(mmGradHor, vzero);
+
+        _mm_storel_epi64((__m128i *)(gradYTmp + x), mmGradVer);
+        _mm_storel_epi64((__m128i *)(gradXTmp + x), mmGradHor);
+      }
+#endif
+      gradXTmp += gradStride;
+      gradYTmp += gradStride;
+      srcTmp += srcStride;
+    }
+#if JVET_O0570_GRAD_SIMP
   }
+  else
+  {
+    __m128i mmPixTop = _mm_sra_epi16( _mm_unpacklo_epi64( _mm_loadl_epi64( (__m128i*) ( srcTmp - srcStride ) ), _mm_loadl_epi64( (__m128i*) ( srcTmp ) ) ), mmShift1 );
+    for ( int y = 0; y < heightInside; y += 2 )
+    {
+      __m128i mmPixBottom = _mm_sra_epi16( _mm_unpacklo_epi64( _mm_loadl_epi64( (__m128i*) ( srcTmp + srcStride ) ), _mm_loadl_epi64( (__m128i*) ( srcTmp + ( srcStride << 1 ) ) ) ), mmShift1 );
+      __m128i mmPixLeft   = _mm_sra_epi16( _mm_unpacklo_epi64( _mm_loadl_epi64( (__m128i*) ( srcTmp - 1 ) ), _mm_loadl_epi64( (__m128i*) ( srcTmp - 1 + srcStride ) ) ), mmShift1 );
+      __m128i mmPixRight  = _mm_sra_epi16( _mm_unpacklo_epi64( _mm_loadl_epi64( (__m128i*) ( srcTmp + 1 ) ), _mm_loadl_epi64( (__m128i*) ( srcTmp + 1 + srcStride ) ) ), mmShift1 );
+
+      __m128i mmGradVer = _mm_sub_epi16( mmPixBottom, mmPixTop );
+      __m128i mmGradHor = _mm_sub_epi16( mmPixRight, mmPixLeft );
+
+      _mm_storel_epi64( (__m128i *) gradYTmp, mmGradVer );
+      _mm_storel_epi64( (__m128i *) ( gradYTmp + gradStride ), _mm_unpackhi_epi64( mmGradVer, mmGradHor ) );
+      _mm_storel_epi64( (__m128i *) gradXTmp, mmGradHor );
+      _mm_storel_epi64( (__m128i *) ( gradXTmp + gradStride ), _mm_unpackhi_epi64( mmGradHor, mmGradVer ) );
+
+      mmPixTop = mmPixBottom;
+      gradXTmp += gradStride << 1;
+      gradYTmp += gradStride << 1;
+      srcTmp   += srcStride << 1;
+    }
+  }
+#endif
 
 #if JVET_O0070_PROF
   if (PAD)