diff --git a/source/Lib/CommonLib/Buffer.cpp b/source/Lib/CommonLib/Buffer.cpp
index 07ffa9a94ab29b9ce80e49aae1e75bbeb025c6c5..9fd64da8014b28cb3ae26cd538f7f7398e7c92b5 100644
--- a/source/Lib/CommonLib/Buffer.cpp
+++ b/source/Lib/CommonLib/Buffer.cpp
@@ -91,18 +91,30 @@ void addBIOAvgCore(const Pel* src0, int src0Stride, const Pel* src1, int src1Str
   }
 }
 
+#if JVET_M0063_BDOF_FIX
+void gradFilterCore(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, const int bitDepth)
+#else
 void gradFilterCore(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY)
+#endif
 {
   Pel* srcTmp = pSrc + srcStride + 1;
   Pel* gradXTmp = gradX + gradStride + 1;
   Pel* gradYTmp = gradY + gradStride + 1;
+#if JVET_M0063_BDOF_FIX
+  int  shift1 = std::max<int>(2, (IF_INTERNAL_PREC - bitDepth));
+#endif
 
   for (int y = 0; y < (height - 2 * BIO_EXTEND_SIZE); y++)
   {
     for (int x = 0; x < (width - 2 * BIO_EXTEND_SIZE); x++)
     {
+#if JVET_M0063_BDOF_FIX
+      gradYTmp[x] = (srcTmp[x + srcStride] - srcTmp[x - srcStride]) >> shift1;
+      gradXTmp[x] = (srcTmp[x + 1] - srcTmp[x - 1]) >> shift1;
+#else
       gradYTmp[x] = (srcTmp[x + srcStride] - srcTmp[x - srcStride]) >> 4;
       gradXTmp[x] = (srcTmp[x + 1] - srcTmp[x - 1]) >> 4;
+#endif
     }
     gradXTmp += gradStride;
     gradYTmp += gradStride;
@@ -130,15 +142,29 @@ void gradFilterCore(Pel* pSrc, int srcStride, int width, int height, int gradStr
   ::memcpy(gradYTmp + (height - 2 * BIO_EXTEND_SIZE)*gradStride, gradYTmp + (height - 2 * BIO_EXTEND_SIZE - 1)*gradStride, sizeof(Pel)*(width));
 }
 
+#if JVET_M0063_BDOF_FIX
+void calcBIOParCore(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, const int bitDepth)
+#else
 void calcBIOParCore(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG)
+#endif
 {
+#if JVET_M0063_BDOF_FIX
+  int shift4 = std::min<int>(8, (bitDepth - 4));
+  int shift5 = std::min<int>(5, (bitDepth - 7));
+#endif
   for (int y = 0; y < heightG; y++)
   {
     for (int x = 0; x < widthG; x++)
     {
+#if JVET_M0063_BDOF_FIX
+      int temp = (srcY0Temp[x] >> shift4) - (srcY1Temp[x] >> shift4);
+      int tempX = (gradX0[x] + gradX1[x]) >> shift5;
+      int tempY = (gradY0[x] + gradY1[x]) >> shift5;
+#else
       int temp = (srcY0Temp[x] >> 6) - (srcY1Temp[x] >> 6);
       int tempX = (gradX0[x] + gradX1[x]) >> 3;
       int tempY = (gradY0[x] + gradY1[x]) >> 3;
+#endif
       dotProductTemp1[x] = tempX * tempX;
       dotProductTemp2[x] = tempX * tempY;
       dotProductTemp3[x] = -tempX * temp;
diff --git a/source/Lib/CommonLib/Buffer.h b/source/Lib/CommonLib/Buffer.h
index e16ab2d3f4a9b163cbbbe0f2b33bdb1c70ef416d..db9d69e5e59716d556a6554fceb0a242ce55b8bc 100644
--- a/source/Lib/CommonLib/Buffer.h
+++ b/source/Lib/CommonLib/Buffer.h
@@ -69,8 +69,13 @@ struct PelBufferOps
   void ( *linTf4 )        ( const Pel* src0, int src0Stride,                                  Pel *dst, int dstStride, int width, int height, int scale, int shift, int offset, const ClpRng& clpRng, bool bClip );
   void ( *linTf8 )        ( const Pel* src0, int src0Stride,                                  Pel *dst, int dstStride, int width, int height, int scale, int shift, int offset, const ClpRng& clpRng, bool bClip );
   void(*addBIOAvg4)    (const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng);
+#if JVET_M0063_BDOF_FIX
+  void(*bioGradFilter) (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, const int bitDepth);
+  void(*calcBIOPar)    (const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, const int bitDepth);
+#else
   void(*bioGradFilter) (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY);
   void(*calcBIOPar)    (const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG);
+#endif
   void(*calcBlkGradient)(int sx, int sy, int    *arraysGx2, int     *arraysGxGy, int     *arraysGxdI, int     *arraysGy2, int     *arraysGydI, int     &sGx2, int     &sGy2, int     &sGxGy, int     &sGxdI, int     &sGydI, int width, int height, int unitSize);
 #if ENABLE_SIMD_OPT_GBI
   void ( *removeWeightHighFreq8)  ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int gbiWeight);
diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp
index 43deb866b992f6c87d36c332a8ba547536f1ba33..f9a9da2d516638c304e6b331e089392af0729388 100644
--- a/source/Lib/CommonLib/InterPrediction.cpp
+++ b/source/Lib/CommonLib/InterPrediction.cpp
@@ -875,7 +875,11 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf
     Pel* gradY = (refList == 0) ? m_gradY0 : m_gradY1;
     Pel* gradX = (refList == 0) ? m_gradX0 : m_gradX1;
 
+#if JVET_M0063_BDOF_FIX
+    xBioGradFilter(dstTempPtr, stridePredMC, widthG, heightG, widthG, gradX, gradY, clipBitDepths.recon[toChannelType(COMPONENT_Y)]);
+#else
     xBioGradFilter(dstTempPtr, stridePredMC, widthG, heightG, widthG, gradX, gradY);
+#endif
     Pel* padStr = m_filteredBlockTmp[2 + refList][COMPONENT_Y] + 2 * stridePredMC + 2;
     for (int y = 0; y< height; y++)
     {
@@ -893,7 +897,11 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf
   const int   bitDepth = clipBitDepths.recon[toChannelType(COMPONENT_Y)];
   const int   shiftNum = IF_INTERNAL_PREC + 1 - bitDepth;
   const int   offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
+#if JVET_M0063_BDOF_FIX
+  const int   limit = (bitDepth>12)? 2 : ((int)1 << (4 + IF_INTERNAL_PREC - bitDepth - 5));
+#else
   const int   limit = ((int)1 << (4 + IF_INTERNAL_PREC - bitDepth - 5));
+#endif
 
   int*     dotProductTemp1 = m_dotProduct1;
   int*     dotProductTemp2 = m_dotProduct2;
@@ -901,7 +909,11 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf
   int*     dotProductTemp5 = m_dotProduct5;
   int*     dotProductTemp6 = m_dotProduct6;
 
+#if JVET_M0063_BDOF_FIX
+  xCalcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, widthG, widthG, heightG, bitDepth);
+#else
   xCalcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, widthG, widthG, heightG);
+#endif
 
   int xUnit = (width >> 2);
   int yUnit = (height >> 2);
@@ -1066,6 +1078,17 @@ void InterPrediction::xAddBIOAvg4(const Pel* src0, int src0Stride, const Pel* sr
   g_pelBufOP.addBIOAvg4(src0, src0Stride, src1, src1Stride, dst, dstStride, gradX0, gradX1, gradY0, gradY1, gradStride, width, height, tmpx, tmpy, shift, offset, clpRng);
 }
 
+#if JVET_M0063_BDOF_FIX
+void InterPrediction::xBioGradFilter(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, int bitDepth)
+{
+  g_pelBufOP.bioGradFilter(pSrc, srcStride, width, height, gradStride, gradX, gradY, bitDepth);
+}
+
+void InterPrediction::xCalcBIOPar(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, int bitDepth)
+{
+  g_pelBufOP.calcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, gradStride, widthG, heightG, bitDepth);
+}
+#else
 void InterPrediction::xBioGradFilter(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY)
 {
   g_pelBufOP.bioGradFilter(pSrc, srcStride, width, height, gradStride, gradX, gradY);
@@ -1075,6 +1098,7 @@ void InterPrediction::xCalcBIOPar(const Pel* srcY0Temp, const Pel* srcY1Temp, co
 {
   g_pelBufOP.calcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, gradStride, widthG, heightG);
 }
+#endif
 
 void InterPrediction::xCalcBlkGradient(int sx, int sy, int    *arraysGx2, int     *arraysGxGy, int     *arraysGxdI, int     *arraysGy2, int     *arraysGydI, int     &sGx2, int     &sGy2, int     &sGxGy, int     &sGxdI, int     &sGydI, int width, int height, int unitSize)
 {
diff --git a/source/Lib/CommonLib/InterPrediction.h b/source/Lib/CommonLib/InterPrediction.h
index 145431e2777417e854efa484d18fe1fba7418ad4..741cc655a871a28fd86ec8db87b0aace19a1c1af 100644
--- a/source/Lib/CommonLib/InterPrediction.h
+++ b/source/Lib/CommonLib/InterPrediction.h
@@ -118,8 +118,13 @@ protected:
                                  );
 
   void xAddBIOAvg4              (const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng);
+#if JVET_M0063_BDOF_FIX
+  void xBioGradFilter           (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, int bitDepth);
+  void xCalcBIOPar              (const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, int bitDepth);
+#else
   void xBioGradFilter           (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY);
   void xCalcBIOPar              (const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG);
+#endif
   void xCalcBlkGradient         (int sx, int sy, int    *arraysGx2, int     *arraysGxGy, int     *arraysGxdI, int     *arraysGy2, int     *arraysGydI, int     &sGx2, int     &sGy2, int     &sGxGy, int     &sGxdI, int     &sGydI, int width, int height, int unitSize);
   void xWeightedAverage         ( const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied );
   void xPredAffineBlk( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng );
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index 7be8dbd8b367997855609dd4cc68180bd6712fcc..cb70e1dcec69ea1ecc85fef4c8dd0ba875dba20c 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -52,6 +52,8 @@
 
 #define JVET_M0116_ATMVP_LEFT_NB_FOR_OFFSET               1 // Only use left neighbor for ATMVP offset derivation, from M0273, M0240, M0116, M0338, M0204
 
+#define JVET_M0063_BDOF_FIX                               1 // BDOF bitdepth bugfix
+
 #define JVET_M0265_MV_ROUNDING_CLEANUP                    1 // Unify MV roundings and make SW/WD allignment
 
 #define JVET_M0228_REMOVE_CPMV_COMPARE                    1 // Remove CPMV comparisons for construnted affine merge candidates from JVET-M0228, M0166, M0477
diff --git a/source/Lib/CommonLib/x86/BufferX86.h b/source/Lib/CommonLib/x86/BufferX86.h
index bdae6dcc51a4b2c2d87629bf3ff32b6e79e4bc64..e83a00ef15352cbe221bd78f9ae5eeeb7299fd71 100644
--- a/source/Lib/CommonLib/x86/BufferX86.h
+++ b/source/Lib/CommonLib/x86/BufferX86.h
@@ -162,7 +162,11 @@ void addBIOAvg4_SSE(const Pel* src0, int src0Stride, const Pel* src1, int src1St
 }
 
 template< X86_VEXT vext >
+#if JVET_M0063_BDOF_FIX
+void gradFilter_SSE(Pel* src, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, const int bitDepth)
+#else
 void gradFilter_SSE(Pel* src, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY)
+#endif
 {
   __m128i vzero = _mm_setzero_si128();
   Pel* srcTmp = src + srcStride + 1;
@@ -171,6 +175,10 @@ void gradFilter_SSE(Pel* src, int srcStride, int width, int height, int gradStri
 
   int widthInside = width - 2 * BIO_EXTEND_SIZE;
   int heightInside = height - 2 * BIO_EXTEND_SIZE;
+#if JVET_M0063_BDOF_FIX
+  int shift1 = std::max<int>(2, (14 - bitDepth));
+#endif
+
 
   assert((widthInside & 3) == 0);
 
@@ -184,8 +192,13 @@ void gradFilter_SSE(Pel* src, int srcStride, int width, int height, int gradStri
       __m128i mmPixLeft = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x - 1)));
       __m128i mmPixRight = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x + 1)));
 
+#if JVET_M0063_BDOF_FIX
+      __m128i mmGradVer = _mm_sra_epi32(_mm_sub_epi32(mmPixBottom, mmPixTop), _mm_cvtsi32_si128(shift1));
+      __m128i mmGradHor = _mm_sra_epi32(_mm_sub_epi32(mmPixRight, mmPixLeft), _mm_cvtsi32_si128(shift1));
+#else
       __m128i mmGradVer = _mm_srai_epi32(_mm_sub_epi32(mmPixBottom, mmPixTop), 4);
       __m128i mmGradHor = _mm_srai_epi32(_mm_sub_epi32(mmPixRight, mmPixLeft), 4);
+#endif
       mmGradVer = _mm_packs_epi32(mmGradVer, vzero);
       mmGradHor = _mm_packs_epi32(mmGradHor, vzero);
 
@@ -220,23 +233,41 @@ void gradFilter_SSE(Pel* src, int srcStride, int width, int height, int gradStri
 }
 
 template< X86_VEXT vext >
+#if JVET_M0063_BDOF_FIX
+void calcBIOPar_SSE(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, const int bitDepth)
+#else
 void calcBIOPar_SSE(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG)
+#endif
 {
+#if JVET_M0063_BDOF_FIX
+  int shift4 = std::min<int>(8, (bitDepth - 4));
+  int shift5 = std::min<int>(5, (bitDepth - 7));
+#endif
   for (int y = 0; y < heightG; y++)
   {
     int x = 0;
     for (; x < ((widthG >> 3) << 3); x += 8)
     {
+#if JVET_M0063_BDOF_FIX
+      __m128i mmSrcY0Temp = _mm_sra_epi16(_mm_loadu_si128((__m128i*)(srcY0Temp + x)), _mm_cvtsi32_si128(shift4));
+      __m128i mmSrcY1Temp = _mm_sra_epi16(_mm_loadu_si128((__m128i*)(srcY1Temp + x)), _mm_cvtsi32_si128(shift4));
+#else
       __m128i mmSrcY0Temp = _mm_srai_epi16(_mm_loadu_si128((__m128i*)(srcY0Temp + x)), 6);
       __m128i mmSrcY1Temp = _mm_srai_epi16(_mm_loadu_si128((__m128i*)(srcY1Temp + x)), 6);
+#endif
       __m128i mmGradX0 = _mm_loadu_si128((__m128i*)(gradX0 + x));
       __m128i mmGradX1 = _mm_loadu_si128((__m128i*)(gradX1 + x));
       __m128i mmGradY0 = _mm_loadu_si128((__m128i*)(gradY0 + x));
       __m128i mmGradY1 = _mm_loadu_si128((__m128i*)(gradY1 + x));
 
       __m128i mmTemp1 = _mm_sub_epi16(mmSrcY1Temp, mmSrcY0Temp);
+#if JVET_M0063_BDOF_FIX
+      __m128i mmTempX = _mm_sra_epi16(_mm_add_epi16(mmGradX0, mmGradX1), _mm_cvtsi32_si128(shift5));
+      __m128i mmTempY = _mm_sra_epi16(_mm_add_epi16(mmGradY0, mmGradY1), _mm_cvtsi32_si128(shift5));
+#else
       __m128i mmTempX = _mm_srai_epi16(_mm_add_epi16(mmGradX0, mmGradX1), 3);
       __m128i mmTempY = _mm_srai_epi16(_mm_add_epi16(mmGradY0, mmGradY1), 3);
+#endif
 
       // m_piDotProductTemp1
       __m128i mm_b = _mm_mulhi_epi16(mmTempX, mmTempX);
@@ -291,16 +322,26 @@ void calcBIOPar_SSE(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX
 
     for (; x < ((widthG >> 2) << 2); x += 4)
     {
+#if JVET_M0063_BDOF_FIX
+      __m128i mmSrcY0Temp = _mm_sra_epi16(_mm_loadl_epi64((__m128i*)(srcY0Temp + x)), _mm_cvtsi32_si128(shift4));
+      __m128i mmSrcY1Temp = _mm_sra_epi16(_mm_loadl_epi64((__m128i*)(srcY1Temp + x)), _mm_cvtsi32_si128(shift4));
+#else
       __m128i mmSrcY0Temp = _mm_srai_epi16(_mm_loadl_epi64((__m128i*)(srcY0Temp + x)), 6);
       __m128i mmSrcY1Temp = _mm_srai_epi16(_mm_loadl_epi64((__m128i*)(srcY1Temp + x)), 6);
+#endif
       __m128i mmGradX0 = _mm_loadl_epi64((__m128i*)(gradX0 + x));
       __m128i mmGradX1 = _mm_loadl_epi64((__m128i*)(gradX1 + x));
       __m128i mmGradY0 = _mm_loadl_epi64((__m128i*)(gradY0 + x));
       __m128i mmGradY1 = _mm_loadl_epi64((__m128i*)(gradY1 + x));
 
       __m128i mmTemp1 = _mm_sub_epi16(mmSrcY1Temp, mmSrcY0Temp);
+#if JVET_M0063_BDOF_FIX
+      __m128i mmTempX = _mm_sra_epi16(_mm_add_epi16(mmGradX0, mmGradX1), _mm_cvtsi32_si128(shift5));
+      __m128i mmTempY = _mm_sra_epi16(_mm_add_epi16(mmGradY0, mmGradY1), _mm_cvtsi32_si128(shift5));
+#else
       __m128i mmTempX = _mm_srai_epi16(_mm_add_epi16(mmGradX0, mmGradX1), 3);
       __m128i mmTempY = _mm_srai_epi16(_mm_add_epi16(mmGradY0, mmGradY1), 3);
+#endif
 
       // m_piDotProductTemp1
       __m128i mm_b = _mm_mulhi_epi16(mmTempX, mmTempX);
@@ -340,9 +381,15 @@ void calcBIOPar_SSE(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX
 
     for (; x < widthG; x++)
     {
+#if JVET_M0063_BDOF_FIX
+      int temp = (srcY0Temp[x] >> shift4) - (srcY1Temp[x] >> shift4);
+      int tempX = (gradX0[x] + gradX1[x]) >> shift5;
+      int tempY = (gradY0[x] + gradY1[x]) >> shift5;
+#else
       int temp = (srcY0Temp[x] >> 6) - (srcY1Temp[x] >> 6);
       int tempX = (gradX0[x] + gradX1[x]) >> 3;
       int tempY = (gradY0[x] + gradY1[x]) >> 3;
+#endif
       dotProductTemp1[x] = tempX * tempX;
       dotProductTemp2[x] = tempX * tempY;
       dotProductTemp3[x] = -tempX * temp;