From 8352de9704e430b66b8058ff684fce10d2365dd5 Mon Sep 17 00:00:00 2001
From: xiuxx <xiaoyu.xiu@interdigital.com>
Date: Wed, 24 Oct 2018 17:18:15 -0700
Subject: [PATCH] JVET_L0256 check-in for the adoption of the BIO

---
 cfg/encoder_randomaccess_vtm.cfg             |   3 +-
 source/App/EncoderApp/EncApp.cpp             |   3 +
 source/App/EncoderApp/EncAppCfg.cpp          |  13 +-
 source/App/EncoderApp/EncAppCfg.h            |   3 +
 source/Lib/CommonLib/Buffer.cpp              | 139 +++++++
 source/Lib/CommonLib/Buffer.h                |   6 +
 source/Lib/CommonLib/CommonDef.h             |   4 +
 source/Lib/CommonLib/InterPrediction.cpp     | 398 ++++++++++++++++++-
 source/Lib/CommonLib/InterPrediction.h       |  41 +-
 source/Lib/CommonLib/InterpolationFilter.cpp |  44 ++
 source/Lib/CommonLib/InterpolationFilter.h   |   8 +
 source/Lib/CommonLib/RdCost.cpp              |  16 +
 source/Lib/CommonLib/RdCost.h                |  11 +
 source/Lib/CommonLib/Slice.cpp               |   3 +
 source/Lib/CommonLib/Slice.h                 |   7 +
 source/Lib/CommonLib/TypeDef.h               |  20 +-
 source/Lib/CommonLib/x86/BufferX86.h         | 309 ++++++++++++++
 source/Lib/CommonLib/x86/RdCostX86.h         |  43 ++
 source/Lib/DecoderLib/VLCReader.cpp          |   3 +
 source/Lib/EncoderLib/EncCfg.h               |   7 +
 source/Lib/EncoderLib/EncLib.cpp             |   3 +
 source/Lib/EncoderLib/VLCWriter.cpp          |   3 +
 22 files changed, 1067 insertions(+), 20 deletions(-)

diff --git a/cfg/encoder_randomaccess_vtm.cfg b/cfg/encoder_randomaccess_vtm.cfg
index 63ff58df9..29f790277 100644
--- a/cfg/encoder_randomaccess_vtm.cfg
+++ b/cfg/encoder_randomaccess_vtm.cfg
@@ -142,7 +142,8 @@ DepQuant                     : 1
 IMV                          : 2
 ALF                          : 1
 GBi                          : 1 
-GBiFast                      : 1 
+GBiFast                      : 1
+BIO                          : 1 
 
 # Fast tools
 PBIntraFast                  : 1
diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp
index 3c6efc537..dba92e5f4 100644
--- a/source/App/EncoderApp/EncApp.cpp
+++ b/source/App/EncoderApp/EncApp.cpp
@@ -227,6 +227,9 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setAffineType                                        ( m_AffineType );
 #if !REMOVE_MV_ADAPT_PREC
   m_cEncLib.setHighPrecisionMv                                   (m_highPrecisionMv);
+#endif
+#if JVET_L0256_BIO
+  m_cEncLib.setBIO                                               (m_BIO);
 #endif
   m_cEncLib.setDisableMotionCompression                          ( m_DisableMotionCompression );
   m_cEncLib.setMTTMode                                           ( m_MTT );
diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp
index 0b7da7327..9b4797641 100644
--- a/source/App/EncoderApp/EncAppCfg.cpp
+++ b/source/App/EncoderApp/EncAppCfg.cpp
@@ -818,8 +818,11 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
 #if !REMOVE_MV_ADAPT_PREC 
   ("HighPrecMv",                                     m_highPrecisionMv,                                false, "High precision motion vectors for temporal merging (0:off, 1:on)  [default: off]")
 #endif
-  ("Affine",                                          m_Affine,                                        false, "Enable affine prediction (0:off, 1:on)  [default: off]")
-  ( "AffineType",                                     m_AffineType,                                     true,  "Enable affine type prediction (0:off, 1:on)  [default: on]" )
+  ("Affine",                                         m_Affine,                                         false, "Enable affine prediction (0:off, 1:on)  [default: off]")
+  ("AffineType",                                     m_AffineType,                                     true,  "Enable affine type prediction (0:off, 1:on)  [default: on]" )
+#if JVET_L0256_BIO
+  ("BIO",                                            m_BIO,                                             false, "Enable bi-directional optical flow")
+#endif    
   ("DisableMotCompression",                           m_DisableMotionCompression,                       false, "Disable motion data compression for all modes")
   ("IMV",                                             m_ImvMode,                                            2, "Adaptive MV precision Mode (IMV)\n"
                                                                                                                "\t0: disabled IMV\n"
@@ -1913,6 +1916,9 @@ bool EncAppCfg::xCheckParameter()
 #if !REMOVE_MV_ADAPT_PREC
     xConfirmPara( m_highPrecisionMv, "High precision MV for temporal merging can only be used with NEXT profile" );
     xConfirmPara( m_Affine, "Affine is only allowed with NEXT profile" );
+#endif
+#if JVET_L0256_BIO
+    xConfirmPara( m_BIO, "BIO only allowed with NEXT profile" );
 #endif
     xConfirmPara( m_DisableMotionCompression, "Disable motion data compression only allowed with NEXT profile" );
     xConfirmPara( m_MTT, "Multi type tree is only allowed with NEXT profile" );
@@ -3110,6 +3116,9 @@ void EncAppCfg::xPrintParameter()
     if( !m_QTBT ) msg( VERBOSE, "IMVMaxCand:%d ", m_ImvMaxCand );
 #if !REMOVE_MV_ADAPT_PREC 
     msg(VERBOSE, "HighPrecMv:%d ", m_highPrecisionMv);
+#endif
+#if JVET_L0256_BIO
+    msg( VERBOSE, "BIO:%d ", m_BIO );
 #endif
     msg( VERBOSE, "DisMDC:%d ", m_DisableMotionCompression );
     msg( VERBOSE, "MTT:%d ", m_MTT );
diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h
index a9eb00f22..42a6aaf61 100644
--- a/source/App/EncoderApp/EncAppCfg.h
+++ b/source/App/EncoderApp/EncAppCfg.h
@@ -210,6 +210,9 @@ protected:
   bool      m_AffineType;
 #if !REMOVE_MV_ADAPT_PREC
   bool      m_highPrecisionMv;
+#endif
+#if JVET_L0256_BIO
+  bool      m_BIO;
 #endif
   bool      m_DisableMotionCompression;
   unsigned  m_MTT;
diff --git a/source/Lib/CommonLib/Buffer.cpp b/source/Lib/CommonLib/Buffer.cpp
index f31a22044..2e9c1a42a 100644
--- a/source/Lib/CommonLib/Buffer.cpp
+++ b/source/Lib/CommonLib/Buffer.cpp
@@ -62,6 +62,138 @@ void addAvgCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T
 #undef ADD_AVG_CORE_INC
 }
 
+#if JVET_L0256_BIO
+void addBIOAvgCore(const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *pGradX0, const Pel *pGradX1, const Pel *pGradY0, const Pel*pGradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng)
+{
+  int b = 0;
+
+  for (int y = 0; y < height; y++)
+  {
+    for (int x = 0; x < width; x += 4)
+    {
+      b = tmpx * (pGradX0[x] - pGradX1[x]) + tmpy * (pGradY0[x] - pGradY1[x]);
+      b = ((b + 1) >> 1);
+      dst[x] = ClipPel((int16_t)rightShift((src0[x] + src1[x] + b + offset), shift), clpRng);
+
+      b = tmpx * (pGradX0[x + 1] - pGradX1[x + 1]) + tmpy * (pGradY0[x + 1] - pGradY1[x + 1]);
+      b = ((b + 1) >> 1);
+      dst[x + 1] = ClipPel((int16_t)rightShift((src0[x + 1] + src1[x + 1] + b + offset), shift), clpRng);
+
+      b = tmpx * (pGradX0[x + 2] - pGradX1[x + 2]) + tmpy * (pGradY0[x + 2] - pGradY1[x + 2]);
+      b = ((b + 1) >> 1);
+      dst[x + 2] = ClipPel((int16_t)rightShift((src0[x + 2] + src1[x + 2] + b + offset), shift), clpRng);
+
+      b = tmpx * (pGradX0[x + 3] - pGradX1[x + 3]) + tmpy * (pGradY0[x + 3] - pGradY1[x + 3]);
+      b = ((b + 1) >> 1);
+      dst[x + 3] = ClipPel((int16_t)rightShift((src0[x + 3] + src1[x + 3] + b + offset), shift), clpRng);
+    }
+    dst += dstStride;       src0 += src0Stride;     src1 += src1Stride;
+    pGradX0 += gradStride; pGradX1 += gradStride; pGradY0 += gradStride; pGradY1 += gradStride;
+  }
+}
+
+void gradFilterCore(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* pGradX, Pel* pGradY)
+{
+  Pel* piSrcTmp = pSrc + srcStride + 1;
+  Pel* piGradXTmp = pGradX + gradStride + 1;
+  Pel* piGradYTmp = pGradY + gradStride + 1;
+
+  for (int y = 0; y < (height - 2 * JVET_L0256_BIO_EXTEND_SIZE); y++)
+  {
+    for (int x = 0; x < (width - 2 * JVET_L0256_BIO_EXTEND_SIZE); x++)
+    {
+      piGradYTmp[x] = (piSrcTmp[x + srcStride] - piSrcTmp[x - srcStride]) >> 4;
+      piGradXTmp[x] = (piSrcTmp[x + 1] - piSrcTmp[x - 1]) >> 4;
+    }
+    piGradXTmp += gradStride;
+    piGradYTmp += gradStride;
+    piSrcTmp += srcStride;
+  }
+
+  piGradXTmp = pGradX + gradStride + 1;
+  piGradYTmp = pGradY + gradStride + 1;
+  for (int y = 0; y < (height - 2 * JVET_L0256_BIO_EXTEND_SIZE); y++)
+  {
+    piGradXTmp[-1] = piGradXTmp[0];
+    piGradXTmp[width - 2 * JVET_L0256_BIO_EXTEND_SIZE] = piGradXTmp[width - 2 * JVET_L0256_BIO_EXTEND_SIZE - 1];
+    piGradXTmp += gradStride;
+
+    piGradYTmp[-1] = piGradYTmp[0];
+    piGradYTmp[width - 2 * JVET_L0256_BIO_EXTEND_SIZE] = piGradYTmp[width - 2 * JVET_L0256_BIO_EXTEND_SIZE - 1];
+    piGradYTmp += gradStride;
+  }
+
+  piGradXTmp = pGradX + gradStride;
+  piGradYTmp = pGradY + gradStride;
+  ::memcpy(piGradXTmp - gradStride, piGradXTmp, sizeof(Pel)*(width));
+  ::memcpy(piGradXTmp + (height - 2 * JVET_L0256_BIO_EXTEND_SIZE)*gradStride, piGradXTmp + (height - 2 * JVET_L0256_BIO_EXTEND_SIZE - 1)*gradStride, sizeof(Pel)*(width));
+  ::memcpy(piGradYTmp - gradStride, piGradYTmp, sizeof(Pel)*(width));
+  ::memcpy(piGradYTmp + (height - 2 * JVET_L0256_BIO_EXTEND_SIZE)*gradStride, piGradYTmp + (height - 2 * JVET_L0256_BIO_EXTEND_SIZE - 1)*gradStride, sizeof(Pel)*(width));
+}
+
+void calcBIOParCore(const Pel* pSrcY0Temp, const Pel* pSrcY1Temp, const Pel* pGradX0, const Pel* pGradX1, const Pel* pGradY0, const Pel* pGradY1, int* m_piDotProductTemp1, int* m_piDotProductTemp2, int* m_piDotProductTemp3, int* m_piDotProductTemp5, int* m_piDotProductTemp6, const int iSrc0Stride, const int iSrc1Stride, const int iGradStride, const int iWidthG, const int iHeightG)
+{
+  for (int y = 0; y < iHeightG; y++)
+  {
+    for (int x = 0; x < iWidthG; x++)
+    {
+      int temp = (pSrcY0Temp[x] >> 6) - (pSrcY1Temp[x] >> 6);
+      int tempX = (pGradX0[x] + pGradX1[x]) >> 3;
+      int tempY = (pGradY0[x] + pGradY1[x]) >> 3;
+      m_piDotProductTemp1[x] = tempX * tempX;
+      m_piDotProductTemp2[x] = tempX * tempY;
+      m_piDotProductTemp3[x] = -tempX * temp;
+      m_piDotProductTemp5[x] = tempY * tempY;
+      m_piDotProductTemp6[x] = -tempY * temp;
+    }
+    pSrcY0Temp += iSrc0Stride;
+    pSrcY1Temp += iSrc1Stride;
+    pGradX0 += iGradStride;
+    pGradX1 += iGradStride;
+    pGradY0 += iGradStride;
+    pGradY1 += iGradStride;
+    m_piDotProductTemp1 += iWidthG;
+    m_piDotProductTemp2 += iWidthG;
+    m_piDotProductTemp3 += iWidthG;
+    m_piDotProductTemp5 += iWidthG;
+    m_piDotProductTemp6 += iWidthG;
+  }
+}
+
+void calcBlkGradientCore(int sx, int sy, int     *arraysGx2, int     *arraysGxGy, int     *arraysGxdI, int     *arraysGy2, int     *arraysGydI, int     &sGx2, int     &sGy2, int     &sGxGy, int     &sGxdI, int     &sGydI, int width, int height, int unitSize)
+{
+  int     *pGx2 = arraysGx2;
+  int     *pGy2 = arraysGy2;
+  int     *pGxGy = arraysGxGy;
+  int     *pGxdI = arraysGxdI;
+  int     *pGydI = arraysGydI;
+
+  // set to the above row due to JVET_K0485_BIO_EXTEND_SIZE
+  pGx2 -= (JVET_L0256_BIO_EXTEND_SIZE*width);
+  pGy2 -= (JVET_L0256_BIO_EXTEND_SIZE*width);
+  pGxGy -= (JVET_L0256_BIO_EXTEND_SIZE*width);
+  pGxdI -= (JVET_L0256_BIO_EXTEND_SIZE*width);
+  pGydI -= (JVET_L0256_BIO_EXTEND_SIZE*width);
+
+  for (int y = -JVET_L0256_BIO_EXTEND_SIZE; y < unitSize + JVET_L0256_BIO_EXTEND_SIZE; y++)
+  {
+    for (int x = -JVET_L0256_BIO_EXTEND_SIZE; x < unitSize + JVET_L0256_BIO_EXTEND_SIZE; x++)
+    {
+      sGx2 += pGx2[x];
+      sGy2 += pGy2[x];
+      sGxGy += pGxGy[x];
+      sGxdI += pGxdI[x];
+      sGydI += pGydI[x];
+    }
+    pGx2 += width;
+    pGy2 += width;
+    pGxGy += width;
+    pGxdI += width;
+    pGydI += width;
+  }
+}
+#endif
+
 #if ENABLE_SIMD_OPT_GBI && JVET_L0646_GBI
 void removeWeightHighFreq(int16_t* dst, int dstStride, const int16_t* src, int srcStride, int width, int height, int shift, int gbiWeight)
 {
@@ -138,6 +270,13 @@ PelBufferOps::PelBufferOps()
   linTf4 = linTfCore<Pel>;
   linTf8 = linTfCore<Pel>;
 
+#if JVET_L0256_BIO
+  addBIOAvg4      = addBIOAvgCore;
+  bioGradFilter   = gradFilterCore;
+  calcBIOPar      = calcBIOParCore;
+  calcBlkGradient = calcBlkGradientCore;
+#endif
+
 #if ENABLE_SIMD_OPT_GBI
   removeWeightHighFreq8 = removeWeightHighFreq;
   removeWeightHighFreq4 = removeWeightHighFreq;
diff --git a/source/Lib/CommonLib/Buffer.h b/source/Lib/CommonLib/Buffer.h
index fdf3b962f..b8e69315f 100644
--- a/source/Lib/CommonLib/Buffer.h
+++ b/source/Lib/CommonLib/Buffer.h
@@ -68,6 +68,12 @@ struct PelBufferOps
   void ( *reco8 )         ( const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, int width, int height,                                   const ClpRng& clpRng );
   void ( *linTf4 )        ( const Pel* src0, int src0Stride,                                  Pel *dst, int dstStride, int width, int height, int scale, int shift, int offset, const ClpRng& clpRng, bool bClip );
   void ( *linTf8 )        ( const Pel* src0, int src0Stride,                                  Pel *dst, int dstStride, int width, int height, int scale, int shift, int offset, const ClpRng& clpRng, bool bClip );
+#if JVET_L0256_BIO
+  void(*addBIOAvg4)    (const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *pGradX0, const Pel *pGradX1, const Pel *pGradY0, const Pel*pGradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng);
+  void(*bioGradFilter) (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* pGradX, Pel* pGradY);
+  void(*calcBIOPar)    (const Pel* pSrcY0Temp, const Pel* pSrcY1Temp, const Pel* pGradX0, const Pel* pGradX1, const Pel* pGradY0, const Pel* pGradY1, int* m_piDotProductTemp1, int* m_piDotProductTemp2, int* m_piDotProductTemp3, int* m_piDotProductTemp5, int* m_piDotProductTemp6, const int iSrc0Stride, const int iSrc1Stride, const int iGradStride, const int iWidthG, const int iHeightG);
+  void(*calcBlkGradient)(int sx, int sy, int    *arraysGx2, int     *arraysGxGy, int     *arraysGxdI, int     *arraysGy2, int     *arraysGydI, int     &sGx2, int     &sGy2, int     &sGxGy, int     &sGxdI, int     &sGydI, int width, int height, int unitSize);
+#endif
 #if ENABLE_SIMD_OPT_GBI
   void ( *removeWeightHighFreq8)  ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int gbiWeight);
   void ( *removeWeightHighFreq4)  ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int gbiWeight);
diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h
index 6e853ecfc..98174dbeb 100644
--- a/source/Lib/CommonLib/CommonDef.h
+++ b/source/Lib/CommonLib/CommonDef.h
@@ -364,6 +364,10 @@ static const unsigned C806_ALF_TEMPPRED_NUM =                      6;
 
 static const int NTAPS_LUMA               =                         8; ///< Number of taps for luma
 static const int NTAPS_CHROMA             =                         4; ///< Number of taps for chroma
+#if JVET_L0256_BIO
+static const int NTAPS_BILINEAR           =                         2; ///< Number of taps for bilinear filter
+#endif
+
 // ====================================================================================================================
 // Macro functions
 // ====================================================================================================================
diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp
index c6ea5e914..4720c41ef 100644
--- a/source/Lib/CommonLib/InterPrediction.cpp
+++ b/source/Lib/CommonLib/InterPrediction.cpp
@@ -55,6 +55,13 @@ InterPrediction::InterPrediction()
   m_currChromaFormat( NUM_CHROMA_FORMAT )
 , m_maxCompIDToPred ( MAX_NUM_COMPONENT )
 , m_pcRdCost        ( nullptr )
+#if JVET_L0256_BIO
+, m_pGradX0(nullptr)
+, m_pGradY0(nullptr)
+, m_pGradX1(nullptr)
+, m_pGradY1(nullptr)
+, m_subPuMC(false)
+#endif
 {
   for( uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++ )
   {
@@ -109,6 +116,13 @@ void InterPrediction::destroy()
       m_filteredBlockTmp[i][c] = nullptr;
     }
   }
+
+#if JVET_L0256_BIO
+  xFree(m_pGradX0);   m_pGradX0 = nullptr;
+  xFree(m_pGradY0);   m_pGradY0 = nullptr;
+  xFree(m_pGradX1);   m_pGradX1 = nullptr;
+  xFree(m_pGradY1);   m_pGradY1 = nullptr;
+#endif
 }
 
 void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC )
@@ -127,8 +141,13 @@ void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC )
   {
     for( uint32_t c = 0; c < MAX_NUM_COMPONENT; c++ )
     {
+#if JVET_L0256_BIO
+      int extWidth = MAX_CU_SIZE + (2 * JVET_L0256_BIO_EXTEND_SIZE + 2) + 16;
+      int extHeight = MAX_CU_SIZE + (2 * JVET_L0256_BIO_EXTEND_SIZE + 2) + 1;
+#else
       int extWidth  = MAX_CU_SIZE + 16;
       int extHeight = MAX_CU_SIZE + 1;
+#endif
       for( uint32_t i = 0; i < LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS; i++ )
       {
         m_filteredBlockTmp[i][c] = ( Pel* ) xMalloc( Pel, ( extWidth + 4 ) * ( extHeight + 7 + 4 ) );
@@ -148,7 +167,13 @@ void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC )
 
 
     m_iRefListIdx = -1;
-    
+  
+#if JVET_L0256_BIO
+    m_pGradX0 = (Pel*)xMalloc(Pel, BIO_TEMP_BUFFER_SIZE);
+    m_pGradY0 = (Pel*)xMalloc(Pel, BIO_TEMP_BUFFER_SIZE);
+    m_pGradX1 = (Pel*)xMalloc(Pel, BIO_TEMP_BUFFER_SIZE);
+    m_pGradY1 = (Pel*)xMalloc(Pel, BIO_TEMP_BUFFER_SIZE);
+#endif
   }
 
 #if !JVET_J0090_MEMORY_BANDWITH_MEASURE
@@ -264,6 +289,10 @@ void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const R
   int  fstStep = (!verMC ? puHeight : puWidth);
   int  secStep = (!verMC ? puWidth : puHeight);
 
+#if JVET_L0256_BIO
+  m_subPuMC = true;
+#endif
+
   for (int fstDim = fstStart; fstDim < fstEnd; fstDim += fstStep)
   {
     for (int secDim = secStart; secDim < secEnd; secDim += secStep)
@@ -299,10 +328,16 @@ void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const R
       secDim = later - secStep;
     }
   }
+#if JVET_L0256_BIO
+  m_subPuMC = false;
+#endif
 }
 
 
 void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList& eRefPicList, PelUnitBuf& pcYuvPred, const bool& bi 
+#if JVET_L0256_BIO
+                                   ,const bool& bBIOApplied /*=false*/
+#endif
 )
 {
   const SPS &sps = *pu.cs->sps;
@@ -332,12 +367,18 @@ void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList&
     const ComponentID compID = ComponentID( comp );
     if ( pu.cu->affine )
     {
+#if JVET_L0256_BIO
+      CHECK( bBIOApplied, "BIO is not allowed with affine" );
+#endif
       xPredAffineBlk( compID, pu, pu.cu->slice->getRefPic( eRefPicList, iRefIdx ), mv, pcYuvPred, bi, pu.cu->slice->clpRng( compID ) );
     }
     else
     {
       xPredInterBlk( compID, pu, pu.cu->slice->getRefPic( eRefPicList, iRefIdx ), mv[0], pcYuvPred, bi, pu.cu->slice->clpRng( compID )
-                    );
+#if JVET_L0256_BIO
+                    ,bBIOApplied
+#endif
+      );
     }
   }
 }
@@ -347,6 +388,36 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred)
   const PPS   &pps   = *pu.cs->pps;
   const Slice &slice = *pu.cs->slice;
 
+#if JVET_L0256_BIO
+  bool bBIOApplied = false;
+  if (pu.cs->sps->getSpsNext().getUseBIO())
+  {
+    if (pu.cu->affine || m_subPuMC)
+    {
+      bBIOApplied = false;
+    }
+    else
+    {
+      const bool bBIOcheck0 = !(pps.getWPBiPred() && slice.getSliceType() == B_SLICE);
+      const bool bBIOcheck1 = !(pps.getUseWP() && slice.getSliceType() == P_SLICE);
+      if (bBIOcheck0
+        && bBIOcheck1
+        && PU::isBiPredFromDifferentDir(pu)
+        && !(pu.Y().height == 4 || (pu.Y().width == 4 && pu.Y().height == 8))
+       )
+      {
+        bBIOApplied = true;
+      }
+    }
+
+#if JVET_L0646_GBI
+    if (pu.cu->cs->sps->getSpsNext().getUseGBi() && bBIOApplied && pu.cu->GBiIdx != GBI_DEFAULT)
+    {
+      bBIOApplied = false;
+    }
+#endif
+  }
+#endif
 
   for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++)
   {
@@ -367,6 +438,9 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred)
     if (pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0)
     {
       xPredInterUni ( pu, eRefPicList, pcMbBuf, true
+#if JVET_L0256_BIO
+                     ,bBIOApplied 
+#endif
                      );
     }
     else
@@ -399,13 +473,19 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred)
   }
   else
   {
+#if JVET_L0256_BIO
+    xWeightedAverage( pu, srcPred0, srcPred1, pcYuvPred, slice.getSPS()->getBitDepths(), slice.clpRngs(), bBIOApplied );
+#else
     xWeightedAverage( pu, srcPred0, srcPred1, pcYuvPred, slice.getSPS()->getBitDepths(), slice.clpRngs() );
+#endif
   }
 }
 
-
 void InterPrediction::xPredInterBlk ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng
-                                    )
+#if JVET_L0256_BIO
+                                     ,const bool& bBIOApplied /*=false*/
+#endif
+)
 {
   JVET_J0090_SET_REF_PICTURE( refPic, compID );
   const ChromaFormat  chFmt = pu.chromaFormat;
@@ -446,24 +526,75 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio
     refBuf = refPic->getRecoBuf( CompArea( compID, chFmt, offset, pu.blocks[compID].size() ) );
   }
 
+#if JVET_L0256_BIO
+  // backup data
+  int backupWidth = width;
+  int backupHeight = height;
+  Pel *backupDstBufPtr = dstBuf.buf;
+  int backupDstBufStride = dstBuf.stride;
+
+  if (bBIOApplied && compID == COMPONENT_Y)
+  {
+    width = width + 2 * JVET_L0256_BIO_EXTEND_SIZE + 2;
+    height = height + 2 * JVET_L0256_BIO_EXTEND_SIZE + 2;
+
+    // change MC output
+    dstBuf.stride = width;
+    dstBuf.buf = m_filteredBlockTmp[2 + m_iRefListIdx][compID] + 2 * dstBuf.stride + 2;
+  }
+#endif
+
   if( yFrac == 0 )
   {
+#if JVET_L0256_BIO
+    m_if.filterHor(compID, (Pel*)refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, xFrac, rndRes, chFmt, clpRng);
+#else
     m_if.filterHor(compID, (Pel*) refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, width, height, xFrac, rndRes, chFmt, clpRng);
+#endif
   }
   else if( xFrac == 0 )
   {
+#if JVET_L0256_BIO
+    m_if.filterVer(compID, (Pel*)refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, true, rndRes, chFmt, clpRng);
+#else
     m_if.filterVer(compID, (Pel*) refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, width, height, yFrac, true, rndRes, chFmt, clpRng);
+#endif
   }
   else
   {
-      PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][compID], pu.blocks[compID]);
+    PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][compID], pu.blocks[compID]);
+#if JVET_L0256_BIO
+    tmpBuf.stride = dstBuf.stride;
+#endif
 
     int vFilterSize = isLuma(compID) ? NTAPS_LUMA : NTAPS_CHROMA;
+#if JVET_L0256_BIO
+    m_if.filterHor(compID, (Pel*)refBuf.buf - ((vFilterSize >> 1) - 1) * refBuf.stride, refBuf.stride, tmpBuf.buf, tmpBuf.stride, backupWidth, backupHeight + vFilterSize - 1, xFrac, false, chFmt, clpRng);
+#else
     m_if.filterHor(compID, (Pel*) refBuf.buf - ((vFilterSize >> 1) - 1) * refBuf.stride, refBuf.stride, tmpBuf.buf, tmpBuf.stride, width, height + vFilterSize - 1, xFrac, false,         chFmt, clpRng);
+#endif
     JVET_J0090_SET_CACHE_ENABLE( false );
+#if JVET_L0256_BIO
+    m_if.filterVer(compID, (Pel*)tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, false, rndRes, chFmt, clpRng);
+#else
     m_if.filterVer(compID, (Pel*) tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, dstBuf.buf, dstBuf.stride, width, height,                   yFrac, false, rndRes, chFmt, clpRng);
+#endif
     JVET_J0090_SET_CACHE_ENABLE( true );
   }
+#if JVET_L0256_BIO
+  if (bBIOApplied && compID == COMPONENT_Y)
+  {
+    refBuf.buf = refBuf.buf - refBuf.stride - 1;
+    dstBuf.buf = m_filteredBlockTmp[2 + m_iRefListIdx][compID] + dstBuf.stride + 1;
+    bioSampleExtendBilinearFilter(refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, width - 2, height - 2, 1, xFrac, yFrac, rndRes, chFmt, clpRng);
+
+    // restore data 
+    width = backupWidth;
+    height = backupHeight;
+    dstBuf.buf = backupDstBufPtr;
+    dstBuf.stride = backupDstBufStride;
+  }
+#endif
 }
 
 void InterPrediction::xPredAffineBlk( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng )
@@ -612,8 +743,224 @@ int getMSB( unsigned x )
   return msb;
 }
 
+#if JVET_L0256_BIO
+void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf &pcYuvSrc0, const CPelUnitBuf &pcYuvSrc1, const int &iRefIdx0, const int &iRefIdx1, PelUnitBuf &pcYuvDst, const BitDepths &clipBitDepths)
+{
+  const int     iHeight = pcYuvDst.Y().height;
+  const int     iWidth = pcYuvDst.Y().width;
+  int           iHeightG = iHeight + 2 * JVET_L0256_BIO_EXTEND_SIZE;
+  int           iWidthG = iWidth + 2 * JVET_L0256_BIO_EXTEND_SIZE;
+  int           offsetPos = iWidthG*JVET_L0256_BIO_EXTEND_SIZE + JVET_L0256_BIO_EXTEND_SIZE;
+
+  Pel*          pGradX0 = m_pGradX0;
+  Pel*          pGradX1 = m_pGradX1;
+  Pel*          pGradY0 = m_pGradY0;
+  Pel*          pGradY1 = m_pGradY1;
+
+  int           stridePredMC = iWidthG + 2;
+  const Pel*    pSrcY0 = m_filteredBlockTmp[2][COMPONENT_Y] + stridePredMC + 1;
+  const Pel*    pSrcY1 = m_filteredBlockTmp[3][COMPONENT_Y] + stridePredMC + 1;
+  const int     iSrc0Stride = stridePredMC;
+  const int     iSrc1Stride = stridePredMC;
+
+  Pel*          pDstY = pcYuvDst.Y().buf;
+  const int     iDstStride = pcYuvDst.Y().stride;
+  const Pel*    pSrcY0Temp = pSrcY0;
+  const Pel*    pSrcY1Temp = pSrcY1;
+
+  for (int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++)
+  {
+    Pel* dstTempPtr = m_filteredBlockTmp[2 + refList][COMPONENT_Y] + stridePredMC + 1;
+    Pel* pGradY = (refList == 0) ? m_pGradY0 : m_pGradY1;
+    Pel* pGradX = (refList == 0) ? m_pGradX0 : m_pGradX1;
+
+    g_pelBufOP.bioGradFilter(dstTempPtr, stridePredMC, iWidthG, iHeightG, iWidthG, pGradX, pGradY);
+    Pel* pcPadStr = m_filteredBlockTmp[2 + refList][COMPONENT_Y] + 2 * stridePredMC + 2;
+    for (int y = 0; y< iHeight; y++)
+    {
+      pcPadStr[-1] = pcPadStr[0];
+      pcPadStr[iWidth] = pcPadStr[iWidth - 1];
+      pcPadStr += stridePredMC;
+    }
+
+    pcPadStr = m_filteredBlockTmp[2 + refList][COMPONENT_Y] + 2 * stridePredMC + 1;
+    ::memcpy(pcPadStr - stridePredMC, pcPadStr, sizeof(Pel)*(iWidthG));
+    ::memcpy(pcPadStr + iHeight*stridePredMC, pcPadStr + (iHeight - 1)*stridePredMC, sizeof(Pel)*(iWidthG));
+  }
+
+  const ClpRng& clpRng = pu.cu->cs->slice->clpRng(COMPONENT_Y);
+  const int   bitDepth = clipBitDepths.recon[toChannelType(COMPONENT_Y)];
+  const int   shiftNum = IF_INTERNAL_PREC + 1 - bitDepth;
+  const int   offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
+  const int   limit = ((int)1 << (4 + IF_INTERNAL_PREC - bitDepth - 5));
+
+  int*     m_piDotProductTemp1 = m_piDotProduct1;
+  int*     m_piDotProductTemp2 = m_piDotProduct2;
+  int*     m_piDotProductTemp3 = m_piDotProduct3;
+  int*     m_piDotProductTemp5 = m_piDotProduct5;
+  int*     m_piDotProductTemp6 = m_piDotProduct6;
+
+  g_pelBufOP.calcBIOPar(pSrcY0Temp, pSrcY1Temp, pGradX0, pGradX1, pGradY0, pGradY1, m_piDotProductTemp1, m_piDotProductTemp2, m_piDotProductTemp3, m_piDotProductTemp5, m_piDotProductTemp6, iSrc0Stride, iSrc1Stride, iWidthG, iWidthG, iHeightG);
+
+  int xUnit = (iWidth >> 2);
+  int yUnit = (iHeight >> 2);
+
+  Pel *pDstY0 = pDstY;
+  pGradX0 = m_pGradX0; pGradX1 = m_pGradX1;
+  pGradY0 = m_pGradY0; pGradY1 = m_pGradY1;
+
+  for (int yu = 0; yu < yUnit; yu++)
+  {
+    for (int xu = 0; xu < xUnit; xu++)
+    {
+      if (m_bioPredSubBlkDist[yu*xUnit + xu] < m_bioSubBlkDistThres)
+      {
+        pSrcY0Temp = pSrcY0 + (stridePredMC + 1) + ((yu*iSrc0Stride + xu) << 2);
+        pSrcY1Temp = pSrcY1 + (stridePredMC + 1) + ((yu*iSrc1Stride + xu) << 2);
+        pDstY0 = pDstY + ((yu*iDstStride + xu) << 2);
+        g_pelBufOP.addAvg4(pSrcY0Temp, iSrc0Stride, pSrcY1Temp, iSrc1Stride, pDstY0, iDstStride, (1 << 2), (1 << 2), shiftNum, offset, clpRng);
+        continue;
+      }
+
+      int     sGxdI = 0, sGydI = 0, sGxGy = 0, sGx2 = 0, sGy2 = 0;
+      int     tmpx = 0, tmpy = 0;
 
+      m_piDotProductTemp1 = m_piDotProduct1 + offsetPos + ((yu*iWidthG + xu) << 2);
+      m_piDotProductTemp2 = m_piDotProduct2 + offsetPos + ((yu*iWidthG + xu) << 2);
+      m_piDotProductTemp3 = m_piDotProduct3 + offsetPos + ((yu*iWidthG + xu) << 2);
+      m_piDotProductTemp5 = m_piDotProduct5 + offsetPos + ((yu*iWidthG + xu) << 2);
+      m_piDotProductTemp6 = m_piDotProduct6 + offsetPos + ((yu*iWidthG + xu) << 2);
+
+      g_pelBufOP.calcBlkGradient(xu << 2, yu << 2, m_piDotProductTemp1, m_piDotProductTemp2, m_piDotProductTemp3, m_piDotProductTemp5, m_piDotProductTemp6, sGx2, sGy2, sGxGy, sGxdI, sGydI, iWidthG, iHeightG, (1 << 2));
+
+      if (sGx2 > 0)
+      {
+        tmpx = rightShiftMSB(sGxdI << 3, sGx2);
+        tmpx = Clip3(-limit, limit, tmpx);
+      }
+      if (sGy2 > 0)
+      {
+        int     mainsGxGy = sGxGy >> 12;
+        int     secsGxGy = sGxGy & ((1 << 12) - 1);
+        int     tmpData = tmpx * mainsGxGy;
+        tmpData = ((tmpData << 12) + tmpx*secsGxGy) >> 1;
+        tmpy = rightShiftMSB(((sGydI << 3) - tmpData), sGy2);
+        tmpy = Clip3(-limit, limit, tmpy);
+      }
+
+      pSrcY0Temp = pSrcY0 + (stridePredMC + 1) + ((yu*iSrc0Stride + xu) << 2);
+      pSrcY1Temp = pSrcY1 + (stridePredMC + 1) + ((yu*iSrc0Stride + xu) << 2);
+      pGradX0 = m_pGradX0 + offsetPos + ((yu*iWidthG + xu) << 2);
+      pGradX1 = m_pGradX1 + offsetPos + ((yu*iWidthG + xu) << 2);
+      pGradY0 = m_pGradY0 + offsetPos + ((yu*iWidthG + xu) << 2);
+      pGradY1 = m_pGradY1 + offsetPos + ((yu*iWidthG + xu) << 2);
+
+      pDstY0 = pDstY + ((yu*iDstStride + xu) << 2);
+      g_pelBufOP.addBIOAvg4(pSrcY0Temp, iSrc0Stride, pSrcY1Temp, iSrc1Stride, pDstY0, iDstStride, pGradX0, pGradX1, pGradY0, pGradY1, iWidthG, (1 << 2), (1 << 2), (int)tmpx, (int)tmpy, shiftNum, offset, clpRng);
+    }  // xu
+  }  // yu
+}
+
+void InterPrediction::bioSampleExtendBilinearFilter(Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int dim, int fracX, int fracY, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng)
+{
+  Pel const* pSrc = NULL;
+  Pel*       pDst = NULL;
+
+  int vFilterSize = NTAPS_LUMA;
+  int widthTmp = 0;
+  int heightTmp = 0;
+
+  for (int cand = 0; cand < 4; cand++)  // top, left, bottom and right
+  {
+
+    if (cand == 0)  // top
+    {
+      pSrc = src;
+      pDst = dst;
+      widthTmp = width;
+      heightTmp = dim;
+    }
+    else if (cand == 1)  // left
+    {
+      pSrc = src + dim*srcStride;
+      pDst = dst + dim*dstStride;
+      widthTmp = dim;
+      heightTmp = height - 2 * dim;
+    }
+    else if (cand == 2)  // bottom
+    {
+      pSrc = src + (height - dim)*srcStride;
+      pDst = dst + (height - dim)*dstStride;
+      widthTmp = width;
+      heightTmp = dim;
+    }
+    else if (cand == 3)  // right
+    {
+      pSrc = src + dim*srcStride + width - dim;
+      pDst = dst + dim*dstStride + width - dim;
+      widthTmp = dim;
+      heightTmp = height - 2 * dim;
+    }
+
+    if (fracY == 0)
+    {
+      m_if.filterHor(COMPONENT_Y, pSrc, srcStride, pDst, dstStride, widthTmp, heightTmp, fracX, isLast, fmt, clpRng, 1);
+    }
+    else if (fracX == 0)
+    {
+      m_if.filterVer(COMPONENT_Y, pSrc, srcStride, pDst, dstStride, widthTmp, heightTmp, fracY, true, isLast, fmt, clpRng, 1);
+    }
+    else
+    {
+      PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][COMPONENT_Y], Size(width, height));
+      tmpBuf.stride = width;
+
+      m_if.filterHor(COMPONENT_Y, pSrc - ((vFilterSize >> 1) - 1) * srcStride, srcStride, tmpBuf.buf, tmpBuf.stride, widthTmp, heightTmp + vFilterSize - 1, fracX, false, fmt, clpRng, 1);
+      m_if.filterVer(COMPONENT_Y, tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, pDst, dstStride, widthTmp, heightTmp, fracY, false, isLast, fmt, clpRng, 1);
+    }
+  }
+}
+
+bool InterPrediction::xCalcBiPredSubBlkDist(const PredictionUnit &pu, const Pel* pYuvSrc0, const int src0Stride, const Pel* pYuvSrc1, const int src1Stride, const BitDepths &clipBitDepths)
+{
+  const int     width = pu.lwidth();
+  const int     height = pu.lheight();
+  const int     clipbd = clipBitDepths.recon[toChannelType(COMPONENT_Y)];
+  const uint32_t distortionShift = DISTORTION_PRECISION_ADJUSTMENT(clipbd);
+  const int     shift = std::max<int>(2, (IF_INTERNAL_PREC - clipbd));
+  const int     xUnit = (width >> 2);
+  const int     yUnit = (height >> 2);
+
+  m_bioDistThres = (shift <= 5) ? (((32 << (clipbd - 8))*width*height) >> (5 - shift)) : (((32 << (clipbd - 8))*width*height) << (shift - 5));
+  m_bioSubBlkDistThres = (shift <= 5) ? (((64 << (clipbd - 8)) << 4) >> (5 - shift)) : (((64 << (clipbd - 8)) << 4) << (shift - 5));
+
+  m_bioDistThres >>= distortionShift;
+  m_bioSubBlkDistThres >>= distortionShift;
+
+  DistParam cDistParam;
+  Distortion dist = 0;
+  for (int yu = 0, blkIdx = 0; yu < yUnit; yu++)
+  {
+    for (int xu = 0; xu < xUnit; xu++, blkIdx++)
+    {
+      const Pel* pPred0 = pYuvSrc0 + ((yu*src0Stride + xu) << 2);
+      const Pel* pPred1 = pYuvSrc1 + ((yu*src1Stride + xu) << 2);
+
+      m_pcRdCost->setDistParam(cDistParam, pPred0, pPred1, src0Stride, src1Stride, clipbd, COMPONENT_Y, (1 << 2), (1 << 2), 0, 1, false, true);
+      m_bioPredSubBlkDist[blkIdx] = cDistParam.distFunc(cDistParam);
+      dist += m_bioPredSubBlkDist[blkIdx];
+    }
+  }
+
+  return (dist >= m_bioDistThres);
+}
+#endif
+
+#if JVET_L0256_BIO
+void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bBIOApplied )
+#else
 void InterPrediction::xWeightedAverage( const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs )
+#endif
 {
   const int iRefIdx0 = pu.refIdx[0];
   const int iRefIdx1 = pu.refIdx[1];
@@ -623,11 +970,35 @@ void InterPrediction::xWeightedAverage( const PredictionUnit& pu, const CPelUnit
 #if JVET_L0646_GBI
     if( pu.cu->GBiIdx != GBI_DEFAULT )
     {
+#if JVET_L0256_BIO
+      CHECK(bBIOApplied, "GBi is disallowed with BIO");
+#endif
       pcYuvDst.addWeightedAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, pu.cu->GBiIdx);
       return;
     }
 #endif
+#if JVET_L0256_BIO
+    if (bBIOApplied)
+    {
+      const int  src0Stride = pu.lwidth() + 2 * JVET_L0256_BIO_EXTEND_SIZE + 2;
+      const int  src1Stride = pu.lwidth() + 2 * JVET_L0256_BIO_EXTEND_SIZE + 2;
+      const Pel* pSrcY0 = m_filteredBlockTmp[2][COMPONENT_Y] + 2 * src0Stride + 2;
+      const Pel* pSrcY1 = m_filteredBlockTmp[3][COMPONENT_Y] + 2 * src1Stride + 2;
+
+      bool bioEnabled = xCalcBiPredSubBlkDist(pu, pSrcY0, src0Stride, pSrcY1, src1Stride, clipBitDepths);
+      if (bioEnabled)
+      {
+        applyBiOptFlow(pu, pcYuvSrc0, pcYuvSrc1, iRefIdx0, iRefIdx1, pcYuvDst, clipBitDepths);
+      }
+      else
+      {
+        pcYuvDst.bufs[0].addAvg(CPelBuf(pSrcY0, src0Stride, pu.lumaSize()), CPelBuf(pSrcY1, src1Stride, pu.lumaSize()), clpRngs.comp[0]);
+      }
+    }
+    pcYuvDst.addAvg(pcYuvSrc0, pcYuvSrc1, clpRngs, bBIOApplied);
+#else
     pcYuvDst.addAvg( pcYuvSrc0, pcYuvSrc1, clpRngs );
+#endif
   }
   else if( iRefIdx0 >= 0 && iRefIdx1 < 0 )
   {
@@ -694,8 +1065,25 @@ void InterPrediction::motionCompensation( PredictionUnit &pu, const RefPicList &
   );
 }
 
+#if JVET_L0256_BIO
+int InterPrediction::rightShiftMSB(int numer, int denom)
+{
+  int     d;
+  int msbIdx = 0;
+  for (msbIdx = 0; msbIdx<32; msbIdx++)
+  {
+    if (denom < ((int)1 << msbIdx))
+    {
+      break;
+    }
+  }
 
+  int shiftIdx = msbIdx - 1;
+  d = (numer >> shiftIdx);
 
+  return d;
+}
+#endif
 
 #if JVET_J0090_MEMORY_BANDWITH_MEASURE
 void InterPrediction::cacheAssign( CacheModel *cache )
diff --git a/source/Lib/CommonLib/InterPrediction.h b/source/Lib/CommonLib/InterPrediction.h
index c58fed664..01b13c84e 100644
--- a/source/Lib/CommonLib/InterPrediction.h
+++ b/source/Lib/CommonLib/InterPrediction.h
@@ -60,10 +60,25 @@ class Mv;
 // Class definition
 // ====================================================================================================================
 
+#if JVET_L0256_BIO
+#define BIO_TEMP_BUFFER_SIZE ( MAX_CU_SIZE+2*JVET_L0256_BIO_EXTEND_SIZE ) * ( MAX_CU_SIZE+2*JVET_L0256_BIO_EXTEND_SIZE )
+#endif
+
 class InterPrediction : public WeightPrediction
 {
 private:
 
+#if JVET_L0256_BIO
+  Distortion  m_bioDistThres;
+  Distortion  m_bioSubBlkDistThres;
+  Distortion  m_bioPredSubBlkDist[MAX_NUM_PARTS_IN_CTU];
+
+  int m_piDotProduct1[BIO_TEMP_BUFFER_SIZE];
+  int m_piDotProduct2[BIO_TEMP_BUFFER_SIZE];
+  int m_piDotProduct3[BIO_TEMP_BUFFER_SIZE];
+  int m_piDotProduct5[BIO_TEMP_BUFFER_SIZE];
+  int m_piDotProduct6[BIO_TEMP_BUFFER_SIZE];
+#endif
 
 protected:
   InterpolationFilter  m_if;
@@ -80,15 +95,37 @@ protected:
   RdCost*              m_pcRdCost;
 
   int                  m_iRefListIdx;
-  
+ 
+#if JVET_L0256_BIO
+  Pel*                 m_pGradX0;
+  Pel*                 m_pGradY0;
+  Pel*                 m_pGradX1;
+  Pel*                 m_pGradY1;
+  bool                 m_subPuMC;
+
+  int             rightShiftMSB(int numer, int    denom);
+  void            applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf &pcYuvSrc0, const CPelUnitBuf &pcYuvSrc1, const int &iRefIdx0, const int &iRefIdx1, PelUnitBuf &pcYuvDst, const BitDepths &clipBitDepths);
+  bool            xCalcBiPredSubBlkDist(const PredictionUnit &pu, const Pel* pYuvSrc0, const int src0Stride, const Pel* pYuvSrc1, const int src1Stride, const BitDepths &clipBitDepths);
+  void            bioSampleExtendBilinearFilter(Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int dim, int fracX, int fracY, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng);
+#endif
 
   void xPredInterUni            ( const PredictionUnit& pu, const RefPicList& eRefPicList, PelUnitBuf& pcYuvPred, const bool& bi 
+#if JVET_L0256_BIO
+                                  ,const bool& bBIOApplied = false
+#endif
   );
   void xPredInterBi             ( PredictionUnit& pu, PelUnitBuf &pcYuvPred );
   void xPredInterBlk            ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng
+#if JVET_L0256_BIO
+                                  ,const bool& bBIOApplied = false
+#endif
                                  );
-  
+
+#if JVET_L0256_BIO
+  void xWeightedAverage         ( const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bBIOApplied );
+#else
   void xWeightedAverage         ( const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs );
+#endif
   void xPredAffineBlk( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng );
 
   static bool xCheckIdenticalMotion( const PredictionUnit& pu );
diff --git a/source/Lib/CommonLib/InterpolationFilter.cpp b/source/Lib/CommonLib/InterpolationFilter.cpp
index 32e4d9d75..abcef170f 100644
--- a/source/Lib/CommonLib/InterpolationFilter.cpp
+++ b/source/Lib/CommonLib/InterpolationFilter.cpp
@@ -111,6 +111,28 @@ const TFilterCoeff InterpolationFilter::m_chromaFilter[CHROMA_INTERPOLATION_FILT
   {  0,  2, 63, -1 },
 };
 
+#if JVET_L0256_BIO
+const TFilterCoeff InterpolationFilter::m_bilinearFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS << VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE][NTAPS_BILINEAR] =
+{
+  { 64,  0, },
+  { 60,  4, },
+  { 56,  8, },
+  { 52, 12, },
+  { 48, 16, },
+  { 44, 20, },
+  { 40, 24, },
+  { 36, 28, },
+  { 32, 32, },
+  { 28, 36, },
+  { 24, 40, },
+  { 20, 44, },
+  { 16, 48, },
+  { 12, 52, },
+  { 8, 56, },
+  { 4, 60, },
+};
+#endif
+
 // ====================================================================================================================
 // Private member functions
 // ====================================================================================================================
@@ -443,7 +465,11 @@ void InterpolationFilter::filterVer(const ClpRng& clpRng, Pel const *src, int sr
  * \param  fmt        Chroma format
  * \param  bitDepth   Bit depth
  */
+#if JVET_L0256_BIO
+void InterpolationFilter::filterHor( const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx )
+#else
 void InterpolationFilter::filterHor( const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng )
+#endif
 {
   if( frac == 0 )
   {
@@ -452,6 +478,13 @@ void InterpolationFilter::filterHor( const ComponentID compID, Pel const *src, i
   else if( isLuma( compID ) )
   {
     CHECK( frac < 0 || frac >= ( LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS << VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE ), "Invalid fraction" );
+#if JVET_L0256_BIO
+    if( nFilterIdx == 1 )
+    {
+      filterHor<NTAPS_BILINEAR>(clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_bilinearFilter[frac]);
+    }
+    else
+#endif
     {
       filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter[frac] );
     }
@@ -481,7 +514,11 @@ void InterpolationFilter::filterHor( const ComponentID compID, Pel const *src, i
  * \param  fmt        Chroma format
  * \param  bitDepth   Bit depth
  */
+#if JVET_L0256_BIO
+void InterpolationFilter::filterVer( const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx)
+#else
 void InterpolationFilter::filterVer( const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng )
+#endif
 {
   if( frac == 0 )
   {
@@ -490,6 +527,13 @@ void InterpolationFilter::filterVer( const ComponentID compID, Pel const *src, i
   else if( isLuma( compID ) )
   {
     CHECK( frac < 0 || frac >= ( LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS << VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE ), "Invalid fraction" );
+#if JVET_L0256_BIO
+    if (nFilterIdx == 1)
+    {
+      filterVer<NTAPS_BILINEAR>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_bilinearFilter[frac]);
+    }
+    else
+#endif
     {
       filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilter[frac] );
     }
diff --git a/source/Lib/CommonLib/InterpolationFilter.h b/source/Lib/CommonLib/InterpolationFilter.h
index 4535b6bc5..4f246d9be 100644
--- a/source/Lib/CommonLib/InterpolationFilter.h
+++ b/source/Lib/CommonLib/InterpolationFilter.h
@@ -56,6 +56,9 @@ class InterpolationFilter
 {
   static const TFilterCoeff m_lumaFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS << VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE][NTAPS_LUMA]; ///< Luma filter taps
   static const TFilterCoeff m_chromaFilter[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS << VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE][NTAPS_CHROMA]; ///< Chroma filter taps
+#if JVET_L0256_BIO
+  static const TFilterCoeff m_bilinearFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS << VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE][NTAPS_BILINEAR]; ///< bilinear filter taps
+#endif
 public:
   template<bool isFirst, bool isLast>
   static void filterCopy( const ClpRng& clpRng, const Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height );
@@ -87,8 +90,13 @@ public:
   void _initInterpolationFilterX86();
 #endif
 
+#if JVET_L0256_BIO
+  void filterHor(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac,               bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx = 0);
+  void filterVer(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx = 0);
+#else
   void filterHor(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac,               bool isLast, const ChromaFormat fmt, const ClpRng& clpRng );
   void filterVer(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng );
+#endif
 #if JVET_J0090_MEMORY_BANDWITH_MEASURE
   void cacheAssign( CacheModel *cache ) { m_cacheModel = cache; }
 #endif
diff --git a/source/Lib/CommonLib/RdCost.cpp b/source/Lib/CommonLib/RdCost.cpp
index 20119d9a8..85306088a 100644
--- a/source/Lib/CommonLib/RdCost.cpp
+++ b/source/Lib/CommonLib/RdCost.cpp
@@ -164,6 +164,10 @@ void RdCost::init()
   m_afpDistortFunc[DF_SSE16N_WTD] = RdCost::xGetSSE16N_WTD;
 #endif
 
+#if JVET_L0256_BIO
+  m_afpDistortFunc[DF_SAD_INTERMEDIATE_BITDEPTH] = RdCost::xGetSAD;
+#endif
+
 #if ENABLE_SIMD_OPT_DIST
 #ifdef TARGET_SIMD_X86
   initRdCostX86();
@@ -318,7 +322,11 @@ void RdCost::setDistParam( DistParam &rcDP, const CPelBuf &org, const CPelBuf &c
   rcDP.maximumDistortionForEarlyExit = std::numeric_limits<Distortion>::max();
 }
 
+#if JVET_L0256_BIO
+void RdCost::setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY, int iOrgStride, int iRefStride, int bitDepth, ComponentID compID, int width, int height, int subShiftMode, int step, bool useHadamard, bool bBIOApplied )
+#else
 void RdCost::setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY, int iOrgStride, int iRefStride, int bitDepth, ComponentID compID, int width, int height, int subShiftMode, int step, bool useHadamard )
+#endif
 {
   rcDP.bitDepth   = bitDepth;
   rcDP.compID     = compID;
@@ -339,6 +347,14 @@ void RdCost::setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY,
 
   CHECK( useHadamard || rcDP.useMR || subShiftMode > 0, "only used in xDirectMCCost with these default parameters (so far...)" );
 
+#if JVET_L0256_BIO
+  if ( bBIOApplied )
+  {
+    rcDP.distFunc = m_afpDistortFunc[ DF_SAD_INTERMEDIATE_BITDEPTH ];
+    return;
+  }
+#endif
+
   if( width == 12 )
   {
     rcDP.distFunc = m_afpDistortFunc[ DF_SAD12 ];
diff --git a/source/Lib/CommonLib/RdCost.h b/source/Lib/CommonLib/RdCost.h
index 4e79040c2..07b9f6939 100644
--- a/source/Lib/CommonLib/RdCost.h
+++ b/source/Lib/CommonLib/RdCost.h
@@ -102,6 +102,9 @@ private:
   // for distortion
 
   static FpDistFunc       m_afpDistortFunc[DF_TOTAL_FUNCTIONS]; // [eDFunc]
+#if JVET_L0256_BIO
+
+#endif
   CostMode                m_costMode;
   double                  m_distortionWeight[MAX_NUM_COMPONENT]; // only chroma values are used.
   double                  m_dLambda;
@@ -154,7 +157,11 @@ public:
 
   void           setDistParam( DistParam &rcDP, const CPelBuf &org, const Pel* piRefY , int iRefStride, int bitDepth, ComponentID compID, int subShiftMode = 0, int step = 1, bool useHadamard = false );
   void           setDistParam( DistParam &rcDP, const CPelBuf &org, const CPelBuf &cur, int bitDepth, ComponentID compID, bool useHadamard = false );
+#if JVET_L0256_BIO
+  void           setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY, int iOrgStride, int iRefStride, int bitDepth, ComponentID compID, int width, int height, int subShiftMode = 0, int step = 1, bool useHadamard = false, bool bBIOApplied = false );
+#else
   void           setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY, int iOrgStride, int iRefStride, int bitDepth, ComponentID compID, int width, int height, int subShiftMode = 0, int step = 1, bool useHadamard = false );
+#endif
 
   double         getMotionLambda          ( bool bIsTransquantBypass ) { return m_dLambdaMotionSAD[(bIsTransquantBypass && m_costMode==COST_MIXED_LOSSLESS_LOSSY_CODING)?1:0]; }
   void           selectMotionLambda       ( bool bIsTransquantBypass ) { m_motionLambda = getMotionLambda( bIsTransquantBypass ); }
@@ -266,6 +273,10 @@ private:
   static Distortion xGetSAD_SIMD    ( const DistParam& pcDtParam );
   template< int iWidth, X86_VEXT vext >
   static Distortion xGetSAD_NxN_SIMD( const DistParam& pcDtParam );
+#if JVET_L0256_BIO
+  template< X86_VEXT vext >
+  static Distortion xGetSAD_IBD_SIMD(const DistParam& pcDtParam);
+#endif
 
   template< typename Torg, typename Tcur, X86_VEXT vext >
   static Distortion xGetHADs_SIMD   ( const DistParam& pcDtParam );
diff --git a/source/Lib/CommonLib/Slice.cpp b/source/Lib/CommonLib/Slice.cpp
index 6023e01bc..4bd5fc727 100644
--- a/source/Lib/CommonLib/Slice.cpp
+++ b/source/Lib/CommonLib/Slice.cpp
@@ -1632,6 +1632,9 @@ SPSNext::SPSNext( SPS& sps )
   , m_IMV                       ( false )
 #if !REMOVE_MV_ADAPT_PREC
   , m_highPrecMv                ( false )
+#endif
+#if JVET_L0256_BIO
+  , m_BIO                       ( false )
 #endif
   , m_DisableMotionCompression  ( false )
   , m_LMChroma                  ( false )
diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h
index d6ef610c7..f122ab69d 100644
--- a/source/Lib/CommonLib/Slice.h
+++ b/source/Lib/CommonLib/Slice.h
@@ -803,6 +803,9 @@ private:
   bool              m_IMV;                        // 9
 #if !REMOVE_MV_ADAPT_PREC
   bool              m_highPrecMv;
+#endif
+#if JVET_L0256_BIO
+  bool              m_BIO;
 #endif
   bool              m_DisableMotionCompression;   // 13
   bool              m_LMChroma;                   // 17
@@ -865,6 +868,10 @@ public:
 #if !REMOVE_MV_ADAPT_PREC
   void      setUseHighPrecMv(bool b) { m_highPrecMv = b; }
   bool      getUseHighPrecMv()                                      const { return m_highPrecMv; }
+#endif
+#if JVET_L0256_BIO
+  void      setUseBIO(bool b)                                                       { m_BIO = b; }
+  bool      getUseBIO()                                                   const     { return m_BIO; }
 #endif
   void      setDisableMotCompress ( bool b )                                        { m_DisableMotionCompression = b; }
   bool      getDisableMotCompress ()                                      const     { return m_DisableMotionCompression; }
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index d8e34d3bb..afc72b3b0 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -69,16 +69,10 @@
 #define JVET_L0274                                        1
 #define JVET_L0274_ENCODER_SPEED_UP                     ( 1 && JVET_L0274 ) // encoder speed-up by pre-calculating position dependent parameters
 
-
-
-
-
-
-
-
-
-
-
+#define JVET_L0256_BIO                                    1
+#if JVET_L0256_BIO
+#define JVET_L0256_BIO_EXTEND_SIZE                        1
+#endif
 
 #define JVET_L0646_GBI                                    1 // Generalized bi-prediction (GBi)
 
@@ -549,7 +543,13 @@ enum DFunc
   DF_DEFAULT_ORI      = DF_SSE_WTD+8,
 #endif
 
+#if JVET_L0256_BIO
+  DF_SAD_INTERMEDIATE_BITDEPTH = 63,
+
+  DF_TOTAL_FUNCTIONS = 64
+#else
   DF_TOTAL_FUNCTIONS = 63
+#endif
 };
 
 /// motion vector predictor direction used in AMVP
diff --git a/source/Lib/CommonLib/x86/BufferX86.h b/source/Lib/CommonLib/x86/BufferX86.h
index 34d2cb7b4..1add0996f 100644
--- a/source/Lib/CommonLib/x86/BufferX86.h
+++ b/source/Lib/CommonLib/x86/BufferX86.h
@@ -128,6 +128,308 @@ void addAvg_SSE( const int16_t* src0, int src0Stride, const int16_t* src1, int s
   }
 }
 
+#if JVET_L0256_BIO
+template< X86_VEXT vext >
+void addBIOAvg4_SSE(const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *pGradX0, const Pel *pGradX1, const Pel *pGradY0, const Pel*pGradY1, int iGradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng)
+{
+  __m128i mm_tmpx = _mm_unpacklo_epi64(_mm_set1_epi16(tmpx), _mm_set1_epi16(tmpy));
+  __m128i mm_boffset = _mm_set1_epi32(1);
+  __m128i mm_offset = _mm_set1_epi32(offset);
+  __m128i vibdimin = _mm_set1_epi16(clpRng.min);
+  __m128i vibdimax = _mm_set1_epi16(clpRng.max);
+
+  for (int y = 0; y < height; y++)
+  {
+    for (int x = 0; x < width; x += 4)
+    {
+      __m128i mm_a = _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)(pGradX0 + x)), _mm_loadl_epi64((const __m128i *)(pGradY0 + x)));
+      __m128i mm_b = _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)(pGradX1 + x)), _mm_loadl_epi64((const __m128i *)(pGradY1 + x)));
+      mm_a = _mm_sub_epi16(mm_a, mm_b);
+      mm_b = _mm_mulhi_epi16(mm_a, mm_tmpx);
+      mm_a = _mm_mullo_epi16(mm_a, mm_tmpx);
+
+      __m128i mm_sum = _mm_add_epi32(_mm_unpacklo_epi16(mm_a, mm_b), _mm_unpackhi_epi16(mm_a, mm_b));
+      mm_sum = _mm_srai_epi32(_mm_add_epi32(mm_sum, mm_boffset), 1);
+      mm_a = _mm_cvtepi16_epi32(_mm_loadl_epi64((const __m128i *)(src0 + x)));
+      mm_b = _mm_cvtepi16_epi32(_mm_loadl_epi64((const __m128i *)(src1 + x)));
+      mm_sum = _mm_add_epi32(_mm_add_epi32(mm_sum, mm_a), _mm_add_epi32(mm_b, mm_offset));
+      mm_sum = _mm_packs_epi32(_mm_srai_epi32(mm_sum, shift), mm_a);
+      mm_sum = _mm_min_epi16(vibdimax, _mm_max_epi16(vibdimin, mm_sum));
+      _mm_storel_epi64((__m128i *)(dst + x), mm_sum);
+    }
+    dst += dstStride;       src0 += src0Stride;     src1 += src1Stride;
+    pGradX0 += iGradStride; pGradX1 += iGradStride; pGradY0 += iGradStride; pGradY1 += iGradStride;
+  }
+}
+
+template< X86_VEXT vext >
+void gradFilter_SSE(Pel* piSrc, int iSrcStride, int iWidth, int iHeight, int iGradStride, Pel* piGradX, Pel* piGradY)
+{
+  __m128i vzero = _mm_setzero_si128();
+  Pel* piSrcTmp = piSrc + iSrcStride + 1;
+  Pel* piGradXTmp = piGradX + iGradStride + 1;
+  Pel* piGradYTmp = piGradY + iGradStride + 1;
+
+  int iWidthInside = iWidth - 2 * JVET_L0256_BIO_EXTEND_SIZE;
+  int iHeightInside = iHeight - 2 * JVET_L0256_BIO_EXTEND_SIZE;
+
+  assert((iWidthInside & 3) == 0);
+
+  for (int y = 0; y < iHeightInside; y++)
+  {
+    int x = 0;
+    for (; x < iWidthInside; x += 4)
+    {
+      __m128i mmPixTop = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(piSrcTmp + x - iSrcStride)));
+      __m128i mmPixBottom = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(piSrcTmp + x + iSrcStride)));
+      __m128i mmPixLeft = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(piSrcTmp + x - 1)));
+      __m128i mmPixRight = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(piSrcTmp + x + 1)));
+
+      __m128i mmGradVer = _mm_srai_epi32(_mm_sub_epi32(mmPixBottom, mmPixTop), 4);
+      __m128i mmGradHor = _mm_srai_epi32(_mm_sub_epi32(mmPixRight, mmPixLeft), 4);
+      mmGradVer = _mm_packs_epi32(mmGradVer, vzero);
+      mmGradHor = _mm_packs_epi32(mmGradHor, vzero);
+
+      _mm_storel_epi64((__m128i *)(piGradYTmp + x), mmGradVer);
+      _mm_storel_epi64((__m128i *)(piGradXTmp + x), mmGradHor);
+    }
+
+    piGradXTmp += iGradStride;
+    piGradYTmp += iGradStride;
+    piSrcTmp += iSrcStride;
+  }
+
+  piGradXTmp = piGradX + iGradStride + 1;
+  piGradYTmp = piGradY + iGradStride + 1;
+  for (int y = 0; y < iHeightInside; y++)
+  {
+    piGradXTmp[-1] = piGradXTmp[0];
+    piGradXTmp[iWidthInside] = piGradXTmp[iWidthInside - 1];
+    piGradXTmp += iGradStride;
+
+    piGradYTmp[-1] = piGradYTmp[0];
+    piGradYTmp[iWidthInside] = piGradYTmp[iWidthInside - 1];
+    piGradYTmp += iGradStride;
+  }
+
+  piGradXTmp = piGradX + iGradStride;
+  piGradYTmp = piGradY + iGradStride;
+  ::memcpy(piGradXTmp - iGradStride, piGradXTmp, sizeof(Pel)*(iWidth));
+  ::memcpy(piGradXTmp + iHeightInside*iGradStride, piGradXTmp + (iHeightInside - 1)*iGradStride, sizeof(Pel)*(iWidth));
+  ::memcpy(piGradYTmp - iGradStride, piGradYTmp, sizeof(Pel)*(iWidth));
+  ::memcpy(piGradYTmp + iHeightInside*iGradStride, piGradYTmp + (iHeightInside - 1)*iGradStride, sizeof(Pel)*(iWidth));
+}
+
+template< X86_VEXT vext >
+void calcBIOPar_SSE(const Pel* pSrcY0Temp, const Pel* pSrcY1Temp, const Pel* pGradX0, const Pel* pGradX1, const Pel* pGradY0, const Pel* pGradY1, int* m_piDotProductTemp1, int* m_piDotProductTemp2, int* m_piDotProductTemp3, int* m_piDotProductTemp5, int* m_piDotProductTemp6, const int iSrc0Stride, const int iSrc1Stride, const int iGradStride, const int iWidthG, const int iHeightG)
+{
+  for (int y = 0; y < iHeightG; y++)
+  {
+    int x = 0;
+    for (; x < ((iWidthG >> 3) << 3); x += 8)
+    {
+      __m128i mmSrcY0Temp = _mm_srai_epi16(_mm_loadu_si128((__m128i*)(pSrcY0Temp + x)), 6);
+      __m128i mmSrcY1Temp = _mm_srai_epi16(_mm_loadu_si128((__m128i*)(pSrcY1Temp + x)), 6);
+      __m128i mmGradX0 = _mm_loadu_si128((__m128i*)(pGradX0 + x));
+      __m128i mmGradX1 = _mm_loadu_si128((__m128i*)(pGradX1 + x));
+      __m128i mmGradY0 = _mm_loadu_si128((__m128i*)(pGradY0 + x));
+      __m128i mmGradY1 = _mm_loadu_si128((__m128i*)(pGradY1 + x));
+
+      __m128i mmTemp1 = _mm_sub_epi16(mmSrcY1Temp, mmSrcY0Temp);
+      __m128i mmTempX = _mm_srai_epi16(_mm_add_epi16(mmGradX0, mmGradX1), 3);
+      __m128i mmTempY = _mm_srai_epi16(_mm_add_epi16(mmGradY0, mmGradY1), 3);
+
+      // m_piDotProductTemp1
+      __m128i mm_b = _mm_mulhi_epi16(mmTempX, mmTempX);
+      __m128i mm_a = _mm_mullo_epi16(mmTempX, mmTempX);
+
+      __m128i mm_l = _mm_unpacklo_epi16(mm_a, mm_b);
+      __m128i mm_h = _mm_unpackhi_epi16(mm_a, mm_b);
+
+      _mm_storeu_si128((__m128i *)(m_piDotProductTemp1 + x), mm_l);
+      _mm_storeu_si128((__m128i *)(m_piDotProductTemp1 + x + 4), mm_h);
+
+      // m_piDotProductTemp2
+      mm_b = _mm_mulhi_epi16(mmTempX, mmTempY);
+      mm_a = _mm_mullo_epi16(mmTempX, mmTempY);
+
+      mm_l = _mm_unpacklo_epi16(mm_a, mm_b);
+      mm_h = _mm_unpackhi_epi16(mm_a, mm_b);
+
+      _mm_storeu_si128((__m128i *)(m_piDotProductTemp2 + x), mm_l);
+      _mm_storeu_si128((__m128i *)(m_piDotProductTemp2 + x + 4), mm_h);
+
+      // m_piDotProductTemp3
+      mm_b = _mm_mulhi_epi16(mmTempX, mmTemp1);
+      mm_a = _mm_mullo_epi16(mmTempX, mmTemp1);
+
+      mm_l = _mm_unpacklo_epi16(mm_a, mm_b);
+      mm_h = _mm_unpackhi_epi16(mm_a, mm_b);
+
+      _mm_storeu_si128((__m128i *)(m_piDotProductTemp3 + x), mm_l);
+      _mm_storeu_si128((__m128i *)(m_piDotProductTemp3 + x + 4), mm_h);
+
+      // m_piDotProductTemp5
+      mm_b = _mm_mulhi_epi16(mmTempY, mmTempY);
+      mm_a = _mm_mullo_epi16(mmTempY, mmTempY);
+
+      mm_l = _mm_unpacklo_epi16(mm_a, mm_b);
+      mm_h = _mm_unpackhi_epi16(mm_a, mm_b);
+
+      _mm_storeu_si128((__m128i *)(m_piDotProductTemp5 + x), mm_l);
+      _mm_storeu_si128((__m128i *)(m_piDotProductTemp5 + x + 4), mm_h);
+
+      // m_piDotProductTemp6
+      mm_b = _mm_mulhi_epi16(mmTempY, mmTemp1);
+      mm_a = _mm_mullo_epi16(mmTempY, mmTemp1);
+
+      mm_l = _mm_unpacklo_epi16(mm_a, mm_b);
+      mm_h = _mm_unpackhi_epi16(mm_a, mm_b);
+
+      _mm_storeu_si128((__m128i *)(m_piDotProductTemp6 + x), mm_l);
+      _mm_storeu_si128((__m128i *)(m_piDotProductTemp6 + x + 4), mm_h);
+    }
+
+    for (; x < ((iWidthG >> 2) << 2); x += 4)
+    {
+      __m128i mmSrcY0Temp = _mm_srai_epi16(_mm_loadl_epi64((__m128i*)(pSrcY0Temp + x)), 6);
+      __m128i mmSrcY1Temp = _mm_srai_epi16(_mm_loadl_epi64((__m128i*)(pSrcY1Temp + x)), 6);
+      __m128i mmGradX0 = _mm_loadl_epi64((__m128i*)(pGradX0 + x));
+      __m128i mmGradX1 = _mm_loadl_epi64((__m128i*)(pGradX1 + x));
+      __m128i mmGradY0 = _mm_loadl_epi64((__m128i*)(pGradY0 + x));
+      __m128i mmGradY1 = _mm_loadl_epi64((__m128i*)(pGradY1 + x));
+
+      __m128i mmTemp1 = _mm_sub_epi16(mmSrcY1Temp, mmSrcY0Temp);
+      __m128i mmTempX = _mm_srai_epi16(_mm_add_epi16(mmGradX0, mmGradX1), 3);
+      __m128i mmTempY = _mm_srai_epi16(_mm_add_epi16(mmGradY0, mmGradY1), 3);
+
+      // m_piDotProductTemp1
+      __m128i mm_b = _mm_mulhi_epi16(mmTempX, mmTempX);
+      __m128i mm_a = _mm_mullo_epi16(mmTempX, mmTempX);
+      __m128i mm_l = _mm_unpacklo_epi16(mm_a, mm_b);
+
+      _mm_storeu_si128((__m128i *)(m_piDotProductTemp1 + x), mm_l);
+
+      // m_piDotProductTemp2
+      mm_b = _mm_mulhi_epi16(mmTempX, mmTempY);
+      mm_a = _mm_mullo_epi16(mmTempX, mmTempY);
+      mm_l = _mm_unpacklo_epi16(mm_a, mm_b);
+
+      _mm_storeu_si128((__m128i *)(m_piDotProductTemp2 + x), mm_l);
+
+      // m_piDotProductTemp3
+      mm_b = _mm_mulhi_epi16(mmTempX, mmTemp1);
+      mm_a = _mm_mullo_epi16(mmTempX, mmTemp1);
+      mm_l = _mm_unpacklo_epi16(mm_a, mm_b);
+
+      _mm_storeu_si128((__m128i *)(m_piDotProductTemp3 + x), mm_l);
+
+      // m_piDotProductTemp5
+      mm_b = _mm_mulhi_epi16(mmTempY, mmTempY);
+      mm_a = _mm_mullo_epi16(mmTempY, mmTempY);
+      mm_l = _mm_unpacklo_epi16(mm_a, mm_b);
+
+      _mm_storeu_si128((__m128i *)(m_piDotProductTemp5 + x), mm_l);
+
+      // m_piDotProductTemp6
+      mm_b = _mm_mulhi_epi16(mmTempY, mmTemp1);
+      mm_a = _mm_mullo_epi16(mmTempY, mmTemp1);
+      mm_l = _mm_unpacklo_epi16(mm_a, mm_b);
+
+      _mm_storeu_si128((__m128i *)(m_piDotProductTemp6 + x), mm_l);
+    }
+
+    for (; x < iWidthG; x++)
+    {
+      int temp = (pSrcY0Temp[x] >> 6) - (pSrcY1Temp[x] >> 6);
+      int tempX = (pGradX0[x] + pGradX1[x]) >> 3;
+      int tempY = (pGradY0[x] + pGradY1[x]) >> 3;
+      m_piDotProductTemp1[x] = tempX * tempX;
+      m_piDotProductTemp2[x] = tempX * tempY;
+      m_piDotProductTemp3[x] = -tempX * temp;
+      m_piDotProductTemp5[x] = tempY * tempY;
+      m_piDotProductTemp6[x] = -tempY * temp;
+    }
+
+    pSrcY0Temp += iSrc0Stride;
+    pSrcY1Temp += iSrc1Stride;
+    pGradX0 += iGradStride;
+    pGradX1 += iGradStride;
+    pGradY0 += iGradStride;
+    pGradY1 += iGradStride;
+    m_piDotProductTemp1 += iWidthG;
+    m_piDotProductTemp2 += iWidthG;
+    m_piDotProductTemp3 += iWidthG;
+    m_piDotProductTemp5 += iWidthG;
+    m_piDotProductTemp6 += iWidthG;
+  }
+}
+
+template< X86_VEXT vext >
+void calcBlkGradient_SSE(int sx, int sy, int     *arraysGx2, int     *arraysGxGy, int     *arraysGxdI, int     *arraysGy2, int     *arraysGydI, int     &sGx2, int     &sGy2, int     &sGxGy, int     &sGxdI, int     &sGydI, int width, int height, int unitSize)
+{
+  int     *pGx2 = arraysGx2;
+  int     *pGy2 = arraysGy2;
+  int     *pGxGy = arraysGxGy;
+  int     *pGxdI = arraysGxdI;
+  int     *pGydI = arraysGydI;
+
+  // set to the above row due to JVET_K0485_BIO_EXTEND_SIZE
+  pGx2 -= (JVET_L0256_BIO_EXTEND_SIZE*width);
+  pGy2 -= (JVET_L0256_BIO_EXTEND_SIZE*width);
+  pGxGy -= (JVET_L0256_BIO_EXTEND_SIZE*width);
+  pGxdI -= (JVET_L0256_BIO_EXTEND_SIZE*width);
+  pGydI -= (JVET_L0256_BIO_EXTEND_SIZE*width);
+
+  __m128i vzero = _mm_setzero_si128();
+  __m128i mmGx2Total = _mm_setzero_si128();
+  __m128i mmGy2Total = _mm_setzero_si128();
+  __m128i mmGxGyTotal = _mm_setzero_si128();
+  __m128i mmGxdITotal = _mm_setzero_si128();
+  __m128i mmGydITotal = _mm_setzero_si128();
+
+  for (int y = -JVET_L0256_BIO_EXTEND_SIZE; y < unitSize + JVET_L0256_BIO_EXTEND_SIZE; y++)
+  {
+    __m128i mmsGx2 = _mm_loadu_si128((__m128i*)(pGx2 - 1));   __m128i mmsGx2Sec = _mm_loadl_epi64((__m128i*)(pGx2 + 3));
+    __m128i mmsGy2 = _mm_loadu_si128((__m128i*)(pGy2 - 1));   __m128i mmsGy2Sec = _mm_loadl_epi64((__m128i*)(pGy2 + 3));
+    __m128i mmsGxGy = _mm_loadu_si128((__m128i*)(pGxGy - 1));  __m128i mmsGxGySec = _mm_loadl_epi64((__m128i*)(pGxGy + 3));
+    __m128i mmsGxdI = _mm_loadu_si128((__m128i*)(pGxdI - 1));  __m128i mmsGxdISec = _mm_loadl_epi64((__m128i*)(pGxdI + 3));
+    __m128i mmsGydI = _mm_loadu_si128((__m128i*)(pGydI - 1));  __m128i mmsGydISec = _mm_loadl_epi64((__m128i*)(pGydI + 3));
+
+    mmsGx2 = _mm_add_epi32(mmsGx2, mmsGx2Sec);
+    mmsGy2 = _mm_add_epi32(mmsGy2, mmsGy2Sec);
+    mmsGxGy = _mm_add_epi32(mmsGxGy, mmsGxGySec);
+    mmsGxdI = _mm_add_epi32(mmsGxdI, mmsGxdISec);
+    mmsGydI = _mm_add_epi32(mmsGydI, mmsGydISec);
+
+
+    mmGx2Total = _mm_add_epi32(mmGx2Total, mmsGx2);
+    mmGy2Total = _mm_add_epi32(mmGy2Total, mmsGy2);
+    mmGxGyTotal = _mm_add_epi32(mmGxGyTotal, mmsGxGy);
+    mmGxdITotal = _mm_add_epi32(mmGxdITotal, mmsGxdI);
+    mmGydITotal = _mm_add_epi32(mmGydITotal, mmsGydI);
+
+    pGx2 += width;
+    pGy2 += width;
+    pGxGy += width;
+    pGxdI += width;
+    pGydI += width;
+  }
+
+  mmGx2Total = _mm_hadd_epi32(_mm_hadd_epi32(mmGx2Total, vzero), vzero);
+  mmGy2Total = _mm_hadd_epi32(_mm_hadd_epi32(mmGy2Total, vzero), vzero);
+  mmGxGyTotal = _mm_hadd_epi32(_mm_hadd_epi32(mmGxGyTotal, vzero), vzero);
+  mmGxdITotal = _mm_hadd_epi32(_mm_hadd_epi32(mmGxdITotal, vzero), vzero);
+  mmGydITotal = _mm_hadd_epi32(_mm_hadd_epi32(mmGydITotal, vzero), vzero);
+
+  sGx2 = _mm_cvtsi128_si32(mmGx2Total);
+  sGy2 = _mm_cvtsi128_si32(mmGy2Total);
+  sGxGy = _mm_cvtsi128_si32(mmGxGyTotal);
+  sGxdI = _mm_cvtsi128_si32(mmGxdITotal);
+  sGydI = _mm_cvtsi128_si32(mmGydITotal);
+}
+#endif
+
 template< X86_VEXT vext, int W >
 void reco_SSE( const int16_t* src0, int src0Stride, const int16_t* src1, int src1Stride, int16_t *dst, int dstStride, int width, int height, const ClpRng& clpRng )
 {
@@ -496,6 +798,13 @@ void PelBufferOps::_initPelBufOpsX86()
   addAvg8 = addAvg_SSE<vext, 8>;
   addAvg4 = addAvg_SSE<vext, 4>;
 
+#if JVET_L0256_BIO
+  addBIOAvg4      = addBIOAvg4_SSE<vext>;
+  bioGradFilter   = gradFilter_SSE<vext>;
+  calcBIOPar      = calcBIOPar_SSE<vext>;
+  calcBlkGradient = calcBlkGradient_SSE<vext>;
+#endif
+
   reco8 = reco_SSE<vext, 8>;
   reco4 = reco_SSE<vext, 4>;
 
diff --git a/source/Lib/CommonLib/x86/RdCostX86.h b/source/Lib/CommonLib/x86/RdCostX86.h
index 95383045f..ab54737fa 100644
--- a/source/Lib/CommonLib/x86/RdCostX86.h
+++ b/source/Lib/CommonLib/x86/RdCostX86.h
@@ -297,6 +297,45 @@ Distortion RdCost::xGetSAD_SIMD( const DistParam &rcDtParam )
   return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth);
 }
 
+#if JVET_L0256_BIO
+template< X86_VEXT vext >
+Distortion RdCost::xGetSAD_IBD_SIMD(const DistParam &rcDtParam)
+{
+  if (rcDtParam.org.width < 4 || rcDtParam.bitDepth > 10 || rcDtParam.applyWeight)
+    return RdCost::xGetSAD(rcDtParam);
+
+  const short* src0 = (const short*)rcDtParam.org.buf;
+  const short* src1 = (const short*)rcDtParam.cur.buf;
+  int  width = rcDtParam.org.height;
+  int  height = rcDtParam.org.width;
+  int  iSubShift = rcDtParam.subShift;
+  int  iSubStep = (1 << iSubShift);
+  const int src0Stride = rcDtParam.org.stride * iSubStep;
+  const int src1Stride = rcDtParam.cur.stride * iSubStep;
+
+  __m128i vtotalsum32 = _mm_setzero_si128();
+  __m128i vzero = _mm_setzero_si128();
+  for (int y = 0; y < height; y += iSubStep)
+  {
+    for (int x = 0; x < width; x += 4)
+    {
+      __m128i vsrc1 = _mm_loadl_epi64((const __m128i*)(src0 + x));
+      __m128i vsrc2 = _mm_loadl_epi64((const __m128i*)(src1 + x));
+      vsrc1 = _mm_cvtepi16_epi32(vsrc1);
+      vsrc2 = _mm_cvtepi16_epi32(vsrc2);
+      vtotalsum32 = _mm_add_epi32(vtotalsum32, _mm_abs_epi32(_mm_sub_epi32(vsrc1, vsrc2)));
+    }
+    src0 += src0Stride;
+    src1 += src1Stride;
+  }
+  vtotalsum32 = _mm_hadd_epi32(vtotalsum32, vzero);
+  vtotalsum32 = _mm_hadd_epi32(vtotalsum32, vzero);
+  Distortion uiSum = _mm_cvtsi128_si32(vtotalsum32);
+
+  uiSum <<= iSubShift;
+  return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth);
+}
+#endif
 
 template< int iWidth, X86_VEXT vext >
 Distortion RdCost::xGetSAD_NxN_SIMD( const DistParam &rcDtParam )
@@ -2422,6 +2461,10 @@ void RdCost::_initRdCostX86()
   m_afpDistortFunc[DF_HAD32]   = RdCost::xGetHADs_SIMD<Pel, Pel, vext>;
   m_afpDistortFunc[DF_HAD64]   = RdCost::xGetHADs_SIMD<Pel, Pel, vext>;
   m_afpDistortFunc[DF_HAD16N]  = RdCost::xGetHADs_SIMD<Pel, Pel, vext>;
+
+#if JVET_L0256_BIO
+  m_afpDistortFunc[DF_SAD_INTERMEDIATE_BITDEPTH] = RdCost::xGetSAD_IBD_SIMD<vext>;
+#endif
 }
 
 template void RdCost::_initRdCostX86<SIMDX86>();
diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp
index 6411f4092..5802f9b34 100644
--- a/source/Lib/DecoderLib/VLCReader.cpp
+++ b/source/Lib/DecoderLib/VLCReader.cpp
@@ -794,6 +794,9 @@ void HLSyntaxReader::parseSPSNext( SPSNext& spsNext, const bool usePCM )
   READ_FLAG( symbol,    "imv_enable_flag" );                        spsNext.setUseIMV                 ( symbol != 0 );
 #if !REMOVE_MV_ADAPT_PREC
   READ_FLAG( symbol, "high_precision_motion_vectors" );             spsNext.setUseHighPrecMv(symbol != 0);
+#endif
+#if JVET_L0256_BIO
+  READ_FLAG( symbol, "bio_enable_flag" );                           spsNext.setUseBIO                 ( symbol != 0 );
 #endif
   READ_FLAG( symbol,    "disable_motion_compression_flag" );        spsNext.setDisableMotCompress     ( symbol != 0 );
   READ_FLAG( symbol,    "lm_chroma_enabled_flag" );                 spsNext.setUseLMChroma            ( symbol != 0 );
diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h
index 71f50a481..cbae17fd0 100644
--- a/source/Lib/EncoderLib/EncCfg.h
+++ b/source/Lib/EncoderLib/EncCfg.h
@@ -196,6 +196,9 @@ protected:
   bool      m_AffineType;
 #if !REMOVE_MV_ADAPT_PREC
   bool      m_highPrecMv;
+#endif
+#if JVET_L0256_BIO
+  bool      m_BIO;
 #endif
   bool      m_DisableMotionCompression;
   unsigned  m_MTTMode;
@@ -619,6 +622,10 @@ public:
 #if !REMOVE_MV_ADAPT_PREC
   void      setHighPrecisionMv              ( bool b )       { m_highPrecMv = b; }
   bool      getHighPrecisionMv              ()               { return m_highPrecMv; }
+#endif
+#if JVET_L0256_BIO
+  void      setBIO(bool b)                                   { m_BIO = b; }
+  bool      getBIO()                                   const { return m_BIO; }
 #endif
   void      setDisableMotionCompression     ( bool b )       { m_DisableMotionCompression = b; }
   bool      getDisableMotionCompression     ()         const { return m_DisableMotionCompression; }
diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp
index 38661820c..a261753a7 100644
--- a/source/Lib/EncoderLib/EncLib.cpp
+++ b/source/Lib/EncoderLib/EncLib.cpp
@@ -840,6 +840,9 @@ void EncLib::xInitSPS(SPS &sps)
   sps.getSpsNext().setUseIMV                ( m_ImvMode != IMV_OFF );
 #if !REMOVE_MV_ADAPT_PREC
   sps.getSpsNext().setUseHighPrecMv         ( m_highPrecMv );
+#endif
+#if JVET_L0256_BIO
+  sps.getSpsNext().setUseBIO                ( m_BIO );
 #endif
   sps.getSpsNext().setUseAffine             ( m_Affine );
   sps.getSpsNext().setUseAffineType         ( m_AffineType );
diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp
index 29f4b427f..b84bbf562 100644
--- a/source/Lib/EncoderLib/VLCWriter.cpp
+++ b/source/Lib/EncoderLib/VLCWriter.cpp
@@ -534,6 +534,9 @@ void HLSWriter::codeSPSNext( const SPSNext& spsNext, const bool usePCM )
   WRITE_FLAG( spsNext.getUseIMV() ? 1 : 0,                                                      "imv_enable_flag" );
 #if !REMOVE_MV_ADAPT_PREC
   WRITE_FLAG( spsNext.getUseHighPrecMv() ? 1 : 0,                                               "high_precision_motion_vectors");
+#endif
+#if JVET_L0256_BIO
+  WRITE_FLAG( spsNext.getUseBIO() ? 1 : 0,                                                      "bio_enable_flag" );
 #endif
   WRITE_FLAG( spsNext.getDisableMotCompress() ? 1 : 0,                                          "disable_motion_compression_flag" );
   WRITE_FLAG( spsNext.getUseLMChroma() ? 1 : 0,                                                 "lm_chroma_enabled_flag" );
-- 
GitLab