diff --git a/cfg/encoder_intra_vtm.cfg b/cfg/encoder_intra_vtm.cfg
index a8e2850ff60432658ece74697bb70d3f1248917f..7dcb733abeea23f7547d0fb95aac52c3ba058bc9 100644
--- a/cfg/encoder_intra_vtm.cfg
+++ b/cfg/encoder_intra_vtm.cfg
@@ -104,6 +104,7 @@ MTT                          : 1
 MTS                          : 1
 MTSIntraMaxCand              : 3
 MTSInterMaxCand              : 4
+SBT                          : 1
 Affine                       : 1
 SubPuMvp                     : 1
 MaxNumMergeCand              : 6
diff --git a/cfg/encoder_lowdelay_P_vtm.cfg b/cfg/encoder_lowdelay_P_vtm.cfg
index 1ad1e8112835ddeb10e8ae61a21313de15e7a9b8..e5fa7ee25ac00d3d3e0209699f94742d5ecff438 100644
--- a/cfg/encoder_lowdelay_P_vtm.cfg
+++ b/cfg/encoder_lowdelay_P_vtm.cfg
@@ -120,6 +120,7 @@ MTT                          : 1
 MTS                          : 1
 MTSIntraMaxCand              : 3
 MTSInterMaxCand              : 4
+SBT                          : 1
 Affine                       : 1
 SubPuMvp                     : 1
 MaxNumMergeCand              : 6
diff --git a/cfg/encoder_lowdelay_vtm.cfg b/cfg/encoder_lowdelay_vtm.cfg
index 5e09cae7dcaf24e292f2db9605f4ac4bbabd82b0..dfd581489386ce61b9c84da83bbd2f7736e37aae 100644
--- a/cfg/encoder_lowdelay_vtm.cfg
+++ b/cfg/encoder_lowdelay_vtm.cfg
@@ -120,6 +120,7 @@ MTT                          : 1
 MTS                          : 1
 MTSIntraMaxCand              : 3
 MTSInterMaxCand              : 4
+SBT                          : 1
 Affine                       : 1
 SubPuMvp                     : 1
 MaxNumMergeCand              : 6
diff --git a/cfg/encoder_randomaccess_vtm.cfg b/cfg/encoder_randomaccess_vtm.cfg
index 2f0ff3dad0747818d5482af32a690a4d4119efea..397869adb5d938acd130640d61ec7a5cb707a7bf 100644
--- a/cfg/encoder_randomaccess_vtm.cfg
+++ b/cfg/encoder_randomaccess_vtm.cfg
@@ -134,6 +134,7 @@ MTT                          : 1
 MTS                          : 1
 MTSIntraMaxCand              : 3
 MTSInterMaxCand              : 4
+SBT                          : 1
 Affine                       : 1
 SubPuMvp                     : 1
 MaxNumMergeCand              : 6
diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp
index 9cc41985218ca698db9c6ec8b5cbe75a52ae22e9..0b844b553dddbbbba4a59cb0648f72f3ea9f12f6 100644
--- a/source/App/EncoderApp/EncApp.cpp
+++ b/source/App/EncoderApp/EncApp.cpp
@@ -242,6 +242,9 @@ void EncApp::xInitLibCfg()
 #endif
 #if JVET_M0303_IMPLICIT_MTS
   m_cEncLib.setImplicitMTS                                       ( m_MTSImplicit );
+#endif
+#if JVET_M0140_SBT
+  m_cEncLib.setUseSBT                                            ( m_SBT );
 #endif
   m_cEncLib.setUseCompositeRef                                   ( m_compositeRefEnabled );
   m_cEncLib.setUseGBi                                            ( m_GBi );
diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp
index 1d6de204bbf5d06ac28d2aae75eba624d99c19a1..849461a0511b0144fd724101222b89c4bb2b2ece 100644
--- a/source/App/EncoderApp/EncAppCfg.cpp
+++ b/source/App/EncoderApp/EncAppCfg.cpp
@@ -855,6 +855,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
 #endif
 #if JVET_M0303_IMPLICIT_MTS
   ("MTSImplicit",                                     m_MTSImplicit,                                        0, "Enable implicit MTS (when explicit MTS is off)\n")
+#endif
+#if JVET_M0140_SBT
+  ( "SBT",                                            m_SBT,                                            false, "Enable Sub-Block Transform for inter blocks\n" )
 #endif
   ("CompositeLTReference",                            m_compositeRefEnabled,                            false, "Enable Composite Long Term Reference Frame")
   ("GBi",                                             m_GBi,                                            false, "Enable Generalized Bi-prediction(GBi)")
@@ -3197,6 +3200,9 @@ void EncAppCfg::xPrintParameter()
     msg( VERBOSE, "MTS: %1d(intra) %1d(inter) ", m_MTS & 1, ( m_MTS >> 1 ) & 1 );
 #else
     msg( VERBOSE, "EMT: %1d(intra) %1d(inter) ", m_EMT & 1, ( m_EMT >> 1 ) & 1 );
+#endif
+#if JVET_M0140_SBT
+    msg( VERBOSE, "SBT:%d ", m_SBT );
 #endif
     msg( VERBOSE, "CompositeLTReference:%d ", m_compositeRefEnabled);
     msg( VERBOSE, "GBi:%d ", m_GBi );
diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h
index f79bcaa44fae23a16aaab54d51b0e96ca273e670..3a7b9dfbb53eae02f6c226f7b6585a1c125da2c1 100644
--- a/source/App/EncoderApp/EncAppCfg.h
+++ b/source/App/EncoderApp/EncAppCfg.h
@@ -227,6 +227,9 @@ protected:
 #if JVET_M0303_IMPLICIT_MTS
   int       m_MTSImplicit;
 #endif
+#if JVET_M0140_SBT
+  bool      m_SBT;                                            ///< Sub-Block Transform for inter blocks
+#endif
 
   bool      m_compositeRefEnabled;
   bool      m_GBi;
diff --git a/source/Lib/CommonLib/CodingStatistics.h b/source/Lib/CommonLib/CodingStatistics.h
index 22a9449c3d448cac60ed25adec4ce86f3f38190b..6f2492427618d23e4ba78a53fccecb54adc7e27a 100644
--- a/source/Lib/CommonLib/CodingStatistics.h
+++ b/source/Lib/CommonLib/CodingStatistics.h
@@ -105,6 +105,9 @@ enum CodingStatisticsType
   STATS__CABAC_BITS__GBI_IDX,
   STATS__CABAC_BITS__EMT_CU_FLAG,
   STATS__CABAC_BITS__EMT_TU_INDEX,
+#if JVET_M0140_SBT
+  STATS__CABAC_BITS__SBT_MODE,
+#endif
   STATS__CABAC_BITS__MH_INTRA_FLAG,
   STATS__CABAC_BITS__TRIANGLE_FLAG,
   STATS__CABAC_BITS__TRIANGLE_INDEX,
@@ -191,6 +194,9 @@ static inline const char* getName(CodingStatisticsType name)
     "CABAC_BITS__GBI_IDX",
     "CABAC_BITS__EMT_CU_FLAG",
     "CABAC_BITS__EMT_TU_INDX",
+#if JVET_M0140_SBT
+    "CABAC_BITS__SBT_MODE",
+#endif
     "CABAC_BITS__MH_INTRA_FLAG",
     "CABAC_BITS__TRIANGLE_FLAG",
     "CABAC_BITS__TRIANGLE_INDEX",
diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h
index b2b7a9135047231cd348114148e5ac13b116ea1a..6e2ade64161fa45bd63ce7f49b470c93b0e9e659 100644
--- a/source/Lib/CommonLib/CommonDef.h
+++ b/source/Lib/CommonLib/CommonDef.h
@@ -406,6 +406,12 @@ static const int TRIANGLE_MAX_NUM_CANDS =                          40;
 static const int TRIANGLE_MAX_NUM_SATD_CANDS =                      3;
 static const int TRIANGLE_MIN_SIZE =                            8 * 8;
 
+#if JVET_M0140_SBT
+static const int SBT_MAX_SIZE =                                    64; ///< maximum CU size for using SBT
+static const int SBT_NUM_SL =                                      10; ///< maximum number of historical PU decision saved for a CU
+static const int SBT_NUM_RDO =                                      2; ///< maximum number of SBT mode tried for a PU 
+#endif
+
 static const int IBC_MAX_CAND_SIZE = 16; // max block size for ibc search
 static const int IBC_NUM_CANDIDATES = 64; ///< Maximum number of candidates to store/test
 static const int CHROMA_REFINEMENT_CANDIDATES = 8; /// 8 candidates BV to choose from
diff --git a/source/Lib/CommonLib/Contexts.cpp b/source/Lib/CommonLib/Contexts.cpp
index 558720f5c5e86671fef3a7ed66b9b9d1eb50d1ea..85ad6159688dab89b2028f9924d7c0d82372a123 100644
--- a/source/Lib/CommonLib/Contexts.cpp
+++ b/source/Lib/CommonLib/Contexts.cpp
@@ -1239,6 +1239,48 @@ const CtxSet ContextSetCfg::ISPMode = ContextSetCfg::addCtxSet
 });
 #endif
 
+#if JVET_M0140_SBT
+const CtxSet ContextSetCfg::SbtFlag = ContextSetCfg::addCtxSet
+( {
+  {  CNU, CNU,},
+  {  CNU, CNU,},
+  {  CNU, CNU,},
+#if JVET_M0453_CABAC_ENGINE
+  {  DWS, DWS,},
+#endif
+} );
+
+const CtxSet ContextSetCfg::SbtQuadFlag = ContextSetCfg::addCtxSet
+( {
+  {  CNU,},
+  {  CNU,},
+  {  CNU,},
+#if JVET_M0453_CABAC_ENGINE
+  {  DWS,},
+#endif
+} );
+
+const CtxSet ContextSetCfg::SbtHorFlag = ContextSetCfg::addCtxSet
+( {
+  {  CNU, CNU, CNU,},
+  {  CNU, CNU, CNU,},
+  {  CNU, CNU, CNU,},
+#if JVET_M0453_CABAC_ENGINE
+  {  DWS, DWS, DWS,},
+#endif
+} );
+
+const CtxSet ContextSetCfg::SbtPosFlag = ContextSetCfg::addCtxSet
+( {
+  {  CNU,},
+  {  CNU,},
+  {  CNU,},
+#if JVET_M0453_CABAC_ENGINE
+  {  DWS,},
+#endif
+} );
+#endif
+
 const CtxSet ContextSetCfg::CrossCompPred = ContextSetCfg::addCtxSet
 ({
   {  154, 154, 154, 154, 154, 154, 154, 154, 154, 154,},
diff --git a/source/Lib/CommonLib/Contexts.h b/source/Lib/CommonLib/Contexts.h
index 994d44bae4ad3c85b1ab1a3f3937f06d9f70de6e..00dc798cecfbb1ba1cf5fe2822837b8240b06fd2 100644
--- a/source/Lib/CommonLib/Contexts.h
+++ b/source/Lib/CommonLib/Contexts.h
@@ -293,6 +293,12 @@ public:
 #if !JVET_M0464_UNI_MTS
   static const CtxSet   EMTTuIndex;
   static const CtxSet   EMTCuFlag;
+#endif
+#if JVET_M0140_SBT
+  static const CtxSet   SbtFlag;
+  static const CtxSet   SbtQuadFlag;
+  static const CtxSet   SbtHorFlag;
+  static const CtxSet   SbtPosFlag;
 #endif
   static const CtxSet   CrossCompPred;
   static const CtxSet   ChromaQpAdjFlag;
diff --git a/source/Lib/CommonLib/Slice.cpp b/source/Lib/CommonLib/Slice.cpp
index 372aa0899671e6ae4f7e0c0cc042dcbaee4cda06..c14c2b68efbfe8bef9961e3c5f53b2195f64a04f 100644
--- a/source/Lib/CommonLib/Slice.cpp
+++ b/source/Lib/CommonLib/Slice.cpp
@@ -1900,6 +1900,9 @@ SPSNext::SPSNext( SPS& sps )
 #else
   , m_IntraEMT                  ( false )
   , m_InterEMT                  ( false )
+#endif
+#if JVET_M0140_SBT
+  , m_SBT                       ( false )
 #endif
   , m_Affine                    ( false )
   , m_AffineType                ( false )
diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h
index 6dc1c6302e68ac5879c2f183e8812ce6dbc391ea..f2688be6a1d5e5856199de7a7eb71bb344026251 100644
--- a/source/Lib/CommonLib/Slice.h
+++ b/source/Lib/CommonLib/Slice.h
@@ -848,6 +848,10 @@ private:
 #else
   bool              m_IntraEMT;                   // 18
   bool              m_InterEMT;                   // 19
+#endif
+#if JVET_M0140_SBT
+  bool              m_SBT;
+  uint8_t           m_MaxSbtSize;
 #endif
   bool              m_Affine;
   bool              m_AffineType;
@@ -928,6 +932,12 @@ public:
   bool      getUseIntraEMT        ()                                      const     { return m_IntraEMT; }
   void      setUseInterEMT        ( bool b )                                        { m_InterEMT = b; }
   bool      getUseInterEMT        ()                                      const     { return m_InterEMT; }
+#endif
+#if JVET_M0140_SBT
+  void      setUseSBT             ( bool b )                                        { m_SBT = b; }
+  bool      getUseSBT             ()                                      const     { return m_SBT; }
+  void      setMaxSbtSize         ( uint8_t val )                                   { m_MaxSbtSize = val; }
+  uint8_t   getMaxSbtSize         ()                                      const     { return m_MaxSbtSize; }
 #endif
   void      setUseGBi             ( bool b )                                        { m_GBi = b; }
   bool      getUseGBi             ()                                      const     { return m_GBi; }
diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp
index a756ed223e23da328717a8a134aad5401ff0c367..af5d1ce22e826e438bc4b6693a4519f9404a3d95 100644
--- a/source/Lib/CommonLib/TrQuant.cpp
+++ b/source/Lib/CommonLib/TrQuant.cpp
@@ -311,6 +311,41 @@ void TrQuant::getTrTypes ( TransformUnit tu, const ComponentID compID, int &trTy
     return;
 }
 #endif
+#if JVET_M0140_SBT
+  if( tu.cu->sbtInfo && compID == COMPONENT_Y )
+  {
+    uint8_t sbtIdx = tu.cu->getSbtIdx();
+    uint8_t sbtPos = tu.cu->getSbtPos();
+
+    if( sbtIdx == SBT_VER_HALF || sbtIdx == SBT_VER_QUAD )
+    {
+      assert( tu.lwidth() <= MTS_INTER_MAX_CU_SIZE );
+      if( tu.lheight() > MTS_INTER_MAX_CU_SIZE )
+      {
+        trTypeHor = trTypeVer = DCT2;
+      }
+      else
+      {
+        if( sbtPos == SBT_POS0 )  { trTypeHor = DCT8;  trTypeVer = DST7; }
+        else                      { trTypeHor = DST7;  trTypeVer = DST7; }
+      }
+    }
+    else
+    {
+      assert( tu.lheight() <= MTS_INTER_MAX_CU_SIZE );
+      if( tu.lwidth() > MTS_INTER_MAX_CU_SIZE )
+      {
+        trTypeHor = trTypeVer = DCT2;
+      }
+      else
+      {
+        if( sbtPos == SBT_POS0 )  { trTypeHor = DST7;  trTypeVer = DCT8; }
+        else                      { trTypeHor = DST7;  trTypeVer = DST7; }
+      }
+    }
+    return;
+  }
+#endif
   
 #if JVET_M0464_UNI_MTS
   if ( mtsActivated )
@@ -568,6 +603,15 @@ void TrQuant::transformNxN(TransformUnit &tu, const ComponentID &compID, const Q
   {
     tu.mtsIdx = it->first;
     CoeffBuf tempCoeff( m_mtsCoeffs[tu.mtsIdx], rect );
+#if JVET_M0140_SBT
+    if( tu.noResidual )
+    {
+      int sumAbs = 0;
+      trCosts.push_back( TrCost( sumAbs, pos++ ) );
+      it++;
+      continue;
+    }
+#endif
 
     if( isLuma(compID) && tu.mtsIdx == 1 )
     {
@@ -640,6 +684,15 @@ void TrQuant::transformNxN(TransformUnit &tu, const ComponentID &compID, const Q
   const CPelBuf resiBuf     = cs.getResiBuf(rect);
         CoeffBuf rpcCoeff   = tu.getCoeffs(compID);
 
+#if JVET_M0140_SBT
+  if( tu.noResidual )
+  {
+    uiAbsSum = 0;
+    TU::setCbfAtDepth( tu, compID, tu.depth, uiAbsSum > 0 );
+    return;
+  }
+#endif
+
   RDPCMMode rdpcmMode = RDPCM_OFF;
   rdpcmNxN(tu, compID, cQP, uiAbsSum, rdpcmMode);
 
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index 9589707dcb7c3e3142f3e28d38e90243deee805b..0e5a45e29c681ba333720c7a0070689c98748d7c 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -115,6 +115,11 @@
 #endif
 #define JVET_M0502_PRED_MODE_CTX                          1
 
+#define JVET_M0140_SBT                                    1 // Sub-Block transform for Inter blocks
+#if JVET_M0140_SBT
+#define APPLY_SBT_SL_ON_MTS                               1 // apply save & load fast algorithm on inter MTS when SBT is on
+#endif
+
 #define JVET_M0407_IBC_RANGE                              1 // extend IBC search range to some part of left CTU
 
 #define JVET_M0464_UNI_MTS                                1
@@ -420,6 +425,39 @@ enum ISPType
 };
 #endif
 
+#if JVET_M0140_SBT
+enum SbtIdx
+{
+  SBT_OFF_DCT  = 0,
+  SBT_VER_HALF = 1,
+  SBT_HOR_HALF = 2,
+  SBT_VER_QUAD = 3,
+  SBT_HOR_QUAD = 4,
+  NUMBER_SBT_IDX,
+  SBT_OFF_MTS, //note: must be after all SBT modes, only used in fast algorithm to discern the best mode is inter EMT
+};
+
+enum SbtPos
+{
+  SBT_POS0 = 0,
+  SBT_POS1 = 1,
+  NUMBER_SBT_POS
+};
+
+enum SbtMode
+{
+  SBT_VER_H0 = 0,
+  SBT_VER_H1 = 1,
+  SBT_HOR_H0 = 2,
+  SBT_HOR_H1 = 3,
+  SBT_VER_Q0 = 4,
+  SBT_VER_Q1 = 5,
+  SBT_HOR_Q0 = 6,
+  SBT_HOR_Q1 = 7,
+  NUMBER_SBT_MODE
+};
+#endif
+
 enum RDPCMMode
 {
   RDPCM_OFF             = 0,
diff --git a/source/Lib/CommonLib/Unit.cpp b/source/Lib/CommonLib/Unit.cpp
index 0ff1f5a6ab864ba27a7359503eebc6faced203e8..a2f4bc6ecd06ce6905415b7aa95fe22be017a974 100644
--- a/source/Lib/CommonLib/Unit.cpp
+++ b/source/Lib/CommonLib/Unit.cpp
@@ -271,6 +271,9 @@ CodingUnit& CodingUnit::operator=( const CodingUnit& other )
   qp                = other.qp;
   chromaQpAdj       = other.chromaQpAdj;
   rootCbf           = other.rootCbf;
+#if JVET_M0140_SBT
+  sbtInfo           = other.sbtInfo;
+#endif
 #if !JVET_M0464_UNI_MTS
   emtFlag           = other.emtFlag;
 #endif
@@ -317,6 +320,9 @@ void CodingUnit::initData()
   qp                = 0;
   chromaQpAdj       = 0;
   rootCbf           = true;
+#if JVET_M0140_SBT
+  sbtInfo           = 0;
+#endif
 #if !JVET_M0464_UNI_MTS
   emtFlag           = 0;
 #endif
@@ -344,6 +350,70 @@ void CodingUnit::initData()
 #endif
 }
 
+#if JVET_M0140_SBT
+const uint8_t CodingUnit::checkAllowedSbt() const
+{
+  if( !slice->getSPS()->getSpsNext().getUseSBT() )
+  {
+    return 0;
+  }
+
+  //check on prediction mode
+  if( predMode == MODE_INTRA ) //intra
+  {
+    return 0;
+  }
+  if( firstPU->mhIntraFlag )
+  {
+    return 0;
+  }
+
+  uint8_t sbtAllowed = 0;
+  int cuWidth  = lwidth();
+  int cuHeight = lheight();
+  bool allow_type[NUMBER_SBT_IDX];
+  memset( allow_type, false, NUMBER_SBT_IDX * sizeof( bool ) );
+
+  //parameter
+  int maxSbtCUSize = cs->sps->getSpsNext().getMaxSbtSize();
+  int minSbtCUSize = 1 << ( MIN_CU_LOG2 + 1 );
+
+  //check on size
+  if( cuWidth > maxSbtCUSize || cuHeight > maxSbtCUSize )
+  {
+    return 0;
+  }
+
+  allow_type[SBT_VER_HALF] = cuWidth  >= minSbtCUSize;
+  allow_type[SBT_HOR_HALF] = cuHeight >= minSbtCUSize;
+  allow_type[SBT_VER_QUAD] = cuWidth  >= ( minSbtCUSize << 1 );
+  allow_type[SBT_HOR_QUAD] = cuHeight >= ( minSbtCUSize << 1 );
+
+  for( int i = 0; i < NUMBER_SBT_IDX; i++ )
+  {
+    sbtAllowed += (uint8_t)allow_type[i] << i;
+  }
+
+  return sbtAllowed;
+}
+
+uint8_t CodingUnit::getSbtTuSplit() const
+{
+  uint8_t sbtTuSplitType = 0;
+
+  switch( getSbtIdx() )
+  {
+  case SBT_VER_HALF: sbtTuSplitType = ( getSbtPos() == SBT_POS0 ? 0 : 1 ) + SBT_VER_HALF_POS0_SPLIT; break;
+  case SBT_HOR_HALF: sbtTuSplitType = ( getSbtPos() == SBT_POS0 ? 0 : 1 ) + SBT_HOR_HALF_POS0_SPLIT; break;
+  case SBT_VER_QUAD: sbtTuSplitType = ( getSbtPos() == SBT_POS0 ? 0 : 1 ) + SBT_VER_QUAD_POS0_SPLIT; break;
+  case SBT_HOR_QUAD: sbtTuSplitType = ( getSbtPos() == SBT_POS0 ? 0 : 1 ) + SBT_HOR_QUAD_POS0_SPLIT; break;
+  default: assert( 0 );  break;
+  }
+
+  assert( sbtTuSplitType <= SBT_HOR_QUAD_POS1_SPLIT && sbtTuSplitType >= SBT_VER_HALF_POS0_SPLIT );
+  return sbtTuSplitType;
+}
+#endif
 
 // ---------------------------------------------------------------------------
 // prediction unit method definitions
@@ -594,6 +664,9 @@ void TransformUnit::initData()
 #else
   emtIdx             = 0;
 #endif
+#if JVET_M0140_SBT
+  noResidual         = false;
+#endif
 #if JVET_M0427_INLOOP_RESHAPER
   m_chromaResScaleInv = 0;
 #endif
@@ -636,6 +709,9 @@ TransformUnit& TransformUnit::operator=(const TransformUnit& other)
   mtsIdx             = other.mtsIdx;
 #else
   emtIdx             = other.emtIdx;
+#endif
+#if JVET_M0140_SBT
+  noResidual         = other.noResidual;
 #endif
   return *this;
 }
@@ -667,6 +743,9 @@ void TransformUnit::copyComponentFrom(const TransformUnit& other, const Componen
     emtIdx         = other.emtIdx;
   }
 #endif
+#if JVET_M0140_SBT
+  noResidual       = other.noResidual;
+#endif
 }
 
        CoeffBuf TransformUnit::getCoeffs(const ComponentID id)       { return  CoeffBuf(m_coeffs[id], blocks[id]); }
@@ -674,6 +753,21 @@ const CCoeffBuf TransformUnit::getCoeffs(const ComponentID id) const { return CC
 
        PelBuf   TransformUnit::getPcmbuf(const ComponentID id)       { return  PelBuf  (m_pcmbuf[id], blocks[id]); }
 const CPelBuf   TransformUnit::getPcmbuf(const ComponentID id) const { return CPelBuf  (m_pcmbuf[id], blocks[id]); }
+
+#if JVET_M0140_SBT
+void TransformUnit::checkTuNoResidual( unsigned idx )
+{
+  if( CU::getSbtIdx( cu->sbtInfo ) == SBT_OFF_DCT )
+  {
+    return;
+  }
+
+  if( ( CU::getSbtPos( cu->sbtInfo ) == SBT_POS0 && idx == 1 ) || ( CU::getSbtPos( cu->sbtInfo ) == SBT_POS1 && idx == 0 ) )
+  {
+    noResidual = true;
+  }
+}
+#endif
 #if JVET_M0427_INLOOP_RESHAPER
 int          TransformUnit::getChromaAdj()                     const { return m_chromaResScaleInv; }
 void         TransformUnit::setChromaAdj(int i)                      { m_chromaResScaleInv = i;    }
diff --git a/source/Lib/CommonLib/Unit.h b/source/Lib/CommonLib/Unit.h
index 78624986944a2e0c34f16755c4d3daadf1ee6705..f25184cb118f0a6a7bb1df126884376dde65ca81 100644
--- a/source/Lib/CommonLib/Unit.h
+++ b/source/Lib/CommonLib/Unit.h
@@ -308,6 +308,9 @@ struct CodingUnit : public UnitArea
   bool           ipcm;
   uint8_t          imv;
   bool           rootCbf;
+#if JVET_M0140_SBT
+  uint8_t        sbtInfo;
+#endif
 #if HEVC_TILES_WPP
   uint32_t           tileIdx;
 #endif
@@ -353,6 +356,14 @@ struct CodingUnit : public UnitArea
   int64_t cacheId;
   bool    cacheUsed;
 #endif
+#if JVET_M0140_SBT
+  const uint8_t     getSbtIdx() const { assert( ( ( sbtInfo >> 0 ) & 0xf ) < NUMBER_SBT_IDX ); return ( sbtInfo >> 0 ) & 0xf; }
+  const uint8_t     getSbtPos() const { return ( sbtInfo >> 4 ) & 0x3; }
+  void              setSbtIdx( uint8_t idx ) { CHECK( idx >= NUMBER_SBT_IDX, "sbt_idx wrong" ); sbtInfo = ( idx << 0 ) + ( sbtInfo & 0xf0 ); }
+  void              setSbtPos( uint8_t pos ) { CHECK( pos >= 4, "sbt_pos wrong" ); sbtInfo = ( pos << 4 ) + ( sbtInfo & 0xcf ); }
+  uint8_t           getSbtTuSplit() const;
+  const uint8_t     checkAllowedSbt() const;
+#endif
 };
 
 // ---------------------------------------------------------------------------
@@ -459,6 +470,9 @@ struct TransformUnit : public UnitArea
   uint8_t        mtsIdx;
 #else
   uint8_t        emtIdx;
+#endif
+#if JVET_M0140_SBT
+  bool           noResidual;
 #endif
   uint8_t        cbf        [ MAX_NUM_TBLOCKS ];
   RDPCMMode    rdpcm        [ MAX_NUM_TBLOCKS ];
@@ -483,6 +497,9 @@ struct TransformUnit : public UnitArea
 
   TransformUnit& operator=(const TransformUnit& other);
   void copyComponentFrom  (const TransformUnit& other, const ComponentID compID);
+#if JVET_M0140_SBT
+  void checkTuNoResidual( unsigned idx );
+#endif
 
          CoeffBuf getCoeffs(const ComponentID id);
   const CCoeffBuf getCoeffs(const ComponentID id) const;
diff --git a/source/Lib/CommonLib/UnitPartitioner.cpp b/source/Lib/CommonLib/UnitPartitioner.cpp
index 39be3bb0cc5c48b67b059e1198dec2899d0ffd3a..35a17fc19fe3ff5550d0305858bf67a336eb0ed6 100644
--- a/source/Lib/CommonLib/UnitPartitioner.cpp
+++ b/source/Lib/CommonLib/UnitPartitioner.cpp
@@ -255,6 +255,18 @@ void QTBTPartitioner::splitCurrArea( const PartSplit split, const CodingStructur
   case TU_MAX_TR_SPLIT:
     m_partStack.push_back( PartLevel( split, PartitionerImpl::getMaxTuTiling( currArea(), cs ) ) );
     break;
+#if JVET_M0140_SBT
+  case SBT_VER_HALF_POS0_SPLIT:
+  case SBT_VER_HALF_POS1_SPLIT:
+  case SBT_HOR_HALF_POS0_SPLIT:
+  case SBT_HOR_HALF_POS1_SPLIT:
+  case SBT_VER_QUAD_POS0_SPLIT:
+  case SBT_VER_QUAD_POS1_SPLIT:
+  case SBT_HOR_QUAD_POS0_SPLIT:
+  case SBT_HOR_QUAD_POS1_SPLIT:
+    m_partStack.push_back( PartLevel( split, PartitionerImpl::getSbtTuTiling( currArea(), cs, split ) ) );
+    break;
+#endif
   default:
     THROW( "Unknown split mode" );
     break;
@@ -269,6 +281,12 @@ void QTBTPartitioner::splitCurrArea( const PartSplit split, const CodingStructur
   {
     currTrDepth++;
   }
+#if JVET_M0140_SBT
+  else if( split >= SBT_VER_HALF_POS0_SPLIT && split <= SBT_HOR_QUAD_POS1_SPLIT )
+  {
+    currTrDepth++;
+  }
+#endif
   else
   {
     currTrDepth = 0;
@@ -418,6 +436,18 @@ bool QTBTPartitioner::canSplit( const PartSplit split, const CodingStructure &cs
   case TU_MAX_TR_SPLIT:
     return area.width > maxTrSize || area.height > maxTrSize;
     break;
+#if JVET_M0140_SBT
+  case SBT_VER_HALF_POS0_SPLIT:
+  case SBT_VER_HALF_POS1_SPLIT:
+  case SBT_HOR_HALF_POS0_SPLIT:
+  case SBT_HOR_HALF_POS1_SPLIT:
+  case SBT_VER_QUAD_POS0_SPLIT:
+  case SBT_VER_QUAD_POS1_SPLIT:
+  case SBT_HOR_QUAD_POS0_SPLIT:
+  case SBT_HOR_QUAD_POS1_SPLIT:
+    return currTrDepth == 0;
+    break;
+#endif
 #if JVET_M0421_SPLIT_SIG
   case CU_QUAD_SPLIT:
     return canQt;
@@ -622,6 +652,13 @@ void QTBTPartitioner::exitCurrSplit()
     CHECK( currTrDepth == 0, "TR depth is '0', although a TU split was performed" );
     currTrDepth--;
   }
+#if JVET_M0140_SBT
+  else if( currSplit >= SBT_VER_HALF_POS0_SPLIT && currSplit <= SBT_HOR_QUAD_POS1_SPLIT )
+  {
+    CHECK( currTrDepth == 0, "TR depth is '0', although a TU split was performed" );
+    currTrDepth--;
+  }
+#endif
   else
   {
     CHECK( currTrDepth > 0, "RQT found with QTBT partitioner" );
@@ -1111,3 +1148,64 @@ Partitioning PartitionerImpl::getMaxTuTiling( const UnitArea &cuArea, const Codi
 
   return ret;
 }
+
+#if JVET_M0140_SBT
+Partitioning PartitionerImpl::getSbtTuTiling( const UnitArea& cuArea, const CodingStructure &cs, const PartSplit splitType )
+{
+  Partitioning ret;
+  int numTiles = 2;
+  int widthFactor, heightFactor, xOffsetFactor, yOffsetFactor; // y = (x * factor) >> 2;
+  assert( splitType >= SBT_VER_HALF_POS0_SPLIT && splitType <= SBT_HOR_QUAD_POS1_SPLIT );
+
+  ret.resize( numTiles, cuArea );
+  for( int i = 0; i < numTiles; i++ )
+  {
+    if( splitType >= SBT_VER_QUAD_POS0_SPLIT )
+    {
+      if( splitType == SBT_HOR_QUAD_POS0_SPLIT || splitType == SBT_HOR_QUAD_POS1_SPLIT )
+      {
+        widthFactor = 4;
+        xOffsetFactor = 0;
+        heightFactor = ( ( i == 0 && splitType == SBT_HOR_QUAD_POS0_SPLIT ) || ( i == 1 && splitType == SBT_HOR_QUAD_POS1_SPLIT ) ) ? 1 : 3;
+        yOffsetFactor = ( i == 0 ) ? 0 : ( splitType == SBT_HOR_QUAD_POS0_SPLIT ? 1 : 3 );
+      }
+      else
+      {
+        widthFactor = ( ( i == 0 && splitType == SBT_VER_QUAD_POS0_SPLIT ) || ( i == 1 && splitType == SBT_VER_QUAD_POS1_SPLIT ) ) ? 1 : 3;
+        xOffsetFactor = ( i == 0 ) ? 0 : ( splitType == SBT_VER_QUAD_POS0_SPLIT ? 1 : 3 );
+        heightFactor = 4;
+        yOffsetFactor = 0;
+      }
+    }
+    else
+    {
+      if( splitType == SBT_HOR_HALF_POS0_SPLIT || splitType == SBT_HOR_HALF_POS1_SPLIT )
+      {
+        widthFactor = 4;
+        xOffsetFactor = 0;
+        heightFactor = 2;
+        yOffsetFactor = ( i == 0 ) ? 0 : 2;
+      }
+      else
+      {
+        widthFactor = 2;
+        xOffsetFactor = ( i == 0 ) ? 0 : 2;
+        heightFactor = 4;
+        yOffsetFactor = 0;
+      }
+    }
+
+    UnitArea& tile = ret[i];
+    for( CompArea &comp : tile.blocks )
+    {
+      if( !comp.valid() ) continue;
+      comp.x += ( comp.width  * xOffsetFactor ) >> 2;
+      comp.y += ( comp.height * yOffsetFactor ) >> 2;
+      comp.width = ( comp.width  * widthFactor ) >> 2;
+      comp.height = ( comp.height * heightFactor ) >> 2;
+    }
+  }
+
+  return ret;
+}
+#endif
\ No newline at end of file
diff --git a/source/Lib/CommonLib/UnitPartitioner.h b/source/Lib/CommonLib/UnitPartitioner.h
index 5cf07f530632290737dedb4072f041d1f8804bb8..4b49746d13fdf2c1097661acf7886d942fd9ae22 100644
--- a/source/Lib/CommonLib/UnitPartitioner.h
+++ b/source/Lib/CommonLib/UnitPartitioner.h
@@ -71,6 +71,16 @@ enum PartSplit
   TU_NO_ISP,
   TU_1D_HORZ_SPLIT,
   TU_1D_VERT_SPLIT,
+#endif
+#if JVET_M0140_SBT
+  SBT_VER_HALF_POS0_SPLIT,
+  SBT_VER_HALF_POS1_SPLIT,
+  SBT_HOR_HALF_POS0_SPLIT,
+  SBT_HOR_HALF_POS1_SPLIT,
+  SBT_VER_QUAD_POS0_SPLIT,
+  SBT_VER_QUAD_POS1_SPLIT,
+  SBT_HOR_QUAD_POS0_SPLIT,
+  SBT_HOR_QUAD_POS1_SPLIT,
 #endif
   NUM_PART_SPLIT,
   CU_MT_SPLIT             = 1000, ///< dummy element to indicate the MT (multi-type-tree) split
@@ -219,6 +229,9 @@ namespace PartitionerImpl
 #if JVET_M0102_INTRA_SUBPARTITIONS
   void    getTUIntraSubPartitions( Partitioning &sub, const UnitArea &tuArea, const CodingStructure &cs, const PartSplit splitType );
 #endif
+#if JVET_M0140_SBT
+  Partitioning getSbtTuTiling    ( const UnitArea& curArea, const CodingStructure &cs, const PartSplit splitType );
+#endif
 };
 
 #endif
diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp
index e54d24f0e8e1b175571ece392cbbb44e05f2a660..8768b5ade62a1a820114a34c3bfe6bd6c0c6adfc 100644
--- a/source/Lib/CommonLib/UnitTools.cpp
+++ b/source/Lib/CommonLib/UnitTools.cpp
@@ -5502,6 +5502,121 @@ int CU::getMaxNeighboriMVCandNum( const CodingStructure& cs, const Position& pos
   return maxImvNumCand;
 }
 
+#if JVET_M0140_SBT
+uint8_t CU::getSbtInfo( uint8_t idx, uint8_t pos )
+{
+  return ( pos << 4 ) + ( idx << 0 );
+}
+
+uint8_t CU::getSbtIdx( const uint8_t sbtInfo )
+{
+  return ( sbtInfo >> 0 ) & 0xf;
+}
+
+uint8_t CU::getSbtPos( const uint8_t sbtInfo )
+{
+  return ( sbtInfo >> 4 ) & 0x3;
+}
+
+uint8_t CU::getSbtMode( uint8_t sbtIdx, uint8_t sbtPos )
+{
+  uint8_t sbtMode = 0;
+  switch( sbtIdx )
+  {
+  case SBT_VER_HALF: sbtMode = sbtPos + SBT_VER_H0;  break;
+  case SBT_HOR_HALF: sbtMode = sbtPos + SBT_HOR_H0;  break;
+  case SBT_VER_QUAD: sbtMode = sbtPos + SBT_VER_Q0;  break;
+  case SBT_HOR_QUAD: sbtMode = sbtPos + SBT_HOR_Q0;  break;
+  default:           assert( 0 );
+  }
+
+  assert( sbtMode < NUMBER_SBT_MODE );
+  return sbtMode;
+}
+
+uint8_t CU::getSbtIdxFromSbtMode( uint8_t sbtMode )
+{
+  if( sbtMode <= SBT_VER_H1 )
+    return SBT_VER_HALF;
+  else if( sbtMode <= SBT_HOR_H1 )
+    return SBT_HOR_HALF;
+  else if( sbtMode <= SBT_VER_Q1 )
+    return SBT_VER_QUAD;
+  else if( sbtMode <= SBT_HOR_Q1 )
+    return SBT_HOR_QUAD;
+  else
+  {
+    assert( 0 );
+    return 0;
+  }
+}
+
+uint8_t CU::getSbtPosFromSbtMode( uint8_t sbtMode )
+{
+  if( sbtMode <= SBT_VER_H1 )
+    return sbtMode - SBT_VER_H0;
+  else if( sbtMode <= SBT_HOR_H1 )
+    return sbtMode - SBT_HOR_H0;
+  else if( sbtMode <= SBT_VER_Q1 )
+    return sbtMode - SBT_VER_Q0;
+  else if( sbtMode <= SBT_HOR_Q1 )
+    return sbtMode - SBT_HOR_Q0;
+  else
+  {
+    assert( 0 );
+    return 0;
+  }
+}
+
+uint8_t CU::targetSbtAllowed( uint8_t sbtIdx, uint8_t sbtAllowed )
+{
+  uint8_t val = 0;
+  switch( sbtIdx )
+  {
+  case SBT_VER_HALF: val = ( ( sbtAllowed >> SBT_VER_HALF ) & 0x1 ); break;
+  case SBT_HOR_HALF: val = ( ( sbtAllowed >> SBT_HOR_HALF ) & 0x1 ); break;
+  case SBT_VER_QUAD: val = ( ( sbtAllowed >> SBT_VER_QUAD ) & 0x1 ); break;
+  case SBT_HOR_QUAD: val = ( ( sbtAllowed >> SBT_HOR_QUAD ) & 0x1 ); break;
+  default:           CHECK( 1, "unknown SBT type" );
+  }
+  return val;
+}
+
+uint8_t CU::numSbtModeRdo( uint8_t sbtAllowed )
+{
+  uint8_t num = 0;
+  uint8_t sum = 0;
+  num = targetSbtAllowed( SBT_VER_HALF, sbtAllowed ) + targetSbtAllowed( SBT_HOR_HALF, sbtAllowed );
+  sum += std::min( SBT_NUM_RDO, ( num << 1 ) );
+  num = targetSbtAllowed( SBT_VER_QUAD, sbtAllowed ) + targetSbtAllowed( SBT_HOR_QUAD, sbtAllowed );
+  sum += std::min( SBT_NUM_RDO, ( num << 1 ) );
+  return sum;
+}
+
+bool CU::isMtsMode( const uint8_t sbtInfo )
+{
+  return getSbtIdx( sbtInfo ) == SBT_OFF_MTS;
+}
+
+bool CU::isSbtMode( const uint8_t sbtInfo )
+{
+  uint8_t sbtIdx = getSbtIdx( sbtInfo );
+  return sbtIdx >= SBT_VER_HALF && sbtIdx <= SBT_HOR_QUAD;
+}
+
+bool CU::isSameSbtSize( const uint8_t sbtInfo1, const uint8_t sbtInfo2 )
+{
+  uint8_t sbtIdx1 = getSbtIdxFromSbtMode( sbtInfo1 );
+  uint8_t sbtIdx2 = getSbtIdxFromSbtMode( sbtInfo2 );
+  if( sbtIdx1 == SBT_HOR_HALF || sbtIdx1 == SBT_VER_HALF )
+    return sbtIdx2 == SBT_HOR_HALF || sbtIdx2 == SBT_VER_HALF;
+  else if( sbtIdx1 == SBT_HOR_QUAD || sbtIdx1 == SBT_VER_QUAD )
+    return sbtIdx2 == SBT_HOR_QUAD || sbtIdx2 == SBT_VER_QUAD;
+  else
+    return false;
+}
+#endif
+
 bool CU::isGBiIdxCoded( const CodingUnit &cu )
 {
   if( cu.cs->sps->getSpsNext().getUseGBi() == false )
@@ -5657,6 +5772,9 @@ bool TU::isTSAllowed(const TransformUnit &tu, const ComponentID compID)
 
   SizeType transformSkipMaxSize = 1 << maxSize;
   tsAllowed &= tu.lwidth() <= transformSkipMaxSize && tu.lheight() <= transformSkipMaxSize;
+#if JVET_M0140_SBT
+  tsAllowed &= !tu.cu->sbtInfo;
+#endif
 
   return tsAllowed;
 }
@@ -5670,6 +5788,9 @@ bool TU::isMTSAllowed(const TransformUnit &tu, const ComponentID compID)
   mtsAllowed &= ( tu.lwidth() <= maxSize && tu.lheight() <= maxSize );
 #if JVET_M0102_INTRA_SUBPARTITIONS
   mtsAllowed &= !tu.cu->ispMode;
+#endif
+#if JVET_M0140_SBT
+  mtsAllowed &= !tu.cu->sbtInfo;
 #endif
   return mtsAllowed;
 }
diff --git a/source/Lib/CommonLib/UnitTools.h b/source/Lib/CommonLib/UnitTools.h
index 5cdb1fb6f7f9f9b227382a39dab373a00ecbd759..3414c2c5511d1fd7f68af90da51adc59e6aabb5a 100644
--- a/source/Lib/CommonLib/UnitTools.h
+++ b/source/Lib/CommonLib/UnitTools.h
@@ -115,7 +115,19 @@ namespace CU
   int   getMaxNeighboriMVCandNum      (const CodingStructure& cs, const Position& pos);
   void  resetMVDandMV2Int             (      CodingUnit& cu, InterPrediction *interPred );
 
-
+#if JVET_M0140_SBT
+  uint8_t getSbtInfo                  (uint8_t idx, uint8_t pos);
+  uint8_t getSbtIdx                   (const uint8_t sbtInfo);
+  uint8_t getSbtPos                   (const uint8_t sbtInfo);
+  uint8_t getSbtMode                  (const uint8_t sbtIdx, const uint8_t sbtPos);
+  uint8_t getSbtIdxFromSbtMode        (const uint8_t sbtMode);
+  uint8_t getSbtPosFromSbtMode        (const uint8_t sbtMode);
+  uint8_t targetSbtAllowed            (uint8_t idx, uint8_t sbtAllowed);
+  uint8_t numSbtModeRdo               (uint8_t sbtAllowed);
+  bool    isMtsMode                   (const uint8_t sbtInfo);
+  bool    isSbtMode                   (const uint8_t sbtInfo);
+  bool    isSameSbtSize               (const uint8_t sbtInfo1, const uint8_t sbtInfo2);
+#endif
 }
 // PU tools
 namespace PU
diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp
index 3ff8a9a798246a5dac50c56d1faa2795063c08fc..78321e38f975acf3910ac8556dba5c05c4f0dcd9 100644
--- a/source/Lib/DecoderLib/CABACReader.cpp
+++ b/source/Lib/DecoderLib/CABACReader.cpp
@@ -1488,6 +1488,12 @@ void CABACReader::cu_residual( CodingUnit& cu, Partitioner &partitioner, CUCtx&
     {
       cu.rootCbf = true;
     }
+#if JVET_M0140_SBT
+    if( cu.rootCbf )
+    {
+      sbt_mode( cu );
+    }
+#endif
     if( !cu.rootCbf )
     {
       TransformUnit& tu = cu.cs->addTU(cu, partitioner.chType);
@@ -1528,6 +1534,64 @@ void CABACReader::rqt_root_cbf( CodingUnit& cu )
   DTRACE( g_trace_ctx, D_SYNTAX, "rqt_root_cbf() ctx=0 root_cbf=%d pos=(%d,%d)\n", cu.rootCbf ? 1 : 0, cu.lumaPos().x, cu.lumaPos().y );
 }
 
+#if JVET_M0140_SBT
+void CABACReader::sbt_mode( CodingUnit& cu )
+{
+  const uint8_t sbtAllowed = cu.checkAllowedSbt();
+  if( !sbtAllowed )
+  {
+    return;
+  }
+
+  SizeType cuWidth = cu.lwidth();
+  SizeType cuHeight = cu.lheight();
+
+  RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__SBT_MODE );
+  //bin - flag
+  uint8_t ctxIdx = ( cuWidth * cuHeight <= 256 ) ? 1 : 0;
+  bool sbtFlag = m_BinDecoder.decodeBin( Ctx::SbtFlag( ctxIdx ) );
+  if( !sbtFlag )
+  {
+    return;
+  }
+
+  uint8_t sbtVerHalfAllow = CU::targetSbtAllowed( SBT_VER_HALF, sbtAllowed );
+  uint8_t sbtHorHalfAllow = CU::targetSbtAllowed( SBT_HOR_HALF, sbtAllowed );
+  uint8_t sbtVerQuadAllow = CU::targetSbtAllowed( SBT_VER_QUAD, sbtAllowed );
+  uint8_t sbtHorQuadAllow = CU::targetSbtAllowed( SBT_HOR_QUAD, sbtAllowed );
+
+  //bin - type
+  bool sbtQuadFlag = false;
+  if( ( sbtHorHalfAllow || sbtVerHalfAllow ) && ( sbtHorQuadAllow || sbtVerQuadAllow ) )
+  {
+    sbtQuadFlag = m_BinDecoder.decodeBin( Ctx::SbtQuadFlag( 0 ) );
+  }
+  else
+  {
+    sbtQuadFlag = 0;
+  }
+
+  //bin - dir
+  bool sbtHorFlag = false;
+  if( ( sbtQuadFlag && sbtVerQuadAllow && sbtHorQuadAllow ) || ( !sbtQuadFlag && sbtVerHalfAllow && sbtHorHalfAllow ) ) //both direction allowed
+  {
+    uint8_t ctxIdx = ( cuWidth == cuHeight ) ? 0 : ( cuWidth < cuHeight ? 1 : 2 );
+    sbtHorFlag = m_BinDecoder.decodeBin( Ctx::SbtHorFlag( ctxIdx ) );
+  }
+  else
+  {
+    sbtHorFlag = ( sbtQuadFlag && sbtHorQuadAllow ) || ( !sbtQuadFlag && sbtHorHalfAllow );
+  }
+  cu.setSbtIdx( sbtHorFlag ? ( sbtQuadFlag ? SBT_HOR_QUAD : SBT_HOR_HALF ) : ( sbtQuadFlag ? SBT_VER_QUAD : SBT_VER_HALF ) );
+
+  //bin - pos
+  bool sbtPosFlag = m_BinDecoder.decodeBin( Ctx::SbtPosFlag( 0 ) );
+  cu.setSbtPos( sbtPosFlag ? SBT_POS1 : SBT_POS0 );
+
+  DTRACE( g_trace_ctx, D_SYNTAX, "sbt_mode() pos=(%d,%d) sbtInfo=%d\n", cu.lx(), cu.ly(), (int)cu.sbtInfo );
+}
+#endif
+
 
 bool CABACReader::end_of_ctu( CodingUnit& cu, CUCtx& cuCtx )
 {
@@ -2260,6 +2324,11 @@ void CABACReader::transform_tree( CodingStructure &cs, Partitioner &partitioner,
 void CABACReader::transform_tree( CodingStructure &cs, Partitioner &partitioner, CUCtx& cuCtx, ChromaCbfs& chromaCbfs )
 #endif
 {
+#if JVET_M0140_SBT
+  ChromaCbfs chromaCbfsLastDepth;
+  chromaCbfsLastDepth.Cb        = chromaCbfs.Cb;
+  chromaCbfsLastDepth.Cr        = chromaCbfs.Cr;
+#endif
   const UnitArea& area          = partitioner.currArea();
 
   CodingUnit&     cu            = *cs.getCU( area.blocks[partitioner.chType], partitioner.chType );
@@ -2272,6 +2341,12 @@ void CABACReader::transform_tree( CodingStructure &cs, Partitioner &partitioner,
   bool split = false;
 
   split = partitioner.canSplit( TU_MAX_TR_SPLIT, cs );
+#if JVET_M0140_SBT
+  if( cu.sbtInfo && partitioner.canSplit( PartSplit( cu.getSbtTuSplit() ), cs ) )
+  {
+    split = true;
+  }
+#endif
 
 #if JVET_M0102_INTRA_SUBPARTITIONS
   if( !split && cu.ispMode )
@@ -2294,19 +2369,31 @@ void CABACReader::transform_tree( CodingStructure &cs, Partitioner &partitioner,
         const int cbfDepth = chromaCbfISP ? trDepth - 1 : trDepth;
         if( chromaCbfs.Cb )
         {
+#if JVET_M0140_SBT
+          if( !( cu.sbtInfo && trDepth == 1 ) )
+#endif
           chromaCbfs.Cb &= cbf_comp( cs, area.blocks[COMPONENT_Cb], cbfDepth );
         }
         if( chromaCbfs.Cr )
         {
+#if JVET_M0140_SBT
+          if( !( cu.sbtInfo && trDepth == 1 ) )
+#endif
           chromaCbfs.Cr &= cbf_comp( cs, area.blocks[COMPONENT_Cr], cbfDepth, chromaCbfs.Cb );
         }
 #else
         if( chromaCbfs.Cb )
         {
+#if JVET_M0140_SBT
+          if( !( cu.sbtInfo && trDepth == 1 ) )
+#endif
           chromaCbfs.Cb &= cbf_comp( cs, area.blocks[COMPONENT_Cb], trDepth );
         }
         if( chromaCbfs.Cr )
         {
+#if JVET_M0140_SBT
+          if( !( cu.sbtInfo && trDepth == 1 ) )
+#endif
           chromaCbfs.Cr &= cbf_comp( cs, area.blocks[COMPONENT_Cr], trDepth, chromaCbfs.Cb );
         }
 #endif
@@ -2343,6 +2430,12 @@ void CABACReader::transform_tree( CodingStructure &cs, Partitioner &partitioner,
       {
         partitioner.splitCurrArea( ispType, cs );
       }
+#endif
+#if JVET_M0140_SBT
+      else if( cu.sbtInfo && partitioner.canSplit( PartSplit( cu.getSbtTuSplit() ), cs ) )
+      {
+        partitioner.splitCurrArea( PartSplit( cu.getSbtTuSplit() ), cs );
+      }
 #endif
       else
         THROW( "Implicit TU split not available!" );
@@ -2403,6 +2496,11 @@ void CABACReader::transform_tree( CodingStructure &cs, Partitioner &partitioner,
   {
     TransformUnit &tu = cs.addTU( CS::getArea( cs, area, partitioner.chType ), partitioner.chType );
     unsigned numBlocks = ::getNumberValidTBlocks( *cs.pcv );
+#if JVET_M0140_SBT
+    tu.checkTuNoResidual( partitioner.currPartIdx() );
+    chromaCbfs.Cb &= !tu.noResidual;
+    chromaCbfs.Cr &= !tu.noResidual;
+#endif
 
     for( unsigned compID = COMPONENT_Y; compID < numBlocks; compID++ )
     {
@@ -2421,6 +2519,17 @@ void CABACReader::transform_tree( CodingStructure &cs, Partitioner &partitioner,
       {
         TU::setCbfAtDepth( tu, COMPONENT_Y, trDepth, 1 );
       }
+#if JVET_M0140_SBT
+      else if( cu.sbtInfo && tu.noResidual )
+      {
+        TU::setCbfAtDepth( tu, COMPONENT_Y, trDepth, 0 );
+      }
+      else if( cu.sbtInfo && !chromaCbfsLastDepth.sigChroma( area.chromaFormat ) )
+      {
+        assert( !tu.noResidual );
+        TU::setCbfAtDepth( tu, COMPONENT_Y, trDepth, 1 );
+      }
+#endif
       else
       {
 #if JVET_M0102_INTRA_SUBPARTITIONS
diff --git a/source/Lib/DecoderLib/CABACReader.h b/source/Lib/DecoderLib/CABACReader.h
index 941be20b9b125d72668365312bc8fb603a9a2a49..031e01562be66bd5fe37d5420bd557e0013a2ba5 100644
--- a/source/Lib/DecoderLib/CABACReader.h
+++ b/source/Lib/DecoderLib/CABACReader.h
@@ -96,6 +96,9 @@ public:
   void        intra_chroma_pred_mode    ( PredictionUnit&               pu );
   void        cu_residual               ( CodingUnit&                   cu,     Partitioner&    pm,       CUCtx& cuCtx );
   void        rqt_root_cbf              ( CodingUnit&                   cu );
+#if JVET_M0140_SBT
+  void        sbt_mode                  ( CodingUnit&                   cu );
+#endif
   bool        end_of_ctu                ( CodingUnit&                   cu,     CUCtx&          cuCtx );
 
   // prediction unit (clause 7.3.8.6)
diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp
index 742391b504a127a88f964f863a22cd36f7fe5657..a94f9710d62db7b0f034d2c77d1f5d7b15628ff0 100644
--- a/source/Lib/DecoderLib/VLCReader.cpp
+++ b/source/Lib/DecoderLib/VLCReader.cpp
@@ -815,6 +815,13 @@ void HLSyntaxReader::parseSPSNext( SPSNext& spsNext, const bool usePCM )
   }
 #endif
 
+#if JVET_M0140_SBT
+  READ_FLAG( symbol,    "sbt_enable_flag" );                        spsNext.setUseSBT                 ( symbol != 0 );
+  if( spsNext.getUseSBT() )
+  {
+    READ_FLAG( symbol,  "max_sbt_size_64_flag" );                   spsNext.setMaxSbtSize             ( symbol ? 64 : 32 );
+  }
+#endif
   READ_FLAG( symbol,    "affine_flag" );                            spsNext.setUseAffine              ( symbol != 0 );
   if ( spsNext.getUseAffine() )
   {
diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp
index 85f257b789db7f4f4a633f9b270623627202681a..84c431da75e0f0597ee3327b78f60bc1bbf71f57 100644
--- a/source/Lib/EncoderLib/CABACWriter.cpp
+++ b/source/Lib/EncoderLib/CABACWriter.cpp
@@ -1318,6 +1318,12 @@ void CABACWriter::cu_residual( const CodingUnit& cu, Partitioner& partitioner, C
     {
       rqt_root_cbf( cu );
     }
+#if JVET_M0140_SBT
+    if( cu.rootCbf )
+    {
+      sbt_mode( cu );
+    }
+#endif
 
     if( !cu.rootCbf )
     {
@@ -1349,6 +1355,64 @@ void CABACWriter::rqt_root_cbf( const CodingUnit& cu )
   DTRACE( g_trace_ctx, D_SYNTAX, "rqt_root_cbf() ctx=0 root_cbf=%d pos=(%d,%d)\n", cu.rootCbf ? 1 : 0, cu.lumaPos().x, cu.lumaPos().y );
 }
 
+#if JVET_M0140_SBT
+void CABACWriter::sbt_mode( const CodingUnit& cu )
+{
+  uint8_t sbtAllowed = cu.checkAllowedSbt();
+  if( !sbtAllowed )
+  {
+    return;
+  }
+
+  SizeType cuWidth = cu.lwidth();
+  SizeType cuHeight = cu.lheight();
+  uint8_t sbtIdx = cu.getSbtIdx();
+  uint8_t sbtPos = cu.getSbtPos();
+
+  //bin - flag
+  bool sbtFlag = cu.sbtInfo != 0;
+  uint8_t ctxIdx = ( cuWidth * cuHeight <= 256 ) ? 1 : 0;
+  m_BinEncoder.encodeBin( sbtFlag, Ctx::SbtFlag( ctxIdx ) );
+  if( !sbtFlag )
+  {
+    return;
+  }
+
+  bool sbtQuadFlag = sbtIdx == SBT_HOR_QUAD || sbtIdx == SBT_VER_QUAD;
+  bool sbtHorFlag = sbtIdx == SBT_HOR_HALF || sbtIdx == SBT_HOR_QUAD;
+  bool sbtPosFlag = sbtPos == SBT_POS1;
+
+  uint8_t sbtVerHalfAllow = CU::targetSbtAllowed( SBT_VER_HALF, sbtAllowed );
+  uint8_t sbtHorHalfAllow = CU::targetSbtAllowed( SBT_HOR_HALF, sbtAllowed );
+  uint8_t sbtVerQuadAllow = CU::targetSbtAllowed( SBT_VER_QUAD, sbtAllowed );
+  uint8_t sbtHorQuadAllow = CU::targetSbtAllowed( SBT_HOR_QUAD, sbtAllowed );
+  //bin - type
+  if( ( sbtHorHalfAllow || sbtVerHalfAllow ) && ( sbtHorQuadAllow || sbtVerQuadAllow ) )
+  {
+    m_BinEncoder.encodeBin( sbtQuadFlag, Ctx::SbtQuadFlag( 0 ) );
+  }
+  else
+  {
+    assert( sbtQuadFlag == 0 );
+  }
+
+  //bin - dir
+  if( ( sbtQuadFlag && sbtVerQuadAllow && sbtHorQuadAllow ) || ( !sbtQuadFlag && sbtVerHalfAllow && sbtHorHalfAllow ) ) //both direction allowed
+  {
+    uint8_t ctxIdx = ( cuWidth == cuHeight ) ? 0 : ( cuWidth < cuHeight ? 1 : 2 );
+    m_BinEncoder.encodeBin( sbtHorFlag, Ctx::SbtHorFlag( ctxIdx ) );
+  }
+  else
+  {
+    assert( sbtHorFlag == ( ( sbtQuadFlag && sbtHorQuadAllow ) || ( !sbtQuadFlag && sbtHorHalfAllow ) ) );
+  }
+
+  //bin - pos
+  m_BinEncoder.encodeBin( sbtPosFlag, Ctx::SbtPosFlag( 0 ) );
+
+  DTRACE( g_trace_ctx, D_SYNTAX, "sbt_mode() pos=(%d,%d) sbtInfo=%d\n", cu.lx(), cu.ly(), (int)cu.sbtInfo );
+}
+#endif
 
 void CABACWriter::end_of_ctu( const CodingUnit& cu, CUCtx& cuCtx )
 {
@@ -2080,6 +2144,11 @@ void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partit
 void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partitioner, CUCtx& cuCtx, ChromaCbfs& chromaCbfs )
 #endif
 {
+#if JVET_M0140_SBT
+  ChromaCbfs chromaCbfsLastDepth;
+  chromaCbfsLastDepth.Cb              = chromaCbfs.Cb;
+  chromaCbfsLastDepth.Cr              = chromaCbfs.Cr;
+#endif
   const UnitArea&       area          = partitioner.currArea();
 #if JVET_M0102_INTRA_SUBPARTITIONS
         int             subTuCounter  = subTuIdx;
@@ -2099,6 +2168,12 @@ void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partit
   {
     CHECK( !split, "transform split implied" );
   }
+#if JVET_M0140_SBT
+  else if( cu.sbtInfo && partitioner.canSplit( PartSplit( cu.getSbtTuSplit() ), cs ) )
+  {
+    CHECK( !split, "transform split implied - sbt" );
+  }
+#endif
   else
 #if JVET_M0102_INTRA_SUBPARTITIONS
   CHECK( split && !cu.ispMode, "transform split not allowed with QTBT" );
@@ -2120,6 +2195,9 @@ void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partit
       if( trDepth == 0 || chromaCbfs.Cb || chromaCbfISP )
       {
         chromaCbfs.Cb = TU::getCbfAtDepth( tu, COMPONENT_Cb, trDepth );
+#if JVET_M0140_SBT
+        if( !( cu.sbtInfo && trDepth == 1 ) )
+#endif
         cbf_comp( cs, chromaCbfs.Cb, area.blocks[COMPONENT_Cb], cbfDepth );
       }
       else
@@ -2130,6 +2208,9 @@ void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partit
       if( trDepth == 0 || chromaCbfs.Cr || chromaCbfISP )
       {
         chromaCbfs.Cr = TU::getCbfAtDepth( tu, COMPONENT_Cr, trDepth );
+#if JVET_M0140_SBT
+        if( !( cu.sbtInfo && trDepth == 1 ) )
+#endif
         cbf_comp( cs, chromaCbfs.Cr, area.blocks[COMPONENT_Cr], cbfDepth, chromaCbfs.Cb );
       }
       else
@@ -2140,6 +2221,9 @@ void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partit
       if( trDepth == 0 || chromaCbfs.Cb )
       {
         chromaCbfs.Cb = TU::getCbfAtDepth( tu, COMPONENT_Cb, trDepth );
+#if JVET_M0140_SBT
+        if( !( cu.sbtInfo && trDepth == 1 ) )
+#endif
         cbf_comp( cs, chromaCbfs.Cb, area.blocks[COMPONENT_Cb], trDepth );
       }
       else
@@ -2150,6 +2234,9 @@ void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partit
       if( trDepth == 0 || chromaCbfs.Cr )
       {
         chromaCbfs.Cr = TU::getCbfAtDepth( tu, COMPONENT_Cr,   trDepth );
+#if JVET_M0140_SBT
+        if( !( cu.sbtInfo && trDepth == 1 ) )
+#endif
         cbf_comp( cs, chromaCbfs.Cr, area.blocks[COMPONENT_Cr], trDepth, chromaCbfs.Cb );
       }
       else
@@ -2193,6 +2280,12 @@ void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partit
     {
       partitioner.splitCurrArea( ispType, cs );
     }
+#endif
+#if JVET_M0140_SBT
+    else if( cu.sbtInfo && partitioner.canSplit( PartSplit( cu.getSbtTuSplit() ), cs ) )
+    {
+      partitioner.splitCurrArea( PartSplit( cu.getSbtTuSplit() ), cs );
+    }
 #endif
     else
       THROW( "Implicit TU split not available" );
@@ -2220,6 +2313,17 @@ void CABACWriter::transform_tree( const CodingStructure& cs, Partitioner& partit
       {
         CHECK( !TU::getCbfAtDepth( tu, COMPONENT_Y, trDepth ), "Luma cbf must be true for inter units with no chroma coeffs" );
       }
+#if JVET_M0140_SBT
+      else if( cu.sbtInfo && tu.noResidual )
+      {
+        CHECK( TU::getCbfAtDepth( tu, COMPONENT_Y, trDepth ), "Luma cbf must be false for inter sbt no-residual tu" );
+      }
+      else if( cu.sbtInfo && !chromaCbfsLastDepth.sigChroma( area.chromaFormat ) )
+      {
+        assert( !tu.noResidual );
+        CHECK( !TU::getCbfAtDepth( tu, COMPONENT_Y, trDepth ), "Luma cbf must be true for inter sbt residual tu" );
+      }
+#endif
       else
       {
 #if JVET_M0102_INTRA_SUBPARTITIONS
diff --git a/source/Lib/EncoderLib/CABACWriter.h b/source/Lib/EncoderLib/CABACWriter.h
index 8ddd3c5fcb0eae3260426fcd6a559e17bb859de3..9f45ea0a5099366138eaf2d9dde3ef1e5755faa9 100644
--- a/source/Lib/EncoderLib/CABACWriter.h
+++ b/source/Lib/EncoderLib/CABACWriter.h
@@ -107,6 +107,9 @@ public:
   void        intra_chroma_pred_mode    ( const PredictionUnit&         pu );
   void        cu_residual               ( const CodingUnit&             cu,       Partitioner&      pm,         CUCtx& cuCtx );
   void        rqt_root_cbf              ( const CodingUnit&             cu );
+#if JVET_M0140_SBT
+  void        sbt_mode                  ( const CodingUnit&             cu );
+#endif
   void        end_of_ctu                ( const CodingUnit&             cu,       CUCtx&            cuCtx );
 
   // prediction unit (clause 7.3.8.6)
diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h
index 5f8ff17f7bc8e33045554fcc41a39b1373baca96..c2d2a0a67b631c774f812effbd54ca93d5770c8f 100644
--- a/source/Lib/EncoderLib/EncCfg.h
+++ b/source/Lib/EncoderLib/EncCfg.h
@@ -217,6 +217,9 @@ protected:
 #endif
 #if JVET_M0303_IMPLICIT_MTS
   int       m_ImplicitMTS;
+#endif
+#if JVET_M0140_SBT
+  bool      m_SBT;                                ///< Sub-Block Transform for inter blocks
 #endif
   bool      m_LargeCTU;
   int       m_SubPuMvpMode;
@@ -753,6 +756,10 @@ public:
   void      setImplicitMTS                  ( bool b )       { m_ImplicitMTS = b; }
   bool      getImplicitMTS                  ()         const { return m_ImplicitMTS; }
 #endif
+#if JVET_M0140_SBT
+  void      setUseSBT                       ( bool b )       { m_SBT = b; }
+  bool      getUseSBT                       ()         const { return m_SBT; }
+#endif
 
   void      setUseCompositeRef              (bool b)         { m_compositeRefEnabled = b; }
   bool      getUseCompositeRef              ()         const { return m_compositeRefEnabled; }
diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp
index 6f6d16822c359eda71c161ce7b5e59166de345e9..b93231088587e50d5fab776cd78de6e3bfdd7015 100644
--- a/source/Lib/EncoderLib/EncCu.cpp
+++ b/source/Lib/EncoderLib/EncCu.cpp
@@ -713,6 +713,15 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par
   tempCS->chType = partitioner.chType;
   bestCS->chType = partitioner.chType;
   m_modeCtrl->initCULevel( partitioner, *tempCS );
+#if JVET_M0140_SBT
+  if( partitioner.currQtDepth == 0 && partitioner.currMtDepth == 0 && !tempCS->slice->isIntra() && ( sps.getSpsNext().getUseSBT() || sps.getSpsNext().getUseInterMTS() ) )
+  {
+    auto slsSbt = dynamic_cast<SaveLoadEncInfoSbt*>( m_modeCtrl );
+    int maxSLSize = sps.getSpsNext().getUseSBT() ? tempCS->slice->getSPS()->getSpsNext().getMaxSbtSize() : MTS_INTER_MAX_CU_SIZE;
+    slsSbt->resetSaveloadSbt( maxSLSize );
+  }
+  m_sbtCostSave[0] = m_sbtCostSave[1] = MAX_DOUBLE;
+#endif
 
   m_CurrCtx->start = m_CABACEstimator->getCtx();
 
@@ -4171,6 +4180,11 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be
   CodingUnit*            cu        = tempCS->getCU( partitioner.chType );
   double   bestCostInternal        = MAX_DOUBLE;
   double           bestCost        = bestCS->cost;
+#if JVET_M0140_SBT
+  double           bestCostBegin   = bestCS->cost;
+  CodingUnit*      prevBestCU      = bestCS->getCU( partitioner.chType );
+  uint8_t          prevBestSbt     = ( prevBestCU == nullptr ) ? 0 : prevBestCU->sbtInfo;
+#endif
 #if !JVET_M0464_UNI_MTS
   const SPS&            sps        = *tempCS->sps;
   const int      maxSizeEMT        = EMT_INTER_MAX_CU_WITH_QTBT;
@@ -4211,6 +4225,51 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be
       }
     }
   }
+#if JVET_M0140_SBT
+  const bool mtsAllowed = tempCS->sps->getSpsNext().getUseInterMTS() && partitioner.currArea().lwidth() <= MTS_INTER_MAX_CU_SIZE && partitioner.currArea().lheight() <= MTS_INTER_MAX_CU_SIZE;
+  uint8_t sbtAllowed = cu->checkAllowedSbt();
+  uint8_t numRDOTried = 0;
+  Distortion sbtOffDist = 0;
+  bool    sbtOffRootCbf = 0;
+  double  sbtOffCost      = MAX_DOUBLE;
+  double  currBestCost = MAX_DOUBLE;
+  bool    doPreAnalyzeResi = ( sbtAllowed || mtsAllowed ) && residualPass == 0;
+
+  m_pcInterSearch->initTuAnalyzer();
+  if( doPreAnalyzeResi )
+  {
+    m_pcInterSearch->calcMinDistSbt( *tempCS, *cu, sbtAllowed );
+  }
+
+  auto    slsSbt = dynamic_cast<SaveLoadEncInfoSbt*>( m_modeCtrl );
+  int     slShift = 4 + std::min( (int)gp_sizeIdxInfo->idxFrom( cu->lwidth() ) + (int)gp_sizeIdxInfo->idxFrom( cu->lheight() ), 9 );
+  Distortion curPuSse = m_pcInterSearch->getEstDistSbt( NUMBER_SBT_MODE );
+  uint8_t currBestSbt = 0;
+  uint8_t currBestTrs = MAX_UCHAR;
+  uint8_t histBestSbt = MAX_UCHAR;
+  uint8_t histBestTrs = MAX_UCHAR;
+  m_pcInterSearch->setHistBestTrs( MAX_UCHAR, MAX_UCHAR );
+  if( doPreAnalyzeResi )
+  {
+    if( m_pcInterSearch->getSkipSbtAll() && !mtsAllowed ) //emt is off
+    {
+      histBestSbt = 0; //try DCT2
+      m_pcInterSearch->setHistBestTrs( histBestSbt, histBestTrs );
+    }
+    else
+    {
+      assert( curPuSse != std::numeric_limits<uint64_t>::max() );
+      uint16_t compositeSbtTrs = slsSbt->findBestSbt( cu->cs->area, (uint32_t)( curPuSse >> slShift ) );
+      histBestSbt = ( compositeSbtTrs >> 0 ) & 0xff;
+      histBestTrs = ( compositeSbtTrs >> 8 ) & 0xff;
+      if( m_pcInterSearch->getSkipSbtAll() && CU::isSbtMode( histBestSbt ) ) //special case, skip SBT when loading SBT
+      {
+        histBestSbt = 0; //try DCT2
+      }
+      m_pcInterSearch->setHistBestTrs( histBestSbt, histBestTrs );
+    }
+  }
+#endif
 
 #if !JVET_M0464_UNI_MTS
   if( emtMode == 2 )
@@ -4254,14 +4313,24 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be
 #if !JVET_M0464_UNI_MTS
     cu->emtFlag = curEmtMode;
 #endif
+#if JVET_M0140_SBT
+    cu->sbtInfo = 0;
+#endif
 
     const bool skipResidual = residualPass == 1;
+#if JVET_M0140_SBT // skip DCT-2 and EMT if historical best transform mode is SBT
+    if( skipResidual || histBestSbt == MAX_UCHAR || !CU::isSbtMode( histBestSbt ) )
+    {
+#endif
     m_pcInterSearch->encodeResAndCalcRdInterCU( *tempCS, partitioner, skipResidual );
-
+#if JVET_M0140_SBT
+    numRDOTried += mtsAllowed ? 2 : 1;
+#endif
     xEncodeDontSplit( *tempCS, partitioner );
 
     xCheckDQP( *tempCS, partitioner );
 
+#if !JVET_M0140_SBT //harmonize with GBI fast algorithm (move the code to the end of this function)
     if( ETM_INTER_ME == encTestMode.type )
     {
       if( equGBiCost != NULL )
@@ -4291,6 +4360,7 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be
         }
       }
     }
+#endif
 
 #if !JVET_M0464_UNI_MTS
     double emtFirstPassCost = tempCS->cost;
@@ -4318,6 +4388,18 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be
         return;
       }
     }
+#if JVET_M0140_SBT
+    currBestCost = tempCS->cost;
+    sbtOffCost = tempCS->cost;
+    sbtOffDist = tempCS->dist;
+    sbtOffRootCbf = cu->rootCbf;
+    currBestSbt = CU::getSbtInfo( cu->firstTU->mtsIdx > 1 ? SBT_OFF_MTS : SBT_OFF_DCT, 0 );
+    currBestTrs = cu->firstTU->mtsIdx;
+    if( cu->lwidth() <= MAX_TU_SIZE_FOR_PROFILE && cu->lheight() <= MAX_TU_SIZE_FOR_PROFILE )
+    {
+      CHECK( tempCS->tus.size() != 1, "tu must be only one" );
+    }
+#endif
 
 #if WCG_EXT
     DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) );
@@ -4340,8 +4422,200 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be
         maxEMTMode = 0; // do not test EMT
       }
     }
+#endif
+#if JVET_M0140_SBT // skip DCT-2 and EMT
+    }
+#endif
+
+#if JVET_M0140_SBT //RDO for SBT
+    uint8_t numSbtRdo = CU::numSbtModeRdo( sbtAllowed );
+    //early termination if all SBT modes are not allowed
+    //normative
+    if( !sbtAllowed || skipResidual )
+    {
+      numSbtRdo = 0;
+    }
+    //fast algorithm
+    if( ( histBestSbt != MAX_UCHAR && !CU::isSbtMode( histBestSbt ) ) || m_pcInterSearch->getSkipSbtAll() )
+    {
+      numSbtRdo = 0;
+    }
+    if( bestCost != MAX_DOUBLE && sbtOffCost != MAX_DOUBLE )
+    {
+      double th = 1.07;
+      if( !( prevBestSbt == 0 || m_sbtCostSave[0] == MAX_DOUBLE ) )
+      {
+        assert( m_sbtCostSave[1] <= m_sbtCostSave[0] );
+        th *= ( m_sbtCostSave[0] / m_sbtCostSave[1] );
+      }
+      if( sbtOffCost > bestCost * th )
+      {
+        numSbtRdo = 0;
+      }
+    }
+    if( !sbtOffRootCbf && sbtOffCost != MAX_DOUBLE )
+    {
+      double th = Clip3( 0.05, 0.55, ( 27 - cu->qp ) * 0.02 + 0.35 );
+      if( sbtOffCost < m_pcRdCost->calcRdCost( ( cu->lwidth() * cu->lheight() ) << SCALE_BITS, 0 ) * th )
+      {
+        numSbtRdo = 0;
+      }
+    }
+
+    if( histBestSbt != MAX_UCHAR && numSbtRdo != 0 )
+    {
+      numSbtRdo = 1;
+      m_pcInterSearch->initSbtRdoOrder( CU::getSbtMode( CU::getSbtIdx( histBestSbt ), CU::getSbtPos( histBestSbt ) ) );
+    }
+
+    for( int sbtModeIdx = 0; sbtModeIdx < numSbtRdo; sbtModeIdx++ )
+    {
+      uint8_t sbtMode = m_pcInterSearch->getSbtRdoOrder( sbtModeIdx );
+      uint8_t sbtIdx = CU::getSbtIdxFromSbtMode( sbtMode );
+      uint8_t sbtPos = CU::getSbtPosFromSbtMode( sbtMode );
+
+      //fast algorithm (early skip, save & load)
+      if( histBestSbt == MAX_UCHAR )
+      {
+        uint8_t skipCode = m_pcInterSearch->skipSbtByRDCost( cu->lwidth(), cu->lheight(), cu->mtDepth, sbtIdx, sbtPos, bestCS->cost, sbtOffDist, sbtOffCost, sbtOffRootCbf );
+        if( skipCode != MAX_UCHAR )
+        {
+          continue;
+        }
+
+        if( sbtModeIdx > 0 )
+        {
+          uint8_t prevSbtMode = m_pcInterSearch->getSbtRdoOrder( sbtModeIdx - 1 );
+          //make sure the prevSbtMode is the same size as the current SBT mode (otherwise the estimated dist may not be comparable)
+          if( CU::isSameSbtSize( prevSbtMode, sbtMode ) )
+          {
+            Distortion currEstDist = m_pcInterSearch->getEstDistSbt( sbtMode );
+            Distortion prevEstDist = m_pcInterSearch->getEstDistSbt( prevSbtMode );
+            if( currEstDist > prevEstDist * 1.15 )
+            {
+              continue;
+            }
+          }
+        }
+      }
+
+      //init tempCS and TU
+      if( bestCost == bestCS->cost ) //The first EMT pass didn't become the bestCS, so we clear the TUs generated
+      {
+        tempCS->clearTUs();
+      }
+      else if( false == swapped )
+      {
+        tempCS->initStructData( encTestMode.qp, encTestMode.lossless );
+        tempCS->copyStructure( *bestCS, partitioner.chType );
+        tempCS->getPredBuf().copyFrom( bestCS->getPredBuf() );
+        bestCost = bestCS->cost;
+        cu = tempCS->getCU( partitioner.chType );
+        swapped = true;
+      }
+      else
+      {
+        tempCS->clearTUs();
+        bestCost = bestCS->cost;
+        cu = tempCS->getCU( partitioner.chType );
+      }
+
+      //we need to restart the distortion for the new tempCS, the bit count and the cost
+      tempCS->dist = 0;
+      tempCS->fracBits = 0;
+      tempCS->cost = MAX_DOUBLE;
+      cu->skip = false;
+
+      //set SBT info
+      cu->setSbtIdx( sbtIdx );
+      cu->setSbtPos( sbtPos );
+
+      //try residual coding
+      m_pcInterSearch->encodeResAndCalcRdInterCU( *tempCS, partitioner, skipResidual );
+      numRDOTried++;
+
+      xEncodeDontSplit( *tempCS, partitioner );
+
+      xCheckDQP( *tempCS, partitioner );
+
+      if( imvCS && ( tempCS->cost < imvCS->cost ) )
+      {
+        if( imvCS->cost != MAX_DOUBLE )
+        {
+          imvCS->initStructData( encTestMode.qp, encTestMode.lossless );
+        }
+        imvCS->copyStructure( *tempCS, partitioner.chType );
+      }
+
+      if( NULL != bestHasNonResi && ( bestCostInternal > tempCS->cost ) )
+      {
+        bestCostInternal = tempCS->cost;
+        if( !( tempCS->getPU( partitioner.chType )->mhIntraFlag ) )
+          *bestHasNonResi = !cu->rootCbf;
+      }
+
+      if( tempCS->cost < currBestCost )
+      {
+        currBestSbt = cu->sbtInfo;
+        currBestTrs = tempCS->tus[cu->sbtInfo ? cu->getSbtPos() : 0]->mtsIdx;
+        assert( currBestTrs == 0 || currBestTrs == 1 );
+        currBestCost = tempCS->cost;
+      }
+
+#if WCG_EXT
+      DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) );
+#else
+      DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda() );
+#endif
+      xCheckBestMode( tempCS, bestCS, partitioner, encTestMode );
+    }
+
+    if( bestCostBegin != bestCS->cost )
+    {
+      m_sbtCostSave[0] = sbtOffCost;
+      m_sbtCostSave[1] = currBestCost;
+    }
 #endif
   } //end emt loop
+
+#if JVET_M0140_SBT
+  if( histBestSbt == MAX_UCHAR && doPreAnalyzeResi && numRDOTried > 1 )
+  {
+    slsSbt->saveBestSbt( cu->cs->area, (uint32_t)( curPuSse >> slShift ), currBestSbt, currBestTrs );
+  }
+#endif
+#if JVET_M0140_SBT //harmonize with GBI fast algorithm (move the code here)
+  tempCS->cost = currBestCost;
+  if( ETM_INTER_ME == encTestMode.type )
+  {
+    if( equGBiCost != NULL )
+    {
+      if( tempCS->cost < ( *equGBiCost ) && cu->GBiIdx == GBI_DEFAULT )
+      {
+        ( *equGBiCost ) = tempCS->cost;
+      }
+    }
+    else
+    {
+      CHECK( equGBiCost == NULL, "equGBiCost == NULL" );
+    }
+    if( tempCS->slice->getCheckLDC() && !cu->imv && cu->GBiIdx != GBI_DEFAULT && tempCS->cost < m_bestGbiCost[1] )
+    {
+      if( tempCS->cost < m_bestGbiCost[0] )
+      {
+        m_bestGbiCost[1] = m_bestGbiCost[0];
+        m_bestGbiCost[0] = tempCS->cost;
+        m_bestGbiIdx[1] = m_bestGbiIdx[0];
+        m_bestGbiIdx[0] = cu->GBiIdx;
+      }
+      else
+      {
+        m_bestGbiCost[1] = tempCS->cost;
+        m_bestGbiIdx[1] = cu->GBiIdx;
+      }
+    }
+  }
+#endif
 }
 
 
diff --git a/source/Lib/EncoderLib/EncCu.h b/source/Lib/EncoderLib/EncCu.h
index 3f1bc3b90230c2f5f231da4ae78d076fcfdf524b..89760c8491b9caaee20b70da692b5d4fdea5b113 100644
--- a/source/Lib/EncoderLib/EncCu.h
+++ b/source/Lib/EncoderLib/EncCu.h
@@ -152,6 +152,9 @@ private:
 #if SHARP_LUMA_DELTA_QP
   void    updateLambda      ( Slice* slice, double dQP );
 #endif
+#if JVET_M0140_SBT
+  double                m_sbtCostSave[2];
+#endif
 
 public:
   /// copy parameters from encoder class
diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp
index 2deda4f064adbf1df0d5e64e3452f0b86d613135..76b3aef55aec3723872343c0bdee53683daead2c 100644
--- a/source/Lib/EncoderLib/EncLib.cpp
+++ b/source/Lib/EncoderLib/EncLib.cpp
@@ -909,6 +909,13 @@ void EncLib::xInitSPS(SPS &sps)
 #endif
   sps.getSpsNext().setUseIntraEMT           ( m_IntraEMT );
   sps.getSpsNext().setUseInterEMT           ( m_InterEMT );
+#endif
+#if JVET_M0140_SBT
+  sps.getSpsNext().setUseSBT                ( m_SBT );
+  if( sps.getSpsNext().getUseSBT() )
+  {
+    sps.getSpsNext().setMaxSbtSize          ( m_iSourceWidth >= 1920 ? 64 : 32 );
+  }
 #endif
   sps.getSpsNext().setUseCompositeRef       ( m_compositeRefEnabled );
   sps.getSpsNext().setUseGBi                ( m_GBi );
diff --git a/source/Lib/EncoderLib/EncModeCtrl.cpp b/source/Lib/EncoderLib/EncModeCtrl.cpp
index dd4b2282e974f1b728279275d411cf224d5b115b..cac92f3864e957079f60ad703f052f1d9dd427e0 100644
--- a/source/Lib/EncoderLib/EncModeCtrl.cpp
+++ b/source/Lib/EncoderLib/EncModeCtrl.cpp
@@ -510,6 +510,106 @@ bool CacheBlkInfoCtrl::getMv( const UnitArea& area, const RefPicList refPicList,
   return m_codedCUInfo[idx1][idx2][idx3][idx4]->validMv[refPicList][iRefIdx];
 }
 
+#if JVET_M0140_SBT
+void SaveLoadEncInfoSbt::init( const Slice &slice )
+{
+  m_sliceSbt = &slice;
+}
+
+void SaveLoadEncInfoSbt::create()
+{
+  int numSizeIdx = gp_sizeIdxInfo->idxFrom( SBT_MAX_SIZE ) - MIN_CU_LOG2 + 1;
+  int numPosIdx = MAX_CU_SIZE >> MIN_CU_LOG2;
+
+  m_saveLoadSbt = new SaveLoadStructSbt***[numPosIdx];
+
+  for( int xIdx = 0; xIdx < numPosIdx; xIdx++ )
+  {
+    m_saveLoadSbt[xIdx] = new SaveLoadStructSbt**[numPosIdx];
+    for( int yIdx = 0; yIdx < numPosIdx; yIdx++ )
+    {
+      m_saveLoadSbt[xIdx][yIdx] = new SaveLoadStructSbt*[numSizeIdx];
+      for( int wIdx = 0; wIdx < numSizeIdx; wIdx++ )
+      {
+        m_saveLoadSbt[xIdx][yIdx][wIdx] = new SaveLoadStructSbt[numSizeIdx];
+      }
+    }
+  }
+}
+
+void SaveLoadEncInfoSbt::destroy()
+{
+  int numSizeIdx = gp_sizeIdxInfo->idxFrom( SBT_MAX_SIZE ) - MIN_CU_LOG2 + 1;
+  int numPosIdx = MAX_CU_SIZE >> MIN_CU_LOG2;
+
+  for( int xIdx = 0; xIdx < numPosIdx; xIdx++ )
+  {
+    for( int yIdx = 0; yIdx < numPosIdx; yIdx++ )
+    {
+      for( int wIdx = 0; wIdx < numSizeIdx; wIdx++ )
+      {
+        delete[] m_saveLoadSbt[xIdx][yIdx][wIdx];
+      }
+      delete[] m_saveLoadSbt[xIdx][yIdx];
+    }
+    delete[] m_saveLoadSbt[xIdx];
+  }
+  delete[] m_saveLoadSbt;
+}
+
+uint16_t SaveLoadEncInfoSbt::findBestSbt( const UnitArea& area, const uint32_t curPuSse )
+{
+  unsigned idx1, idx2, idx3, idx4;
+  getAreaIdx( area.Y(), *m_sliceSbt->getPPS()->pcv, idx1, idx2, idx3, idx4 );
+  SaveLoadStructSbt* pSbtSave = &m_saveLoadSbt[idx1][idx2][idx3 - MIN_CU_LOG2][idx4 - MIN_CU_LOG2];
+
+  for( int i = 0; i < pSbtSave->numPuInfoStored; i++ )
+  {
+    if( curPuSse == pSbtSave->puSse[i] )
+    {
+      return pSbtSave->puSbt[i] + ( pSbtSave->puTrs[i] << 8 );
+    }
+  }
+
+  return MAX_UCHAR + ( MAX_UCHAR << 8 );
+}
+
+bool SaveLoadEncInfoSbt::saveBestSbt( const UnitArea& area, const uint32_t curPuSse, const uint8_t curPuSbt, const uint8_t curPuTrs )
+{
+  unsigned idx1, idx2, idx3, idx4;
+  getAreaIdx( area.Y(), *m_sliceSbt->getPPS()->pcv, idx1, idx2, idx3, idx4 );
+  SaveLoadStructSbt* pSbtSave = &m_saveLoadSbt[idx1][idx2][idx3 - MIN_CU_LOG2][idx4 - MIN_CU_LOG2];
+
+  if( pSbtSave->numPuInfoStored == SBT_NUM_SL )
+  {
+    return false;
+  }
+
+  pSbtSave->puSse[pSbtSave->numPuInfoStored] = curPuSse;
+  pSbtSave->puSbt[pSbtSave->numPuInfoStored] = curPuSbt;
+  pSbtSave->puTrs[pSbtSave->numPuInfoStored] = curPuTrs;
+  pSbtSave->numPuInfoStored++;
+  return true;
+}
+
+void SaveLoadEncInfoSbt::resetSaveloadSbt( int maxSbtSize )
+{
+  int numSizeIdx = gp_sizeIdxInfo->idxFrom( maxSbtSize ) - MIN_CU_LOG2 + 1;
+  int numPosIdx = MAX_CU_SIZE >> MIN_CU_LOG2;
+
+  for( int xIdx = 0; xIdx < numPosIdx; xIdx++ )
+  {
+    for( int yIdx = 0; yIdx < numPosIdx; yIdx++ )
+    {
+      for( int wIdx = 0; wIdx < numSizeIdx; wIdx++ )
+      {
+        memset( m_saveLoadSbt[xIdx][yIdx][wIdx], 0, numSizeIdx * sizeof( SaveLoadStructSbt ) );
+      }
+    }
+  }
+}
+#endif
+
 bool CacheBlkInfoCtrl::getInter(const UnitArea& area)
 {
   unsigned idx1, idx2, idx3, idx4;
@@ -952,12 +1052,18 @@ void EncModeCtrlMTnoRQT::create( const EncCfg& cfg )
 {
   CacheBlkInfoCtrl::create();
   BestEncInfoCache::create( cfg.getChromaFormatIdc() );
+#if JVET_M0140_SBT
+  SaveLoadEncInfoSbt::create();
+#endif
 }
 
 void EncModeCtrlMTnoRQT::destroy()
 {
   CacheBlkInfoCtrl::destroy();
   BestEncInfoCache::destroy();
+#if JVET_M0140_SBT
+  SaveLoadEncInfoSbt::destroy();
+#endif
 }
 
 #endif
@@ -967,6 +1073,9 @@ void EncModeCtrlMTnoRQT::initCTUEncoding( const Slice &slice )
 #if REUSE_CU_RESULTS
   BestEncInfoCache::init( slice );
 #endif
+#if JVET_M0140_SBT
+  SaveLoadEncInfoSbt::init( slice );
+#endif
 
   CHECK( !m_ComprCUCtxList.empty(), "Mode list is not empty at the beginning of a CTU" );
 
diff --git a/source/Lib/EncoderLib/EncModeCtrl.h b/source/Lib/EncoderLib/EncModeCtrl.h
index 145d0c4b96e5875e0c112e04617276ce2aad7993..9aa3abe477e94a5e59c25011bc7fcf0d13b0f0a5 100644
--- a/source/Lib/EncoderLib/EncModeCtrl.h
+++ b/source/Lib/EncoderLib/EncModeCtrl.h
@@ -355,6 +355,33 @@ protected:
 //////////////////////////////////////////////////////////////////////////
 // some utility interfaces that expose some functionality that can be used without concerning about which particular controller is used
 //////////////////////////////////////////////////////////////////////////
+#if JVET_M0140_SBT
+struct SaveLoadStructSbt
+{
+  uint8_t  numPuInfoStored;
+  uint32_t puSse[SBT_NUM_SL];
+  uint8_t  puSbt[SBT_NUM_SL];
+  uint8_t  puTrs[SBT_NUM_SL];
+};
+
+class SaveLoadEncInfoSbt
+{
+protected:
+  void init( const Slice &slice );
+  void create();
+  void destroy();
+
+private:
+  SaveLoadStructSbt ****m_saveLoadSbt;
+  Slice const       *m_sliceSbt;
+
+public:
+  virtual  ~SaveLoadEncInfoSbt() { }
+  void     resetSaveloadSbt( int maxSbtSize );
+  uint16_t findBestSbt( const UnitArea& area, const uint32_t curPuSse );
+  bool     saveBestSbt( const UnitArea& area, const uint32_t curPuSse, const uint8_t curPuSbt, const uint8_t curPuTrs );
+};
+#endif
 
 static const int MAX_STORED_CU_INFO_REFS = 4;
 
@@ -481,6 +508,9 @@ class EncModeCtrlMTnoRQT : public EncModeCtrl, public CacheBlkInfoCtrl
 #if REUSE_CU_RESULTS
   , public BestEncInfoCache
 #endif
+#if JVET_M0140_SBT
+  , public SaveLoadEncInfoSbt
+#endif
 {
   enum ExtraFeatures
   {
diff --git a/source/Lib/EncoderLib/InterSearch.cpp b/source/Lib/EncoderLib/InterSearch.cpp
index cd19a77f979ae2ae17a601d2c6fe16b7ed72c068..22b5d5e4b828bbe9110ecab4f723a92a7f6361b6 100644
--- a/source/Lib/EncoderLib/InterSearch.cpp
+++ b/source/Lib/EncoderLib/InterSearch.cpp
@@ -113,6 +113,10 @@ InterSearch::InterSearch()
   m_affMVList = nullptr;
   m_affMVListSize = 0;
   m_affMVListIdx = 0;
+#if JVET_M0140_SBT
+  m_histBestSbt    = MAX_UCHAR;
+  m_histBestMtsIdx = MAX_UCHAR;
+#endif
 }
 
 
@@ -5928,6 +5932,12 @@ void InterSearch::xEncodeInterResidualQT(CodingStructure &cs, Partitioner &parti
     {
       CHECK( !bSubdiv, "Not performing the implicit TU split" );
     }
+#if JVET_M0140_SBT
+    else if( cu.sbtInfo && partitioner.canSplit( PartSplit( cu.getSbtTuSplit() ), cs ) )
+    {
+      CHECK( !bSubdiv, "Not performing the implicit TU split - sbt" );
+    }
+#endif
     else
     {
       CHECK( bSubdiv, "transformsplit not supported" );
@@ -5942,17 +5952,27 @@ void InterSearch::xEncodeInterResidualQT(CodingStructure &cs, Partitioner &parti
         if( firstCbfOfCU || TU::getCbfAtDepth( currTU, COMPONENT_Cb, currDepth - 1 ) )
         {
           const bool  chroma_cbf = TU::getCbfAtDepth( currTU, COMPONENT_Cb, currDepth );
+#if JVET_M0140_SBT
+          if( !( cu.sbtInfo && currDepth == 1 ) )
+#endif
           m_CABACEstimator->cbf_comp( cs, chroma_cbf, currArea.blocks[COMPONENT_Cb], currDepth );
         }
         if( firstCbfOfCU || TU::getCbfAtDepth( currTU, COMPONENT_Cr, currDepth - 1 ) )
         {
           const bool  chroma_cbf = TU::getCbfAtDepth( currTU, COMPONENT_Cr, currDepth );
+#if JVET_M0140_SBT
+          if( !( cu.sbtInfo && currDepth == 1 ) )
+#endif
           m_CABACEstimator->cbf_comp( cs, chroma_cbf, currArea.blocks[COMPONENT_Cr], currDepth, TU::getCbfAtDepth( currTU, COMPONENT_Cb, currDepth ) );
         }
       }
     }
 
+#if JVET_M0140_SBT
+    if( !bSubdiv && !( cu.sbtInfo && currTU.noResidual ) )
+#else
     if( !bSubdiv )
+#endif
     {
       m_CABACEstimator->cbf_comp( cs, TU::getCbfAtDepth( currTU, COMPONENT_Y, currDepth ), currArea.Y(), currDepth );
     }
@@ -5983,6 +6003,12 @@ void InterSearch::xEncodeInterResidualQT(CodingStructure &cs, Partitioner &parti
       {
         partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs );
       }
+#if JVET_M0140_SBT
+      else if( cu.sbtInfo && partitioner.canSplit( PartSplit( cu.getSbtTuSplit() ), cs ) )
+      {
+        partitioner.splitCurrArea( PartSplit( cu.getSbtTuSplit() ), cs );
+      }
+#endif
       else
         THROW( "Implicit TU split not available!" );
 
@@ -5996,6 +6022,253 @@ void InterSearch::xEncodeInterResidualQT(CodingStructure &cs, Partitioner &parti
   }
 }
 
+#if JVET_M0140_SBT
+void InterSearch::calcMinDistSbt( CodingStructure &cs, const CodingUnit& cu, const uint8_t sbtAllowed )
+{
+  if( !sbtAllowed )
+  {
+    m_estMinDistSbt[NUMBER_SBT_MODE] = 0;
+    for( int comp = 0; comp < getNumberValidTBlocks( *cs.pcv ); comp++ )
+    {
+      const ComponentID compID = ComponentID( comp );
+      CPelBuf pred = cs.getPredBuf( compID );
+      CPelBuf org  = cs.getOrgBuf( compID );
+      m_estMinDistSbt[NUMBER_SBT_MODE] += m_pcRdCost->getDistPart( org, pred, cs.sps->getBitDepth( toChannelType( compID ) ), compID, DF_SSE );
+    }
+    return;
+  }
+
+  //SBT fast algorithm 2.1 : estimate a minimum RD cost of a SBT mode based on the luma distortion of uncoded part and coded part (assuming distorted can be reduced to 1/16); 
+  //                         if this cost is larger than the best cost, no need to try a specific SBT mode
+  int cuWidth  = cu.lwidth();
+  int cuHeight = cu.lheight();
+  int numPartX = cuWidth  >= 16 ? 4 : ( cuWidth  == 4 ? 1 : 2 );
+  int numPartY = cuHeight >= 16 ? 4 : ( cuHeight == 4 ? 1 : 2 );
+  Distortion dist[4][4];
+  memset( dist, 0, sizeof( Distortion ) * 16 );
+
+  for( uint32_t c = 0; c < getNumberValidTBlocks( *cs.pcv ); c++ )
+  {
+    const ComponentID compID   = ComponentID( c );
+    const CompArea&   compArea = cu.blocks[compID];
+    const CPelBuf orgPel  = cs.getOrgBuf( compArea );
+    const CPelBuf predPel = cs.getPredBuf( compArea );
+    int lengthX = compArea.width / numPartX;
+    int lengthY = compArea.height / numPartY;
+    int strideOrg  = orgPel.stride;
+    int stridePred = predPel.stride;
+    uint32_t   uiShift = DISTORTION_PRECISION_ADJUSTMENT( ( *cs.sps.getBitDepth( toChannelType( compID ) ) - 8 ) << 1 );
+    Intermediate_Int iTemp;
+
+    //calc distY of 16 sub parts
+    for( int j = 0; j < numPartY; j++ )
+    {
+      for( int i = 0; i < numPartX; i++ )
+      {
+        int posX = i * lengthX;
+        int posY = j * lengthY;
+        const Pel* ptrOrg  = orgPel.bufAt( posX, posY );
+        const Pel* ptrPred = predPel.bufAt( posX, posY );
+        Distortion uiSum = 0;
+        for( int n = 0; n < lengthY; n++ )
+        {
+          for( int m = 0; m < lengthX; m++ )
+          {
+            iTemp = ptrOrg[m] - ptrPred[m];
+            uiSum += Distortion( ( iTemp * iTemp ) >> uiShift );
+          }
+          ptrOrg += strideOrg;
+          ptrPred += stridePred;
+        }
+        if( isChroma( compID ) )
+        {
+          uiSum = (Distortion)( uiSum * m_pcRdCost->getChromaWeight() );
+        }
+        dist[j][i] += uiSum;
+      }
+    }
+  }
+
+  //SSE of a CU
+  m_estMinDistSbt[NUMBER_SBT_MODE] = 0;
+  for( int j = 0; j < numPartY; j++ )
+  {
+    for( int i = 0; i < numPartX; i++ )
+    {
+      m_estMinDistSbt[NUMBER_SBT_MODE] += dist[j][i];
+    }
+  }
+  //init per-mode dist
+  for( int i = SBT_VER_H0; i < NUMBER_SBT_MODE; i++ )
+  {
+    m_estMinDistSbt[i] = std::numeric_limits<uint64_t>::max();
+  }
+
+  //SBT fast algorithm 1: not try SBT if the residual is too small to compensate bits for encoding residual info
+  uint64_t minNonZeroResiFracBits = 12 << SCALE_BITS;
+  if( m_pcRdCost->calcRdCost( 0, m_estMinDistSbt[NUMBER_SBT_MODE] ) < m_pcRdCost->calcRdCost( minNonZeroResiFracBits, 0 ) )
+  {
+    m_skipSbtAll = true;
+    return;
+  }
+
+  //derive estimated minDist of SBT = zero-residual part distortion + non-zero residual part distortion / 16
+  int shift = 5;
+  Distortion distResiPart = 0, distNoResiPart = 0;
+
+  if( CU::targetSbtAllowed( SBT_VER_HALF, sbtAllowed ) )
+  {
+    int offsetResiPart = 0;
+    int offsetNoResiPart = numPartX / 2;
+    distResiPart = distNoResiPart = 0;
+    assert( numPartX >= 2 );
+    for( int j = 0; j < numPartY; j++ )
+    {
+      for( int i = 0; i < numPartX / 2; i++ )
+      {
+        distResiPart   += dist[j][i + offsetResiPart];
+        distNoResiPart += dist[j][i + offsetNoResiPart];
+      }
+    }
+    m_estMinDistSbt[SBT_VER_H0] = ( distResiPart >> shift ) + distNoResiPart;
+    m_estMinDistSbt[SBT_VER_H1] = ( distNoResiPart >> shift ) + distResiPart;
+  }
+
+  if( CU::targetSbtAllowed( SBT_HOR_HALF, sbtAllowed ) )
+  {
+    int offsetResiPart = 0;
+    int offsetNoResiPart = numPartY / 2;
+    assert( numPartY >= 2 );
+    distResiPart = distNoResiPart = 0;
+    for( int j = 0; j < numPartY / 2; j++ )
+    {
+      for( int i = 0; i < numPartX; i++ )
+      {
+        distResiPart   += dist[j + offsetResiPart][i];
+        distNoResiPart += dist[j + offsetNoResiPart][i];
+      }
+    }
+    m_estMinDistSbt[SBT_HOR_H0] = ( distResiPart >> shift ) + distNoResiPart;
+    m_estMinDistSbt[SBT_HOR_H1] = ( distNoResiPart >> shift ) + distResiPart;
+  }
+
+  if( CU::targetSbtAllowed( SBT_VER_QUAD, sbtAllowed ) )
+  {
+    assert( numPartX == 4 );
+    m_estMinDistSbt[SBT_VER_Q0] = m_estMinDistSbt[SBT_VER_Q1] = 0;
+    for( int j = 0; j < numPartY; j++ )
+    {
+      m_estMinDistSbt[SBT_VER_Q0] += dist[j][0] + ( ( dist[j][1] + dist[j][2] + dist[j][3] ) << shift );
+      m_estMinDistSbt[SBT_VER_Q1] += dist[j][3] + ( ( dist[j][0] + dist[j][1] + dist[j][2] ) << shift );
+    }
+    m_estMinDistSbt[SBT_VER_Q0] = m_estMinDistSbt[SBT_VER_Q0] >> shift;
+    m_estMinDistSbt[SBT_VER_Q1] = m_estMinDistSbt[SBT_VER_Q1] >> shift;
+  }
+
+  if( CU::targetSbtAllowed( SBT_HOR_QUAD, sbtAllowed ) )
+  {
+    assert( numPartY == 4 );
+    m_estMinDistSbt[SBT_HOR_Q0] = m_estMinDistSbt[SBT_HOR_Q1] = 0;
+    for( int i = 0; i < numPartX; i++ )
+    {
+      m_estMinDistSbt[SBT_HOR_Q0] += dist[0][i] + ( ( dist[1][i] + dist[2][i] + dist[3][i] ) << shift );
+      m_estMinDistSbt[SBT_HOR_Q1] += dist[3][i] + ( ( dist[0][i] + dist[1][i] + dist[2][i] ) << shift );
+    }
+    m_estMinDistSbt[SBT_HOR_Q0] = m_estMinDistSbt[SBT_HOR_Q0] >> shift;
+    m_estMinDistSbt[SBT_HOR_Q1] = m_estMinDistSbt[SBT_HOR_Q1] >> shift;
+  }
+
+  //SBT fast algorithm 5: try N SBT modes with the lowest distortion
+  Distortion temp[NUMBER_SBT_MODE];
+  memcpy( temp, m_estMinDistSbt, sizeof( Distortion ) * NUMBER_SBT_MODE );
+  memset( m_sbtRdoOrder, 255, NUMBER_SBT_MODE );
+  int startIdx = 0, numRDO;
+  numRDO = CU::targetSbtAllowed( SBT_VER_HALF, sbtAllowed ) + CU::targetSbtAllowed( SBT_HOR_HALF, sbtAllowed );
+  numRDO = std::min( ( numRDO << 1 ), SBT_NUM_RDO );
+  for( int i = startIdx; i < startIdx + numRDO; i++ )
+  {
+    Distortion minDist = std::numeric_limits<uint64_t>::max();
+    for( int n = SBT_VER_H0; n <= SBT_HOR_H1; n++ )
+    {
+      if( temp[n] < minDist )
+      {
+        minDist = temp[n];
+        m_sbtRdoOrder[i] = n;
+      }
+    }
+    temp[m_sbtRdoOrder[i]] = std::numeric_limits<uint64_t>::max();
+  }
+
+  startIdx += numRDO;
+  numRDO = CU::targetSbtAllowed( SBT_VER_QUAD, sbtAllowed ) + CU::targetSbtAllowed( SBT_HOR_QUAD, sbtAllowed );
+  numRDO = std::min( ( numRDO << 1 ), SBT_NUM_RDO );
+  for( int i = startIdx; i < startIdx + numRDO; i++ )
+  {
+    Distortion minDist = std::numeric_limits<uint64_t>::max();
+    for( int n = SBT_VER_Q0; n <= SBT_HOR_Q1; n++ )
+    {
+      if( temp[n] < minDist )
+      {
+        minDist = temp[n];
+        m_sbtRdoOrder[i] = n;
+      }
+    }
+    temp[m_sbtRdoOrder[i]] = std::numeric_limits<uint64_t>::max();
+  }
+}
+
+uint8_t InterSearch::skipSbtByRDCost( int width, int height, int mtDepth, uint8_t sbtIdx, uint8_t sbtPos, double bestCost, Distortion distSbtOff, double costSbtOff, bool rootCbfSbtOff )
+{
+  int sbtMode = CU::getSbtMode( sbtIdx, sbtPos );
+
+  //SBT fast algorithm 2.2 : estimate a minimum RD cost of a SBT mode based on the luma distortion of uncoded part and coded part (assuming distorted can be reduced to 1/16); 
+  //                         if this cost is larger than the best cost, no need to try a specific SBT mode
+  if( m_pcRdCost->calcRdCost( 11 << SCALE_BITS, m_estMinDistSbt[sbtMode] ) > bestCost )
+  {
+    return 0; //early skip type 0
+  }
+
+  if( costSbtOff != MAX_DOUBLE )
+  {
+    if( !rootCbfSbtOff )
+    {
+      //SBT fast algorithm 3: skip SBT when the residual is too small (estCost is more accurate than fast algorithm 1, counting PU mode bits)
+      uint64_t minNonZeroResiFracBits = 10 << SCALE_BITS;
+      Distortion distResiPart;
+      if( sbtIdx == SBT_VER_HALF || sbtIdx == SBT_HOR_HALF )
+      {
+        distResiPart = (Distortion)( ( ( m_estMinDistSbt[NUMBER_SBT_MODE] - m_estMinDistSbt[sbtMode] ) * 9 ) >> 4 );
+      }
+      else
+      {
+        distResiPart = (Distortion)( ( ( m_estMinDistSbt[NUMBER_SBT_MODE] - m_estMinDistSbt[sbtMode] ) * 3 ) >> 3 );
+      }
+
+      double estCost = ( costSbtOff - m_pcRdCost->calcRdCost( 0 << SCALE_BITS, distSbtOff ) ) + m_pcRdCost->calcRdCost( minNonZeroResiFracBits, m_estMinDistSbt[sbtMode] + distResiPart );
+      if( estCost > costSbtOff )
+      {
+        return 1;
+      }
+      if( estCost > bestCost )
+      {
+        return 2;
+      }
+    }
+    else
+    {
+      //SBT fast algorithm 4: skip SBT when an estimated RD cost is larger than the bestCost
+      double weight = sbtMode > SBT_HOR_H1 ? 0.4 : 0.6;
+      double estCost = ( ( costSbtOff - m_pcRdCost->calcRdCost( 0 << SCALE_BITS, distSbtOff ) ) * weight ) + m_pcRdCost->calcRdCost( 0 << SCALE_BITS, m_estMinDistSbt[sbtMode] );
+      if( estCost > bestCost )
+      {
+        return 3;
+      }
+    }
+  }
+  return MAX_UCHAR;
+}
+#endif
+
 void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &partitioner, Distortion *puiZeroDist /*= NULL*/
   , const bool luma, const bool chroma
 )
@@ -6011,6 +6284,12 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
   const unsigned currDepth = partitioner.currTrDepth;
 
   bool bCheckFull  = !partitioner.canSplit( TU_MAX_TR_SPLIT, cs );
+#if JVET_M0140_SBT
+  if( cu.sbtInfo && partitioner.canSplit( PartSplit( cu.getSbtTuSplit() ), cs ) )
+  {
+    bCheckFull = false;
+  }
+#endif
   bool bCheckSplit = !bCheckFull;
 
   // get temporary data
@@ -6041,6 +6320,9 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
 #else
     tu.emtIdx         = 0;
 #endif
+#if JVET_M0140_SBT
+    tu.checkTuNoResidual( partitioner.currPartIdx() );
+#endif
 
 #if JVET_M0427_INLOOP_RESHAPER
     const Slice           &slice = *cs.slice;
@@ -6127,15 +6409,41 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
       uint8_t nNumTransformCands = 1 + ( tsAllowed ? 1 : 0 ) + ( mtsAllowed ? 4 : 0 ); // DCT + TS + 4 MTS = 6 tests
       std::vector<TrMode> trModes;
       trModes.push_back( TrMode( 0, true ) ); //DCT2
+#if JVET_M0140_SBT
+      nNumTransformCands = 1;
+      //for a SBT-no-residual TU, the RDO process should be called once, in order to get the RD cost
+      if( tsAllowed && !tu.noResidual )
+#else
       if( tsAllowed )
+#endif
       {
         trModes.push_back( TrMode( 1, true ) );
+#if JVET_M0140_SBT
+        nNumTransformCands++;
+#endif
       }
+
+#if APPLY_SBT_SL_ON_MTS
+      //skip MTS if DCT2 is the best
+      if( mtsAllowed && ( !tu.cu->slice->getSPS()->getSpsNext().getUseSBT() || CU::getSbtIdx( m_histBestSbt ) != SBT_OFF_DCT ) )
+#else
       if( mtsAllowed )
+#endif
       {
         for( int i = 2; i < 6; i++ )
         {
+#if APPLY_SBT_SL_ON_MTS
+          //skip the non-best Mts mode
+          if( !tu.cu->slice->getSPS()->getSpsNext().getUseSBT() || ( m_histBestMtsIdx == MAX_UCHAR || m_histBestMtsIdx == i ) )
+          {
+#endif
           trModes.push_back( TrMode( i, true ) );
+#if JVET_M0140_SBT
+          nNumTransformCands++;
+#endif
+#if APPLY_SBT_SL_ON_MTS
+          }
+#endif
         }
       }
 #endif
@@ -6249,6 +6557,10 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
               nonCoeffDist = m_pcRdCost->getDistPart( zeroBuf, orgResi, channelBitDepth, compID, DF_SSE ); // initialized with zero residual distortion
             }
 
+#if JVET_M0140_SBT
+            if( !tu.noResidual )
+            {
+#endif
             const bool prevCbf = ( compID == COMPONENT_Cr ? tu.cbf[COMPONENT_Cb] : false );
             m_CABACEstimator->cbf_comp( *csFull, false, compArea, currDepth, prevCbf );
 
@@ -6256,6 +6568,9 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
             {
               m_CABACEstimator->cross_comp_pred( tu, compID );
             }
+#if JVET_M0140_SBT
+            }
+#endif
 
             nonCoeffFracBits = m_CABACEstimator->getEstFracBits();
 #if WCG_EXT
@@ -6381,6 +6696,12 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
 
             isLastBest = isLastMode;
           }
+#if JVET_M0140_SBT
+          if( tu.noResidual )
+          {
+            CHECK( currCompFracBits > 0 || currAbsSum, "currCompFracBits > 0 when tu noResidual" );
+          }
+#endif
         }
       }
 
@@ -6394,7 +6715,10 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
 
     m_CABACEstimator->getCtx() = ctxStart;
     m_CABACEstimator->resetBits();
-
+#if JVET_M0140_SBT
+    if( !tu.noResidual )
+    {
+#endif
     static const ComponentID cbf_getComp[3] = { COMPONENT_Cb, COMPONENT_Cr, COMPONENT_Y };
     for( unsigned c = 0; c < numTBlocks; c++)
     {
@@ -6409,6 +6733,9 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
         m_CABACEstimator->cbf_comp( *csFull, TU::getCbfAtDepth( tu, compID, currDepth ), tu.blocks[compID], currDepth, prevCbf );
       }
     }
+#if JVET_M0140_SBT
+    }
+#endif
 
     for (uint32_t ch = 0; ch < numValidComp; ch++)
     {
@@ -6430,6 +6757,12 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
         uiSingleDist += uiSingleDistComp[compID];
       }
     }
+#if JVET_M0140_SBT
+    if( tu.noResidual )
+    {
+      CHECK( m_CABACEstimator->getEstFracBits() > 0, "no residual TU's bits shall be 0" );
+    }
+#endif
 
     csFull->fracBits += m_CABACEstimator->getEstFracBits();
     csFull->dist     += uiSingleDist;
@@ -6455,6 +6788,12 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par
     {
       partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs );
     }
+#if JVET_M0140_SBT
+    else if( cu.sbtInfo && partitioner.canSplit( PartSplit( cu.getSbtTuSplit() ), cs ) )
+    {
+      partitioner.splitCurrArea( PartSplit( cu.getSbtTuSplit() ), cs );
+    }
+#endif
     else
       THROW( "Implicit TU split not available!" );
 
@@ -6574,6 +6913,9 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
   {
     cu.skip    = true;
     cu.rootCbf = false;
+#if JVET_M0140_SBT
+    CHECK( cu.sbtInfo != 0, "sbtInfo shall be 0 if CU has no residual" );
+#endif
     cs.getResiBuf().fill(0);
     {
       cs.getRecoBuf().copyFrom(cs.getPredBuf() );
@@ -6744,6 +7086,9 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
 
   if (zeroCost < cs.cost || !cu.rootCbf)
   {
+#if JVET_M0140_SBT
+    cu.sbtInfo = 0;
+#endif
     cu.rootCbf = false;
 
     cs.clearTUs();
diff --git a/source/Lib/EncoderLib/InterSearch.h b/source/Lib/EncoderLib/InterSearch.h
index 178905b3bd93ca8832092b664ffe33844f0acf89..a77004bf25f7e8f9b7a089715edfee1c2a62cb95 100644
--- a/source/Lib/EncoderLib/InterSearch.h
+++ b/source/Lib/EncoderLib/InterSearch.h
@@ -164,6 +164,14 @@ protected:
   bool            m_isInitialized;
   unsigned int    m_numBVs, m_numBV16s;
   Mv              m_acBVs[IBC_NUM_CANDIDATES];
+#if JVET_M0140_SBT
+  Distortion      m_estMinDistSbt[NUMBER_SBT_MODE + 1]; // estimated minimum SSE value of the PU if using a SBT mode
+  uint8_t         m_sbtRdoOrder[NUMBER_SBT_MODE];       // order of SBT mode in RDO
+  bool            m_skipSbtAll;                         // to skip all SBT modes for the current PU
+  uint8_t         m_histBestSbt;                        // historical best SBT mode for PU of certain SSE values
+  uint8_t         m_histBestMtsIdx;                     // historical best MTS idx  for PU of certain SSE values
+#endif
+
 public:
   InterSearch();
   virtual ~InterSearch();
@@ -186,6 +194,18 @@ public:
 
   void destroy                      ();
 
+#if JVET_M0140_SBT
+  void       calcMinDistSbt         ( CodingStructure &cs, const CodingUnit& cu, const uint8_t sbtAllowed );
+  uint8_t    skipSbtByRDCost        ( int width, int height, int mtDepth, uint8_t sbtIdx, uint8_t sbtPos, double bestCost, Distortion distSbtOff, double costSbtOff, bool rootCbfSbtOff );
+  bool       getSkipSbtAll          ()                 { return m_skipSbtAll; }
+  void       setSkipSbtAll          ( bool skipAll )   { m_skipSbtAll = skipAll; }
+  uint8_t    getSbtRdoOrder         ( uint8_t idx )    { assert( m_sbtRdoOrder[idx] < NUMBER_SBT_MODE ); assert( (uint32_t)( m_estMinDistSbt[m_sbtRdoOrder[idx]] >> 2 ) < ( MAX_UINT >> 1 ) ); return m_sbtRdoOrder[idx]; }
+  Distortion getEstDistSbt          ( uint8_t sbtMode) { return m_estMinDistSbt[sbtMode]; }
+  void       initTuAnalyzer         ()                 { m_estMinDistSbt[NUMBER_SBT_MODE] = std::numeric_limits<uint64_t>::max(); m_skipSbtAll = false; }
+  void       setHistBestTrs         ( uint8_t sbtInfo, uint8_t mtsIdx ) { m_histBestSbt = sbtInfo; m_histBestMtsIdx = mtsIdx; }
+  void       initSbtRdoOrder        ( uint8_t sbtMode ) { m_sbtRdoOrder[0] = sbtMode; m_estMinDistSbt[0] = m_estMinDistSbt[sbtMode]; }
+#endif
+
   void setTempBuffers               (CodingStructure ****pSlitCS, CodingStructure ****pFullCS, CodingStructure **pSaveCS );
   void resetCtuRecord               ()             { m_ctuRecord.clear(); }
 #if ENABLE_SPLIT_PARALLELISM
diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp
index 248ee6db4253939623ff5f674a122e333e233592..823751dc792402248f445100f41d940d689eaec5 100644
--- a/source/Lib/EncoderLib/VLCWriter.cpp
+++ b/source/Lib/EncoderLib/VLCWriter.cpp
@@ -555,6 +555,13 @@ void HLSWriter::codeSPSNext( const SPSNext& spsNext, const bool usePCM )
   }
 #endif
 
+#if JVET_M0140_SBT
+  WRITE_FLAG( spsNext.getUseSBT() ? 1 : 0,                                                      "sbt_enable_flag" );
+  if( spsNext.getUseSBT() )
+  {
+    WRITE_FLAG( spsNext.getMaxSbtSize() == 64 ? 1 : 0,                                          "max_sbt_size_64_flag" );
+  }
+#endif
   WRITE_FLAG( spsNext.getUseAffine() ? 1 : 0,                                                   "affine_flag" );
   if ( spsNext.getUseAffine() )
   {