diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp
index 378561a7a65918bc4935c41dab9ab3711c3d642f..b7f45d32aae6c42cb38ab92c06ae4207a3b5534c 100644
--- a/source/App/EncoderApp/EncApp.cpp
+++ b/source/App/EncoderApp/EncApp.cpp
@@ -403,6 +403,10 @@ void EncApp::xInitLibCfg()
     m_cEncLib.setNoDimdConstraintFlag(m_noDimdConstraintFlag);
     CHECK(m_noDimdConstraintFlag && m_dimd, "DIMD shall be deactivated when m_noDimdConstraintFlag is equal to 1");
 #endif
+#if JVET_W0123_TIMD_FUSION
+    m_cEncLib.setNoTimdConstraintFlag(m_noTimdConstraintFlag);
+    CHECK(m_noTimdConstraintFlag && m_timd, "TIMD shall be deactivated when m_noTimdConstraintFlag is equal to 1");
+#endif
 #if ENABLE_OBMC
     m_cEncLib.setNoObmcConstraintFlag(m_noObmcConstraintFlag);
     CHECK(m_noObmcConstraintFlag && m_OBMC, "OBMC shall be deactivated when m_noObmcConstraintFlag is equal to 1");
@@ -534,6 +538,9 @@ void EncApp::xInitLibCfg()
 #if ENABLE_DIMD
     m_cEncLib.setNoDimdConstraintFlag(false);
 #endif
+#if JVET_W0123_TIMD_FUSION
+    m_cEncLib.setNoTimdConstraintFlag(false);
+#endif
 #if ENABLE_OBMC
     m_cEncLib.setNoObmcConstraintFlag(false);
 #endif
@@ -761,6 +768,9 @@ void EncApp::xInitLibCfg()
 #if ENABLE_DIMD
   m_cEncLib.setUseDimd                                           ( m_dimd );
 #endif
+#if JVET_W0123_TIMD_FUSION
+  m_cEncLib.setUseTimd                                           ( m_timd );
+#endif
 #if ENABLE_OBMC
   m_cEncLib.setUseObmc                                           ( m_OBMC );
 #endif
diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp
index 15e65b15408c26752f056c2500942d84478eb132..9d312e8c8656ce6e11391a3800eaa870fd622000 100644
--- a/source/App/EncoderApp/EncAppCfg.cpp
+++ b/source/App/EncoderApp/EncAppCfg.cpp
@@ -888,6 +888,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
 #if ENABLE_DIMD
   ("NoDimdConstraintFlag",                             m_noDimdConstraintFlag,                            false, "Indicate that DIMD is deactivated")
 #endif
+#if JVET_W0123_TIMD_FUSION
+  ("NoTimdConstraintFlag",                             m_noTimdConstraintFlag,                          false, "Indicate that TIMD is deactivated")
+#endif
 #if ENABLE_OBMC
   ("NoObmcConstraintFlag",                             m_noObmcConstraintFlag,                            false, "Indicate that OBMC is deactivated")
 #endif
@@ -1012,6 +1015,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
 #if ENABLE_DIMD
   ( "DIMD",                                           m_dimd,                                            true, "Enable decoder side intra mode derivation\n" )
 #endif
+#if JVET_W0123_TIMD_FUSION
+  ( "TIMD",                                           m_timd,                                            true,  "Enable template based intra mode derivation\n" )
+#endif
 #if ENABLE_OBMC
   ("OBMC",                                            m_OBMC,                                           true, "Overlapping Block Motion Compensation")
 #endif
@@ -4276,6 +4282,9 @@ void EncAppCfg::xPrintParameter()
 #if ENABLE_DIMD
     msg(VERBOSE, "DIMD:%d ", m_dimd);
 #endif
+#if JVET_W0123_TIMD_FUSION
+    msg(VERBOSE, "TIMD:%d ", m_timd);
+#endif
 #if ENABLE_OBMC
     msg(VERBOSE, "OBMC:%d ", m_OBMC);
 #endif
diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h
index 83b5967d6cfb4d41bb9ed786457ce56db66ab722..965161ce8943a1f2b4d519332fb100ed409cd8c6 100644
--- a/source/App/EncoderApp/EncAppCfg.h
+++ b/source/App/EncoderApp/EncAppCfg.h
@@ -187,6 +187,9 @@ protected:
 #if ENABLE_DIMD
   bool      m_noDimdConstraintFlag;
 #endif
+#if JVET_W0123_TIMD_FUSION
+  bool      m_noTimdConstraintFlag;
+#endif
 #if ENABLE_OBMC
   bool      m_noObmcConstraintFlag;
 #endif
@@ -377,6 +380,9 @@ protected:
 #if ENABLE_DIMD
   bool      m_dimd;
 #endif
+#if JVET_W0123_TIMD_FUSION
+  bool      m_timd;
+#endif
 #if ENABLE_OBMC
   bool      m_OBMC;
 #endif
diff --git a/source/Lib/CommonLib/Buffer.h b/source/Lib/CommonLib/Buffer.h
index f6aa142477825ace6caeea4e5e0c6f3e62d6f9d5..36f47f6942e943622ee5fc94353b5e4a9e85eecd 100644
--- a/source/Lib/CommonLib/Buffer.h
+++ b/source/Lib/CommonLib/Buffer.h
@@ -190,6 +190,10 @@ typedef AreaBuf<const TCoeff> CCoeffBuf;
 
 typedef AreaBuf<      MotionInfo>  MotionBuf;
 typedef AreaBuf<const MotionInfo> CMotionBuf;
+#if JVET_W0123_TIMD_FUSION
+typedef AreaBuf<      uint8_t> IpmBuf;
+typedef AreaBuf<const uint8_t> CIpmBuf;
+#endif
 
 typedef AreaBuf<      TCoeff>  PLTescapeBuf;
 typedef AreaBuf<const TCoeff> CPLTescapeBuf;
diff --git a/source/Lib/CommonLib/CodingStructure.cpp b/source/Lib/CommonLib/CodingStructure.cpp
index 7c7628c628b57dc30f726e09fba3bc19c5859793..80ad1a359a832ad0a78923801dcd9486555658e0 100644
--- a/source/Lib/CommonLib/CodingStructure.cpp
+++ b/source/Lib/CommonLib/CodingStructure.cpp
@@ -101,6 +101,9 @@ CodingStructure::CodingStructure(CUCache& cuCache, PUCache& puCache, TUCache& tu
   }
 
   m_motionBuf     = nullptr;
+#if JVET_W0123_TIMD_FUSION
+  m_ipmBuf        = nullptr;
+#endif
   features.resize( NUM_ENC_FEATURES );
 #if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
   treeType = TREE_D;
@@ -140,6 +143,10 @@ void CodingStructure::destroy()
 
   delete[] m_motionBuf;
   m_motionBuf = nullptr;
+#if JVET_W0123_TIMD_FUSION
+  delete[] m_ipmBuf;
+  m_ipmBuf = nullptr;
+#endif
 
 
   m_tuCache.cache( tus );
@@ -1007,6 +1014,9 @@ void CodingStructure::createInternals(const UnitArea& _unit, const bool isTopLay
 
   unsigned _lumaAreaScaled = g_miScaling.scale( area.lumaSize() ).area();
   m_motionBuf       = new MotionInfo[_lumaAreaScaled];
+#if JVET_W0123_TIMD_FUSION
+  m_ipmBuf          = new uint8_t[_lumaAreaScaled];
+#endif
   initStructData();
 }
 
@@ -1345,6 +1355,14 @@ void CodingStructure::useSubStructure( const CodingStructure& subStruct, const C
 
     motionLut = subStruct.motionLut;
   }
+#if JVET_W0123_TIMD_FUSION
+  if (!subStruct.m_isTuEnc && chType != CHANNEL_TYPE_CHROMA)
+  {
+    IpmBuf ownIB  = getIpmBuf          ( clippedArea );
+    CIpmBuf subIB = subStruct.getIpmBuf( clippedArea );
+    ownIB.copyFrom( subIB );
+  }
+#endif
   prevPLT = subStruct.prevPLT;
 
 
@@ -1469,6 +1487,11 @@ void CodingStructure::copyStructure( const CodingStructure& other, const Channel
 
     motionLut = other.motionLut;
   }
+#if JVET_W0123_TIMD_FUSION
+  IpmBuf  ownIB = getIpmBuf();
+  CIpmBuf subIB = other.getIpmBuf();
+  ownIB.copyFrom( subIB );
+#endif
   prevPLT = other.prevPLT;
 
   if( copyTUs )
@@ -1536,6 +1559,9 @@ void CodingStructure::initStructData( const int &QP, const bool &skipMotBuf )
   {
     getMotionBuf().memset(0);
   }
+#if JVET_W0123_TIMD_FUSION
+  getIpmBuf().memset(0);
+#endif
 
   fracBits = 0;
   dist     = 0;
@@ -1653,6 +1679,56 @@ const MotionInfo& CodingStructure::getMotionInfo( const Position& pos ) const
   return *( m_motionBuf + miPos.y * stride + miPos.x );
 }
 
+#if JVET_W0123_TIMD_FUSION
+IpmBuf CodingStructure::getIpmBuf( const Area& _area )
+{
+  const CompArea& _luma = area.Y();
+
+  CHECKD( !_luma.contains( _area ), "Trying to access motion information outside of this coding structure" );
+
+  const Area miArea   = g_miScaling.scale( _area );
+  const Area selfArea = g_miScaling.scale( _luma );
+
+  return IpmBuf( m_ipmBuf + rsAddr( miArea.pos(), selfArea.pos(), selfArea.width ), selfArea.width, miArea.size() );
+}
+
+const CIpmBuf CodingStructure::getIpmBuf( const Area& _area ) const
+{
+  const CompArea& _luma = area.Y();
+
+  CHECKD( !_luma.contains( _area ), "Trying to access motion information outside of this coding structure" );
+
+  const Area miArea   = g_miScaling.scale( _area );
+  const Area selfArea = g_miScaling.scale( _luma );
+
+  return IpmBuf( m_ipmBuf + rsAddr( miArea.pos(), selfArea.pos(), selfArea.width ), selfArea.width, miArea.size() );
+}
+
+uint8_t& CodingStructure::getIpmInfo( const Position& pos )
+{
+  CHECKD( !area.Y().contains( pos ), "Trying to access motion information outside of this coding structure" );
+
+  //return getIpmBuf().at( g_miScaling.scale( pos - area.lumaPos() ) );
+  // bypass the intra prediction mode buf calling and get the value directly
+  const unsigned stride = g_miScaling.scaleHor( area.lumaSize().width );
+  const Position miPos  = g_miScaling.scale( pos - area.lumaPos() );
+
+  return *( m_ipmBuf + miPos.y * stride + miPos.x );
+}
+
+const uint8_t& CodingStructure::getIpmInfo( const Position& pos ) const
+{
+  CHECKD( !area.Y().contains( pos ), "Trying to access motion information outside of this coding structure" );
+
+  //return getIpmBuf().at( g_miScaling.scale( pos - area.lumaPos() ) );
+  // bypass the intra prediction mode buf calling and get the value directly
+  const unsigned stride = g_miScaling.scaleHor( area.lumaSize().width );
+  const Position miPos  = g_miScaling.scale( pos - area.lumaPos() );
+
+  return *( m_ipmBuf + miPos.y * stride + miPos.x );
+}
+#endif
+
 
 // data accessors
        PelBuf     CodingStructure::getPredBuf(const CompArea &blk)           { return getBuf(blk,  PIC_PREDICTION); }
diff --git a/source/Lib/CommonLib/CodingStructure.h b/source/Lib/CommonLib/CodingStructure.h
index 097a9e2a025cfad77840236141dc3128625609be..aa745489463cbe3c7e2db87da607efe9de2dda6e 100644
--- a/source/Lib/CommonLib/CodingStructure.h
+++ b/source/Lib/CommonLib/CodingStructure.h
@@ -252,6 +252,9 @@ private:
   int     m_offsets[ MAX_NUM_COMPONENT ];
 
   MotionInfo *m_motionBuf;
+#if JVET_W0123_TIMD_FUSION
+  uint8_t *m_ipmBuf;
+#endif
 
 public:
   CodingStructure *bestParent;
@@ -272,6 +275,19 @@ public:
   MotionInfo& getMotionInfo( const Position& pos );
   const MotionInfo& getMotionInfo( const Position& pos ) const;
 
+#if JVET_W0123_TIMD_FUSION
+  IpmBuf getIpmBuf( const     Area& _area );
+  IpmBuf getIpmBuf( const UnitArea& _area ) { return getIpmBuf( _area.Y() ); }
+  IpmBuf getIpmBuf()                        { return getIpmBuf(  area.Y() ); }
+
+  const CIpmBuf getIpmBuf( const     Area& _area ) const;
+  const CIpmBuf getIpmBuf( const UnitArea& _area ) const { return getIpmBuf( _area.Y() ); }
+  const CIpmBuf getIpmBuf()                        const { return getIpmBuf(  area.Y() ); }
+
+  uint8_t& getIpmInfo( const Position& pos );
+  const uint8_t& getIpmInfo( const Position& pos ) const;
+#endif
+
 
 public:
   // ---------------------------------------------------------------------------
diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h
index 7a26296aafdd575c98bcc7e5a92fffb67e651a08..510b0f039f154372866fc88ca2fac83780dc0699 100644
--- a/source/Lib/CommonLib/CommonDef.h
+++ b/source/Lib/CommonLib/CommonDef.h
@@ -300,13 +300,27 @@ static const int HOR_IDX =                    (1 * (NUM_DIR - 1) + 2); ///< inde
 static const int DIA_IDX =                    (2 * (NUM_DIR - 1) + 2); ///< index for intra DIAGONAL   mode
 static const int VER_IDX =                    (3 * (NUM_DIR - 1) + 2); ///< index for intra VERTICAL   mode
 static const int VDIA_IDX =                   (4 * (NUM_DIR - 1) + 2); ///< index for intra VDIAGONAL  mode
+#if JVET_W0123_TIMD_FUSION
+static const int BDPCM_IDX =                                      162;
+#else
 static const int BDPCM_IDX =                  (5 * (NUM_DIR - 1) + 2); ///< index for intra VDIAGONAL  mode
+#endif
 static const int NOMODE_IDX =                               MAX_UCHAR; ///< indicating uninitialized elements
 static const int NUM_CHROMA_MODE = (5 + NUM_LMC_MODE); ///< total number of chroma modes
 static const int LM_CHROMA_IDX = NUM_LUMA_MODE; ///< chroma mode index for derived from LM mode
 #if ENABLE_DIMD
 static const int DIMD_IDX =                                        99; ///< index for intra DIMD mode
 #endif
+#if JVET_W0123_TIMD_FUSION
+static const int TIMD_IDX =                                       199; ///< index for intra TIMD mode
+static const int DIMD_MAX_TEMP_SIZE =                               4;
+static const int EXT_HOR_IDX =                                     34;
+static const int EXT_DIA_IDX =                                     66;
+static const int EXT_VER_IDX =                                     98;
+static const int EXT_VDIA_IDX =                                   130;
+#define MAP131TO67( mode )                 (mode<2?mode:((mode>>1)+1))
+#define MAP67TO131( mode )                 (mode<2?mode:((mode<<1)-2))
+#endif
 #if MMLM
 static const int MMLM_CHROMA_IDX = LM_CHROMA_IDX + 1; ///< MDLM_L
 static const int MDLM_L_IDX = LM_CHROMA_IDX + 2; ///< MDLM_L
diff --git a/source/Lib/CommonLib/ContextModelling.cpp b/source/Lib/CommonLib/ContextModelling.cpp
index 895e739d4da85b93e85c63642f9cebff728cca06..e6db8469e2e0819aa65227a22515c948bdabdf74 100644
--- a/source/Lib/CommonLib/ContextModelling.cpp
+++ b/source/Lib/CommonLib/ContextModelling.cpp
@@ -357,6 +357,19 @@ unsigned DeriveCtx::CtxDIMDFlag(const CodingUnit& cu)
 }
 #endif
 
+#if JVET_W0123_TIMD_FUSION
+unsigned DeriveCtx::CtxTimdFlag(const CodingUnit& cu)
+{
+  const CodingStructure *cs = cu.cs;
+  unsigned ctxId = 0;
+  const CodingUnit *cuLeft = cs->getCURestricted( cu.lumaPos().offset( -1, 0 ), cu, CH_L );
+  ctxId = (cuLeft && cuLeft->timd) ? 1 : 0;
+  const CodingUnit *cuAbove = cs->getCURestricted( cu.lumaPos().offset( 0, -1 ), cu, CH_L );
+  ctxId += (cuAbove && cuAbove->timd) ? 1 : 0;
+  return ctxId;
+}
+#endif
+
 unsigned DeriveCtx::CtxPredModeFlag( const CodingUnit& cu )
 {
   const CodingUnit *cuLeft  = cu.cs->getCURestricted(cu.lumaPos().offset(-1, 0), cu, CH_L);
diff --git a/source/Lib/CommonLib/ContextModelling.h b/source/Lib/CommonLib/ContextModelling.h
index 834e03af34a3c377352cdbcc5d6afa0f90870959..dd33025f3b5b0e6faa8424b0022261c99671eee5 100644
--- a/source/Lib/CommonLib/ContextModelling.h
+++ b/source/Lib/CommonLib/ContextModelling.h
@@ -618,6 +618,9 @@ unsigned CtxPltCopyFlag( const unsigned prevRunType, const unsigned dist );
 #if ENABLE_DIMD
 unsigned CtxDIMDFlag(const CodingUnit& cu);
 #endif
+#if JVET_W0123_TIMD_FUSION
+unsigned CtxTimdFlag( const CodingUnit& cu );
+#endif
 }
 
 #endif // __CONTEXTMODELLING__
diff --git a/source/Lib/CommonLib/Contexts.cpp b/source/Lib/CommonLib/Contexts.cpp
index dfce0eb5fa9ac8c61ce60203397cadb392f9f0dd..1bd41905edc89da556daaa387d9d01f3b84b1857 100644
--- a/source/Lib/CommonLib/Contexts.cpp
+++ b/source/Lib/CommonLib/Contexts.cpp
@@ -909,12 +909,21 @@ const CtxSet ContextSetCfg::PredMode = ContextSetCfg::addCtxSet
 
 const CtxSet ContextSetCfg::MultiRefLineIdx = ContextSetCfg::addCtxSet
 ({
+#if JVET_W0123_TIMD_FUSION
+  { 25, 59, 25, 59 },
+  { 25, 58, 25, 58 },
+  { 25, 60, 25, 60 },
+  { 6,  5,  6,  5 },
+  { 6,  5,  6,  5 },
+  { 6,  8,  6,  8 }
+#else
 	{ 25, 59 },
 	{ 25, 58 },
 	{ 25, 60 },
 	{ 6,  5 },
 	{ 6,  5 },
 	{ 6,  8 }
+#endif
 });
 
 const CtxSet ContextSetCfg::IntraLumaMpmFlag = ContextSetCfg::addCtxSet
@@ -1575,12 +1584,21 @@ const CtxSet ContextSetCfg::MTSIdx = ContextSetCfg::addCtxSet
 
 const CtxSet ContextSetCfg::ISPMode = ContextSetCfg::addCtxSet
 ({
+#if JVET_W0123_TIMD_FUSION
+  { 33, 43, 33 },
+  { 33, 43, 33 },
+  { 33, 43, 33 },
+  { 9,  2,  9 },
+  { 9,  3,  9 },
+  { 9,  2,  9 }
+#else
 	{ 33, 43 },
 	{ 33, 43 },
 	{ 33, 43 },
 	{ 9,  2 },
 	{ 9,  3 },
 	{ 9,  2 }
+#endif
 });
 
 const CtxSet ContextSetCfg::SbtFlag = ContextSetCfg::addCtxSet
@@ -1645,6 +1663,18 @@ const CtxSet ContextSetCfg::DimdFlag = ContextSetCfg::addCtxSet
 });
 #endif
 
+#if JVET_W0123_TIMD_FUSION
+const CtxSet ContextSetCfg::TimdFlag = ContextSetCfg::addCtxSet
+({
+  { 48, 56, 56 },
+  { 41, 49, 49 },
+  { 33, 49, 49 },
+  { 5,  1,  1 },
+  { 5,  1,  1 },
+  { 2,  1,  1 }
+});
+#endif
+
 #if ENABLE_OBMC 
 const CtxSet ContextSetCfg::ObmcFlag = ContextSetCfg::addCtxSet
 ({
@@ -2016,10 +2046,17 @@ const CtxSet ContextSetCfg::PredMode = ContextSetCfg::addCtxSet
 
 const CtxSet ContextSetCfg::MultiRefLineIdx = ContextSetCfg::addCtxSet
 ({
+#if JVET_W0123_TIMD_FUSION
+  {  25,  59,  25,  59, },
+  {  25,  58,  25,  58, },
+  {  25,  60,  25,  60, },
+  {   5,   8,  5,   8, },
+#else
   {  25,  59, },
   {  25,  58, },
   {  25,  60, },
   {   5,   8, },
+#endif
 });
 
 const CtxSet ContextSetCfg::IntraLumaMpmFlag = ContextSetCfg::addCtxSet
@@ -2553,10 +2590,17 @@ const CtxSet ContextSetCfg::MTSIdx = ContextSetCfg::addCtxSet
 
 const CtxSet ContextSetCfg::ISPMode = ContextSetCfg::addCtxSet
 ({
+#if JVET_W0123_TIMD_FUSION
+  {  33,  43,  33, },
+  {  33,  36,  33, },
+  {  33,  43,  33, },
+  {   9,   2,   9, },
+#else
   {  33,  43, },
   {  33,  36, },
   {  33,  43, },
   {   9,   2, },
+#endif
 });
 
 const CtxSet ContextSetCfg::SbtFlag = ContextSetCfg::addCtxSet
@@ -2607,6 +2651,17 @@ const CtxSet ContextSetCfg::DimdFlag = ContextSetCfg::addCtxSet
   {  5,  1,  1 }
   });
 #endif
+
+#if JVET_W0123_TIMD_FUSION
+const CtxSet ContextSetCfg::TimdFlag = ContextSetCfg::addCtxSet
+({
+  { 48, 56, 56 },
+  { 41, 49, 49 },
+  { 33, 49, 49 },
+  {  5,  1,  1 }
+});
+#endif
+
 #if ENABLE_OBMC
 const CtxSet ContextSetCfg::ObmcFlag = ContextSetCfg::addCtxSet
 ({
diff --git a/source/Lib/CommonLib/Contexts.h b/source/Lib/CommonLib/Contexts.h
index 8993e11bbff8dccf4930e823bf6bb63ff935030c..a49689489b0a084f862925857c22cfed25f8c04c 100644
--- a/source/Lib/CommonLib/Contexts.h
+++ b/source/Lib/CommonLib/Contexts.h
@@ -315,6 +315,9 @@ public:
 #if ENABLE_DIMD
   static const CtxSet   DimdFlag;
 #endif
+#if JVET_W0123_TIMD_FUSION
+  static const CtxSet   TimdFlag;
+#endif
 #if ENABLE_OBMC
   static const CtxSet   ObmcFlag;
 #endif 
diff --git a/source/Lib/CommonLib/InterpolationFilter.cpp b/source/Lib/CommonLib/InterpolationFilter.cpp
index d8b3d01ba7fa6cde2971eb894786f9372592fee1..b2a4f35265b1c6c9062483ffff23bac04ebe269f 100644
--- a/source/Lib/CommonLib/InterpolationFilter.cpp
+++ b/source/Lib/CommonLib/InterpolationFilter.cpp
@@ -490,6 +490,211 @@ const TFilterCoeff InterpolationFilter::m_lumaIntraFilter[CHROMA_INTERPOLATION_F
   {   0,  -4,  17, 249,  -7,   1 },  // 30/32 position
   {   0, - 2,   9, 253,  -4,   0 },  // 31/32 position
 };
+
+#if JVET_W0123_TIMD_FUSION
+const TFilterCoeff InterpolationFilter::m_lumaIntraFilterExt[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS<<1][6] =
+{
+  {   0,   0, 256,   0,   0,   0 },
+  {   0,  -2, 255,   4,  -1,   0 },
+  {   0,  -4, 253,   9,  -2,   0 },
+  {   0,  -5, 251,  13,  -3,   0 },
+  {   1,  -7, 249,  17,  -4,   0 },
+  {   1,  -9, 247,  21,  -5,   1 },
+  {   1, -10, 245,  25,  -6,   1 },
+  {   1, -12, 243,  30,  -7,   1 },
+  {   1, -13, 241,  34,  -8,   1 },
+  {   2, -15, 238,  39,  -9,   1 },
+  {   2, -16, 235,  44, -10,   1 },
+  {   2, -17, 232,  49, -11,   1 },
+  {   2, -18, 229,  53, -12,   2 },
+  {   2, -19, 226,  58, -13,   2 },
+  {   2, -20, 223,  63, -14,   2 },
+  {   2, -21, 220,  68, -15,   2 },
+  {   2, -22, 217,  72, -15,   2 },
+  {   2, -23, 213,  78, -16,   2 },
+  {   3, -23, 209,  82, -17,   2 },
+  {   3, -24, 205,  88, -18,   2 },
+  {   3, -24, 202,  92, -19,   2 },
+  {   3, -24, 198,  97, -20,   2 },
+  {   3, -25, 194, 101, -20,   3 },
+  {   3, -25, 189, 106, -20,   3 },
+  {   3, -25, 185, 111, -21,   3 },
+  {   3, -25, 181, 116, -22,   3 },
+  {   3, -26, 178, 121, -23,   3 },
+  {   3, -26, 173, 126, -23,   3 },
+  {   3, -25, 168, 131, -24,   3 },
+  {   3, -25, 163, 137, -25,   3 },
+  {   3, -25, 159, 141, -25,   3 },
+  {   3, -25, 155, 145, -25,   3 },
+  {   3, -25, 150, 150, -25,   3 },
+  {   3, -25, 145, 155, -25,   3 },
+  {   3, -25, 141, 159, -25,   3 },
+  {   3, -25, 137, 163, -25,   3 },
+  {   3, -24, 131, 168, -25,   3 },
+  {   3, -24, 126, 173, -25,   3 },
+  {   3, -23, 121, 178, -26,   3 },
+  {   3, -22, 116, 181, -25,   3 },
+  {   3, -21, 111, 185, -25,   3 },
+  {   3, -21, 106, 180, -25,   3 },
+  {   3, -20, 101, 194, -25,   3 },
+  {   2, -20,  97, 198, -24,   3 },
+  {   2, -19,  92, 202, -24,   3 },
+  {   2, -18,  86, 206, -23,   3 },
+  {   2, -17,  82, 209, -23,   3 },
+  {   2, -16,  77, 213, -23,   3 },
+  {   2, -15,  72, 217, -22,   2 },
+  {   2, -15,  68, 220, -21,   2 },
+  {   2, -14,  63, 223, -20,   2 },
+  {   2, -13,  58, 226, -19,   2 },
+  {   2, -12,  53, 229, -18,   2 },
+  {   2, -11,  48, 232, -17,   2 },
+  {   1, -10,  44, 235, -16,   2 },
+  {   1,  -9,  39, 238, -15,   2 },
+  {   1,  -8,  34, 241, -13,   1 },
+  {   1,  -7,  29, 243, -11,   1 },
+  {   1,  -6,  25, 245, -10,   1 },
+  {   0,  -5,  21, 247,  -8,   1 },
+  {   0,  -4,  17, 249,  -7,   1 },
+  {   0,  -3,  13, 251,  -5,   0 },
+  {   0,  -2,   9, 253,  -4,   0 },
+  {   0,  -1,   5, 255,  -3,   0 },
+};
+#endif
+#endif
+
+#if JVET_W0123_TIMD_FUSION
+const TFilterCoeff InterpolationFilter::g_aiExtIntraCubicFilter[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS<<1][NTAPS_CHROMA] = {
+  {   0, 256,   0,   0 },
+  {  -1, 254,   4,  -1 },
+  {  -3, 252,   8,  -1 },
+  {  -4, 250,  12,  -2 },
+  {  -5, 247,  17,  -3 },
+  {  -6, 244,  21,  -3 },
+  {  -7, 242,  25,  -4 },
+  {  -8, 239,  29,  -4 },
+  {  -9, 236,  34,  -5 },
+  {  -9, 233,  38,  -6 },
+  { -10, 230,  43,  -7 },
+  { -11, 227,  47,  -7 },
+  { -12, 224,  52,  -8 },
+  { -12, 220,  56,  -8 },
+  { -13, 217,  61,  -9 },
+  { -14, 214,  65,  -9 },
+  { -14, 210,  70, -10 },
+  { -14, 206,  75, -11 },
+  { -15, 203,  79, -11 },
+  { -15, 199,  84, -12 },
+  { -16, 195,  89, -12 },
+  { -16, 191,  93, -12 },
+  { -16, 187,  98, -13 },
+  { -16, 183, 102, -13 },
+  { -16, 179, 107, -14 },
+  { -16, 174, 112, -14 },
+  { -16, 170, 116, -14 },
+  { -16, 166, 121, -15 },
+  { -17, 162, 126, -15 },
+  { -16, 157, 130, -15 },
+  { -16, 153, 135, -16 },
+  { -16, 148, 140, -16 },
+  { -16, 144, 144, -16 },
+  { -16, 140, 148, -16},
+  { -16, 135, 153, -16},
+  { -15, 130, 157, -16},
+  { -15, 126, 162, -17},
+  { -15, 121, 166, -16},
+  { -14, 116, 170, -16},
+  { -14, 112, 174, -16},
+  { -14, 107, 179, -16},
+  { -13, 102, 183, -16},
+  { -13,  98, 187, -16},
+  { -12,  93, 191, -16},
+  { -12,  89, 195, -16},
+  { -12,  84, 199, -15},
+  { -11,  79, 203, -15},
+  { -11,  75, 206, -14},
+  { -10,  70, 210, -14},
+  {  -9,  65, 214, -14},
+  {  -9,  61, 217, -13},
+  {  -8,  56, 220, -12},
+  {  -8,  52, 224, -12},
+  {  -7,  47, 227, -11},
+  {  -7,  43, 230, -10},
+  {  -6,  38, 233,  -9},
+  {  -5,  34, 236,  -9},
+  {  -4,  29, 239,  -8},
+  {  -4,  25, 242,  -7},
+  {  -3,  21, 244,  -6},
+  {  -3,  17, 247,  -5},
+  {  -2,  12, 250,  -4},
+  {  -1,   8, 252,  -3},
+  {  -1,   4, 254,  -1},
+};
+const TFilterCoeff InterpolationFilter::g_aiExtIntraGaussFilter[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS<<1][NTAPS_CHROMA] = {
+  {  47, 161,  47,   1 },
+  {  45, 161,  49,   1 },
+  {  43, 161,  51,   1 },
+  {  42, 160,  52,   2 },
+  {  40, 160,  54,   2 },
+  {  38, 160,  56,   2 },
+  {  37, 159,  58,   2 },
+  {  35, 158,  61,   2 },
+  {  34, 158,  62,   2 },
+  {  32, 157,  65,   2 },
+  {  31, 156,  67,   2 },
+  {  29, 155,  69,   3 },
+  {  28, 154,  71,   3 },
+  {  27, 153,  73,   3 },
+  {  26, 151,  76,   3 },
+  {  25, 150,  78,   3 },
+  {  23, 149,  80,   4 },
+  {  22, 147,  83,   4 },
+  {  21, 146,  85,   4 },
+  {  20, 144,  87,   5 },
+  {  19, 142,  90,   5 },
+  {  18, 141,  92,   5 },
+  {  17, 139,  94,   6 },
+  {  16, 137,  97,   6 },
+  {  16, 135,  99,   6 },
+  {  15, 133, 101,   7 },
+  {  14, 131, 104,   7 },
+  {  13, 129, 106,   8 },
+  {  13, 127, 108,   8 },
+  {  12, 125, 111,   8 },
+  {  11, 123, 113,   9 },
+  {  11, 120, 116,   9 },
+  {  10, 118, 118,  10 },
+  {   9, 116, 120,  11},
+  {   9, 113, 123,  11},
+  {   8, 111, 125,  12},
+  {   8, 108, 127,  13},
+  {   8, 106, 129,  13},
+  {   7, 104, 131,  14},
+  {   7, 101, 133,  15},
+  {   6,  99, 135,  16},
+  {   6,  97, 137,  16},
+  {   6,  94, 139,  17},
+  {   5,  92, 141,  18},
+  {   5,  90, 142,  19},
+  {   5,  87, 144,  20},
+  {   4,  85, 146,  21},
+  {   4,  83, 147,  22},
+  {   4,  80, 149,  23},
+  {   3,  78, 150,  25},
+  {   3,  76, 151,  26},
+  {   3,  73, 153,  27},
+  {   3,  71, 154,  28},
+  {   3,  69, 155,  29},
+  {   2,  67, 156,  31},
+  {   2,  65, 157,  32},
+  {   2,  62, 158,  34},
+  {   2,  61, 158,  35},
+  {   2,  58, 159,  37},
+  {   2,  56, 160,  38},
+  {   2,  54, 160,  40},
+  {   2,  52, 160,  42},
+  {   1,  51, 161,  43},
+  {   1,  49, 161,  45},
+};
 #endif
 
 //1.5x
diff --git a/source/Lib/CommonLib/InterpolationFilter.h b/source/Lib/CommonLib/InterpolationFilter.h
index cb41d2d4643251f5316091e9dd427e420af403b9..f1bb512b013566d6478908b5b3054e115bfb26b8 100644
--- a/source/Lib/CommonLib/InterpolationFilter.h
+++ b/source/Lib/CommonLib/InterpolationFilter.h
@@ -75,6 +75,9 @@ public:
 #if INTRA_6TAP
   static const TFilterCoeff m_lumaIntraFilter[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][6]; ///< Chroma filter 6 taps
   static const TFilterCoeff m_weak4TapFilter[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_CHROMA]; ///< Weak filter 4 taps
+#if JVET_W0123_TIMD_FUSION
+  static const TFilterCoeff m_lumaIntraFilterExt[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS << 1][6]; ///< Chroma filter 6 taps
+#endif
 #endif
   static const TFilterCoeff m_lumaFilterRPR1[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][8]; ///< Luma filter taps 1.5x
   static const TFilterCoeff m_lumaFilterRPR2[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][8]; ///< Luma filter taps 2x
@@ -101,6 +104,10 @@ private:
   static const TFilterCoeff m_affineLumaFilterRPR2[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_LUMA]; ///< Luma filter taps 2x
 private:
   static const TFilterCoeff m_lumaAltHpelIFilter[NTAPS_LUMA]; ///< Luma filter taps
+#endif
+#if JVET_W0123_TIMD_FUSION
+  static const TFilterCoeff g_aiExtIntraCubicFilter[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS<<1][NTAPS_CHROMA]; ///< Chroma filter taps
+  static const TFilterCoeff g_aiExtIntraGaussFilter[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS<<1][NTAPS_CHROMA]; ///< Chroma filter taps
 #endif
   static const TFilterCoeff m_bilinearFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_BILINEAR]; ///< bilinear filter taps
   static const TFilterCoeff m_bilinearFilterPrec4[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS][NTAPS_BILINEAR]; ///< bilinear filter taps
@@ -157,8 +164,15 @@ public:
 #if INTRA_6TAP
   static TFilterCoeff const * const getIntraLumaFilterTable(const int deltaFract) { return m_lumaIntraFilter[deltaFract]; };
   static TFilterCoeff const * const getWeak4TapFilterTable(const int deltaFract) { return m_weak4TapFilter[deltaFract]; };
+#if JVET_W0123_TIMD_FUSION
+  static TFilterCoeff const * const getIntraLumaFilterTableExt(const int deltaFract) { return m_lumaIntraFilterExt[deltaFract]; };
+#endif
 #endif
   static TFilterCoeff const * const getChromaFilterTable(const int deltaFract) { return m_chromaFilter[deltaFract]; };
+#if JVET_W0123_TIMD_FUSION
+  static TFilterCoeff const * const getExtIntraCubicFilter(const int deltaFract) { return g_aiExtIntraCubicFilter[deltaFract]; };
+  static TFilterCoeff const * const getExtIntraGaussFilter(const int deltaFract) { return g_aiExtIntraGaussFilter[deltaFract]; };
+#endif
 };
 
 //! \}
diff --git a/source/Lib/CommonLib/IntraPrediction.cpp b/source/Lib/CommonLib/IntraPrediction.cpp
index bc5c898ed0d09a8e270f0db2f8d5176041b8e4dc..b74d3927f1a5b6f7c37d39f1ff25477cd53517e4 100644
--- a/source/Lib/CommonLib/IntraPrediction.cpp
+++ b/source/Lib/CommonLib/IntraPrediction.cpp
@@ -67,6 +67,19 @@ const uint8_t IntraPrediction::m_aucIntraFilter[MAX_INTRA_FILTER_DEPTHS] =
   0   // 128xn
 };
 
+#if JVET_W0123_TIMD_FUSION
+const uint8_t IntraPrediction::m_aucIntraFilterExt[MAX_INTRA_FILTER_DEPTHS] =
+{
+  48, //   1xn
+  48, //   2xn
+  48, //   4xn
+  28, //   8xn
+  4,  //  16xn
+  0,  //  32xn
+  0,  //  64xn
+  0   // 128xn
+};
+#endif
 
 // ====================================================================================================================
 // Constructor / destructor / initialize
@@ -86,6 +99,9 @@ IntraPrediction::IntraPrediction()
   }
 #endif
 
+#if JVET_W0123_TIMD_FUSION
+  m_timdSatdCost = nullptr;
+#endif
   m_piTemp = nullptr;
   m_pMdlmTemp = nullptr;
 #if MMLM
@@ -111,6 +127,9 @@ void IntraPrediction::destroy()
   }
 #endif
 
+#if JVET_W0123_TIMD_FUSION
+  delete m_timdSatdCost;
+#endif
   delete[] m_piTemp;
   m_piTemp = nullptr;
   delete[] m_pMdlmTemp;
@@ -163,6 +182,12 @@ void IntraPrediction::init(ChromaFormat chromaFormatIDC, const unsigned bitDepth
   }
 #endif
 
+#if JVET_W0123_TIMD_FUSION
+  if (m_timdSatdCost == nullptr)
+  {
+    m_timdSatdCost = new RdCost;
+  }
+#endif
   if (m_piTemp == nullptr)
   {
     m_piTemp = new Pel[(MAX_CU_SIZE + 1) * (MAX_CU_SIZE + 1)];
@@ -187,6 +212,175 @@ void IntraPrediction::init(ChromaFormat chromaFormatIDC, const unsigned bitDepth
   }
 }
 
+#if JVET_W0123_TIMD_FUSION
+void IntraPrediction::xIntraPredTimdAngPdpc(Pel* pDsty,const int dstStride,Pel* refSide,const int width,const int height, int xOffset, int yOffset, int scale,int invAngle)
+{
+  int xlim = std::min(3 << scale, width);
+  for (int y = yOffset; y<height; y++)
+  {
+    int invAngleSum = 256;
+    if (width < 4)
+    {
+      for (int x = xOffset; x < 2; x++)
+      {
+        invAngleSum += invAngle;
+        int wL   = 32 >> (2 * x >> scale);
+        Pel left = refSide[y + (invAngleSum >> 9) + 1];
+        pDsty[x] = pDsty[x] + ((wL * (left - pDsty[x]) + 32) >> 6);
+      }
+    }
+    else
+    {
+      for (int x = xOffset; x < xlim; x++)
+      {
+        invAngleSum += invAngle;
+        int wL   = 32 >> (2 * x >> scale);
+        Pel left = refSide[y + (invAngleSum >> 9) + 1];
+        pDsty[x] = pDsty[x] + ((wL * (left - pDsty[x]) + 32) >> 6);
+      }
+    }
+    pDsty += dstStride;
+  }
+}
+
+#if GRAD_PDPC
+void IntraPrediction::xIntraPredTimdAngGradPdpc(Pel* pDsty, const int dstStride, Pel* refMain, Pel* refSide, const int width, const int height, int xOffset, int yOffset, int scale, int deltaPos, int intraPredAngle, const ClpRng& clpRng)
+{
+  for (int y = yOffset; y<height; y++)
+  {
+    const int deltaInt   = deltaPos >> 6;
+    const int deltaFract = deltaPos & 63;
+    const Pel left = refSide[1 + y];
+    const Pel topLeft = refMain[deltaInt] + ((deltaFract * (refMain[deltaInt + 1] - refMain[deltaInt]) + 32) >> 6);
+    for (int x = xOffset; x < std::min(3 << scale, width); x++)
+    {
+      int wL = 32 >> (2 * (x - xOffset) >> scale);
+      pDsty[x] = ClipPel(pDsty[x] + ((wL * (left - topLeft) + 32) >> 6), clpRng);
+    }
+    pDsty += dstStride;
+    deltaPos += intraPredAngle;
+  }
+}
+#endif
+
+void IntraPrediction::xIntraPredTimdHorVerPdpc(Pel* pDsty,const int dstStride,Pel* refSide,const int width,const int height, int xOffset, int yOffset, int scale,const Pel* refMain, const ClpRng& clpRng)
+{
+  const Pel topLeft = refMain[0];
+
+  for( int y = yOffset; y < height; y++ )
+  {
+    memcpy(pDsty,&refMain[1],width*sizeof(Pel));
+    const Pel left    = refSide[1 + y];
+    for (int x = xOffset; x < std::min(3 << scale, width); x++)
+    {
+      const int wL  = 32 >> (2 * x >> scale);
+      const Pel val = pDsty[x];
+      pDsty[x]      = ClipPel(val + ((wL * (left - topLeft) + 32) >> 6), clpRng);
+    }
+    pDsty += dstStride;
+  }
+}
+
+void IntraPrediction::xIntraPredTimdPlanarDcPdpc(const CPelBuf &pSrc, Pel* pDst, int iDstStride, int width, int height, TEMPLATE_TYPE eTempType, int iTemplateWidth, int iTemplateHeight)
+{
+  if (eTempType == LEFT_ABOVE_NEIGHBOR)
+  {
+    int xOffset = 0;
+    int yOffset = 0;
+    // PDPC for above template
+    {
+      const int iWidth  = width;
+      const int iHeight = iTemplateHeight;
+      xOffset = iTemplateWidth;
+      const int scale = ((floorLog2(width) - 2 + floorLog2(height) - 2 + 2) >> 2);
+      for (int y = 0; y < iHeight; y++)
+      {
+        const int wT   = 32 >> std::min(31, ((y << 1) >> scale));
+        const Pel left = pSrc.at(y + 1, 1);
+        for (int x = xOffset; x < iWidth; x++)
+        {
+          const int wL    = 32 >> std::min(31, ((x << 1) >> scale));
+          const Pel top   = pSrc.at(x + 1, 0);
+          const Pel val   = pDst[y * iDstStride + x];
+          pDst[y * iDstStride + x] = val + ((wL * (left - val) + wT * (top - val) + 32) >> 6);
+        }
+      }
+    }
+
+    // PDPC for left template
+    {
+      const int iWidth  = iTemplateWidth;
+      const int iHeight = height;
+      yOffset = iTemplateHeight;
+      const int scale = ((floorLog2(width) - 2 + floorLog2(height) - 2 + 2) >> 2);
+      for (int y = yOffset; y < iHeight; y++)
+      {
+        const int wT   = 32 >> std::min(31, ((y << 1) >> scale));
+        const Pel left = pSrc.at(y + 1, 1);
+        for (int x = 0; x < iWidth; x++)
+        {
+          const int wL    = 32 >> std::min(31, ((x << 1) >> scale));
+          const Pel top   = pSrc.at(x + 1, 0);
+          const Pel val   = pDst[y * iDstStride + x];
+          pDst[y * iDstStride + x] = val + ((wL * (left - val) + wT * (top - val) + 32) >> 6);
+        }
+      }
+    }
+  }
+  else if (eTempType == LEFT_NEIGHBOR)
+  {
+    const int iHeight = height;
+    const int scale = ((floorLog2(width) - 2 + floorLog2(height) - 2 + 2) >> 2);
+    for (int y = 0; y < iHeight; y++)
+    {
+      const int wT   = 32 >> std::min(31, ((y << 1) >> scale));
+      const Pel left = pSrc.at(y + 1, 1);
+      for (int x = 0; x < iTemplateWidth; x++)
+      {
+        const int wL    = 32 >> std::min(31, ((x << 1) >> scale));
+        const Pel top   = pSrc.at(x + 1, 0);
+        const Pel val   = pDst[y * iDstStride + x];
+        pDst[y * iDstStride + x] = val + ((wL * (left - val) + wT * (top - val) + 32) >> 6);
+      }
+    }
+  }
+  else // eTempType == ABOVE_NEIGHBOR
+  {
+    const int iWidth  = width;
+    const int scale = ((floorLog2(width) - 2 + floorLog2(height) - 2 + 2) >> 2);
+    for (int y = 0; y < iTemplateHeight; y++)
+    {
+      const int wT   = 32 >> std::min(31, ((y << 1) >> scale));
+      const Pel left = pSrc.at(y + 1, 1);
+      for (int x = 0; x < iWidth; x++)
+      {
+        const int wL    = 32 >> std::min(31, ((x << 1) >> scale));
+        const Pel top   = pSrc.at(x + 1, 0);
+        const Pel val   = pDst[y * iDstStride + x];
+        pDst[y * iDstStride + x] = val + ((wL * (left - val) + wT * (top - val) + 32) >> 6);
+      }
+    }
+  }
+}
+
+void IntraPrediction::xIntraPredTimdAngLuma(Pel* pDstBuf, const ptrdiff_t dstStride, Pel* refMain, int width, int height, int deltaPos, int intraPredAngle, const ClpRng& clpRng, int xOffset, int yOffset)
+{
+  for (int y = yOffset; y<height; y++ )
+  {
+    const int deltaInt   = deltaPos >> 6;
+    const int deltaFract = deltaPos & 63;
+    const TFilterCoeff* const f = InterpolationFilter::getExtIntraCubicFilter(deltaFract);
+    int refMainIndex = deltaInt + 1 + xOffset;
+    for( int x = xOffset; x < width; x++, refMainIndex++ )
+    {
+      pDstBuf[y*dstStride + x] = (f[0] * refMain[refMainIndex - 1] + f[1] * refMain[refMainIndex] + f[2] * refMain[refMainIndex + 1] + f[3] * refMain[refMainIndex + 2] + 128) >> 8;
+      pDstBuf[y*dstStride + x] = ClipPel( pDstBuf[y*dstStride + x], clpRng ); // always clip even though not always needed
+    }
+    deltaPos += intraPredAngle;
+  }
+}
+#endif
+
 // ====================================================================================================================
 // Public member functions
 // ====================================================================================================================
@@ -249,6 +443,26 @@ int IntraPrediction::getModifiedWideAngle( int width, int height, int predMode )
   return predMode;
 }
 
+#if JVET_W0123_TIMD_FUSION
+int IntraPrediction::getWideAngleExt( int width, int height, int predMode )
+{
+  if ( predMode > DC_IDX && predMode <= EXT_VDIA_IDX )
+  {
+    int modeShift[] = { 0, 11, 19, 23, 27, 29 };
+    int deltaSize = abs(floorLog2(width) - floorLog2(height));
+    if (width > height && predMode < 2 + modeShift[deltaSize])
+    {
+      predMode += (EXT_VDIA_IDX - 1);
+    }
+    else if (height > width && predMode > EXT_VDIA_IDX - modeShift[deltaSize])
+    {
+      predMode -= (EXT_VDIA_IDX - 1);
+    }
+  }
+  return predMode;
+}
+#endif
+
 void IntraPrediction::setReferenceArrayLengths( const CompArea &area )
 {
   // set Top and Left reference samples length
@@ -274,6 +488,9 @@ void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, co
 
   CHECK(PU::isMIP(pu, toChannelType(compId)), "We should not get here for MIP.");
   const uint32_t       uiDirMode    = isLuma( compId ) && pu.cu->bdpcmMode ? BDPCM_IDX : !isLuma(compId) && pu.cu->bdpcmModeChroma ? BDPCM_IDX : PU::getFinalIntraMode(pu, channelType);
+#if JVET_W0123_TIMD_FUSION
+  bool bExtIntraDir = pu.cu->timd && isLuma( compId );
+#endif
 
   CHECK( floorLog2(iWidth) < 2 && pu.cs->pcv->noChroma2x2, "Size not allowed" );
   CHECK( floorLog2(iWidth) > 7, "Size not allowed" );
@@ -292,7 +509,11 @@ void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, co
     case(PLANAR_IDX): xPredIntraPlanar(srcBuf, piPred); break;
     case(DC_IDX):     xPredIntraDc(srcBuf, piPred, channelType, false); break;
     case(BDPCM_IDX):  xPredIntraBDPCM(srcBuf, piPred, isLuma(compID) ? pu.cu->bdpcmMode : pu.cu->bdpcmModeChroma, clpRng); break;
+#if JVET_W0123_TIMD_FUSION
+    default:          xPredIntraAng(srcBuf, piPred, channelType, clpRng, bExtIntraDir); break;
+#else
     default:          xPredIntraAng(srcBuf, piPred, channelType, clpRng); break;
+#endif
     }
 #if CIIP_PDPC
   }
@@ -316,7 +537,11 @@ void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, co
     pu2.intraDir[0] = pu.cu->dimdBlendMode[0];
     initPredIntraParams(pu2, pu.Y(), *(pu.cs->sps));
 
+#if JVET_W0123_TIMD_FUSION
+    xPredIntraAng(srcBuf, predAng, channelType, clpRng, false);
+#else
     xPredIntraAng(srcBuf, predAng, channelType, clpRng);
+#endif
 #else
     const bool   useISP = NOT_INTRA_SUBPARTITIONS != pu.cu->ispMode && isLuma( CHANNEL_TYPE_LUMA );//ok
     const Size   cuSize = Size( pu.cu->blocks[compId].width, pu.cu->blocks[compId].height ); //ok
@@ -395,7 +620,11 @@ void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, co
       }
     }
 
+#if JVET_W0123_TIMD_FUSION
+    xPredIntraAng( srcBuf, predAng, channelType, clpRng, false );
+#else
     xPredIntraAng( srcBuf, predAng, channelType, clpRng );
+#endif
 #endif
     m_ipaParam.applyPDPC = applyPdpc;
 
@@ -423,6 +652,50 @@ void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, co
   }
 #endif
 
+#if JVET_W0123_TIMD_FUSION
+  if (pu.cu->timd && pu.cu->timdIsBlended && isLuma(compID))
+  {
+    int width = piPred.width;
+    int height = piPred.height;
+    const UnitArea localUnitArea( pu.chromaFormat, Area( 0, 0, width, height ) );
+
+    PelBuf predFusion = m_tempBuffer[1].getBuf( localUnitArea.Y() );
+
+    const bool applyPdpc = m_ipaParam.applyPDPC;
+    PredictionUnit pu2 = pu;
+    pu2.intraDir[0] = pu.cu->timdModeSecondary;
+    initPredIntraParams(pu2, pu.Y(), *(pu.cs->sps));
+
+    switch (pu.cu->timdModeSecondary)
+    {
+    case(PLANAR_IDX): xPredIntraPlanar(srcBuf, predFusion); break;
+    case(DC_IDX):     xPredIntraDc(srcBuf, predFusion, channelType, false); break;
+    default:          xPredIntraAng(srcBuf, predFusion, channelType, clpRng, bExtIntraDir); break;
+    }
+
+    m_ipaParam.applyPDPC = applyPdpc;
+
+    // do blending
+    const int log2WeightSum = 6;
+    Pel *pelPred = piPred.buf;
+    Pel *pelPredFusion = predFusion.buf;
+    int  w0 = pu.cu->timdFusionWeight[0], w1 = pu.cu->timdFusionWeight[1];
+
+    for( int y = 0; y < height; y++ )
+    {
+      for( int x = 0; x < width; x++ )
+      {
+        int blend = pelPred[x] * w0;
+        blend += pelPredFusion[x] * w1;
+        pelPred[x] = (Pel)(blend >> log2WeightSum);
+      }
+
+      pelPred += piPred.stride;
+      pelPredFusion += predFusion.stride;
+    }
+  }
+#endif
+
 #if CIIP_PDPC
   if (m_ipaParam.applyPDPC || pu.ciipPDPC)
 #else
@@ -625,6 +898,9 @@ void IntraPrediction::initPredIntraParams(const PredictionUnit & pu, const CompA
 {
   const ComponentID compId = area.compID;
   const ChannelType chType = toChannelType(compId);
+#if JVET_W0123_TIMD_FUSION
+  bool bExtIntraDir = pu.cu->timd && isLuma( chType );
+#endif
 
   const bool        useISP = NOT_INTRA_SUBPARTITIONS != pu.cu->ispMode && isLuma( chType );
 
@@ -632,31 +908,58 @@ void IntraPrediction::initPredIntraParams(const PredictionUnit & pu, const CompA
   const Size   puSize    = Size( area.width, area.height );
   const Size&  blockSize = useISP ? cuSize : puSize;
   const int      dirMode = PU::getFinalIntraMode(pu, chType);
+#if JVET_W0123_TIMD_FUSION
+  const int     predMode = bExtIntraDir ? getWideAngleExt( blockSize.width, blockSize.height, dirMode ) : getModifiedWideAngle( blockSize.width, blockSize.height, dirMode );
+#else
   const int     predMode = getModifiedWideAngle( blockSize.width, blockSize.height, dirMode );
+#endif
 
+#if JVET_W0123_TIMD_FUSION
+  m_ipaParam.isModeVer            = bExtIntraDir ? (predMode >= EXT_DIA_IDX) : (predMode >= DIA_IDX);
+#else
   m_ipaParam.isModeVer            = predMode >= DIA_IDX;
+#endif
   m_ipaParam.multiRefIndex        = isLuma (chType) ? pu.multiRefIdx : 0 ;
   m_ipaParam.refFilterFlag        = false;
   m_ipaParam.interpolationFlag    = false;
   m_ipaParam.applyPDPC            = (puSize.width >= MIN_TB_SIZEY && puSize.height >= MIN_TB_SIZEY) && m_ipaParam.multiRefIndex == 0;
 
+#if JVET_W0123_TIMD_FUSION
+  const int    intraPredAngleMode = (m_ipaParam.isModeVer) ? (predMode - (bExtIntraDir? EXT_VER_IDX : VER_IDX)) : (-(predMode - (bExtIntraDir ? EXT_HOR_IDX : HOR_IDX)));
+#else
   const int    intraPredAngleMode = (m_ipaParam.isModeVer) ? predMode - VER_IDX : -(predMode - HOR_IDX);
+#endif
 
 
   int absAng = 0;
+#if JVET_W0123_TIMD_FUSION
+  if (dirMode > DC_IDX && dirMode < (bExtIntraDir ? EXT_VDIA_IDX + 1 : NUM_LUMA_MODE)) // intraPredAngle for directional modes
+#else
   if (dirMode > DC_IDX && dirMode < NUM_LUMA_MODE) // intraPredAngle for directional modes
+#endif
   {
     static const int angTable[32]    = { 0,    1,    2,    3,    4,    6,     8,   10,   12,   14,   16,   18,   20,   23,   26,   29,   32,   35,   39,  45,  51,  57,  64,  73,  86, 102, 128, 171, 256, 341, 512, 1024 };
     static const int invAngTable[32] = {
       0,   16384, 8192, 5461, 4096, 2731, 2048, 1638, 1365, 1170, 1024, 910, 819, 712, 630, 565,
       512, 468,   420,  364,  321,  287,  256,  224,  191,  161,  128,  96,  64,  48,  32,  16
     };   // (512 * 32) / Angle
+#if JVET_W0123_TIMD_FUSION
+    static const int extAngTable[64]    = { 0, 1, 2, 3, 4, 5, 6,7, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 43, 46, 49, 52, 55, 58, 61, 64, 67, 70, 74, 78, 84, 90, 96, 102, 108, 114, 121, 128, 137, 146, 159, 172, 188, 204, 230, 256, 299, 342, 427, 512, 597, 682, 853, 1024, 1536, 2048, 3072 };
+    static const int extInvAngTable[64] = {
+        0, 32768, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3277, 2731, 2341, 2048, 1820, 1638, 1489, 1365, 1260, 1170, 1092, 1024, 964, 910, 862, 819, 762, 712, 669, 630, 596, 565, 537, 512, 489, 468, 443, 420, 390, 364, 341, 321, 303, 287, 271, 256, 239, 224, 206, 191, 174, 161, 142, 128, 110, 96, 77, 64, 55, 48, 38, 32, 21, 16, 11
+    };   // (512 * 64) / Angle
+#endif
 
     const int     absAngMode         = abs(intraPredAngleMode);
     const int     signAng            = intraPredAngleMode < 0 ? -1 : 1;
+#if JVET_W0123_TIMD_FUSION
+                  absAng             = bExtIntraDir ? extAngTable[absAngMode] : angTable[absAngMode];
+    m_ipaParam.absInvAngle              = bExtIntraDir ? extInvAngTable[absAngMode] : invAngTable[absAngMode];
+#else
                   absAng             = angTable  [absAngMode];
 
     m_ipaParam.absInvAngle           = invAngTable[absAngMode];
+#endif
     m_ipaParam.intraPredAngle        = signAng * absAng;
     if (intraPredAngleMode < 0)
     {
@@ -707,16 +1010,28 @@ void IntraPrediction::initPredIntraParams(const PredictionUnit & pu, const CompA
   {
     bool filterFlag = false;
     {
+#if JVET_W0123_TIMD_FUSION
+      const int diff = std::min<int>( abs( predMode - (bExtIntraDir ? EXT_HOR_IDX : HOR_IDX) ), abs( predMode - (bExtIntraDir ? EXT_VER_IDX : VER_IDX) ) );
+#else
       const int diff = std::min<int>( abs( predMode - HOR_IDX ), abs( predMode - VER_IDX ) );
+#endif
       const int log2Size = ((floorLog2(puSize.width) + floorLog2(puSize.height)) >> 1);
       CHECK( log2Size >= MAX_INTRA_FILTER_DEPTHS, "Size not supported" );
+#if JVET_W0123_TIMD_FUSION
+      filterFlag = (diff > (bExtIntraDir ? m_aucIntraFilterExt[log2Size] : m_aucIntraFilter[log2Size]));
+#else
       filterFlag = (diff > m_aucIntraFilter[log2Size]);
+#endif
     }
 
     // Selelection of either ([1 2 1] / 4 ) refrence filter OR Gaussian 4-tap interpolation filter
     if (filterFlag)
     {
+#if JVET_W0123_TIMD_FUSION
+      const bool isRefFilter       =  bExtIntraDir ? isIntegerSlopeExt(absAng) : isIntegerSlope(absAng);
+#else
       const bool isRefFilter       =  isIntegerSlope(absAng);
+#endif
       CHECK( puSize.width * puSize.height <= 32, "DCT-IF interpolation filter is always used for 4x4, 4x8, and 8x4 luma CB" );
       m_ipaParam.refFilterFlag     =  isRefFilter;
       m_ipaParam.interpolationFlag = !isRefFilter;
@@ -737,7 +1052,11 @@ void IntraPrediction::initPredIntraParams(const PredictionUnit & pu, const CompA
 */
 //NOTE: Bit-Limit - 25-bit source
 
+#if JVET_W0123_TIMD_FUSION
+void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const ClpRng& clpRng, const bool bExtIntraDir)
+#else
 void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const ClpRng& clpRng)
+#endif
 {
   int width =int(pDst.width);
   int height=int(pDst.height);
@@ -783,7 +1102,11 @@ void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const Ch
     // left extend by 1
     for (int k = -(sizeSide + 1); k <= -1; k++)
     {
+#if JVET_W0123_TIMD_FUSION
+      int frac32precision = bExtIntraDir ? ((-k * absInvAngle + 16) >> 5) : ((-k * absInvAngle + 8) >> 4);
+#else
       int frac32precision = (-k * absInvAngle + 8) >> 4;
+#endif
       int intpel = frac32precision >> 5;
       int fracpel = frac32precision & 31;
       //std::cout << " fracPel: " << fracpel << std::endl;
@@ -836,7 +1159,11 @@ void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const Ch
     // Extend main reference to right using replication
     const int log2Ratio = floorLog2(width) - floorLog2(height);
     const int s = std::max<int>(0, bIsModeVer ? log2Ratio : -log2Ratio);
+#if JVET_W0123_TIMD_FUSION
+    const int maxIndex  = (multiRefIdx << s) + 6;
+#else
     const int maxIndex = (multiRefIdx << s) + 2;
+#endif
     const int refLength = bIsModeVer ? m_topRefLength : m_leftRefLength;
     const Pel val = refMain[refLength + multiRefIdx];
     // right extended by 1 (z range)
@@ -860,7 +1187,11 @@ void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const Ch
     // Extend main reference to right using replication
     const int log2Ratio = floorLog2(width) - floorLog2(height);
     const int s         = std::max<int>(0, bIsModeVer ? log2Ratio : -log2Ratio);
+#if JVET_W0123_TIMD_FUSION
+    const int maxIndex  = (multiRefIdx << s) + 6;
+#else
     const int maxIndex  = (multiRefIdx << s) + 2;
+#endif
     const int refLength = bIsModeVer ? m_topRefLength : m_leftRefLength;
     const Pel val       = refMain[refLength + multiRefIdx];
     for (int z = 1; z <= maxIndex; z++)
@@ -911,10 +1242,20 @@ void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const Ch
   {
     for (int y = 0, deltaPos = intraPredAngle * (1 + multiRefIdx); y<height; y++, deltaPos += intraPredAngle, pDsty += dstStride)
     {
+#if JVET_W0123_TIMD_FUSION
+      const int deltaInt   = bExtIntraDir ? deltaPos >> 6 : deltaPos >> 5;
+      const int deltaFract = bExtIntraDir ? deltaPos & 63 : deltaPos & 31;
+#else
       const int deltaInt   = deltaPos >> 5;
       const int deltaFract = deltaPos & 31;
+#endif
 
+#if JVET_W0123_TIMD_FUSION
+      bool bIntSlope = bExtIntraDir ? isIntegerSlopeExt( abs(intraPredAngle) ) : isIntegerSlope( abs(intraPredAngle) );
+      if ( !bIntSlope )
+#else
       if ( !isIntegerSlope( abs(intraPredAngle) ) )
+#endif
       {
         if( isLuma(channelType) )
         {
@@ -924,14 +1265,29 @@ void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const Ch
           const TFilterCoeff        intraSmoothingFilter[6] = { TFilterCoeff(0), TFilterCoeff(64 - (deltaFract << 1)), TFilterCoeff(128 - (deltaFract << 1)), TFilterCoeff(64 + (deltaFract << 1)), TFilterCoeff(deltaFract << 1), TFilterCoeff(0) };
           const TFilterCoeff        intraSmoothingFilter2[6] = { TFilterCoeff(16 - (deltaFract >> 1)), TFilterCoeff(64 - 3*(deltaFract >> 1)), TFilterCoeff(96 - (deltaFract)), TFilterCoeff(64 + (deltaFract)),
             TFilterCoeff(16 + 3*(deltaFract >> 1)), TFilterCoeff((deltaFract >> 1)) };
+#if JVET_W0123_TIMD_FUSION
+          const TFilterCoeff        intraSmoothingFilterExt[6] = { TFilterCoeff(0), TFilterCoeff(64 - (deltaFract)), TFilterCoeff(128 - (deltaFract)), TFilterCoeff(64 + (deltaFract)), TFilterCoeff(deltaFract), TFilterCoeff(0) };
+          const TFilterCoeff        intraSmoothingFilter2Ext[6] = { TFilterCoeff(16 - (deltaFract >> 2)), TFilterCoeff(64 - 3*(deltaFract >> 2)), TFilterCoeff(96 - (deltaFract >> 1)), TFilterCoeff(64 + (deltaFract >> 1)),
+            TFilterCoeff(16 + 3*(deltaFract >> 2)), TFilterCoeff((deltaFract >> 2)) };
+          const TFilterCoeff* const f = (useCubicFilter) ? ( bExtIntraDir ? InterpolationFilter::getIntraLumaFilterTableExt(deltaFract) : InterpolationFilter::getIntraLumaFilterTable(deltaFract)) : (width >=32 && height >=32)? (bExtIntraDir ? intraSmoothingFilter2Ext : intraSmoothingFilter2) : (bExtIntraDir ? intraSmoothingFilterExt : intraSmoothingFilter);
+#else
           const TFilterCoeff* const f = (useCubicFilter) ? InterpolationFilter::getIntraLumaFilterTable(deltaFract) : (width >=32 && height >=32)? intraSmoothingFilter2 : intraSmoothingFilter;
+#endif
 #else
 #if IF_12TAP
           const TFilterCoeff        intraSmoothingFilter[4] = { TFilterCoeff(64 - (deltaFract << 1)), TFilterCoeff(128 - (deltaFract << 1)), TFilterCoeff(64 + (deltaFract << 1)), TFilterCoeff(deltaFract << 1) };   
+#if JVET_W0123_TIMD_FUSION
+          const TFilterCoeff        intraSmoothingFilterExt[4] = { TFilterCoeff(64 - (deltaFract)), TFilterCoeff(128 - (deltaFract)), TFilterCoeff(64 + (deltaFract)), TFilterCoeff(deltaFract) };
+#endif
 #else
           const TFilterCoeff        intraSmoothingFilter[4] = {TFilterCoeff(16 - (deltaFract >> 1)), TFilterCoeff(32 - (deltaFract >> 1)), TFilterCoeff(16 + (deltaFract >> 1)), TFilterCoeff(deltaFract >> 1)};
-#endif          
+#endif  
+
+#if JVET_W0123_TIMD_FUSION
+          const TFilterCoeff* const f                       = (useCubicFilter) ? (bExtIntraDir ? InterpolationFilter::getExtIntraCubicFilter(deltaFract) : InterpolationFilter::getChromaFilterTable(deltaFract)) : (bExtIntraDir ? InterpolationFilter::getExtIntraGaussFilter(deltaFract) : intraSmoothingFilter);
+#else
           const TFilterCoeff* const f                       = (useCubicFilter) ? InterpolationFilter::getChromaFilterTable(deltaFract) : intraSmoothingFilter;
+#endif
 #endif
 
           for (int x = 0; x < width; x++)
@@ -950,9 +1306,20 @@ void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const Ch
 
 #if IF_12TAP
             Pel val = ( f[0] * p[0] + f[1] * p[1] + f[2] * p[2] + f[3] * p[3] + 128 ) >> 8;
+#else
+#if JVET_W0123_TIMD_FUSION
+            int tOffset = 32;
+            int tShift = 6;
+            if (bExtIntraDir)
+            {
+              tOffset = 128;
+              tShift = 8;
+            }
+            Pel val = (f[0] * p[0] + f[1] * p[1] + f[2] * p[2] + f[3] * p[3] + tOffset) >> tShift;
 #else
             Pel val = (f[0] * p[0] + f[1] * p[1] + f[2] * p[2] + f[3] * p[3] + 32) >> 6;
 #endif
+#endif
 #endif
 
             pDsty[x] = ClipPel(val, clpRng);   // always clip even though not always needed
@@ -978,7 +1345,18 @@ void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const Ch
       {
         const int scale = m_ipaParam.angularScale;
         const Pel left = refSide[1 + y];
+#if JVET_W0123_TIMD_FUSION
+        int gradOffset = 16;
+        int gradShift = 5;
+        if (bExtIntraDir)
+        {
+          gradOffset = 32;
+          gradShift = 6;
+        }
+        const Pel topLeft = refMain[deltaInt] + ((deltaFract * (refMain[deltaInt + 1] - refMain[deltaInt]) + gradOffset) >> gradShift);
+#else
         const Pel topLeft = refMain[deltaInt] + ((deltaFract * (refMain[deltaInt + 1] - refMain[deltaInt]) + 16) >> 5);
+#endif
 
         for (int x = 0; x < std::min(3 << scale, width); x++)
         {
@@ -1724,6 +2102,1339 @@ void IntraPrediction::xFilterReferenceSamples(const Pel *refBufUnfiltered, Pel *
   }
   refBufFiltered[predHSize] = refBufUnfiltered[predHSize];
 }
+
+#if JVET_W0123_TIMD_FUSION
+Pel IntraPrediction::xGetPredTimdValDc( const CPelBuf &pSrc, const Size &dstSize, TEMPLATE_TYPE eTempType, int iTempHeight, int iTempWidth )
+{
+  int idx, sum = 0;
+  Pel dcVal;
+  const int width  = dstSize.width;
+  const int height = dstSize.height;
+  auto denom     = (width == height) ? (width << 1) : std::max(width,height);
+  auto divShift  = floorLog2(denom);
+  auto divOffset = (denom >> 1);
+
+  if (eTempType == LEFT_NEIGHBOR)
+  {
+    denom = height;
+    divShift = floorLog2(denom);
+    divOffset = (denom >> 1);
+    for(idx = 0; idx < height; idx++)
+      sum += pSrc.at(1 + idx, 1);
+    dcVal = (sum + divOffset) >> divShift;
+    return dcVal;
+  }
+  else if (eTempType == ABOVE_NEIGHBOR)
+  {
+    denom = width;
+    divShift = floorLog2(denom);
+    divOffset = (denom >> 1);
+    for(idx = 0; idx < width; idx++)
+      sum += pSrc.at( 1 + idx, 0);
+    dcVal = (sum + divOffset) >> divShift;
+    return dcVal;
+  }
+
+  if ( width >= height )
+  {
+    for( idx = 0; idx < width; idx++ )
+    {
+      sum += pSrc.at(iTempWidth + 1 + idx, 0);
+    }
+  }
+  if ( width <= height )
+  {
+    for( idx = 0; idx < height; idx++ )
+    {
+      sum += pSrc.at(iTempHeight + 1 + idx, 1);
+    }
+  }
+  dcVal = (sum + divOffset) >> divShift;
+  return dcVal;
+}
+
+void IntraPrediction::predTimdIntraAng( const ComponentID compId, const PredictionUnit &pu, uint32_t uiDirMode, Pel* pPred, uint32_t uiStride, uint32_t iWidth, uint32_t iHeight, TEMPLATE_TYPE eTempType, int32_t iTemplateWidth, int32_t iTemplateHeight)
+{
+  const ComponentID compID       = MAP_CHROMA( compId );
+
+  const int srcStride  = m_refBufferStride[compID];
+  const int srcHStride = 2;
+
+  const CPelBuf & srcBuf = CPelBuf(getPredictorPtr(compID), srcStride, srcHStride);
+  const ClpRng& clpRng(pu.cu->cs->slice->clpRng(compID));
+
+  switch (uiDirMode)
+  {
+    case(PLANAR_IDX): xPredTimdIntraPlanar(srcBuf, pPred, uiStride, iWidth, iHeight, eTempType, iTemplateWidth, iTemplateHeight); break;
+    case(DC_IDX):     xPredTimdIntraDc(pu, srcBuf, pPred, uiStride, iWidth, iHeight, eTempType, iTemplateWidth, iTemplateHeight); break;
+    default:          xPredTimdIntraAng(srcBuf, clpRng, pPred, uiStride, iWidth, iHeight, eTempType, iTemplateWidth, iTemplateHeight, uiDirMode); break;
+  }
+
+  if (m_ipaParam.applyPDPC && (uiDirMode == PLANAR_IDX || uiDirMode == DC_IDX))
+  {
+    xIntraPredTimdPlanarDcPdpc(srcBuf, pPred, uiStride, iWidth, iHeight, eTempType, iTemplateWidth, iTemplateHeight);
+  }
+}
+
+void IntraPrediction::xPredTimdIntraPlanar( const CPelBuf &pSrc, Pel* rpDst, int iDstStride, int width, int height, TEMPLATE_TYPE eTempType, int iTemplateWidth, int iTemplateHeight )
+{
+  static int leftColumn[MAX_CU_SIZE+DIMD_MAX_TEMP_SIZE+1] = {0}, topRow[MAX_CU_SIZE+DIMD_MAX_TEMP_SIZE+1] ={0}, bottomRow[MAX_CU_SIZE+DIMD_MAX_TEMP_SIZE] = {0}, rightColumn[MAX_CU_SIZE+DIMD_MAX_TEMP_SIZE]={0};
+  if(eTempType == LEFT_ABOVE_NEIGHBOR)
+  {
+    //predict above template
+    {
+      uint32_t w = width - iTemplateWidth;
+      const uint32_t log2W = floorLog2( w );
+      const uint32_t log2H = floorLog2( iTemplateHeight );
+      const uint32_t offset = 1 << (log2W + log2H);
+      for(int k = 0; k < w + 1; k++)
+      {
+        topRow[k] = pSrc.at( k + iTemplateWidth + 1, 0 );
+      }
+      for (int k=0; k < iTemplateHeight + 1; k++)
+      {
+        leftColumn[k] = pSrc.at( k + 1, 1 );
+      }
+
+      int bottomLeft = leftColumn[iTemplateHeight];
+      int topRight = topRow[w];
+      for(int k = 0; k < w; k++)
+      {
+        bottomRow[k]  = bottomLeft - topRow[k];
+        topRow[k]     = topRow[k] << log2H;
+      }
+      for(int k = 0; k < iTemplateHeight; k++)
+      {
+        rightColumn[k]  = topRight - leftColumn[k];
+        leftColumn[k]   = leftColumn[k] << log2W;
+      }
+
+      const uint32_t finalShift = 1 + log2W + log2H;
+      for (int y = 0; y < iTemplateHeight; y++)
+      {
+        int horPred = leftColumn[y];
+        for (int x = 0; x < w; x++)
+        {
+          horPred   += rightColumn[y];
+          topRow[x] += bottomRow[x];
+          int vertPred = topRow[x];
+          rpDst[y*iDstStride+x + iTemplateWidth] = ( ( horPred << log2H ) + ( vertPred << log2W ) + offset ) >> finalShift;
+        }
+      }
+    }
+
+    //predict left template
+    {
+      uint32_t h = height - iTemplateHeight;
+      const uint32_t log2W = floorLog2( iTemplateWidth );
+      const uint32_t log2H = floorLog2( h );
+      const uint32_t offset = 1 << (log2W + log2H);
+      for (int k = 0; k < h + 1; k++)
+      {
+        leftColumn[k] = pSrc.at( k + iTemplateHeight + 1, 1 );
+      }
+      for(int k = 0; k < iTemplateWidth + 1; k++)
+      {
+        topRow[k] = pSrc.at( k + 1, 0 );
+      }
+      int bottomLeft = leftColumn[h];
+      int topRight = topRow[iTemplateWidth];
+      for(int k = 0; k < iTemplateWidth; k++)
+      {
+        bottomRow[k]  = bottomLeft - topRow[k];
+        topRow[k]     = topRow[k] << log2H;
+      }
+      for(int k = 0; k < h; k++)
+      {
+        rightColumn[k]  = topRight - leftColumn[k];
+        leftColumn[k]   = leftColumn[k] << log2W;
+      }
+      const uint32_t finalShift = 1 + log2W + log2H;
+      for (int y = 0; y < height; y++)
+      {
+        int horPred = leftColumn[y];
+        for (int x = 0; x < iTemplateWidth; x++)
+        {
+          horPred   += rightColumn[y];
+          topRow[x] += bottomRow[x];
+          int vertPred = topRow[x];
+          rpDst[(y + iTemplateHeight)*iDstStride+x] = ( ( horPred << log2H ) + ( vertPred << log2W ) + offset ) >> finalShift;
+        }
+      }
+    }
+  }
+  else if(eTempType == LEFT_NEIGHBOR)
+  {
+    const uint32_t log2W = floorLog2( iTemplateWidth );
+    const uint32_t log2H = floorLog2( height );
+    const uint32_t offset = 1 << (log2W + log2H);
+    for (int k = 0; k < height + 1; k++)
+    {
+      leftColumn[k] = pSrc.at( k + iTemplateHeight + 1, 1 );
+    }
+    for(int k = 0; k < iTemplateWidth + 1; k++)
+    {
+      topRow[k] = pSrc.at( k + 1, 0 );
+    }
+
+    int bottomLeft = leftColumn[height];
+    int topRight = topRow[iTemplateWidth];
+    for(int k = 0; k < iTemplateWidth; k++)
+    {
+      bottomRow[k]  = bottomLeft - topRow[k];
+      topRow[k]     = topRow[k] << log2H;
+    }
+    for(int k = 0; k < height; k++)
+    {
+      rightColumn[k]  = topRight - leftColumn[k];
+      leftColumn[k]   = leftColumn[k] << log2W;
+    }
+
+    const uint32_t finalShift = 1 + log2W + log2H;
+    for (int y = 0; y < height; y++)
+    {
+      int horPred = leftColumn[y];
+      for (int x = 0; x < iTemplateWidth; x++)
+      {
+        horPred   += rightColumn[y];
+        topRow[x] += bottomRow[x];
+        int vertPred = topRow[x];
+        rpDst[y*iDstStride+x] = ( ( horPred << log2H ) + ( vertPred << log2W ) + offset ) >> finalShift;
+      }
+    }
+  }
+  else if(eTempType == ABOVE_NEIGHBOR)
+  {
+    const uint32_t log2W = floorLog2( width );
+    const uint32_t log2H = floorLog2( iTemplateHeight );
+    const uint32_t offset = 1 << (log2W + log2H);
+    for(int k = 0; k < width + 1; k++)
+    {
+      topRow[k] = pSrc.at( k + iTemplateWidth + 1, 0 );
+    }
+    for (int k=0; k < iTemplateHeight + 1; k++)
+    {
+      leftColumn[k] = pSrc.at( k + 1, 1 );
+    }
+
+    int bottomLeft = leftColumn[iTemplateHeight];
+    int topRight = topRow[width];
+    for(int k=0;k<width;k++)
+    {
+      bottomRow[k]  = bottomLeft - topRow[k];
+      topRow[k]     = topRow[k] << log2H;
+    }
+    for(int k = 0; k < iTemplateHeight; k++)
+    {
+      rightColumn[k]  = topRight - leftColumn[k];
+      leftColumn[k]   = leftColumn[k] << log2W;
+    }
+
+    const uint32_t finalShift = 1 + log2W + log2H;
+    for (int y = 0; y < iTemplateHeight; y++)
+    {
+      int horPred = leftColumn[y];
+      for (int x = 0; x < width; x++)
+      {
+        horPred   += rightColumn[y];
+        topRow[x] += bottomRow[x];
+        int vertPred = topRow[x];
+        rpDst[y*iDstStride+x] = ( ( horPred << log2H ) + ( vertPred << log2W ) + offset ) >> finalShift;
+      }
+    }
+  }
+  else
+  {
+    assert(0);
+  }
+}
+
+void IntraPrediction::xPredTimdIntraDc( const PredictionUnit &pu, const CPelBuf &pSrc, Pel* pDst, int iDstStride, int iWidth, int iHeight, TEMPLATE_TYPE eTempType, int iTemplateWidth, int iTemplateHeight )
+{
+  const Size &dstSize = Size(pu.lwidth(), pu.lheight());
+  const Pel dcval = xGetPredTimdValDc( pSrc, dstSize, eTempType, iTemplateHeight, iTemplateWidth );
+  if(eTempType == LEFT_ABOVE_NEIGHBOR)
+  {
+    for (int y = 0; y < iHeight; y++,pDst += iDstStride)
+    {
+      if(y < iTemplateHeight)
+      {
+        for (int x = iTemplateWidth; x < iWidth; x++)
+        {
+          pDst[x] = dcval;
+        }
+      }
+      else
+      {
+        for (int x = 0; x < iTemplateWidth; x++)
+        {
+          pDst[x] = dcval;
+        }
+      }
+    }
+  }
+  else if(eTempType == LEFT_NEIGHBOR)
+  {
+    for (int y = 0; y < iHeight; y++, pDst += iDstStride)
+    {
+      for (int x = 0; x < iTemplateWidth; x++)
+      {
+        pDst[x] = dcval;
+      }
+    }
+  }
+  else if(eTempType == ABOVE_NEIGHBOR)
+  {
+    for (int y = 0; y < iTemplateHeight; y++, pDst+=iDstStride)
+    {
+      for (int x = 0; x < iWidth; x++)
+      {
+        pDst[x] = dcval;
+      }
+    }
+  }
+  else
+  {
+    assert(0);
+  }
+}
+
+void IntraPrediction::initPredTimdIntraParams(const PredictionUnit & pu, const CompArea area, int dirMode)
+{
+  const Size   puSize    = Size( area.width, area.height );
+  const Size&  blockSize = puSize;
+  const int     predMode = getWideAngleExt( blockSize.width, blockSize.height, dirMode );
+
+  m_ipaParam.isModeVer            = predMode >= EXT_DIA_IDX;
+  m_ipaParam.refFilterFlag        = false;
+  m_ipaParam.interpolationFlag    = false;
+  m_ipaParam.applyPDPC            = puSize.width >= MIN_TB_SIZEY && puSize.height >= MIN_TB_SIZEY;
+  const int    intraPredAngleMode = (m_ipaParam.isModeVer) ? predMode - EXT_VER_IDX : -(predMode - EXT_HOR_IDX);
+
+  int absAng = 0;
+  static const int extAngTable[64]    = { 0, 1, 2, 3, 4, 5, 6,7, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 43, 46, 49, 52, 55, 58, 61, 64, 67, 70, 74, 78, 84, 90, 96, 102, 108, 114, 121, 128, 137, 146, 159, 172, 188, 204, 230, 256, 299, 342, 427, 512, 597, 682, 853, 1024, 1536, 2048, 3072 };
+  static const int extInvAngTable[64] = { 0, 32768, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3277, 2731, 2341, 2048, 1820, 1638, 1489, 1365, 1260, 1170, 1092, 1024, 964, 910, 862, 819, 762, 712, 669, 630, 596, 565, 537, 512, 489, 468, 443, 420, 390, 364, 341, 321, 303, 287, 271, 256, 239, 224, 206, 191, 174, 161, 142, 128, 110, 96, 77, 64, 55, 48, 38, 32, 21, 16, 11 }; // (512 * 64) / Angle
+
+  const int     absAngMode         = abs(intraPredAngleMode);
+  const int     signAng            = intraPredAngleMode < 0 ? -1 : 1;
+                absAng             = extAngTable  [absAngMode];
+
+  m_ipaParam.absInvAngle              = extInvAngTable[absAngMode];
+  m_ipaParam.intraPredAngle        = signAng * absAng;
+
+  if (dirMode > 1)
+  {
+    if (intraPredAngleMode < 0)
+    {
+      m_ipaParam.applyPDPC = false;
+    }
+    else if (intraPredAngleMode > 0)
+    {
+      const int sideSize = m_ipaParam.isModeVer ? puSize.height : puSize.width;
+      const int maxScale = 2;
+#if GRAD_PDPC
+      m_ipaParam.useGradPDPC = false;
+#endif
+      m_ipaParam.angularScale = std::min(maxScale, floorLog2(sideSize) - (floorLog2(3 * m_ipaParam.absInvAngle - 2) - 8));
+#if GRAD_PDPC
+      if (m_ipaParam.angularScale < 0)
+      {
+        m_ipaParam.angularScale = (floorLog2(puSize.width) + floorLog2(puSize.height) - 2) >> 2;
+        m_ipaParam.useGradPDPC = true;
+      }
+#endif
+      m_ipaParam.applyPDPC &= m_ipaParam.angularScale >= 0;
+    }
+  }
+}
+
+void IntraPrediction::xPredTimdIntraAng( const CPelBuf &pSrc, const ClpRng& clpRng, Pel* pTrueDst, int iDstStride, int iWidth, int iHeight, TEMPLATE_TYPE eTempType, int iTemplateWidth , int iTemplateHeight, uint32_t dirMode)
+{
+  int width = iWidth;
+  int height = iHeight;
+  const bool bIsModeVer     = m_ipaParam.isModeVer;
+  const int  intraPredAngle = m_ipaParam.intraPredAngle;
+  const int  invAngle       = m_ipaParam.absInvAngle;
+  Pel* refMain;
+  Pel* refSide;
+  static Pel  refAbove[2 * MAX_CU_SIZE + 5 + 33 * MAX_REF_LINE_IDX];
+  static Pel  refLeft[2 * MAX_CU_SIZE + 5 + 33 * MAX_REF_LINE_IDX];
+
+  // Initialize the Main and Left reference array.
+  if (intraPredAngle < 0)
+  {
+    for (int x = 0; x <= width + 1; x++)
+    {
+      refAbove[x + height] = pSrc.at(x, 0);
+    }
+    for (int y = 0; y <= height + 1; y++)
+    {
+      refLeft[y + width] = pSrc.at(y, 1);
+    }
+    refMain = bIsModeVer ? refAbove + height : refLeft + width;
+    refSide = bIsModeVer ? refLeft + width : refAbove + height;
+    // Extend the Main reference to the left.
+    int sizeSide = bIsModeVer ? height : width;
+    for (int k = -sizeSide; k <= -1; k++)
+    {
+      refMain[k] = refSide[std::min((-k * invAngle + 256) >> 9, sizeSide)];
+    }
+  }
+  else
+  {
+    for (int x = 0; x <= m_topRefLength; x++)
+    {
+      refAbove[x] = pSrc.at(x, 0);
+    }
+    for (int y = 0; y <= m_leftRefLength; y++)
+    {
+      refLeft[y] = pSrc.at(y, 1);
+    }
+    refMain = bIsModeVer ? refAbove : refLeft;
+    refSide = bIsModeVer ? refLeft : refAbove;
+    // Extend main reference to right using replication
+    const int log2Ratio = floorLog2(width - iTemplateWidth) - floorLog2(height - iTemplateHeight);
+    const int s         = std::max<int>(0, bIsModeVer ? log2Ratio : -log2Ratio);
+    const int maxIndex  = (std::max(iTemplateWidth, iTemplateHeight) << s) + 2 + std::max(iTemplateWidth, iTemplateHeight);
+    const int refLength = bIsModeVer ? m_topRefLength : m_leftRefLength;
+    const Pel val       = refMain[refLength];
+    for (int z = 1; z <= maxIndex; z++)
+    {
+      refMain[refLength + z] = val;
+    }
+  }
+
+  // swap width/height if we are doing a horizontal mode:
+  static Pel tempArray[(MAX_CU_SIZE+DIMD_MAX_TEMP_SIZE)*(MAX_CU_SIZE+DIMD_MAX_TEMP_SIZE)];  ///< buffer size may not be big enough
+  const int dstStride = bIsModeVer ? iDstStride : (MAX_CU_SIZE+DIMD_MAX_TEMP_SIZE);
+  Pel *pDst = bIsModeVer ? pTrueDst : tempArray;
+  if (!bIsModeVer)
+  {
+    std::swap(width, height);
+    std::swap(iTemplateWidth, iTemplateHeight);
+  }
+
+  if( intraPredAngle == 0 )  // pure vertical or pure horizontal
+  {
+    if(eTempType == LEFT_ABOVE_NEIGHBOR)
+    {
+      if (m_ipaParam.applyPDPC)
+      {
+        int scale = (floorLog2(width) + floorLog2(height) - 2) >> 2;
+        xIntraPredTimdHorVerPdpc(pDst, dstStride, refSide, width, iTemplateHeight, iTemplateWidth, 0, scale, refMain, clpRng);
+        xIntraPredTimdHorVerPdpc(pDst+iTemplateHeight*iDstStride, dstStride, refSide, iTemplateWidth, height, 0, iTemplateHeight, scale, refMain, clpRng);
+      }
+      else
+      {
+        for (int y = 0; y < iTemplateHeight; y++)
+        {
+          memcpy(pDst + y * dstStride + iTemplateWidth, &refMain[iTemplateWidth + 1], (width - iTemplateWidth) * sizeof(Pel));
+        }
+        for (int y = iTemplateHeight; y < height; y++)
+        {
+          memcpy(pDst + y * dstStride, &refMain[1], iTemplateWidth * sizeof(Pel));
+        }
+      }
+    }
+    else if(eTempType == LEFT_NEIGHBOR || eTempType == ABOVE_NEIGHBOR)
+    {
+      if((eTempType == LEFT_NEIGHBOR && bIsModeVer)||(eTempType == ABOVE_NEIGHBOR && !bIsModeVer))
+      {
+        if (m_ipaParam.applyPDPC)
+        {
+          const int scale   = (floorLog2(width) + floorLog2(height) - 2) >> 2;
+          xIntraPredTimdHorVerPdpc(pDst, dstStride, refSide, iTemplateWidth, height, 0, 0, scale, refMain, clpRng);
+        }
+        else
+        {
+          for (int y = 0; y < height; y++)
+          {
+            for (int x = 0; x < iTemplateWidth; x++)
+            {
+              pDst[y * dstStride+x] = refMain[x + 1];
+            }
+          }
+        }
+      }
+      else
+      {
+        if (m_ipaParam.applyPDPC)
+        {
+          const int scale   = (floorLog2(width) + floorLog2(height) - 2) >> 2;
+          xIntraPredTimdHorVerPdpc(pDst, dstStride, refSide, width, iTemplateHeight, 0, 0, scale, refMain, clpRng);
+        }
+        else
+        {
+          for (int y = 0; y < iTemplateHeight; y++)
+          {
+            memcpy(pDst + y * dstStride, &refMain[1], width * sizeof(Pel));
+          }
+        }
+      }
+    }
+    else
+    {
+      assert(0);
+    }
+  }
+  else
+  {
+    Pel *pDsty=pDst;
+    if ( !isIntegerSlopeExt( abs(intraPredAngle) ) )
+    {
+      int deltaPos = intraPredAngle;
+      if (eTempType == LEFT_ABOVE_NEIGHBOR)
+      {
+        Pel *pDsty=pDst;
+        // Above template
+        xIntraPredTimdAngLuma(pDsty, dstStride, refMain, width, iTemplateHeight, deltaPos, intraPredAngle, clpRng, iTemplateWidth, 0);
+        // Left template
+        for (int y = 0; y < iTemplateHeight; y++)
+          deltaPos += intraPredAngle;
+        xIntraPredTimdAngLuma(pDsty, dstStride, refMain, iTemplateWidth, height, deltaPos, intraPredAngle, clpRng, 0, iTemplateHeight);
+#if GRAD_PDPC
+        if (m_ipaParam.applyPDPC && m_ipaParam.useGradPDPC)
+        {
+          int deltaPos2 = intraPredAngle;
+          const int scale = m_ipaParam.angularScale;
+          xIntraPredTimdAngGradPdpc(pDst, dstStride, refMain, refSide, width, iTemplateHeight, iTemplateWidth, 0, scale, deltaPos2, intraPredAngle, clpRng);
+          for (int y = 0; y < iTemplateHeight; y++)
+            deltaPos2 += intraPredAngle;
+          xIntraPredTimdAngGradPdpc(pDst+iTemplateHeight*dstStride, dstStride, refMain, refSide, iTemplateWidth, height, 0, iTemplateHeight, scale, deltaPos2, intraPredAngle, clpRng);
+        }
+        else
+#endif
+        if (m_ipaParam.applyPDPC)
+        {
+          const int scale = m_ipaParam.angularScale;
+          xIntraPredTimdAngPdpc(pDst, dstStride, refSide, width, iTemplateHeight, iTemplateWidth, 0, scale, invAngle);
+          xIntraPredTimdAngPdpc(pDst+iTemplateHeight*dstStride, dstStride, refSide, iTemplateWidth, height, 0, iTemplateHeight, scale, invAngle);
+        }
+      }
+      else if (eTempType == LEFT_NEIGHBOR || eTempType == ABOVE_NEIGHBOR)
+      {
+        int iRegionWidth, iRegionHeight;
+        if((eTempType == LEFT_NEIGHBOR && bIsModeVer)||(eTempType == ABOVE_NEIGHBOR && !bIsModeVer))
+        {
+          iRegionWidth  = iTemplateWidth;
+          iRegionHeight = height;
+        }
+        else
+        {
+          iRegionWidth  = width;
+          iRegionHeight = iTemplateHeight;
+        }
+        xIntraPredTimdAngLuma(pDsty, dstStride, refMain, iRegionWidth, iRegionHeight, deltaPos, intraPredAngle, clpRng, 0, 0);
+#if GRAD_PDPC
+        if (m_ipaParam.applyPDPC && m_ipaParam.useGradPDPC)
+        {
+          int deltaPos2 = intraPredAngle;
+          const int scale = m_ipaParam.angularScale;
+          xIntraPredTimdAngGradPdpc(pDst, dstStride, refMain, refSide, iRegionWidth, iRegionHeight, 0, 0, scale, deltaPos2, intraPredAngle, clpRng);
+        }
+        else
+#endif
+        if (m_ipaParam.applyPDPC)
+        {
+          const int scale = m_ipaParam.angularScale;
+          xIntraPredTimdAngPdpc(pDst, dstStride, refSide, iRegionWidth, iRegionHeight, 0, 0, scale, invAngle);
+        }
+      }
+    }
+    else
+    {
+      if(eTempType == LEFT_ABOVE_NEIGHBOR)
+      {
+        Pel *pDsty=pDst;
+        for (int y = 0, deltaPos = intraPredAngle; y<height; y++, deltaPos += intraPredAngle, pDsty += dstStride)
+        {
+          const int deltaInt   = deltaPos >> 6;
+          int iStartIdx, iEndIdx;
+          if(y < iTemplateHeight)
+          {
+            iStartIdx = iTemplateWidth;
+            iEndIdx   = width - 1;
+          }
+          else
+          {
+            iStartIdx = 0;
+            iEndIdx   = iTemplateWidth - 1;
+          }
+          memcpy(pDsty + iStartIdx, &refMain[iStartIdx + deltaInt + 1], (iEndIdx - iStartIdx + 1) * sizeof(Pel));
+        }
+#if GRAD_PDPC
+        if (m_ipaParam.applyPDPC && m_ipaParam.useGradPDPC)
+        {
+          int deltaPos2 = intraPredAngle;
+          const int scale = m_ipaParam.angularScale;
+          xIntraPredTimdAngGradPdpc(pDst, dstStride, refMain, refSide, width, iTemplateHeight, iTemplateWidth, 0, scale, deltaPos2, intraPredAngle, clpRng);
+          for (int y = 0; y < iTemplateHeight; y++)
+            deltaPos2 += intraPredAngle;
+          xIntraPredTimdAngGradPdpc(pDst+iTemplateHeight*dstStride, dstStride, refMain, refSide, iTemplateWidth, height, 0, iTemplateHeight, scale, deltaPos2, intraPredAngle, clpRng);
+        }
+        else
+#endif
+        if (m_ipaParam.applyPDPC)
+        {
+          const int scale = m_ipaParam.angularScale;
+          xIntraPredTimdAngPdpc(pDst, dstStride, refSide, width, iTemplateHeight, iTemplateWidth, 0, scale, invAngle);
+          xIntraPredTimdAngPdpc(pDst+iTemplateHeight*dstStride, dstStride, refSide, iTemplateWidth, height, 0, iTemplateHeight, scale, invAngle);
+        }
+      }
+      else // if (eTempType == LEFT_NEIGHBOR || eTempType == ABOVE_NEIGHBOR)
+      {
+        Pel *pDsty=pDst;
+        assert(eTempType == LEFT_NEIGHBOR || eTempType == ABOVE_NEIGHBOR);
+        int iRegionWidth, iRegionHeight;
+        if((eTempType == LEFT_NEIGHBOR && bIsModeVer)||(eTempType == ABOVE_NEIGHBOR && !bIsModeVer))
+        {
+          iRegionWidth  = iTemplateWidth;
+          iRegionHeight = height;
+        }
+        else
+        {
+          iRegionWidth  = width;
+          iRegionHeight = iTemplateHeight;
+        }
+        for (int y = 0, deltaPos = intraPredAngle; y<iRegionHeight; y++, deltaPos += intraPredAngle, pDsty += dstStride)
+        {
+          const int deltaInt   = deltaPos >> 6;
+          memcpy(pDsty, &refMain[deltaInt + 1], iRegionWidth * sizeof(Pel));
+        }
+#if GRAD_PDPC
+        if (m_ipaParam.applyPDPC && m_ipaParam.useGradPDPC)
+        {
+          int deltaPos2 = intraPredAngle;
+          const int scale = m_ipaParam.angularScale;
+          xIntraPredTimdAngGradPdpc(pDst, dstStride, refMain, refSide, iRegionWidth, iRegionHeight, 0, 0, scale, deltaPos2, intraPredAngle, clpRng);
+        }
+        else
+#endif
+        if (m_ipaParam.applyPDPC)
+        {
+          const int scale   = m_ipaParam.angularScale;
+          xIntraPredTimdAngPdpc(pDst, dstStride, refSide, iRegionWidth, iRegionHeight, 0, 0, scale, invAngle);
+        }
+      }
+    }
+  }
+
+  // Flip the block if this is the horizontal mode
+  if (!bIsModeVer)
+  {
+    if(eTempType == LEFT_ABOVE_NEIGHBOR)
+    {
+      for (int y = 0; y < height; y++)
+      {
+        int iStartIdx, iEndIdx;
+        if(y < iTemplateHeight)
+        {
+          iStartIdx = iTemplateWidth;
+          iEndIdx   = width - 1;
+        }
+        else
+        {
+          iStartIdx = 0;
+          iEndIdx   = iTemplateWidth - 1;
+        }
+        for (int x = iStartIdx; x <= iEndIdx; x++)
+        {
+          pTrueDst[x*iDstStride+y] = pDst[y*dstStride+x];
+        }
+      }
+    }
+    else if(eTempType == LEFT_NEIGHBOR)
+    {
+      for (int y = 0; y < iTemplateHeight; y++)
+      {
+        for (int x = 0; x < width; x++)
+        {
+          pTrueDst[x*iDstStride+y] = pDst[y*dstStride+x];
+        }
+      }
+    }
+    else if(eTempType == ABOVE_NEIGHBOR)
+    {
+      for (int y = 0; y < height; y++)
+      {
+        for (int x = 0; x < iTemplateWidth; x++)
+        {
+          pTrueDst[x*iDstStride+y] = pDst[y*dstStride+x];
+        }
+      }
+    }
+    else
+    {
+      assert(0);
+    }
+  }
+}
+
+void IntraPrediction::initTimdIntraPatternLuma(const CodingUnit &cu, const CompArea &area, int iTemplateWidth, int iTemplateHeight, uint32_t uiRefWidth, uint32_t uiRefHeight)
+{
+  const CodingStructure& cs   = *cu.cs;
+  Pel *refBufUnfiltered = m_refBuffer[area.compID][PRED_BUF_UNFILTERED];
+  bool bLeftAbove = iTemplateHeight > 0 && iTemplateWidth > 0;
+  m_leftRefLength     = bLeftAbove ? (uiRefHeight << 1) : ((uiRefHeight + iTemplateHeight) << 1);
+  m_topRefLength      = bLeftAbove ? (uiRefWidth << 1) : ((uiRefWidth + iTemplateWidth) << 1);
+  xFillTimdReferenceSamples(cs.picture->getRecoBuf(area), refBufUnfiltered, area, cu, iTemplateWidth, iTemplateHeight);
+}
+
+void IntraPrediction::xFillTimdReferenceSamples(const CPelBuf &recoBuf, Pel* refBufUnfiltered, const CompArea &area, const CodingUnit &cu, int iTemplateWidth, int iTemplateHeight)
+{
+  const ChannelType      chType = toChannelType( area.compID );
+  const CodingStructure &cs     = *cu.cs;
+  const SPS             &sps    = *cs.sps;
+  const PreCalcValues   &pcv    = *cs.pcv;
+
+  const int  tuWidth            = area.width;
+  const int  tuHeight           = area.height;
+  const int  predSize           = m_topRefLength;
+  const int  predHSize          = m_leftRefLength;
+  const int predStride = predSize + 1;
+  m_refBufferStride[area.compID] = predStride;
+
+  const bool noShift            = pcv.noChroma2x2 && area.width == 4; // don't shift on the lowest level (chroma not-split)
+  const int  unitWidth          = tuWidth  <= 2 && cu.ispMode && isLuma(area.compID) ? tuWidth  : pcv.minCUWidth  >> (noShift ? 0 : getComponentScaleX(area.compID, sps.getChromaFormatIdc()));
+  const int  unitHeight         = tuHeight <= 2 && cu.ispMode && isLuma(area.compID) ? tuHeight : pcv.minCUHeight >> (noShift ? 0 : getComponentScaleY(area.compID, sps.getChromaFormatIdc()));
+  int leftTempUnitNum = 0;
+  int aboveTempUnitNum = 0;
+  if (iTemplateHeight >= 4)
+  {
+    leftTempUnitNum = iTemplateHeight / unitHeight;
+  }
+  if (iTemplateWidth >= 4)
+  {
+    aboveTempUnitNum = iTemplateWidth / unitWidth;
+  }
+
+  const int  totalAboveUnits = (predSize + (unitWidth - 1)) / unitWidth - aboveTempUnitNum;
+  const int  totalLeftUnits = (predHSize + (unitHeight - 1)) / unitHeight - leftTempUnitNum;
+  const int  totalUnits = totalAboveUnits + totalLeftUnits + 1 + aboveTempUnitNum + leftTempUnitNum; //+1 for top-left
+  const int  numAboveUnits      = std::max<int>( tuWidth / unitWidth, 1 );
+  const int  numLeftUnits       = std::max<int>( tuHeight / unitHeight, 1 );
+  const int  numAboveRightUnits = totalAboveUnits - numAboveUnits;
+  const int  numLeftBelowUnits  = totalLeftUnits - numLeftUnits;
+
+  // ----- Step 1: analyze neighborhood -----
+  const Position posLT          = area;
+  const Position posRT          = area.topRight();
+  const Position posLB          = area.bottomLeft();
+
+  bool  neighborFlags[4 * MAX_NUM_PART_IDXS_IN_CTU_WIDTH + 1];
+  int   numIntraNeighbor = 0;
+
+  memset( neighborFlags, 0, totalUnits );
+
+  neighborFlags[totalLeftUnits] = isAboveLeftAvailable( cu, chType, posLT.offset(-iTemplateWidth, -iTemplateHeight) );
+  neighborFlags[totalLeftUnits + leftTempUnitNum] = neighborFlags[totalLeftUnits];
+  neighborFlags[totalLeftUnits + leftTempUnitNum + aboveTempUnitNum] = neighborFlags[totalLeftUnits];
+  numIntraNeighbor += neighborFlags[totalLeftUnits] ? 1 : 0;
+  numIntraNeighbor += leftTempUnitNum > 0 && neighborFlags[totalLeftUnits] ? 1 : 0;
+  numIntraNeighbor += aboveTempUnitNum > 0 && neighborFlags[totalLeftUnits] ? 1 : 0;
+  numIntraNeighbor += isAboveAvailable     ( cu, chType, posLT.offset(0, -iTemplateHeight), numAboveUnits,      unitWidth,  (neighborFlags + totalLeftUnits + 1 + leftTempUnitNum + aboveTempUnitNum) );
+  numIntraNeighbor += isAboveRightAvailable( cu, chType, posRT.offset(0, -iTemplateHeight), numAboveRightUnits, unitWidth,  (neighborFlags + totalLeftUnits + 1 + leftTempUnitNum + aboveTempUnitNum + numAboveUnits) );
+  numIntraNeighbor += isLeftAvailable      ( cu, chType, posLT.offset(-iTemplateWidth, 0), numLeftUnits,       unitHeight, (neighborFlags + totalLeftUnits - 1) );
+  numIntraNeighbor += isBelowLeftAvailable ( cu, chType, posLB.offset(-iTemplateWidth, 0), numLeftBelowUnits,  unitHeight, (neighborFlags + totalLeftUnits - 1 - numLeftUnits) );
+
+  // ----- Step 2: fill reference samples (depending on neighborhood) -----
+
+  const Pel*  srcBuf    = recoBuf.buf;
+  const int   srcStride = recoBuf.stride;
+        Pel*  ptrDst    = refBufUnfiltered;
+  const Pel*  ptrSrc;
+  const Pel   valueDC   = 1 << (sps.getBitDepth( chType ) - 1);
+
+
+  if( numIntraNeighbor == 0 )
+  {
+    // Fill border with DC value
+    for (int j = 0; j <= predSize; j++) { ptrDst[j] = valueDC; }
+    for (int i = 0; i <= predHSize; i++)
+    {
+      ptrDst[i + predStride] = valueDC;
+    }
+  }
+  else if( numIntraNeighbor == totalUnits )
+  {
+    // Fill top-left border and top and top right with rec. samples
+    ptrSrc = srcBuf - (1 + iTemplateHeight) * srcStride - (1 + iTemplateWidth);
+    for (int j = 0; j <= predSize; j++)
+    {
+      ptrDst[j] = ptrSrc[j];
+    }
+    for (int i = 0; i <= predHSize; i++)
+    {
+      ptrDst[i + predStride] = ptrSrc[i * srcStride];
+    }
+  }
+  else // reference samples are partially available
+  {
+    // Fill top-left sample(s) if available
+    ptrSrc = srcBuf - (1 + iTemplateHeight) * srcStride - (1 + iTemplateWidth);
+    ptrDst = refBufUnfiltered;
+    if (neighborFlags[totalLeftUnits])
+    {
+      for (int i = 0; i <= iTemplateWidth; i++)
+        ptrDst[i] = ptrSrc[i];
+      for (int i = 0; i <= iTemplateHeight; i++)
+        ptrDst[i + predStride] = ptrSrc[i * srcStride];
+    }
+
+    // Fill left & below-left samples if available (downwards)
+    ptrSrc += (1 + iTemplateHeight) * srcStride;
+    ptrDst += (1 + iTemplateHeight) + predStride;
+    for (int unitIdx = totalLeftUnits - 1; unitIdx > 0; unitIdx--)
+    {
+      if (neighborFlags[unitIdx])
+      {
+        for (int i = 0; i < unitHeight; i++)
+        {
+          ptrDst[i] = ptrSrc[i * srcStride];
+        }
+      }
+      ptrSrc += unitHeight * srcStride;
+      ptrDst += unitHeight;
+    }
+    // Fill last below-left sample(s)
+    if (neighborFlags[0])
+    {
+      int lastSample = ((predHSize - iTemplateHeight) % unitHeight == 0) ? unitHeight : (predHSize - iTemplateHeight) % unitHeight;
+      for (int i = 0; i < lastSample; i++)
+      {
+        ptrDst[i] = ptrSrc[i * srcStride];
+      }
+    }
+
+    // Fill above & above-right samples if available (left-to-right)
+    ptrSrc = srcBuf - srcStride * (1 + iTemplateHeight);
+    ptrDst = refBufUnfiltered + 1 + iTemplateWidth;
+    for (int unitIdx = totalLeftUnits + 1 + leftTempUnitNum + aboveTempUnitNum; unitIdx < totalUnits - 1; unitIdx++)
+    {
+      if (neighborFlags[unitIdx])
+      {
+        for (int j = 0; j < unitWidth; j++)
+        {
+          ptrDst[j] = ptrSrc[j];
+        }
+      }
+      ptrSrc += unitWidth;
+      ptrDst += unitWidth;
+    }
+    // Fill last above-right sample(s)
+    if (neighborFlags[totalUnits - 1])
+    {
+      int lastSample = ((predSize - iTemplateWidth) % unitWidth == 0) ? unitWidth : (predSize - iTemplateWidth) % unitWidth;
+      for (int j = 0; j < lastSample; j++)
+      {
+        ptrDst[j] = ptrSrc[j];
+      }
+    }
+
+    // pad from first available down to the last below-left
+    ptrDst = refBufUnfiltered;
+    int lastAvailUnit = 0;
+    if (!neighborFlags[0])
+    {
+      int firstAvailUnit = 1;
+      while (firstAvailUnit < totalUnits && !neighborFlags[firstAvailUnit])
+      {
+        firstAvailUnit++;
+      }
+
+      // first available sample
+      int firstAvailRow = -1;
+      int firstAvailCol = 0;
+      if (firstAvailUnit < totalLeftUnits)
+      {
+        firstAvailRow = (totalLeftUnits - firstAvailUnit) * unitHeight + iTemplateHeight;
+      }
+      else if (firstAvailUnit == totalLeftUnits)
+      {
+        firstAvailRow = iTemplateHeight;
+      }
+      else
+      {
+        firstAvailCol = (firstAvailUnit - (totalLeftUnits + leftTempUnitNum + aboveTempUnitNum) - 1) * unitWidth + 1 + iTemplateWidth;
+      }
+      const Pel firstAvailSample = ptrDst[firstAvailRow < 0 ? firstAvailCol : firstAvailRow + predStride];
+
+      // last sample below-left (n.a.)
+      int lastRow = predHSize;
+
+      // fill left column
+      for (int i = lastRow; i > firstAvailRow; i--)
+      {
+        ptrDst[i + predStride] = firstAvailSample;
+      }
+      // fill top row
+      if (firstAvailCol > 0)
+      {
+        for (int j = 0; j < firstAvailCol; j++)
+        {
+          ptrDst[j] = firstAvailSample;
+        }
+      }
+      lastAvailUnit = firstAvailUnit;
+    }
+
+    // pad all other reference samples.
+    int currUnit = lastAvailUnit + 1;
+    while (currUnit < totalUnits)
+    {
+      if (!neighborFlags[currUnit]) // samples not available
+      {
+        // last available sample
+        int lastAvailRow = -1;
+        int lastAvailCol = 0;
+        if (lastAvailUnit < totalLeftUnits)
+        {
+          lastAvailRow = (totalLeftUnits - lastAvailUnit - 1) * unitHeight + iTemplateHeight + 1;
+        }
+        else if (lastAvailUnit == totalLeftUnits)
+        {
+          lastAvailCol = iTemplateWidth;
+        }
+        else
+        {
+          lastAvailCol = (lastAvailUnit - (totalLeftUnits + leftTempUnitNum + aboveTempUnitNum)) * unitWidth + iTemplateWidth;
+        }
+        const Pel lastAvailSample = ptrDst[lastAvailRow < 0 ? lastAvailCol : lastAvailRow + predStride];
+
+        // fill current unit with last available sample
+        if (currUnit < totalLeftUnits)
+        {
+          for (int i = lastAvailRow - 1; i >= lastAvailRow - unitHeight; i--)
+          {
+            ptrDst[i + predStride] = lastAvailSample;
+          }
+        }
+        else if (currUnit == totalLeftUnits)
+        {
+          for (int i = 0; i < iTemplateHeight + 1; i++)
+          {
+            ptrDst[i + predStride] = lastAvailSample;
+          }
+          for (int j = 0; j < iTemplateWidth + 1; j++)
+          {
+            ptrDst[j] = lastAvailSample;
+          }
+        }
+        else
+        {
+          int numSamplesInUnit = (currUnit == totalUnits - 1) ? (((predSize - iTemplateWidth) % unitWidth == 0) ? unitWidth : (predSize - iTemplateWidth) % unitWidth) : unitWidth;
+          for (int j = lastAvailCol + 1; j <= lastAvailCol + numSamplesInUnit; j++)
+          {
+            ptrDst[j] = lastAvailSample;
+          }
+        }
+      }
+      lastAvailUnit = currUnit;
+      currUnit++;
+    }
+  }
+}
+
+int IntraPrediction::deriveTimdMode( const CPelBuf &recoBuf, const CompArea &area, CodingUnit &cu )
+{
+  int channelBitDepth = cu.slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA);
+  SizeType uiWidth = cu.lwidth();
+  SizeType uiHeight = cu.lheight();
+
+  static Pel PredLuma[(MAX_CU_SIZE + DIMD_MAX_TEMP_SIZE) * (MAX_CU_SIZE + DIMD_MAX_TEMP_SIZE)];
+  memset(PredLuma, 0, (MAX_CU_SIZE + DIMD_MAX_TEMP_SIZE) * (MAX_CU_SIZE + DIMD_MAX_TEMP_SIZE) * sizeof(Pel));
+  Pel* piPred = PredLuma;
+  uint32_t uiPredStride = MAX_CU_SIZE + DIMD_MAX_TEMP_SIZE;
+
+  int  iCurX  = cu.lx();
+  int  iCurY  = cu.ly();
+  int  iRefX  = -1, iRefY = -1;
+  uint32_t uiRefWidth = 0, uiRefHeight = 0;
+
+  int iTempWidth = 4, iTempHeight = 4;
+  if(uiWidth <= 8)
+  {
+    iTempWidth = 2;
+  }
+  if(uiHeight <= 8)
+  {
+    iTempHeight = 2;
+  }
+
+  TEMPLATE_TYPE eTempType = CU::deriveTimdRefType(iCurX, iCurY, uiWidth, uiHeight, iTempWidth, iTempHeight, iRefX, iRefY, uiRefWidth, uiRefHeight);
+
+  if (eTempType != NO_NEIGHBOR)
+  {
+    const CodingStructure& cs   = *cu.cs;
+    m_ipaParam.multiRefIndex        = iTempWidth;
+    Pel* piOrg = cs.picture->getRecoBuf( area ).buf;
+    int iOrgStride = cs.picture->getRecoBuf( area ).stride;
+    piOrg += (iRefY - iCurY) * iOrgStride + (iRefX - iCurX);
+    DistParam distParamSad[2]; // above, left
+    distParamSad[0].applyWeight = false;
+    distParamSad[0].useMR = false;
+    distParamSad[1].applyWeight = false;
+    distParamSad[1].useMR = false;
+    if(eTempType == LEFT_ABOVE_NEIGHBOR)
+    {
+      m_timdSatdCost->setTimdDistParam(distParamSad[0], piOrg + iTempWidth, piPred + iTempWidth, iOrgStride, uiPredStride, channelBitDepth, COMPONENT_Y, uiWidth, iTempHeight, 0, 1, true); // Use HAD (SATD) cost
+      m_timdSatdCost->setTimdDistParam(distParamSad[1], piOrg + iTempHeight * iOrgStride, piPred + iTempHeight * uiPredStride, iOrgStride, uiPredStride, channelBitDepth, COMPONENT_Y, iTempWidth, uiHeight, 0, 1, true); // Use HAD (SATD) cost
+    }
+    else if(eTempType == LEFT_NEIGHBOR)
+    {
+      m_timdSatdCost->setTimdDistParam(distParamSad[1], piOrg, piPred, iOrgStride, uiPredStride, channelBitDepth, COMPONENT_Y, iTempWidth, uiHeight, 0, 1, true);
+    }
+    else if(eTempType == ABOVE_NEIGHBOR)
+    {
+      m_timdSatdCost->setTimdDistParam(distParamSad[0], piOrg, piPred, iOrgStride, uiPredStride, channelBitDepth, COMPONENT_Y, uiWidth, iTempHeight, 0, 1, true);
+    }
+    initTimdIntraPatternLuma(cu, area, eTempType != ABOVE_NEIGHBOR ? iTempWidth : 0, eTempType != LEFT_NEIGHBOR ? iTempHeight : 0, uiRefWidth, uiRefHeight);
+
+    uint32_t uiIntraDirNeighbor[5] = {0}, modeIdx = 0;
+    bool includedMode[EXT_VDIA_IDX + 1];
+    memset(includedMode, false, (EXT_VDIA_IDX + 1) * sizeof(bool));
+    auto &pu = *cu.firstPU;
+    uint32_t uiRealW = uiRefWidth + (eTempType == LEFT_NEIGHBOR? iTempWidth : 0);
+    uint32_t uiRealH = uiRefHeight + (eTempType == ABOVE_NEIGHBOR? iTempHeight : 0);
+    uint64_t maxCost = (uint64_t)(iTempWidth * cu.lheight() + iTempHeight * cu.lwidth());
+
+    uint64_t uiBestCost = MAX_UINT64;
+    int iBestMode = PLANAR_IDX;
+    uint64_t uiSecondaryCost = MAX_UINT64;
+    int iSecondaryMode = PLANAR_IDX;
+
+    const Position posLTx = pu.Y().topLeft();
+    const Position posRTx = pu.Y().topRight();
+    const Position posLBx = pu.Y().bottomLeft();
+
+    // left
+    const PredictionUnit *puLeftx = pu.cs->getPURestricted(posLBx.offset(-1, 0), pu, pu.chType);
+    if (puLeftx && CU::isIntra(*puLeftx->cu))
+    {
+      uiIntraDirNeighbor[modeIdx] = PU::getIntraDirLuma( *puLeftx );
+      if (!puLeftx->cu->timd)
+      {
+        uiIntraDirNeighbor[modeIdx] = MAP67TO131(uiIntraDirNeighbor[modeIdx]);
+      }
+      if( !includedMode[uiIntraDirNeighbor[modeIdx]] )
+      {
+        includedMode[uiIntraDirNeighbor[modeIdx]] = true;
+        modeIdx++;
+      }
+    }
+    // above
+    const PredictionUnit *puAbovex = pu.cs->getPURestricted(posRTx.offset(0, -1), pu, pu.chType);
+    if (puAbovex && CU::isIntra(*puAbovex->cu) && CU::isSameCtu(*pu.cu, *puAbovex->cu))
+    {
+      uiIntraDirNeighbor[modeIdx] =PU::getIntraDirLuma( *puAbovex );
+      if (!puAbovex->cu->timd)
+      {
+        uiIntraDirNeighbor[modeIdx] = MAP67TO131(uiIntraDirNeighbor[modeIdx]);
+      }
+      if( !includedMode[uiIntraDirNeighbor[modeIdx]] )
+      {
+        includedMode[uiIntraDirNeighbor[modeIdx]] = true;
+        modeIdx++;
+      }
+    }
+    // below left
+    const PredictionUnit *puLeftBottomx = cs.getPURestricted( posLBx.offset( -1, 1 ), pu, pu.chType );
+    if (puLeftBottomx && CU::isIntra(*puLeftBottomx->cu))
+    {
+      uiIntraDirNeighbor[modeIdx] = PU::getIntraDirLuma( *puLeftBottomx );
+      if (!puLeftBottomx->cu->timd)
+      {
+        uiIntraDirNeighbor[modeIdx] = MAP67TO131(uiIntraDirNeighbor[modeIdx]);
+      }
+      if( !includedMode[uiIntraDirNeighbor[modeIdx]] )
+      {
+        includedMode[uiIntraDirNeighbor[modeIdx]] = true;
+        modeIdx++;
+      }
+    }
+    // above right
+    const PredictionUnit *puAboveRightx = cs.getPURestricted( posRTx.offset( 1, -1 ), pu, pu.chType );
+    if (puAboveRightx && CU::isIntra(*puAboveRightx->cu))
+    {
+      uiIntraDirNeighbor[modeIdx] = PU::getIntraDirLuma( *puAboveRightx );
+      if (!puAboveRightx->cu->timd)
+      {
+        uiIntraDirNeighbor[modeIdx] = MAP67TO131(uiIntraDirNeighbor[modeIdx]);
+      }
+      if( !includedMode[uiIntraDirNeighbor[modeIdx]] )
+      {
+        includedMode[uiIntraDirNeighbor[modeIdx]] = true;
+        modeIdx++;
+      }
+    }
+    //above left
+    const PredictionUnit *puAboveLeftx = cs.getPURestricted( posLTx.offset( -1, -1 ), pu, pu.chType );
+    if( puAboveLeftx && CU::isIntra(*puAboveLeftx->cu) )
+    {
+      uiIntraDirNeighbor[modeIdx] = PU::getIntraDirLuma( *puAboveLeftx );
+      if (!puAboveLeftx->cu->timd)
+      {
+        uiIntraDirNeighbor[modeIdx] = MAP67TO131(uiIntraDirNeighbor[modeIdx]);
+      }
+      if( !includedMode[uiIntraDirNeighbor[modeIdx]] )
+      {
+        includedMode[uiIntraDirNeighbor[modeIdx]] = true;
+        modeIdx++;
+      }
+    }
+    bool bNoAngular = false;
+    if(modeIdx >= 2)
+    {
+      bNoAngular = true;
+      for(uint32_t i = 0; i < modeIdx; i++)
+      {
+        if(uiIntraDirNeighbor[i] > DC_IDX)
+        {
+          bNoAngular = false;
+          break;
+        }
+      }
+    }
+
+    if (bNoAngular)
+    {
+      for(int iMode = 0; iMode <= 1; iMode ++)
+      {
+        uint64_t uiCost = 0;
+        initPredTimdIntraParams(pu, area, iMode);
+        predTimdIntraAng(COMPONENT_Y, pu, iMode, piPred, uiPredStride, uiRealW, uiRealH, eTempType, (eTempType == ABOVE_NEIGHBOR)? 0: iTempWidth, (eTempType == LEFT_NEIGHBOR)? 0: iTempHeight);
+        if(eTempType == LEFT_ABOVE_NEIGHBOR)
+        {
+          uiCost += distParamSad[0].distFunc(distParamSad[0]);
+          uiCost += distParamSad[1].distFunc(distParamSad[1]);
+        }
+        else if(eTempType == LEFT_NEIGHBOR)
+        {
+          uiCost = distParamSad[1].distFunc(distParamSad[1]);
+        }
+        else if(eTempType == ABOVE_NEIGHBOR)
+        {
+          uiCost += distParamSad[0].distFunc(distParamSad[0]);
+        }
+        else
+        {
+          assert(0);
+        }
+
+        if(uiCost < uiBestCost)
+        {
+          uiBestCost = uiCost;
+          iBestMode = iMode;
+        }
+        if(uiBestCost <= maxCost)
+        {
+          break;
+        }
+      }
+      cu.timdMode = iBestMode;
+      cu.timdIsBlended = false;
+
+      return iBestMode;
+    }
+#if SECONDARY_MPM
+    uint8_t mpmList[NUM_MOST_PROBABLE_MODES];
+    uint8_t intraNonMPM[NUM_NON_MPM_MODES];
+    PU::getIntraMPMs(pu, mpmList, intraNonMPM);
+#else
+    unsigned mpmList[NUM_MOST_PROBABLE_MODES];
+    PU::getIntraMPMs(pu, mpmList);
+#endif
+    unsigned mpmExtraList[NUM_MOST_PROBABLE_MODES + 3]; // +DC/VER/HOR
+    int maxModeNum = NUM_MOST_PROBABLE_MODES;
+    unsigned modeCandList[3] = {DC_IDX, HOR_IDX, VER_IDX};
+    bool bNotExist[3] = {true, true, true};
+    for (int i = 0; i < NUM_MOST_PROBABLE_MODES; i++)
+    {
+      mpmExtraList[i] = mpmList[i];
+      if (bNotExist[0] && mpmList[i] == DC_IDX)
+      {
+        bNotExist[0] = false;
+      }
+      if (bNotExist[1] && mpmList[i] == HOR_IDX)
+      {
+        bNotExist[1] = false;
+      }
+      if (bNotExist[2] && mpmList[i] == VER_IDX)
+      {
+        bNotExist[2] = false;
+      }
+    }
+    for (int i = 0; i < 3; i++)
+    {
+      if (bNotExist[i])
+      {
+        mpmExtraList[maxModeNum++] = modeCandList[i];
+      }
+    }
+    for(int i = 0; i < maxModeNum; i ++)
+    {
+      uint64_t uiCost = 0;
+      int iMode = mpmExtraList[i];
+      if (iMode > DC_IDX)
+      {
+        iMode = MAP67TO131(iMode);
+      }
+      initPredTimdIntraParams(pu, area, iMode);
+      predTimdIntraAng(COMPONENT_Y, pu, iMode, piPred, uiPredStride, uiRealW, uiRealH, eTempType, (eTempType == ABOVE_NEIGHBOR)? 0: iTempWidth, (eTempType == LEFT_NEIGHBOR)? 0: iTempHeight);
+      if(eTempType == LEFT_ABOVE_NEIGHBOR)
+      {
+        uiCost += distParamSad[0].distFunc(distParamSad[0]);
+        uiCost += distParamSad[1].distFunc(distParamSad[1]);
+      }
+      else if(eTempType == LEFT_NEIGHBOR)
+      {
+        uiCost = distParamSad[1].distFunc(distParamSad[1]);
+      }
+      else if(eTempType == ABOVE_NEIGHBOR)
+      {
+        uiCost += distParamSad[0].distFunc(distParamSad[0]);
+      }
+      else
+      {
+        assert(0);
+      }
+
+      if( uiCost < uiBestCost )
+      {
+        uiSecondaryCost = uiBestCost;
+        iSecondaryMode  = iBestMode;
+        uiBestCost  = uiCost;
+        iBestMode = iMode;
+      }
+      else if (uiCost < uiSecondaryCost)
+      {
+        uiSecondaryCost = uiCost;
+        iSecondaryMode  = iMode;
+      }
+      if (uiSecondaryCost <= maxCost)
+      {
+        break;
+      }
+    }
+
+    int midMode = iBestMode;
+    if (midMode > DC_IDX && uiBestCost > maxCost)
+    {
+      for (int i = -1; i <= 1; i+=2)
+      {
+        int iMode = midMode + i;
+        if (iMode <= DC_IDX || iMode > EXT_VDIA_IDX)
+        {
+          continue;
+        }
+        initPredTimdIntraParams(pu, area, iMode);
+        predTimdIntraAng(COMPONENT_Y, pu, iMode, piPred, uiPredStride, uiRealW, uiRealH, eTempType, (eTempType == ABOVE_NEIGHBOR)? 0: iTempWidth, (eTempType == LEFT_NEIGHBOR)? 0: iTempHeight);
+        uint64_t uiCost = 0;
+        if(eTempType == LEFT_ABOVE_NEIGHBOR)
+        {
+          uiCost += distParamSad[0].distFunc(distParamSad[0]);
+          uiCost += distParamSad[1].distFunc(distParamSad[1]);
+        }
+        else if(eTempType == LEFT_NEIGHBOR)
+        {
+          uiCost = distParamSad[1].distFunc(distParamSad[1]);
+        }
+        else if(eTempType == ABOVE_NEIGHBOR)
+        {
+          uiCost += distParamSad[0].distFunc(distParamSad[0]);
+        }
+        else
+        {
+          assert(0);
+        }
+
+        if(uiCost < uiBestCost)
+        {
+          uiBestCost  = uiCost;
+          iBestMode = iMode;
+        }
+        if(uiBestCost <= maxCost)
+        {
+          break;
+        }
+      }
+    }
+
+    midMode = iSecondaryMode;
+    if (midMode > DC_IDX && uiSecondaryCost > maxCost)
+    {
+      for (int i = -1; i <= 1; i+=2)
+      {
+        int iMode = midMode + i;
+        if (iMode <= DC_IDX || iMode > EXT_VDIA_IDX)
+        {
+          continue;
+        }
+        initPredTimdIntraParams(pu, area, iMode);
+        predTimdIntraAng(COMPONENT_Y, pu, iMode, piPred, uiPredStride, uiRealW, uiRealH, eTempType, (eTempType == ABOVE_NEIGHBOR)? 0: iTempWidth, (eTempType == LEFT_NEIGHBOR)? 0: iTempHeight);
+        uint64_t uiCost = 0;
+        if(eTempType == LEFT_ABOVE_NEIGHBOR)
+        {
+          uiCost += distParamSad[0].distFunc(distParamSad[0]);
+          uiCost += distParamSad[1].distFunc(distParamSad[1]);
+        }
+        else if(eTempType == LEFT_NEIGHBOR)
+        {
+          uiCost = distParamSad[1].distFunc(distParamSad[1]);
+        }
+        else if(eTempType == ABOVE_NEIGHBOR)
+        {
+          uiCost += distParamSad[0].distFunc(distParamSad[0]);
+        }
+        else
+        {
+          assert(0);
+        }
+
+        if(uiCost < uiSecondaryCost)
+        {
+          uiSecondaryCost  = uiCost;
+          iSecondaryMode = iMode;
+        }
+        if(uiSecondaryCost <= maxCost)
+        {
+          break;
+        }
+      }
+    }
+
+    if ((uiSecondaryCost - uiBestCost) < uiBestCost)
+  {
+    cu.timdMode         = iBestMode;
+    cu.timdIsBlended    = true;
+    cu.timdModeSecondary = iSecondaryMode;
+
+    const int blend_sum_weight = 6;
+    int       sum_weight       = 1 << blend_sum_weight;
+
+    double dRatio       = 0.0;
+    dRatio              = (double) uiSecondaryCost / (double) (uiBestCost + uiSecondaryCost);
+    int iRatio          = static_cast<int>(dRatio * sum_weight + 0.5);
+    cu.timdFusionWeight[0] = iRatio;
+    cu.timdFusionWeight[1] = sum_weight - iRatio;
+  }
+  else
+  {
+    cu.timdMode      = iBestMode;
+    cu.timdIsBlended = false;
+  }
+
+    return iBestMode;
+  }
+  else
+  {
+    cu.timdMode = PLANAR_IDX;
+    cu.timdIsBlended = false;
+
+    return PLANAR_IDX;
+  }
+}
+#endif
 #if ENABLE_DIMD
 void IntraPrediction::deriveDimdMode(const CPelBuf &recoBuf, const CompArea &area, CodingUnit &cu)
 {
diff --git a/source/Lib/CommonLib/IntraPrediction.h b/source/Lib/CommonLib/IntraPrediction.h
index 95a98729e8b67700dfec4de8d5f5f4ad4614de51..83612248be99bd78020ffd203b73dcb169023b66 100644
--- a/source/Lib/CommonLib/IntraPrediction.h
+++ b/source/Lib/CommonLib/IntraPrediction.h
@@ -43,6 +43,9 @@
 #include "Unit.h"
 #include "Buffer.h"
 #include "Picture.h"
+#if JVET_W0123_TIMD_FUSION
+#include "RdCost.h"
+#endif
 
 #include "MatrixIntraPrediction.h"
 
@@ -81,6 +84,10 @@ private:
 #endif
 
   static const uint8_t m_aucIntraFilter[MAX_INTRA_FILTER_DEPTHS];
+#if JVET_W0123_TIMD_FUSION
+  static const uint8_t m_aucIntraFilterExt[MAX_INTRA_FILTER_DEPTHS];
+  RdCost* m_timdSatdCost;
+#endif
 #if LMS_LINEAR_MODEL
   unsigned m_auShiftLM[32]; // Table for substituting division operation by multiplication
 #endif
@@ -136,11 +143,18 @@ protected:
   // prediction
   void xPredIntraPlanar           ( const CPelBuf &pSrc, PelBuf &pDst );
   void xPredIntraDc               ( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const bool enableBoundaryFilter = true );
+#if JVET_W0123_TIMD_FUSION
+  void xPredIntraAng              ( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const ClpRng& clpRng, const bool bExtIntraDir);
+#else
   void xPredIntraAng              ( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const ClpRng& clpRng);
+#endif
 
   void initPredIntraParams        ( const PredictionUnit & pu,  const CompArea compArea, const SPS& sps );
 
   static bool isIntegerSlope(const int absAng) { return (0 == (absAng & 0x1F)); }
+#if JVET_W0123_TIMD_FUSION
+  static bool isIntegerSlopeExt(const int absAng) { return (0 == (absAng & 0x3F)); }
+#endif
 
   void xPredIntraBDPCM            ( const CPelBuf &pSrc, PelBuf &pDst, const uint32_t dirMode, const ClpRng& clpRng );
   Pel  xGetPredValDc              ( const CPelBuf &pSrc, const Size &dstSize );
@@ -154,6 +168,9 @@ protected:
   );
 
   static int getModifiedWideAngle         ( int width, int height, int predMode );
+#if JVET_W0123_TIMD_FUSION
+  static int getWideAngleExt      ( int width, int height, int predMode );
+#endif
   void setReferenceArrayLengths   ( const CompArea &area );
 
   void destroy                    ();
@@ -189,6 +206,24 @@ public:
   void init                       (ChromaFormat chromaFormatIDC, const unsigned bitDepthY);
 #if ENABLE_DIMD
   static void deriveDimdMode      (const CPelBuf &recoBuf, const CompArea &area, CodingUnit &cu);
+#endif
+#if JVET_W0123_TIMD_FUSION
+  void xIntraPredTimdHorVerPdpc   (Pel* pDsty,const int dstStride, Pel* refSide, const int width, const int height, int xOffset, int yOffset, int scale, const Pel* refMain, const ClpRng& clpRng);
+  void xPredTimdIntraPlanar       (const CPelBuf &pSrc, Pel* pDst, int iDstStride, int width, int height, TEMPLATE_TYPE eTempType, int iTemplateWidth , int iTemplateHeight);
+  void xPredTimdIntraDc           ( const PredictionUnit &pu, const CPelBuf &pSrc, Pel* pDst, int iDstStride, int iWidth, int iHeight, TEMPLATE_TYPE eTempType, int iTemplateWidth , int iTemplateHeight);
+  void xPredTimdIntraAng          ( const CPelBuf &pSrc, const ClpRng& clpRng, Pel* pTrueDst, int iDstStride, int iWidth, int iHeight, TEMPLATE_TYPE eTempType, int iTemplateWidth , int iTemplateHeight, uint32_t dirMode);
+  void xIntraPredTimdAngLuma(Pel* pDstBuf, const ptrdiff_t dstStride, Pel* refMain, int width, int height, int deltaPos, int intraPredAngle, const ClpRng& clpRng, int xOffset, int yOffset);
+  void xIntraPredTimdPlanarDcPdpc (const CPelBuf &pSrc, Pel* pDst, int iDstStride, int width, int height, TEMPLATE_TYPE eTempType, int iTemplateWidth , int iTemplateHeight);
+  void xIntraPredTimdAngPdpc(Pel* pDsty,const int dstStride,Pel* refSide,const int width,const int height, int xOffset, int yOffset, int scale, int invAngle);
+  void xFillTimdReferenceSamples  ( const CPelBuf &recoBuf, Pel* refBufUnfiltered, const CompArea &area, const CodingUnit &cu, int iTemplateWidth, int iTemplateHeight );
+  Pel  xGetPredTimdValDc          ( const CPelBuf &pSrc, const Size &dstSize, TEMPLATE_TYPE eTempType, int iTempHeight, int iTempWidth );
+  void initPredTimdIntraParams    (const PredictionUnit & pu, const CompArea area, int dirMode);
+  void predTimdIntraAng           ( const ComponentID compId, const PredictionUnit &pu, uint32_t uiDirMode, Pel* pPred, uint32_t uiStride, uint32_t iWidth, uint32_t iHeight, TEMPLATE_TYPE eTempType, int32_t iTemplateWidth, int32_t iTemplateHeight);
+  int deriveTimdMode       ( const CPelBuf &recoBuf, const CompArea &area, CodingUnit &cu );
+  void initTimdIntraPatternLuma   (const CodingUnit &cu, const CompArea &area, int iTemplateWidth, int iTemplateHeight, uint32_t uiRefWidth, uint32_t uiRefHeight);
+#if GRAD_PDPC
+  void xIntraPredTimdAngGradPdpc  (Pel* pDsty, const int dstStride, Pel* refMain, Pel* refSide, const int width, const int height, int xOffset, int yOffset, int scale, int deltaPos, int intraPredAngle, const ClpRng& clpRng);
+#endif
 #endif
   // Angular Intra
   void predIntraAng               ( const ComponentID compId, PelBuf &piPred, const PredictionUnit &pu);
diff --git a/source/Lib/CommonLib/RdCost.cpp b/source/Lib/CommonLib/RdCost.cpp
index 7470c66ce645ad92378171c3d6f5860233e6906d..b5018fb3c399eee6730efe4260d6a786d19717ad 100644
--- a/source/Lib/CommonLib/RdCost.cpp
+++ b/source/Lib/CommonLib/RdCost.cpp
@@ -446,6 +446,64 @@ void RdCost::setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY,
   }
 }
 
+#if JVET_W0123_TIMD_FUSION
+void RdCost::setTimdDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY, int iOrgStride, int iRefStride, int bitDepth, ComponentID compID, int width, int height, int subShiftMode, int step, bool useHadamard )
+{
+  rcDP.bitDepth   = bitDepth;
+  rcDP.compID     = compID;
+
+  rcDP.org.buf    = pOrg;
+  rcDP.org.stride = iOrgStride;
+  rcDP.org.width  = width;
+  rcDP.org.height = height;
+
+  rcDP.cur.buf    = piRefY;
+  rcDP.cur.stride = iRefStride;
+  rcDP.cur.width  = width;
+  rcDP.cur.height = height;
+  rcDP.subShift = subShiftMode;
+  rcDP.step       = step;
+  rcDP.maximumDistortionForEarlyExit = std::numeric_limits<Distortion>::max();
+
+  const int DFOffset = ( rcDP.useMR ? DF_MRSAD - DF_SAD : 0 );
+  if( !useHadamard )
+  {
+    if( width == 12 )
+    {
+      rcDP.distFunc = m_afpDistortFunc[ DF_SAD12 + DFOffset ];
+    }
+    else if( width == 24 )
+    {
+      rcDP.distFunc = m_afpDistortFunc[ DF_SAD24 + DFOffset ];
+    }
+    else if( width == 48 )
+    {
+      rcDP.distFunc = m_afpDistortFunc[ DF_SAD48 + DFOffset ];
+    }
+    else if( isPowerOf2( width) )
+    {
+#if CTU_256
+      rcDP.distFunc = m_afpDistortFunc[ DF_SAD + DFOffset + std::min<int>( 7, floorLog2( width ) ) ];
+#else
+      rcDP.distFunc = m_afpDistortFunc[ DF_SAD + DFOffset + floorLog2( width ) ];
+#endif
+    }
+    else
+    {
+      rcDP.distFunc = m_afpDistortFunc[ DF_SAD + DFOffset ];
+    }
+  }
+  else
+  {
+#if CTU_256
+    rcDP.distFunc = m_afpDistortFunc[ DF_HAD + DFOffset + std::min<int>( 7, floorLog2( width ) ) ];
+#else
+    rcDP.distFunc = m_afpDistortFunc[ DF_HAD + DFOffset + floorLog2( width ) ];
+#endif
+  }
+}
+#endif
+
 #if TM_AMVP || TM_MRG
 void RdCost::setDistParam( DistParam &rcDP, const CPelBuf &org, const CPelBuf &cur, int bitDepth, bool TrueA_FalseL, int wIdx, int subShift, ComponentID compID )
 {
diff --git a/source/Lib/CommonLib/RdCost.h b/source/Lib/CommonLib/RdCost.h
index 8e194a9e30f78d50aa01a1782cf8981d2bf41084..4cbe0cf75c868a115683c7b17e71f30bd3534bcc 100644
--- a/source/Lib/CommonLib/RdCost.h
+++ b/source/Lib/CommonLib/RdCost.h
@@ -185,6 +185,9 @@ public:
   void           setDistParam( DistParam &rcDP, const CPelBuf &org, const Pel* piRefY , int iRefStride, int bitDepth, ComponentID compID, int subShiftMode = 0, int step = 1, bool useHadamard = false );
   void           setDistParam( DistParam &rcDP, const CPelBuf &org, const CPelBuf &cur, int bitDepth, ComponentID compID, bool useHadamard = false );
   void           setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY, int iOrgStride, int iRefStride, int bitDepth, ComponentID compID, int width, int height, int subShiftMode = 0, int step = 1, bool useHadamard = false, bool bioApplied = false );
+#if JVET_W0123_TIMD_FUSION
+  void           setTimdDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY, int iOrgStride, int iRefStride, int bitDepth, ComponentID compID, int width, int height, int subShiftMode = 0, int step = 1, bool useHadamard = false );
+#endif
   void           setDistParam( DistParam &rcDP, const CPelBuf &org, const Pel* piRefY, int iRefStride, const Pel* mask, int iMaskStride, int stepX, int iMaskStride2, int bitDepth,  ComponentID compID);
 #if TM_AMVP || TM_MRG
   void           setDistParam( DistParam &rcDP, const CPelBuf &org, const CPelBuf &cur, int bitDepth, bool TrueA_FalseL, int wIdx, int subShift, ComponentID compID );
diff --git a/source/Lib/CommonLib/Slice.cpp b/source/Lib/CommonLib/Slice.cpp
index 934d14c39ac50bc84650f081b54be54510cfe0e7..d82e0bd2969d87b2dc6110911f75e9d22e0df70e 100644
--- a/source/Lib/CommonLib/Slice.cpp
+++ b/source/Lib/CommonLib/Slice.cpp
@@ -3113,6 +3113,9 @@ SPS::SPS()
 #if ENABLE_DIMD
 , m_dimd                      ( false )
 #endif
+#if JVET_W0123_TIMD_FUSION
+, m_timd                      ( false )
+#endif
 #if JVET_V0130_INTRA_TMP
 , m_intraTMP                  ( false )
 , m_intraTmpMaxSize           ( 64 )                             
@@ -4768,6 +4771,9 @@ bool             operator == (const ConstraintInfo& op1, const ConstraintInfo& o
 #if ENABLE_DIMD
   if( op1.m_noDimdConstraintFlag                         != op2.m_noDimdConstraintFlag                           ) return false;
 #endif
+#if JVET_W0123_TIMD_FUSION
+  if( op1.m_noTimdConstraintFlag                         != op2.m_noTimdConstraintFlag                           ) return false;
+#endif
 #if ENABLE_OBMC
   if( op1.m_noObmcConstraintFlag                         != op2.m_noObmcConstraintFlag                           ) return false;
 #endif
diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h
index f53244b1dbbedad876acfc6f5385cf719faa4399..70d7b8bc6b0106e57954db4266f2769999574c9c 100644
--- a/source/Lib/CommonLib/Slice.h
+++ b/source/Lib/CommonLib/Slice.h
@@ -319,6 +319,9 @@ class ConstraintInfo
 #if ENABLE_DIMD
   bool              m_noDimdConstraintFlag;
 #endif
+#if JVET_W0123_TIMD_FUSION
+  bool              m_noTimdConstraintFlag;
+#endif
 #if ENABLE_OBMC
   bool              m_noObmcConstraintFlag;
 #endif
@@ -434,6 +437,9 @@ public:
 #if ENABLE_DIMD
     , m_noDimdConstraintFlag     (false)
 #endif
+#if JVET_W0123_TIMD_FUSION
+    , m_noTimdConstraintFlag     (false)
+#endif
 #if ENABLE_OBMC
     , m_noObmcConstraintFlag     (false)
 #endif
@@ -615,6 +621,10 @@ public:
   bool          getNoDimdConstraintFlag() const { return m_noDimdConstraintFlag; }
   void          setNoDimdConstraintFlag(bool bVal) { m_noDimdConstraintFlag = bVal; }
 #endif
+#if JVET_W0123_TIMD_FUSION
+  bool          getNoTimdConstraintFlag() const { return m_noTimdConstraintFlag; }
+  void          setNoTimdConstraintFlag(bool bVal) { m_noTimdConstraintFlag = bVal; }
+#endif
 #if ENABLE_OBMC
   bool          getNoObmcConstraintFlag() const { return m_noObmcConstraintFlag; }
   void          setNoObmcConstraintFlag(bool bVal) { m_noObmcConstraintFlag = bVal; }
@@ -1651,6 +1661,9 @@ private:
 #if ENABLE_DIMD
   bool              m_dimd;
 #endif
+#if JVET_W0123_TIMD_FUSION
+  bool              m_timd;
+#endif
 #if JVET_V0130_INTRA_TMP
   bool              m_intraTMP;                                       ///< intra Template Matching 
   unsigned          m_intraTmpMaxSize;                               ///< max CU size for which intra TMP is allowed
@@ -2083,6 +2096,10 @@ void                    setCCALFEnabledFlag( bool b )
   void      setIntraTMPMaxSize (unsigned n)                                      { m_intraTmpMaxSize = n; }
   unsigned  getIntraTMPMaxSize ()                                      const     { return m_intraTmpMaxSize; }
 #endif
+#if JVET_W0123_TIMD_FUSION
+  void      setUseTimd         ( bool b )                                        { m_timd = b; }
+  bool      getUseTimd         ()                                      const     { return m_timd; }
+#endif
 #if ENABLE_OBMC
   void      setUseOBMC         ( bool b )                                        { m_OBMC = b; }
   bool      getUseOBMC         ()                                      const     { return m_OBMC; }
diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp
index a6252225bc409c79d9f3b79d81fea4f4c714b783..26fbda3252bb36edb6394600b284812be9f215d9 100644
--- a/source/Lib/CommonLib/TrQuant.cpp
+++ b/source/Lib/CommonLib/TrQuant.cpp
@@ -831,7 +831,11 @@ void TrQuant::xInvLfnst( const TransformUnit &tu, const ComponentID compID )
     const ScanElement * scan = whge3 ? g_coefTopLeftDiagScan8x8[ gp_sizeIdxInfo->idxFrom( width ) ] : g_scanOrder[ SCAN_GROUPED_4x4 ][ SCAN_DIAG ][ gp_sizeIdxInfo->idxFrom( width ) ][ gp_sizeIdxInfo->idxFrom( height ) ];
     uint32_t intraMode = PU::getFinalIntraMode( *tu.cs->getPU( area.pos(), toChannelType( compID ) ), toChannelType( compID ) );
 
+#if JVET_W0123_TIMD_FUSION
+    if( compID != COMPONENT_Y && PU::isLMCMode( tu.cs->getPU( area.pos(), toChannelType( compID ) )->intraDir[ toChannelType( compID ) ] ) )
+#else
     if( PU::isLMCMode( tu.cs->getPU( area.pos(), toChannelType( compID ) )->intraDir[ toChannelType( compID ) ] ) )
+#endif
     {
       intraMode = PU::getCoLocatedIntraLumaMode( *tu.cs->getPU( area.pos(), toChannelType( compID ) ) );
     }
@@ -844,6 +848,12 @@ void TrQuant::xInvLfnst( const TransformUnit &tu, const ComponentID compID )
     {
       intraMode = PLANAR_IDX;
     }
+#endif
+#if JVET_W0123_TIMD_FUSION
+    if (tu.cu->timd && compID == COMPONENT_Y)
+    {
+      intraMode = MAP131TO67(intraMode);
+    }
 #endif
     CHECK( intraMode >= NUM_INTRA_MODE - 1, "Invalid intra mode" );
 
@@ -977,7 +987,11 @@ void TrQuant::xFwdLfnst( const TransformUnit &tu, const ComponentID compID, cons
     const ScanElement * scan = whge3 ? g_coefTopLeftDiagScan8x8[ gp_sizeIdxInfo->idxFrom( width ) ] : g_scanOrder[ SCAN_GROUPED_4x4 ][ SCAN_DIAG ][ gp_sizeIdxInfo->idxFrom( width ) ][ gp_sizeIdxInfo->idxFrom( height ) ];
     uint32_t intraMode = PU::getFinalIntraMode( *tu.cs->getPU( area.pos(), toChannelType( compID ) ), toChannelType( compID ) );
 
+#if JVET_W0123_TIMD_FUSION
+    if( compID != COMPONENT_Y && PU::isLMCMode( tu.cs->getPU( area.pos(), toChannelType( compID ) )->intraDir[ toChannelType( compID ) ] ) )
+#else
     if( PU::isLMCMode( tu.cs->getPU( area.pos(), toChannelType( compID ) )->intraDir[ toChannelType( compID ) ] ) )
+#endif
     {
       intraMode = PU::getCoLocatedIntraLumaMode( *tu.cs->getPU( area.pos(), toChannelType( compID ) ) );
     }
@@ -990,6 +1004,12 @@ void TrQuant::xFwdLfnst( const TransformUnit &tu, const ComponentID compID, cons
     {
       intraMode = PLANAR_IDX;
     }
+#endif
+#if JVET_W0123_TIMD_FUSION
+    if (tu.cu->timd && compID == COMPONENT_Y)
+    {
+      intraMode = MAP131TO67(intraMode);
+    }
 #endif
     CHECK( intraMode >= NUM_INTRA_MODE - 1, "Invalid intra mode" );
 
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index 9cb01a37576d9d26be8fbad57ed503f2b3f6a32d..63d251c2662434aba131f547bb649856a53767d5 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -112,6 +112,8 @@
 #define JVET_V0087_DIMD_NO_ISP                            ENABLE_DIMD // disallow combination of DIMD and ISP
 #define JVET_V0130_INTRA_TMP                              1 // JVET-V0130: template matching prediction
 
+#define JVET_W0123_TIMD_FUSION                            1 // Template based intra mode derivation and fusion
+
 // Inter
 #define CIIP_PDPC                                         1 // apply pdpc to megre prediction as a new CIIP mode (CIIP_PDPC) additional to CIIP mode
 #define SAMPLE_BASED_BDOF                                 1 // Sample based BDOF
@@ -565,6 +567,16 @@ enum ISPType
   INTRA_SUBPARTITIONS_RESERVED  = 4
 };
 
+#if JVET_W0123_TIMD_FUSION
+enum TEMPLATE_TYPE
+{
+  NO_NEIGHBOR         = 0,
+  LEFT_NEIGHBOR       = 1,
+  ABOVE_NEIGHBOR      = 2,
+  LEFT_ABOVE_NEIGHBOR = 3
+};
+#endif
+
 enum SbtIdx
 {
   SBT_OFF_DCT  = 0,
diff --git a/source/Lib/CommonLib/Unit.cpp b/source/Lib/CommonLib/Unit.cpp
index 0d5f7f2a3ca47b9c20ed0a550dc52c9a4388ad19..4e28e52a2aab1fa207cd8164b43ee596e90fc97f 100644
--- a/source/Lib/CommonLib/Unit.cpp
+++ b/source/Lib/CommonLib/Unit.cpp
@@ -290,6 +290,14 @@ CodingUnit& CodingUnit::operator=( const CodingUnit& other )
     dimdRelWeight[i] = other.dimdRelWeight[i];
   }
 #endif
+#if JVET_W0123_TIMD_FUSION
+  timd              = other.timd;
+  timdMode          = other.timdMode;
+  timdModeSecondary = other.timdModeSecondary;
+  timdIsBlended     = other.timdIsBlended;
+  timdFusionWeight[0] = other.timdFusionWeight[0];
+  timdFusionWeight[1] = other.timdFusionWeight[1];
+#endif
 #if ENABLE_OBMC
   obmcFlag          = other.obmcFlag;
   isobmcMC          = other.isobmcMC;
@@ -375,6 +383,14 @@ void CodingUnit::initData()
     dimdRelWeight[i] = -1;
   }
 #endif
+#if JVET_W0123_TIMD_FUSION
+  timd              = false;
+  timdMode          = -1;
+  timdModeSecondary = -1;
+  timdIsBlended     = false;
+  timdFusionWeight[0] = -1;
+  timdFusionWeight[1] = -1;
+#endif
 #if ENABLE_OBMC
   obmcFlag          = true;
   isobmcMC          = false;
@@ -607,7 +623,7 @@ void PredictionUnit::initData()
   intraDir[1] = PLANAR_IDX;
   mipTransposedFlag = false;
   multiRefIdx = 0;
-#if ENABLE_DIMD
+#if ENABLE_DIMD || JVET_W0123_TIMD_FUSION
   parseLumaMode = false;
   candId = -1;
   parseChromaMode = false;
@@ -683,7 +699,7 @@ PredictionUnit& PredictionUnit::operator=(const IntraPredictionData& predData)
   }
   mipTransposedFlag = predData.mipTransposedFlag;
   multiRefIdx = predData.multiRefIdx;
-#if ENABLE_DIMD
+#if ENABLE_DIMD || JVET_W0123_TIMD_FUSION
   parseLumaMode = predData.parseLumaMode;
   candId = predData.candId;
   parseChromaMode = predData.parseChromaMode;
@@ -767,7 +783,7 @@ PredictionUnit& PredictionUnit::operator=( const PredictionUnit& other )
   mergeFlag   = other.mergeFlag;
   regularMergeFlag = other.regularMergeFlag;
   mergeIdx    = other.mergeIdx;
-#if ENABLE_DIMD
+#if ENABLE_DIMD || JVET_W0123_TIMD_FUSION
   parseLumaMode = other.parseLumaMode;
   candId = other.candId;
   parseChromaMode = other.parseChromaMode;
@@ -864,6 +880,29 @@ CMotionBuf PredictionUnit::getMotionBuf() const
   return cs->getMotionBuf( *this );
 }
 
+#if JVET_W0123_TIMD_FUSION
+const uint8_t& PredictionUnit::getIpmInfo() const
+{
+  return cs->getIpmInfo( lumaPos() );
+}
+
+const uint8_t& PredictionUnit::getIpmInfo( const Position& pos ) const
+{
+  CHECKD( !Y().contains( pos ), "Trying to access motion info outsied of PU" );
+  return cs->getIpmInfo( pos );
+}
+
+IpmBuf PredictionUnit::getIpmBuf()
+{
+  return cs->getIpmBuf( *this );
+}
+
+CIpmBuf PredictionUnit::getIpmBuf() const
+{
+  return cs->getIpmBuf( *this );
+}
+#endif
+
 
 // ---------------------------------------------------------------------------
 // transform unit method definitions
diff --git a/source/Lib/CommonLib/Unit.h b/source/Lib/CommonLib/Unit.h
index 31fd09ac0aff9313a9fc4a67fee1363dc61d2db2..1bd52574d6a60507ba79d8616fac45eacfb3b573 100644
--- a/source/Lib/CommonLib/Unit.h
+++ b/source/Lib/CommonLib/Unit.h
@@ -325,6 +325,13 @@ struct CodingUnit : public UnitArea
   int8_t         dimdBlendMode[2]; // max number of blend modes (the main mode is not counter) --> incoherent with dimdRelWeight
   int8_t         dimdRelWeight[3]; // max number of predictions to blend
 #endif
+#if JVET_W0123_TIMD_FUSION
+  bool           timd;
+  int            timdMode;
+  int timdModeSecondary;
+  bool timdIsBlended;
+  int8_t timdFusionWeight[2];
+#endif
 #if ENABLE_OBMC
   bool           obmcFlag;
   bool           isobmcMC;
@@ -399,7 +406,7 @@ struct CodingUnit : public UnitArea
 
 struct IntraPredictionData
 {
-#if ENABLE_DIMD
+#if ENABLE_DIMD || JVET_W0123_TIMD_FUSION
   bool      parseLumaMode = false;
   int8_t    candId = -1;
   bool      parseChromaMode = false;
@@ -496,6 +503,13 @@ struct PredictionUnit : public UnitArea, public IntraPredictionData, public Inte
   MotionBuf         getMotionBuf();
   CMotionBuf        getMotionBuf() const;
 
+#if JVET_W0123_TIMD_FUSION
+  const uint8_t& getIpmInfo() const;
+  const uint8_t& getIpmInfo( const Position& pos ) const;
+  IpmBuf         getIpmBuf();
+  CIpmBuf        getIpmBuf() const;
+#endif
+
 #if ENABLE_SPLIT_PARALLELISM
 
   int64_t cacheId;
diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp
index 3616f90b49ea1c4d5c4ed31583bb6d69bb9131c8..d8528c0f71ab01fd68b6164c9f585e4146f5cf5e 100644
--- a/source/Lib/CommonLib/UnitTools.cpp
+++ b/source/Lib/CommonLib/UnitTools.cpp
@@ -94,7 +94,11 @@ void CS::setRefinedMotionField(CodingStructure &cs)
             subPu.mv[REF_PIC_LIST_1].clipToStorageBitDepth();
             pu.mvdL0SubPu[num].setZero();
             num++;
+#if JVET_W0123_TIMD_FUSION
+            PU::spanMotionInfo2(subPu);
+#else
             PU::spanMotionInfo(subPu);
+#endif
           }
         }
       }
@@ -581,6 +585,48 @@ bool CU::allLumaCBFsAreZero(const CodingUnit& cu)
   }
 }
 
+#if JVET_W0123_TIMD_FUSION
+TEMPLATE_TYPE CU::deriveTimdRefType( int iCurX, int iCurY, uint32_t uiCurWidth, uint32_t uiCurHeight, int iTemplateWidth, int iTemplateHeight, int& iRefX, int& iRefY, uint32_t& uiRefWidth, uint32_t& uiRefHeight )
+{
+  if(iCurX == 0 && iCurY == 0)
+  {
+    return NO_NEIGHBOR;
+  }
+
+  TEMPLATE_TYPE eTempType = NO_NEIGHBOR;
+  iRefX = iRefY = -1;
+  if(iCurX > 0 && iCurY > 0)
+  {
+    iRefX       = iCurX - iTemplateWidth;
+    iRefY       = iCurY - iTemplateHeight;
+    uiRefWidth  = uiCurWidth + iTemplateWidth;
+    uiRefHeight = uiCurHeight + iTemplateHeight;
+    eTempType   = LEFT_ABOVE_NEIGHBOR;
+  }
+  else if(iCurX == 0 && iCurY > 0)
+  {
+    iRefX       = iCurX;
+    iRefY       = iCurY - iTemplateHeight;
+    uiRefWidth  = uiCurWidth;
+    uiRefHeight = uiCurHeight;
+    eTempType   = ABOVE_NEIGHBOR;
+  }
+  else if(iCurX > 0 && iCurY == 0)
+  {
+    iRefX       = iCurX - iTemplateWidth;
+    iRefY       = iCurY;
+    uiRefWidth  = uiCurWidth;
+    uiRefHeight = uiCurHeight;
+    eTempType   = LEFT_NEIGHBOR;
+  }
+  else
+  {
+    assert(0);
+  }
+  return eTempType;
+}
+#endif
+
 
 PUTraverser CU::traversePUs( CodingUnit& cu )
 {
@@ -642,15 +688,31 @@ int PU::getIntraMPMs(const PredictionUnit &pu, unsigned* mpm, const ChannelType
     if (puLeft && CU::isIntra(*puLeft->cu))
     {
 #if SECONDARY_MPM
+#if JVET_W0123_TIMD_FUSION
+      mpm[numValidMPM] = puLeft->cu->timd ? MAP131TO67(PU::getIntraDirLuma(*puLeft)) : PU::getIntraDirLuma(*puLeft);
+#else
       mpm[numValidMPM] = PU::getIntraDirLuma(*puLeft);
+#endif
       if( !includedMode[mpm[numValidMPM]] )
       {
         includedMode[mpm[numValidMPM++]] = true;
       }
 #else
       leftIntraDir = PU::getIntraDirLuma( *puLeft );
+#if JVET_W0123_TIMD_FUSION
+      if (puLeft->cu->timd)
+      {
+        leftIntraDir = MAP131TO67(leftIntraDir);
+      }
+#endif
 #endif
     }
+#if JVET_W0123_TIMD_FUSION && !SECONDARY_MPM
+    if (puLeft && CU::isInter(*puLeft->cu))
+    {
+      leftIntraDir = puLeft->getIpmInfo(posLB.offset(-1, 0));
+    }
+#endif
 
     // Get intra direction of above PU
 #if SECONDARY_MPM
@@ -663,22 +725,60 @@ int PU::getIntraMPMs(const PredictionUnit &pu, unsigned* mpm, const ChannelType
     if (puAbove && CU::isIntra(*puAbove->cu) && CU::isSameCtu(*pu.cu, *puAbove->cu))
     {
 #if SECONDARY_MPM
+#if JVET_W0123_TIMD_FUSION
+      mpm[numValidMPM] = puAbove->cu->timd ? MAP131TO67(PU::getIntraDirLuma(*puAbove)) : PU::getIntraDirLuma(*puAbove);
+#else
       mpm[numValidMPM] = PU::getIntraDirLuma(*puAbove);
-      if( !includedMode[mpm[numValidMPM]] )
+#endif
+      if (!includedMode[mpm[numValidMPM]])
       {
         includedMode[mpm[numValidMPM++]] = true;
       }
 #else
-      aboveIntraDir = PU::getIntraDirLuma( *puAbove );
+      aboveIntraDir = PU::getIntraDirLuma(*puAbove);
+#if JVET_W0123_TIMD_FUSION
+      if (puAbove->cu->timd)
+      {
+        aboveIntraDir = MAP131TO67(aboveIntraDir);
+      }
+#endif
 #endif
     }
+#if JVET_W0123_TIMD_FUSION && !SECONDARY_MPM
+    if (puAbove && CU::isInter(*puAbove->cu))
+    {
+      aboveIntraDir = puAbove->getIpmInfo(posRT.offset(0, -1));
+    }
+#endif
 
 #if SECONDARY_MPM
+#if JVET_W0123_TIMD_FUSION
+    if (puLeft && CU::isInter(*puLeft->cu))
+    {
+      mpm[numValidMPM] = puLeft->getIpmInfo(pu.lheight() >= pu.lwidth() ? posRT.offset(0, -1) : posLB.offset(-1, 0));
+      if( !includedMode[mpm[numValidMPM]] )
+      {
+        includedMode[mpm[numValidMPM++]] = true;
+      }
+    }
+    if (puAbove && CU::isInter(*puAbove->cu))
+    {
+      mpm[numValidMPM] = puAbove->getIpmInfo(pu.lheight() >= pu.lwidth() ? posLB.offset(-1, 0) : posRT.offset(0, -1));
+      if( !includedMode[mpm[numValidMPM]] )
+      {
+        includedMode[mpm[numValidMPM++]] = true;
+      }
+    }
+#endif
     // Get intra direction of below-left PU
     const PredictionUnit *puBelowLeft = pu.cs->getPURestricted(posLB.offset(-1, 1), pu, channelType);
     if (puBelowLeft && CU::isIntra(*puBelowLeft->cu))
     {
+#if JVET_W0123_TIMD_FUSION
+      mpm[numValidMPM] = puBelowLeft->cu->timd ? MAP131TO67(PU::getIntraDirLuma(*puBelowLeft)) : PU::getIntraDirLuma(*puBelowLeft);
+#else
       mpm[numValidMPM] = PU::getIntraDirLuma(*puBelowLeft);
+#endif
       if( !includedMode[mpm[numValidMPM]] )
       {
         includedMode[mpm[numValidMPM++]] = true;
@@ -689,7 +789,11 @@ int PU::getIntraMPMs(const PredictionUnit &pu, unsigned* mpm, const ChannelType
     const PredictionUnit *puAboveRight = pu.cs->getPURestricted(posRT.offset(1, -1), pu, channelType);
     if (puAboveRight && CU::isIntra(*puAboveRight->cu) && CU::isSameCtu(*pu.cu, *puAboveRight->cu))
     {
+#if JVET_W0123_TIMD_FUSION
+      mpm[numValidMPM] = puAboveRight->cu->timd ? MAP131TO67(PU::getIntraDirLuma(*puAboveRight)) : PU::getIntraDirLuma(*puAboveRight);
+#else
       mpm[numValidMPM] = PU::getIntraDirLuma(*puAboveRight);
+#endif
       if( !includedMode[mpm[numValidMPM]] )
       {
         includedMode[mpm[numValidMPM++]] = true;
@@ -701,12 +805,42 @@ int PU::getIntraMPMs(const PredictionUnit &pu, unsigned* mpm, const ChannelType
     const PredictionUnit *puAboveLeft = pu.cs->getPURestricted(posTL.offset(-1, -1), pu, channelType);
     if (puAboveLeft && CU::isIntra(*puAboveLeft->cu) && CU::isSameCtu(*pu.cu, *puAboveLeft->cu))
     {
+#if JVET_W0123_TIMD_FUSION
+      mpm[numValidMPM] = puAboveLeft->cu->timd ? MAP131TO67(PU::getIntraDirLuma(*puAboveLeft)) : PU::getIntraDirLuma(*puAboveLeft);
+#else
       mpm[numValidMPM] = PU::getIntraDirLuma(*puAboveLeft);
+#endif
+      if( !includedMode[mpm[numValidMPM]] )
+      {
+        includedMode[mpm[numValidMPM++]] = true;
+      }
+    }
+#if JVET_W0123_TIMD_FUSION
+    if (puBelowLeft && CU::isInter(*puBelowLeft->cu))
+    {
+      mpm[numValidMPM] = puBelowLeft->getIpmInfo(posLB.offset(-1, 1));
+      if( !includedMode[mpm[numValidMPM]] )
+      {
+        includedMode[mpm[numValidMPM++]] = true;
+      }
+    }
+    if (puAboveRight && CU::isInter(*puAboveRight->cu))
+    {
+      mpm[numValidMPM] = puAboveRight->getIpmInfo(posRT.offset(1, -1));
+      if( !includedMode[mpm[numValidMPM]] )
+      {
+        includedMode[mpm[numValidMPM++]] = true;
+      }
+    }
+    if (puAboveLeft && CU::isInter(*puAboveLeft->cu))
+    {
+      mpm[numValidMPM] = puAboveLeft->getIpmInfo(posTL.offset(-1, -1));
       if( !includedMode[mpm[numValidMPM]] )
       {
         includedMode[mpm[numValidMPM++]] = true;
       }
     }
+#endif
 #endif
 
     CHECK(2 >= numMPMs, "Invalid number of most probable modes");
@@ -1093,6 +1227,12 @@ const PredictionUnit &PU::getCoLocatedLumaPU(const PredictionUnit &pu)
 
 uint32_t PU::getCoLocatedIntraLumaMode(const PredictionUnit &pu)
 {
+#if JVET_W0123_TIMD_FUSION
+  if (PU::getCoLocatedLumaPU(pu).cu->timd)
+  {
+    return MAP131TO67(PU::getIntraDirLuma(PU::getCoLocatedLumaPU(pu)));
+  }
+#endif
   return PU::getIntraDirLuma(PU::getCoLocatedLumaPU(pu));
 }
 
@@ -4213,6 +4353,9 @@ void PU::spanMotionInfo( PredictionUnit &pu, const MergeCtx &mrgCtx )
 #if !MULTI_PASS_DMVR
   MotionBuf mb = pu.getMotionBuf();
 #endif
+#if JVET_W0123_TIMD_FUSION
+  IpmBuf ib = pu.getIpmBuf();
+#endif
 
   if (!pu.mergeFlag || pu.mergeType == MRG_TYPE_DEFAULT_N || pu.mergeType == MRG_TYPE_IBC)
   {
@@ -4280,10 +4423,23 @@ void PU::spanMotionInfo( PredictionUnit &pu, const MergeCtx &mrgCtx )
       {
         PU::setAllAffineMv(pu, pu.mvAffi[1][0], pu.mvAffi[1][1], pu.mvAffi[1][2], REF_PIC_LIST_1);
       }
+#if JVET_W0123_TIMD_FUSION
+      spanIpmInfoInter(pu, mb, ib);
+#endif
     }
     else
     {
       mb.fill(mi);
+#if JVET_W0123_TIMD_FUSION
+      if (mi.isIBCmot)
+      {
+        ib.fill(PLANAR_IDX);
+      }
+      else
+      {
+        spanIpmInfoInter(pu, mb, ib);
+      }
+#endif
     }
   }
   else if (pu.mergeType == MRG_TYPE_SUBPU_ATMVP)
@@ -4293,9 +4449,237 @@ void PU::spanMotionInfo( PredictionUnit &pu, const MergeCtx &mrgCtx )
     MotionBuf mb = pu.getMotionBuf();
 #endif
     mb.copyFrom(mrgCtx.subPuMvpMiBuf);
+#if JVET_W0123_TIMD_FUSION
+    spanIpmInfoInter(pu, mb, ib);
+#endif
   }
 }
 
+#if JVET_W0123_TIMD_FUSION
+#if MULTI_PASS_DMVR
+void PU::spanMotionInfo2( PredictionUnit &pu, const MergeCtx &mrgCtx, Mv* bdmvrSubPuMv0, Mv* bdmvrSubPuMv1, Mv* bdofSubPuMvOffset)
+#else
+void PU::spanMotionInfo2( PredictionUnit &pu, const MergeCtx &mrgCtx )
+#endif
+{
+#if !MULTI_PASS_DMVR
+  MotionBuf mb = pu.getMotionBuf();
+#endif
+
+  if (!pu.mergeFlag || pu.mergeType == MRG_TYPE_DEFAULT_N || pu.mergeType == MRG_TYPE_IBC)
+  {
+    MotionInfo mi;
+
+    mi.isInter = !CU::isIntra(*pu.cu);
+    mi.isIBCmot = CU::isIBC(*pu.cu);
+    mi.sliceIdx = pu.cu->slice->getIndependentSliceIdx();
+#if INTER_LIC
+    mi.usesLIC = pu.cu->LICFlag;
+#endif
+
+    if( mi.isInter )
+    {
+      mi.interDir = pu.interDir;
+      mi.useAltHpelIf = pu.cu->imv == IMV_HPEL;
+      for( int i = 0; i < NUM_REF_PIC_LIST_01; i++ )
+      {
+        mi.mv[i]     = pu.mv[i];
+        mi.refIdx[i] = pu.refIdx[i];
+      }
+      if (mi.isIBCmot)
+      {
+        mi.bv = pu.bv;
+      }
+    }
+
+#if MULTI_PASS_DMVR
+    if (pu.bdmvrRefine)
+    {
+      CHECK(bdmvrSubPuMv0 == nullptr, "this is not possible");
+      const int dx = std::min<int>(pu.lwidth (), BDOF_SUBPU_DIM);
+      const int dy = std::min<int>(pu.lheight(), BDOF_SUBPU_DIM);
+      int subPuIdx = 0;
+      const int bioSubPuIdxStrideIncr = BDOF_SUBPU_STRIDE - std::max(1, (int)(pu.lwidth() >> BDOF_SUBPU_DIM_LOG2));
+
+      for (int yStart = 0; yStart < pu.lheight(); yStart += dy)
+      {
+        for (int xStart = 0; xStart < pu.lwidth(); xStart += dx)
+        {
+          const int bdmvrSubPuIdx = (yStart >> DMVR_SUBCU_HEIGHT_LOG2) * DMVR_SUBPU_STRIDE + (xStart >> DMVR_SUBCU_WIDTH_LOG2);
+          mi.mv[0] = bdmvrSubPuMv0[bdmvrSubPuIdx] + bdofSubPuMvOffset[subPuIdx];
+          mi.mv[1] = bdmvrSubPuMv1[bdmvrSubPuIdx] - bdofSubPuMvOffset[subPuIdx];
+
+          subPuIdx++;
+          MotionBuf mb = pu.cs->getMotionBuf(Area(pu.lx() + xStart, pu.ly() + yStart, dx, dy));
+          mb.fill(mi);
+        }
+        subPuIdx += bioSubPuIdxStrideIncr;
+      }
+      return;
+    }
+    MotionBuf mb = pu.getMotionBuf();
+#endif
+    if (pu.cu->affine)
+    {
+      mi.mv[0].setZero(); // to make sure filling of MV in unused reference list
+      mi.mv[1].setZero();
+      mb.fill(mi);
+      if (pu.refIdx[0] >= 0)
+      {
+        PU::setAllAffineMv(pu, pu.mvAffi[0][0], pu.mvAffi[0][1], pu.mvAffi[0][2], REF_PIC_LIST_0);
+      }
+      if (pu.refIdx[1] >= 0)
+      {
+        PU::setAllAffineMv(pu, pu.mvAffi[1][0], pu.mvAffi[1][1], pu.mvAffi[1][2], REF_PIC_LIST_1);
+      }
+    }
+    else
+    {
+      mb.fill(mi);
+    }
+  }
+  else if (pu.mergeType == MRG_TYPE_SUBPU_ATMVP)
+  {
+    CHECK(mrgCtx.subPuMvpMiBuf.area() == 0 || !mrgCtx.subPuMvpMiBuf.buf, "Buffer not initialized");
+#if MULTI_PASS_DMVR
+    MotionBuf mb = pu.getMotionBuf();
+#endif
+    mb.copyFrom(mrgCtx.subPuMvpMiBuf);
+  }
+}
+
+void PU::spanIpmInfoIntra( PredictionUnit &pu)
+{
+  int ipm = PU::getIntraDirLuma(pu);
+  if (pu.cu->timd)
+  {
+    ipm = MAP131TO67(ipm);
+  }
+  IpmBuf ib = pu.getIpmBuf();
+  ib.fill(ipm);
+}
+
+void PU::spanIpmInfoInter( PredictionUnit &pu, MotionBuf &mb, IpmBuf &ib)
+{
+  const unsigned scale = 4 * std::max<int>(1, 4 * AMVP_DECIMATION_FACTOR / 4);
+  const unsigned mask = ~(scale - 1);
+  Mv cMv;
+  RefPicList refList;
+  int refIdx;
+  Position PosY;
+  MotionInfo tempMi;
+  MotionInfo mi0;
+  MotionInfo mi1;
+  Position PosY0;
+  Position PosY1;
+  Mv cMv0;
+  Mv cMv1;
+  Picture* pRefPic0;
+  Picture* pRefPic1;
+  uint8_t* ii = ib.buf;
+  int ibH = pu.Y().height >> MIN_CU_LOG2;
+  int ibW = pu.Y().width >> MIN_CU_LOG2;
+  for (int y = 0; y < ibH; y++)
+  {
+    for (int x = 0; x < ibW; x++)
+    {
+      uint8_t ipm = PLANAR_IDX;
+      tempMi = mb.at(x, y);
+      if (tempMi.interDir != 3)
+      {
+        if (tempMi.interDir != 2)
+        {
+          cMv = tempMi.mv[0];
+          refList = REF_PIC_LIST_0;
+          refIdx = tempMi.refIdx[0];
+        }
+        else
+        {
+          cMv = tempMi.mv[1];
+          refList = REF_PIC_LIST_1;
+          refIdx = tempMi.refIdx[1];
+        }
+        if (refList < 0 || refIdx < 0)
+        {
+          ipm = PLANAR_IDX;
+        }
+        else
+        {
+          cMv.changePrecision(MV_PRECISION_SIXTEENTH, MV_PRECISION_INT);
+          PosY.x = pu.Y().x + (x << MIN_CU_LOG2) + cMv.getHor();
+          PosY.y = pu.Y().y + (y << MIN_CU_LOG2) + cMv.getVer();
+          clipColPos(PosY.x, PosY.y, pu);
+          PosY.x = (PosY.x & mask);
+          PosY.y = (PosY.y & mask);
+          ipm = pu.cu->slice->getRefPic(refList, refIdx)->cs->getIpmInfo(PosY);
+        }
+      }
+      else
+      {
+        pRefPic0 = pu.cu->slice->getRefPic(REF_PIC_LIST_0, tempMi.refIdx[0]);
+        cMv0 = tempMi.mv[0];
+        cMv0.changePrecision(MV_PRECISION_SIXTEENTH, MV_PRECISION_INT);
+        PosY0.x = pu.Y().x + (x << MIN_CU_LOG2) + cMv0.getHor();
+        PosY0.y = pu.Y().y + (y << MIN_CU_LOG2) + cMv0.getVer();
+        clipColPos(PosY0.x, PosY0.y, pu);
+        PosY0.x = (PosY0.x & mask);
+        PosY0.y = (PosY0.y & mask);
+        mi0 = pRefPic0->cs->getMotionInfo(PosY0);
+        int ipm0 = pRefPic0->cs->getIpmInfo(PosY0);
+        int pocDiff0 = abs(pRefPic0->getPOC() - pu.cu->slice->getPOC());
+
+        pRefPic1 = pu.cu->slice->getRefPic(REF_PIC_LIST_1, tempMi.refIdx[1]);
+        cMv1 = tempMi.mv[1];
+        cMv1.changePrecision(MV_PRECISION_SIXTEENTH, MV_PRECISION_INT);
+        PosY1.x = pu.Y().x + (x << MIN_CU_LOG2) + cMv1.getHor();
+        PosY1.y = pu.Y().y + (y << MIN_CU_LOG2) + cMv1.getVer();
+        clipColPos(PosY1.x, PosY1.y, pu);
+        PosY1.x = (PosY1.x & mask);
+        PosY1.y = (PosY1.y & mask);
+        mi1 = pRefPic1->cs->getMotionInfo(PosY1);
+        int ipm1 = pRefPic1->cs->getIpmInfo(PosY1);
+        int pocDiff1 = abs(pRefPic1->getPOC() - pu.cu->slice->getPOC());
+
+        if (!mi0.isInter && mi1.isInter)
+        {
+          ipm = ipm0;
+        }
+        else if (!mi1.isInter && mi0.isInter)
+        {
+          ipm = ipm1;
+        }
+        else if (ipm0 > DC_IDX && ipm1 <= DC_IDX)
+        {
+          ipm = ipm0;
+        }
+        else if (ipm0 <= DC_IDX && ipm1 > DC_IDX)
+        {
+          ipm = ipm1;
+        }
+        else if (pocDiff0 < pocDiff1)
+        {
+          ipm = ipm0;
+        }
+        else if (pocDiff1 < pocDiff0)
+        {
+          ipm = ipm1;
+        }
+        else if (pRefPic0->m_prevQP[0] > pRefPic1->m_prevQP[0])
+        {
+          ipm = ipm1;
+        }
+        else
+        {
+          ipm = ipm0;
+        }
+      }
+      ii[x] = ipm;
+    }
+    ii += ib.stride;
+  }
+}
+#endif
+
 void PU::applyImv( PredictionUnit& pu, MergeCtx &mrgCtx, InterPrediction *interPred )
 {
   if( !pu.mergeFlag )
@@ -4456,6 +4840,9 @@ void PU::spanGeoMotionInfo( PredictionUnit &pu, MergeCtx &geoMrgCtx, const uint8
   pu.geoMergeIdx0 = candIdx0;
   pu.geoMergeIdx1 = candIdx1;
   MotionBuf mb = pu.getMotionBuf();
+#if JVET_W0123_TIMD_FUSION
+  IpmBuf ib = pu.getIpmBuf();
+#endif
 
   MotionInfo biMv;
   biMv.isInter  = true;
@@ -4558,6 +4945,9 @@ void PU::spanGeoMotionInfo( PredictionUnit &pu, MergeCtx &geoMrgCtx, const uint8
     }
     motionInfo += mb.stride;
   }
+#if JVET_W0123_TIMD_FUSION
+  spanIpmInfoInter(pu, mb, ib);
+#endif
 }
 
 bool CU::hasSubCUNonZeroMVd( const CodingUnit& cu )
@@ -4987,6 +5377,9 @@ bool CU::isMTSAllowed(const CodingUnit &cu, const ComponentID compID)
   mtsAllowed &= !cu.sbtInfo;
 #if JVET_V0130_INTRA_TMP
   mtsAllowed &= !cu.tmpFlag;
+#endif
+#if JVET_W0123_TIMD_FUSION
+  mtsAllowed &= !(cu.timd && cu.firstPU->multiRefIdx);
 #endif
   mtsAllowed &= !(cu.bdpcmMode && cuWidth <= tsMaxSize && cuHeight <= tsMaxSize);
   return mtsAllowed;
diff --git a/source/Lib/CommonLib/UnitTools.h b/source/Lib/CommonLib/UnitTools.h
index ab2c96231fb0bfa826aa208253437a3eac1656c5..0f1fed6c9838dd5f7e74da88f7ca2a25aa53f411 100644
--- a/source/Lib/CommonLib/UnitTools.h
+++ b/source/Lib/CommonLib/UnitTools.h
@@ -109,6 +109,9 @@ namespace CU
   bool      canUseLfnstWithISP        ( const CodingUnit& cu, const ChannelType chType );
   uint32_t  getISPSplitDim            ( const int width, const int height, const PartSplit ispType );
   bool      allLumaCBFsAreZero        ( const CodingUnit& cu );
+#if JVET_W0123_TIMD_FUSION
+  TEMPLATE_TYPE deriveTimdRefType     ( int iCurX, int iCurY, uint32_t uiCurWidth, uint32_t uiCurHeight, int iTemplateWidth, int iTemplateHeight, int& iRefX, int& iRefY, uint32_t& uiRefWidth, uint32_t& uiRefHeight );
+#endif
 
   PUTraverser traversePUs             (      CodingUnit& cu);
   TUTraverser traverseTUs             (      CodingUnit& cu);
@@ -191,6 +194,15 @@ namespace PU
   void spanMotionInfo                 (      PredictionUnit &pu, const MergeCtx &mrgCtx = MergeCtx(), Mv* bdmvrSubPuMv0 = nullptr, Mv* bdmvrSubPuMv1 = nullptr, Mv* bdofSubPuMvOffset = nullptr );
 #else
   void spanMotionInfo                 (      PredictionUnit &pu, const MergeCtx &mrgCtx = MergeCtx() );
+#endif
+#if JVET_W0123_TIMD_FUSION
+#if MULTI_PASS_DMVR
+  void spanMotionInfo2                (      PredictionUnit &pu, const MergeCtx &mrgCtx = MergeCtx(), Mv* bdmvrSubPuMv0 = nullptr, Mv* bdmvrSubPuMv1 = nullptr, Mv* bdofSubPuMvOffset = nullptr );
+#else
+  void spanMotionInfo2                (      PredictionUnit &pu, const MergeCtx &mrgCtx = MergeCtx() );
+#endif
+  void spanIpmInfoIntra               (      PredictionUnit &pu );
+  void spanIpmInfoInter               (      PredictionUnit &pu, MotionBuf &mb, IpmBuf &ib );
 #endif
   void applyImv                       (      PredictionUnit &pu, MergeCtx &mrgCtx, InterPrediction *interPred = NULL );
   void getAffineControlPointCand(const PredictionUnit &pu, MotionInfo mi[4], bool isAvailable[4], int verIdx[4], int8_t bcwIdx, int modelIdx, int verNum, AffineMergeCtx& affMrgCtx);
diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp
index 0539150a948c6a7aab9dad25b7c846d7787f8e4a..a0ced2ca8a07d59063fdaf837ccf7d5bb7354ec7 100644
--- a/source/Lib/DecoderLib/CABACReader.cpp
+++ b/source/Lib/DecoderLib/CABACReader.cpp
@@ -1582,10 +1582,18 @@ void CABACReader::extend_ref_line(CodingUnit& cu)
 
     if (MRL_NUM_REF_LINES > 1)
     {
+#if JVET_W0123_TIMD_FUSION
+      multiRefIdx = m_BinDecoder.decodeBin(cu.timd ? Ctx::MultiRefLineIdx(2) : Ctx::MultiRefLineIdx(0)) == 1 ? MULTI_REF_LINE_IDX[1] : MULTI_REF_LINE_IDX[0];
+#else
       multiRefIdx = m_BinDecoder.decodeBin(Ctx::MultiRefLineIdx(0)) == 1 ? MULTI_REF_LINE_IDX[1] : MULTI_REF_LINE_IDX[0];
+#endif
       if (MRL_NUM_REF_LINES > 2 && multiRefIdx != MULTI_REF_LINE_IDX[0])
       {
+#if JVET_W0123_TIMD_FUSION
+        multiRefIdx = m_BinDecoder.decodeBin(cu.timd ? Ctx::MultiRefLineIdx(3) : Ctx::MultiRefLineIdx(1)) == 1 ? MULTI_REF_LINE_IDX[2] : MULTI_REF_LINE_IDX[1];
+#else
         multiRefIdx = m_BinDecoder.decodeBin(Ctx::MultiRefLineIdx(1)) == 1 ? MULTI_REF_LINE_IDX[2] : MULTI_REF_LINE_IDX[1];
+#endif
       }
 
     }
@@ -1627,6 +1635,9 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu )
     mip_pred_modes(cu);
     return;
   }
+#if JVET_W0123_TIMD_FUSION
+  cu_timd_flag(cu);
+#endif
   extend_ref_line( cu );
   isp_mode( cu );
 #if ENABLE_DIMD
@@ -1634,6 +1645,12 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu )
   {
     return;
   }
+#endif
+#if JVET_W0123_TIMD_FUSION
+  if (cu.timd)
+  {
+    return;
+  }
 #endif
   RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__INTRA_DIR_ANG, cu.lumaSize(), CHANNEL_TYPE_LUMA );
 
@@ -1669,7 +1686,7 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu )
 #else
     PU::getIntraMPMs(*pu, mpm_pred);
 #endif
-#if ENABLE_DIMD
+#if ENABLE_DIMD || JVET_W0123_TIMD_FUSION
     pu->parseLumaMode = true;
     pu->mpmFlag = mpmFlag[k];
 #endif
@@ -1711,7 +1728,7 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu )
           ipred_idx += m_BinDecoder.decodeBinEP();
         }
       }
-#if ENABLE_DIMD
+#if ENABLE_DIMD || JVET_W0123_TIMD_FUSION
       pu->ipred_idx = ipred_idx;
 #endif
       pu->intraDir[0] = mpm_pred[ipred_idx];
@@ -1724,7 +1741,7 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu )
 #if SECONDARY_MPM
         if (m_BinDecoder.decodeBin(Ctx::IntraLumaSecondMpmFlag()))
         {
-#if ENABLE_DIMD
+#if ENABLE_DIMD || JVET_W0123_TIMD_FUSION
           int idx = m_BinDecoder.decodeBinsEP(4) + NUM_PRIMARY_MOST_PROBABLE_MODES;
           ipred_mode = mpm_pred[idx];
           pu->secondMpmFlag = true;
@@ -1736,7 +1753,7 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu )
         else
         {
           xReadTruncBinCode(ipred_mode, NUM_LUMA_MODE - NUM_MOST_PROBABLE_MODES);
-#if ENABLE_DIMD
+#if ENABLE_DIMD || JVET_W0123_TIMD_FUSION
           pu->secondMpmFlag = false;
           pu->ipred_idx = ipred_mode;
 #endif
@@ -1744,7 +1761,7 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu )
         }
 #else
         xReadTruncBinCode(ipred_mode, NUM_LUMA_MODE - NUM_MOST_PROBABLE_MODES);
-#if ENABLE_DIMD
+#if ENABLE_DIMD || JVET_W0123_TIMD_FUSION
         pu->ipred_idx = ipred_mode;
 #endif
 #endif
@@ -1779,6 +1796,38 @@ void CABACReader::cu_dimd_flag(CodingUnit& cu)
   cu.dimd = m_BinDecoder.decodeBin(Ctx::DimdFlag(ctxId));
 }
 #endif
+
+#if JVET_W0123_TIMD_FUSION
+void CABACReader::cu_timd_flag( CodingUnit& cu )
+{
+  if (!cu.cs->sps->getUseTimd())
+  {
+    cu.timd = false;
+    return;
+  }
+  if (cu.lwidth() * cu.lheight() > 1024 && cu.slice->getSliceType() == I_SLICE)
+  {
+    cu.timd = false;
+    return;
+  }
+#if ENABLE_DIMD
+  if (cu.dimd)
+  {
+    cu.timd = false;
+    return;
+  }
+#endif
+  if (!cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma(cu.chType))
+  {
+    cu.timd = false;
+    return;
+  }
+
+  unsigned ctxId = DeriveCtx::CtxTimdFlag( cu );
+  cu.timd = m_BinDecoder.decodeBin( Ctx::TimdFlag(ctxId) );
+}
+#endif
+
 void CABACReader::intra_chroma_pred_modes( CodingUnit& cu )
 {
 #if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
@@ -1875,7 +1924,7 @@ void CABACReader::intra_chroma_pred_mode(PredictionUnit& pu)
     pu.intraDir[1] = DM_CHROMA_IDX;
     return;
   }
-#if ENABLE_DIMD
+#if ENABLE_DIMD || JVET_W0123_TIMD_FUSION
   pu.parseChromaMode = true;
 #endif
   unsigned candId = m_BinDecoder.decodeBinsEP(2);
@@ -1888,7 +1937,7 @@ void CABACReader::intra_chroma_pred_mode(PredictionUnit& pu)
   CHECK(chromaCandModes[candId] == DM_CHROMA_IDX, "The intra dir cannot be DM_CHROMA for this path");
 
   pu.intraDir[1] = chromaCandModes[candId];
-#if ENABLE_DIMD
+#if ENABLE_DIMD || JVET_W0123_TIMD_FUSION
   pu.candId = candId;
 #endif
 }
@@ -3848,7 +3897,11 @@ void CABACReader::isp_mode( CodingUnit& cu )
 
   RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET(STATS__CABAC_BITS__ISP_MODE_FLAG);
 
+#if JVET_W0123_TIMD_FUSION
+  int symbol = m_BinDecoder.decodeBin(cu.timd ? Ctx::ISPMode(2) : Ctx::ISPMode(0));
+#else
   int symbol = m_BinDecoder.decodeBin(Ctx::ISPMode(0));
+#endif
 
   if( symbol )
   {
@@ -3881,6 +3934,13 @@ void CABACReader::residual_lfnst_mode( CodingUnit& cu,  CUCtx& cuCtx  )
   {
     return;
   }
+#if JVET_W0123_TIMD_FUSION
+  if (cu.timd && (cu.ispMode || cu.firstPU->multiRefIdx))
+  {
+    cu.lfnstIdx = 0;
+    return;
+  }
+#endif
 
   RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__LFNST );
 
diff --git a/source/Lib/DecoderLib/CABACReader.h b/source/Lib/DecoderLib/CABACReader.h
index 59607c833b83baa691603e354f34bc03f1ea175e..4a8b362b9e25f8f294bdae22d20cfb62495cd478 100644
--- a/source/Lib/DecoderLib/CABACReader.h
+++ b/source/Lib/DecoderLib/CABACReader.h
@@ -96,6 +96,9 @@ public:
   void        cu_bcw_flag               ( CodingUnit&                   cu );
   void        extend_ref_line           (CodingUnit&                     cu);
   void        intra_luma_pred_modes     ( CodingUnit&                   cu );
+#if JVET_W0123_TIMD_FUSION
+  void        cu_timd_flag              ( CodingUnit&                   cu );
+#endif
   void        intra_chroma_pred_modes   ( CodingUnit&                   cu );
   bool        intra_chroma_lmc_mode     ( PredictionUnit&               pu );
   void        intra_chroma_pred_mode    ( PredictionUnit&               pu );
diff --git a/source/Lib/DecoderLib/DecCu.cpp b/source/Lib/DecoderLib/DecCu.cpp
index 19f5affab59e9e932206a0c7ec8a21a8304935e8..f3c5ec18b4cb47fa5a83083a94c4056233f287b7 100644
--- a/source/Lib/DecoderLib/DecCu.cpp
+++ b/source/Lib/DecoderLib/DecCu.cpp
@@ -161,12 +161,84 @@ void DecCu::decompressCtu( CodingStructure& cs, const UnitArea& ctuArea )
           IntraPrediction::deriveDimdMode(currCU.cs->picture->getRecoBuf(area), area, currCU);
           pu->intraDir[0] = currCU.dimdMode;
         }
+#if JVET_W0123_TIMD_FUSION
+        else if (currCU.timd)
+        {
+          PredictionUnit *pu = currCU.firstPU;
+          const CompArea &area = currCU.Y();
+#if SECONDARY_MPM
+          IntraPrediction::deriveDimdMode(currCU.cs->picture->getRecoBuf(area), area, currCU);
+#endif
+          currCU.timdMode = m_pcIntraPred->deriveTimdMode(currCU.cs->picture->getRecoBuf(area), area, currCU);
+          pu->intraDir[0] = currCU.timdMode;
+        }
+#endif
         else if (currCU.firstPU->parseLumaMode)
         {
           const CompArea &area = currCU.Y();
           IntraPrediction::deriveDimdMode(currCU.cs->picture->getRecoBuf(area), area, currCU);
         }
 
+        //redo prediction dir derivation
+        if (currCU.firstPU->parseLumaMode)
+        {
+#if SECONDARY_MPM
+          uint8_t* mpm_pred = currCU.firstPU->intraMPM;  // mpm_idx / rem_intra_luma_pred_mode
+          uint8_t* non_mpm_pred = currCU.firstPU->intraNonMPM;
+          PU::getIntraMPMs( *currCU.firstPU, mpm_pred, non_mpm_pred );
+#else
+          unsigned int mpm_pred[NUM_MOST_PROBABLE_MODES];  // mpm_idx / rem_intra_luma_pred_mode
+          PU::getIntraMPMs(*currCU.firstPU, mpm_pred);
+#endif
+          if (currCU.firstPU->mpmFlag)
+          {
+            currCU.firstPU->intraDir[0] = mpm_pred[currCU.firstPU->ipred_idx];
+          }
+          else
+          {
+#if SECONDARY_MPM
+            if (currCU.firstPU->secondMpmFlag)
+            {
+              currCU.firstPU->intraDir[0] = mpm_pred[currCU.firstPU->ipred_idx];
+            }
+            else
+            {
+              currCU.firstPU->intraDir[0] = non_mpm_pred[currCU.firstPU->ipred_idx];
+            }
+#else
+            //postponed sorting of MPMs (only in remaining branch)
+            std::sort(mpm_pred, mpm_pred + NUM_MOST_PROBABLE_MODES);
+            unsigned ipred_mode = currCU.firstPU->ipred_idx;
+
+            for (uint32_t i = 0; i < NUM_MOST_PROBABLE_MODES; i++)
+            {
+              ipred_mode += (ipred_mode >= mpm_pred[i]);
+            }
+            currCU.firstPU->intraDir[0] = ipred_mode;
+#endif
+          }
+        }
+        if (currCU.firstPU->parseChromaMode)
+        {
+          unsigned chromaCandModes[NUM_CHROMA_MODE];
+          PU::getIntraChromaCandModes(*currCU.firstPU, chromaCandModes);
+
+          CHECK(currCU.firstPU->candId >= NUM_CHROMA_MODE, "Chroma prediction mode index out of bounds");
+          CHECK(PU::isLMCMode(chromaCandModes[currCU.firstPU->candId]), "The intra dir cannot be LM_CHROMA for this path");
+          CHECK(chromaCandModes[currCU.firstPU->candId] == DM_CHROMA_IDX, "The intra dir cannot be DM_CHROMA for this path");
+
+          currCU.firstPU->intraDir[1] = chromaCandModes[currCU.firstPU->candId];
+        }
+#else
+#if JVET_W0123_TIMD_FUSION
+        if (currCU.timd)
+        {
+          PredictionUnit *pu = currCU.firstPU;
+          const CompArea &area = currCU.Y();
+          currCU.timdMode = m_pcIntraPred->deriveTimdMode(currCU.cs->picture->getRecoBuf(area), area, currCU);
+          pu->intraDir[0] = currCU.timdMode;
+        }
+
         //redo prediction dir derivation
         if (currCU.firstPU->parseLumaMode)
         {
@@ -217,6 +289,7 @@ void DecCu::decompressCtu( CodingStructure& cs, const UnitArea& ctuArea )
 
           currCU.firstPU->intraDir[1] = chromaCandModes[currCU.firstPU->candId];
         }
+#endif
 #endif
         xReconIntraQT( currCU );
         break;
@@ -670,6 +743,12 @@ void DecCu::xReconIntraQT( CodingUnit &cu )
     }
   }
   }
+#if JVET_W0123_TIMD_FUSION
+  if (cu.blocks[CHANNEL_TYPE_LUMA].valid())
+  {
+    PU::spanIpmInfoIntra(*cu.firstPU);
+  }
+#endif
 }
 
 void DecCu::xReconPLT(CodingUnit &cu, ComponentID compBegin, uint32_t numComp)
diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp
index b41bffd482821ebc8174ffe69f8abc181fe6215b..96d62b5f8372b48ce07d3d99461577d8834ff1fd 100644
--- a/source/Lib/DecoderLib/VLCReader.cpp
+++ b/source/Lib/DecoderLib/VLCReader.cpp
@@ -2266,6 +2266,9 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS)
   {
     READ_UVLC(uiCode, "sps_log2_intra_tmp_max_size");                 pcSPS->setIntraTMPMaxSize(1 << uiCode);
   }
+#endif
+#if JVET_W0123_TIMD_FUSION
+  READ_FLAG(uiCode, "sps_timd_enabled_flag");                        pcSPS->setUseTimd( uiCode != 0 );
 #endif
   if( pcSPS->getChromaFormatIdc() != CHROMA_400)
   {
@@ -5006,6 +5009,9 @@ void HLSyntaxReader::parseConstraintInfo(ConstraintInfo *cinfo)
     READ_FLAG(symbol, "gci_no_cclm_constraint_flag");                    cinfo->setNoCclmConstraintFlag(symbol > 0 ? true : false);
 #if ENABLE_DIMD
     READ_FLAG(symbol, "gci_no_dimd_constraint_flag");                    cinfo->setNoDimdConstraintFlag(symbol > 0 ? true : false);
+#endif
+#if JVET_W0123_TIMD_FUSION
+    READ_FLAG(symbol, "gci_no_timd_constraint_flag");                    cinfo->setNoTimdConstraintFlag(symbol > 0 ? true : false);
 #endif
     /* inter */
     READ_FLAG(symbol, "gci_no_ref_pic_resampling_constraint_flag");      cinfo->setNoRprConstraintFlag(symbol > 0 ? true : false);
diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp
index 391f28a2fbac914005f1b5bf19360e80dc8806a9..4904308cf850d1f40e3d256220d054e1791b8dc5 100644
--- a/source/Lib/EncoderLib/CABACWriter.cpp
+++ b/source/Lib/EncoderLib/CABACWriter.cpp
@@ -1140,10 +1140,18 @@ void CABACWriter::extend_ref_line(const PredictionUnit& pu)
   int multiRefIdx = pu.multiRefIdx;
   if (MRL_NUM_REF_LINES > 1)
   {
+#if JVET_W0123_TIMD_FUSION
+    m_BinEncoder.encodeBin(multiRefIdx != MULTI_REF_LINE_IDX[0], cu.timd ? Ctx::MultiRefLineIdx(2) : Ctx::MultiRefLineIdx(0));
+#else
     m_BinEncoder.encodeBin(multiRefIdx != MULTI_REF_LINE_IDX[0], Ctx::MultiRefLineIdx(0));
+#endif
     if (MRL_NUM_REF_LINES > 2 && multiRefIdx != MULTI_REF_LINE_IDX[0])
     {
+#if JVET_W0123_TIMD_FUSION
+      m_BinEncoder.encodeBin(multiRefIdx != MULTI_REF_LINE_IDX[1], cu.timd ? Ctx::MultiRefLineIdx(3) : Ctx::MultiRefLineIdx(1));
+#else
       m_BinEncoder.encodeBin(multiRefIdx != MULTI_REF_LINE_IDX[1], Ctx::MultiRefLineIdx(1));
+#endif
     }
   }
 }
@@ -1177,10 +1185,18 @@ void CABACWriter::extend_ref_line(const CodingUnit& cu)
     int multiRefIdx = pu->multiRefIdx;
     if (MRL_NUM_REF_LINES > 1)
     {
+#if JVET_W0123_TIMD_FUSION
+      m_BinEncoder.encodeBin(multiRefIdx != MULTI_REF_LINE_IDX[0], cu.timd ? Ctx::MultiRefLineIdx(2) : Ctx::MultiRefLineIdx(0));
+#else
       m_BinEncoder.encodeBin(multiRefIdx != MULTI_REF_LINE_IDX[0], Ctx::MultiRefLineIdx(0));
+#endif
       if (MRL_NUM_REF_LINES > 2 && multiRefIdx != MULTI_REF_LINE_IDX[0])
       {
+#if JVET_W0123_TIMD_FUSION
+        m_BinEncoder.encodeBin(multiRefIdx != MULTI_REF_LINE_IDX[1], cu.timd ? Ctx::MultiRefLineIdx(3) : Ctx::MultiRefLineIdx(1));
+#else
         m_BinEncoder.encodeBin(multiRefIdx != MULTI_REF_LINE_IDX[1], Ctx::MultiRefLineIdx(1));
+#endif
       }
 
     }
@@ -1217,6 +1233,9 @@ void CABACWriter::intra_luma_pred_modes( const CodingUnit& cu )
     mip_pred_modes(cu);
     return;
   }
+#if JVET_W0123_TIMD_FUSION
+  cu_timd_flag(cu);
+#endif
   extend_ref_line( cu );
 
   isp_mode( cu );
@@ -1226,6 +1245,12 @@ void CABACWriter::intra_luma_pred_modes( const CodingUnit& cu )
     return;
   }
 #endif
+#if JVET_W0123_TIMD_FUSION
+  if (cu.timd)
+  {
+    return;
+  }
+#endif
 #if SECONDARY_MPM
   const int numMPMs = NUM_PRIMARY_MOST_PROBABLE_MODES;
 #else
@@ -1410,6 +1435,9 @@ void CABACWriter::intra_luma_pred_mode( const PredictionUnit& pu )
     mip_pred_mode(pu);
     return;
   }
+#if JVET_W0123_TIMD_FUSION
+  cu_timd_flag(*pu.cu);
+#endif
   extend_ref_line( pu );
   isp_mode( *pu.cu );
 #if ENABLE_DIMD
@@ -1417,6 +1445,12 @@ void CABACWriter::intra_luma_pred_mode( const PredictionUnit& pu )
   {
     return;
   }
+#endif
+#if JVET_W0123_TIMD_FUSION
+  if (pu.cu->timd)
+  {
+    return;
+  }
 #endif
   // prev_intra_luma_pred_flag
 #if SECONDARY_MPM
@@ -1540,6 +1574,33 @@ void CABACWriter::intra_luma_pred_mode( const PredictionUnit& pu )
   }
 }
 
+#if JVET_W0123_TIMD_FUSION
+void CABACWriter::cu_timd_flag( const CodingUnit& cu )
+{
+  if (!cu.cs->sps->getUseTimd())
+  {
+    return;
+  }
+  if (cu.lwidth() * cu.lheight() > 1024 && cu.slice->getSliceType() == I_SLICE)
+  {
+    return;
+  }
+#if ENABLE_DIMD
+  if (cu.dimd)
+  {
+    return;
+  }
+#endif
+  if (!cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma(cu.chType))
+  {
+    return;
+  }
+
+  unsigned ctxId = DeriveCtx::CtxTimdFlag(cu);
+  m_BinEncoder.encodeBin(cu.timd, Ctx::TimdFlag(ctxId));
+}
+#endif
+
 #if ENABLE_DIMD
 void CABACWriter::cu_dimd_flag(const CodingUnit& cu)
 {
@@ -3578,11 +3639,19 @@ void CABACWriter::isp_mode( const CodingUnit& cu )
   }
   if ( cu.ispMode == NOT_INTRA_SUBPARTITIONS )
   {
+#if JVET_W0123_TIMD_FUSION
+    m_BinEncoder.encodeBin( 0, cu.timd ? Ctx::ISPMode( 2 ) : Ctx::ISPMode( 0 ) );
+#else
     m_BinEncoder.encodeBin( 0, Ctx::ISPMode( 0 ) );
+#endif
   }
   else
   {
+#if JVET_W0123_TIMD_FUSION
+    m_BinEncoder.encodeBin( 1, cu.timd ? Ctx::ISPMode( 2 ) : Ctx::ISPMode( 0 ) );
+#else
     m_BinEncoder.encodeBin( 1, Ctx::ISPMode( 0 ) );
+#endif
     m_BinEncoder.encodeBin( cu.ispMode - 1, Ctx::ISPMode( 1 ) );
   }
   DTRACE( g_trace_ctx, D_SYNTAX, "intra_subPartitions() etype=%d pos=(%d,%d) ispIdx=%d\n", cu.chType, cu.blocks[cu.chType].x, cu.blocks[cu.chType].y, (int)cu.ispMode );
@@ -3611,6 +3680,12 @@ void CABACWriter::residual_lfnst_mode( const CodingUnit& cu, CUCtx& cuCtx )
   {
     return;
   }
+#if JVET_W0123_TIMD_FUSION
+  if (cu.timd && (cu.ispMode || cu.firstPU->multiRefIdx))
+  {
+    return;
+  }
+#endif
 
   if( cu.cs->sps->getUseLFNST() && CU::isIntra( cu ) )
   {
diff --git a/source/Lib/EncoderLib/CABACWriter.h b/source/Lib/EncoderLib/CABACWriter.h
index afbf6ed7c1a280ea67cfcaadaabca11236e8cdb6..bae249e5075fba6e03134e6374d1d8a0345d2c1b 100644
--- a/source/Lib/EncoderLib/CABACWriter.h
+++ b/source/Lib/EncoderLib/CABACWriter.h
@@ -107,6 +107,9 @@ public:
   void        intra_luma_pred_mode      ( const PredictionUnit&         pu );
 #if ENABLE_DIMD
   void        cu_dimd_flag              ( const CodingUnit&             cu );
+#endif
+#if JVET_W0123_TIMD_FUSION
+  void        cu_timd_flag              ( const CodingUnit&             cu );
 #endif
   void        intra_chroma_pred_modes   ( const CodingUnit&             cu );
   void        intra_chroma_lmc_mode     ( const PredictionUnit&         pu );
diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h
index a0740d6c0da4a2a4250e539130baa9fa9b919772..14ecf41a012ef787653f69c1d7c6ab738dd6b4ae 100644
--- a/source/Lib/EncoderLib/EncCfg.h
+++ b/source/Lib/EncoderLib/EncCfg.h
@@ -232,6 +232,9 @@ protected:
 #if ENABLE_DIMD
   bool      m_noDimdConstraintFlag;
 #endif
+#if JVET_W0123_TIMD_FUSION
+  bool      m_noTimdConstraintFlag;
+#endif
 #if ENABLE_OBMC
   bool      m_noObmcConstraintFlag;
 #endif
@@ -384,6 +387,9 @@ protected:
 #if ENABLE_DIMD
   bool      m_dimd;
 #endif
+#if JVET_W0123_TIMD_FUSION
+  bool      m_timd;
+#endif
 #if ENABLE_OBMC
   bool      m_OBMC;
 #endif
@@ -976,6 +982,10 @@ public:
   bool      getNoDimdConstraintFlag() const { return m_noDimdConstraintFlag; }
   void      setNoDimdConstraintFlag(bool val) { m_noDimdConstraintFlag = val; }
 #endif
+#if JVET_W0123_TIMD_FUSION
+  bool      getNoTimdConstraintFlag() const { return m_noTimdConstraintFlag; }
+  void      setNoTimdConstraintFlag(bool val) { m_noTimdConstraintFlag = val; }
+#endif
 #if ENABLE_OBMC
   bool      getNoObmcConstraintFlag() const { return m_noObmcConstraintFlag; }
   void      setNoObmcConstraintFlag(bool bVal) { m_noObmcConstraintFlag = bVal; }
@@ -1238,6 +1248,10 @@ public:
   void      setUseDimd                   ( bool b )       { m_dimd = b; }
   bool      getUseDimd                   ()         const { return m_dimd; }
 #endif
+#if JVET_W0123_TIMD_FUSION
+  void      setUseTimd                   ( bool b )       { m_timd = b; }
+  bool      getUseTimd                   () const         { return m_timd; }
+#endif
 #if ENABLE_OBMC
   void      setUseObmc                   ( bool b )       { m_OBMC = b; }
   bool      getUseObmc                   ()         const { return m_OBMC; }
diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp
index 59dbb6c05e0ec253279e2b27f0db3d0cb8cbef52..2142260474e0634d82ada0799cc17cc8bad0f82a 100644
--- a/source/Lib/EncoderLib/EncCu.cpp
+++ b/source/Lib/EncoderLib/EncCu.cpp
@@ -1816,6 +1816,12 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS
   double maxCostAllowedForChroma = MAX_DOUBLE;
   const  CodingUnit *bestCU      = bestCS->getCU( partitioner.chType );
   Distortion interHad = m_modeCtrl->getInterHad();
+#if JVET_W0123_TIMD_FUSION
+  int timdMode = 0;
+  int timdModeSecondary = 0;
+  bool timdIsBlended = false;
+  int  timdFusionWeight[2] = { 0 };
+#endif
 
 
   double dct2Cost                =   MAX_DOUBLE;
@@ -1923,6 +1929,9 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS
   }
 #endif
 
+#if JVET_W0123_TIMD_FUSION
+  bool timdDerived = false;
+#endif
   for( int trGrpIdx = 0; trGrpIdx < grpNumMax; trGrpIdx++ )
   {
     const uint8_t startMtsFlag = trGrpIdx > 0;
@@ -1979,6 +1988,35 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS
           cu.colorTransform = adaptiveColorTrans;
 
           CU::addPUs( cu );
+#if JVET_W0123_TIMD_FUSION
+          cu.timd = false;
+          if (isLuma(partitioner.chType) && cu.slice->getSPS()->getUseTimd())
+          {
+            if (cu.lwidth() * cu.lheight() > 1024 && cu.slice->getSliceType() == I_SLICE)
+            {
+              timdDerived = true;
+            }
+            if (!timdDerived)
+            {
+              const CompArea &area = cu.Y();
+              cu.timdMode = m_pcIntraSearch->deriveTimdMode(bestCS->picture->getRecoBuf(area), area, cu);
+              timdMode = cu.timdMode;
+              timdDerived = true;
+              timdModeSecondary = cu.timdModeSecondary;
+              timdIsBlended     = cu.timdIsBlended;
+              timdFusionWeight[0] = cu.timdFusionWeight[0];
+              timdFusionWeight[1] = cu.timdFusionWeight[1];
+            }
+            else
+            {
+              cu.timdMode = timdMode;
+              cu.timdModeSecondary = timdModeSecondary;
+              cu.timdIsBlended     = timdIsBlended;
+              cu.timdFusionWeight[0] = timdFusionWeight[0];
+              cu.timdFusionWeight[1] = timdFusionWeight[1];
+            }
+          }
+#endif
 
           tempCS->interHad    = interHad;
 
@@ -2003,7 +2041,14 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS
             {
               continue;
             }
+#if JVET_W0123_TIMD_FUSION
+            PU::spanIpmInfoIntra(*cu.firstPU);
+#endif
+#if JVET_W0123_TIMD_FUSION
+            if (m_pcEncCfg->getUseFastISP() && validCandRet && !mtsFlag && !lfnstIdx && !cu.colorTransform && !cu.timd)
+#else
             if (m_pcEncCfg->getUseFastISP() && validCandRet && !mtsFlag && !lfnstIdx && !cu.colorTransform)
+#endif
             {
               m_modeCtrl->setISPMode(cu.ispMode);
               m_modeCtrl->setISPLfnstIdx(cu.lfnstIdx);
diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp
index b76d81cf34849a886353165abf4603baf1914d66..16965b1ac1592b814f890798740006b84e007d93 100644
--- a/source/Lib/EncoderLib/EncLib.cpp
+++ b/source/Lib/EncoderLib/EncLib.cpp
@@ -1288,6 +1288,9 @@ void EncLib::xInitSPS( SPS& sps )
 #if ENABLE_DIMD
   cinfo->setNoDimdConstraintFlag(m_noDimdConstraintFlag);
 #endif
+#if JVET_W0123_TIMD_FUSION
+  cinfo->setNoTimdConstraintFlag(m_noTimdConstraintFlag);
+#endif
 #if ENABLE_OBMC
   cinfo->setNoObmcConstraintFlag(m_noObmcConstraintFlag);
 #endif
@@ -1408,6 +1411,9 @@ void EncLib::xInitSPS( SPS& sps )
 #if ENABLE_DIMD
   sps.setUseDimd            ( m_dimd );
 #endif
+#if JVET_W0123_TIMD_FUSION
+  sps.setUseTimd            ( m_timd );
+#endif
 #if ENABLE_OBMC
   sps.setUseOBMC            ( m_OBMC );
 #endif
diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp
index 46474299c2704bb447f3af87fb6398c124173acd..aa634aa9dcd750952c764c140dd7b54ea5af8cd0 100644
--- a/source/Lib/EncoderLib/IntraSearch.cpp
+++ b/source/Lib/EncoderLib/IntraSearch.cpp
@@ -396,6 +396,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
   const TempCtx ctxStartMipFlag    ( m_CtxCache, SubCtx( Ctx::MipFlag,          m_CABACEstimator->getCtx() ) );
 #if JVET_V0130_INTRA_TMP
   const TempCtx ctxStartTpmFlag(m_CtxCache, SubCtx(Ctx::TmpFlag, m_CABACEstimator->getCtx()));
+#endif
+#if JVET_W0123_TIMD_FUSION
+  const TempCtx ctxStartTimdFlag   ( m_CtxCache, SubCtx( Ctx::TimdFlag,      m_CABACEstimator->getCtx() ) );
 #endif
   const TempCtx ctxStartIspMode    ( m_CtxCache, SubCtx( Ctx::ISPMode,          m_CABACEstimator->getCtx() ) );
 #if SECONDARY_MPM
@@ -509,6 +512,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
 #endif
 #if ENABLE_DIMD
     bool bestDimdMode = false;
+#endif
+#if JVET_W0123_TIMD_FUSION
+    bool bestTimdMode = false;
 #endif
     if (isSecondColorSpace)
     {
@@ -621,6 +627,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
               m_CABACEstimator->getCtx() = SubCtx( Ctx::TmpFlag, ctxStartTpmFlag );
 #endif
               m_CABACEstimator->getCtx() = SubCtx( Ctx::MipFlag, ctxStartMipFlag );
+#if JVET_W0123_TIMD_FUSION
+              m_CABACEstimator->getCtx() = SubCtx( Ctx::TimdFlag, ctxStartTimdFlag );
+#endif
               m_CABACEstimator->getCtx() = SubCtx( Ctx::ISPMode, ctxStartIspMode );
 #if SECONDARY_MPM
               m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMPMIdx, ctxStartMPMIdxFlag);
@@ -698,6 +707,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
                     m_CABACEstimator->getCtx() = SubCtx( Ctx::TmpFlag, ctxStartTpmFlag );
 #endif
                     m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag);
+#if JVET_W0123_TIMD_FUSION
+                    m_CABACEstimator->getCtx() = SubCtx( Ctx::TimdFlag, ctxStartTimdFlag );
+#endif
                     m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode);
 #if SECONDARY_MPM
                     m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMPMIdx, ctxStartMPMIdxFlag);
@@ -766,6 +778,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
                   m_CABACEstimator->getCtx() = SubCtx( Ctx::TmpFlag, ctxStartTpmFlag );
 #endif
                   m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag);
+#if JVET_W0123_TIMD_FUSION
+                  m_CABACEstimator->getCtx() = SubCtx( Ctx::TimdFlag, ctxStartTimdFlag );
+#endif
                   m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode);
 #if SECONDARY_MPM
                   m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMPMIdx, ctxStartMPMIdxFlag);
@@ -1096,6 +1111,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
           m_CABACEstimator->getCtx() = SubCtx( Ctx::TmpFlag, ctxStartTpmFlag );
 #endif
           m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag);
+#if JVET_W0123_TIMD_FUSION
+          m_CABACEstimator->getCtx() = SubCtx( Ctx::TimdFlag, ctxStartTimdFlag );
+#endif
           m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode);
 #if SECONDARY_MPM
           m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMPMIdx, ctxStartMPMIdxFlag);
@@ -1113,6 +1131,30 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
     }
 
     int numNonISPModes = (int)uiRdModeList.size();
+#if JVET_W0123_TIMD_FUSION
+    bool isTimdValid = cu.slice->getSPS()->getUseTimd();
+    if (cu.lwidth() * cu.lheight() > 1024 && cu.slice->getSliceType() == I_SLICE)
+    {
+      isTimdValid = false;
+    }
+    if (isTimdValid)
+    {
+      cu.timd = false;
+      uiRdModeList.push_back( ModeInfo( false, false, 0, NOT_INTRA_SUBPARTITIONS, TIMD_IDX ) );
+      numNonISPModes++;
+      if (lfnstIdx == 0 && !cu.mtsFlag)
+      {
+        bool isFirstLineOfCtu     = (((pu.block(COMPONENT_Y).y) & ((pu.cs->sps)->getMaxCUWidth() - 1)) == 0);
+        int  numOfPassesExtendRef = ((!sps.getUseMRL() || isFirstLineOfCtu) ? 1 : MRL_NUM_REF_LINES);
+        for (int mRefNum = 1; mRefNum < numOfPassesExtendRef; mRefNum++)
+        {
+          int multiRefIdx = MULTI_REF_LINE_IDX[mRefNum];
+          uiRdModeList.push_back( ModeInfo( false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, TIMD_IDX ) );
+          numNonISPModes++;
+        }
+      }
+    }
+#endif
 
     if ( testISP )
     {
@@ -1124,6 +1166,13 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
         uiRdModeList.push_back( ModeInfo( false, false, 0, INTRA_SUBPARTITIONS_RESERVED, 0 ) );
       }
     }
+#if JVET_W0123_TIMD_FUSION
+    if (isTimdValid && sps.getUseISP() && CU::canUseISP(width, height, cu.cs->sps->getMaxTbSize()) && lfnstIdx == 0)
+    {
+      uiRdModeList.push_back( ModeInfo( false, false, 0, HOR_INTRA_SUBPARTITIONS, TIMD_IDX ) );
+      uiRdModeList.push_back( ModeInfo( false, false, 0, VER_INTRA_SUBPARTITIONS, TIMD_IDX ) );
+    }
+#endif
 
     //===== check modes (using r-d costs) =====
     ModeInfo       uiBestPUMode;
@@ -1176,9 +1225,29 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
         {
           if (m_pcEncCfg->getUseFastISP())
           {
+#if JVET_W0123_TIMD_FUSION
+            if (bestTimdMode)
+            {
+              m_modeCtrl->setBestPredModeDCT2(MAP131TO67(uiBestPUMode.modeId));
+            }
+            else
+            {
+              m_modeCtrl->setBestPredModeDCT2(uiBestPUMode.modeId);
+            }
+#else
             m_modeCtrl->setBestPredModeDCT2(uiBestPUMode.modeId);
+#endif
           }
+#if JVET_W0123_TIMD_FUSION
+          ModeInfo tempBestPUMode = uiBestPUMode;
+          if (bestTimdMode)
+          {
+            tempBestPUMode.modeId = MAP131TO67(tempBestPUMode.modeId);
+          }
+          if (!xSortISPCandList(bestCurrentCost, csBest->cost, tempBestPUMode))
+#else
           if (!xSortISPCandList(bestCurrentCost, csBest->cost, uiBestPUMode))
+#endif
           {
             break;
           }
@@ -1207,9 +1276,43 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
       cu.ispMode                     = uiOrgMode.ispMod;
       pu.multiRefIdx                 = uiOrgMode.mRefId;
       pu.intraDir[CHANNEL_TYPE_LUMA] = uiOrgMode.modeId;
+#if JVET_W0123_TIMD_FUSION
+      cu.timd = false;
+      int modeDiff = uiOrgMode.modeId - MAP131TO67(cu.dimdMode);
+      if (isTimdValid && lfnstIdx == 0 && uiOrgMode.ispMod > 0 && modeDiff == 0)
+      {
+        continue;
+      }
+      if (isTimdValid && uiOrgMode.mRefId > 0 && lfnstIdx == 0 && cu.mtsFlag == 0 && modeDiff == 0)
+      {
+        continue;
+      }
+      if (mode >= 0 && uiOrgMode.modeId == TIMD_IDX)
+      {
+        if (cu.ispMode)
+        {
+          cu.lfnstIdx = lfnstIdx;
+          if (cu.ispMode == VER_INTRA_SUBPARTITIONS && uiBestPUMode.ispMod == 0 && !bestTimdMode)
+          {
+            continue;
+          }
+        }
+        uiOrgMode.modeId = cu.timdMode;
+        pu.intraDir[CHANNEL_TYPE_LUMA] = uiOrgMode.modeId;
+        cu.timd = true;
+      }
+#endif
 
       CHECK(cu.mipFlag && pu.multiRefIdx, "Error: combination of MIP and MRL not supported");
-      CHECK(pu.multiRefIdx && (pu.intraDir[0] == PLANAR_IDX), "Error: combination of MRL and Planar mode not supported");
+#if JVET_W0123_TIMD_FUSION
+      if (!cu.timd)
+      {
+#endif
+        CHECK(pu.multiRefIdx && (pu.intraDir[0] == PLANAR_IDX),
+              "Error: combination of MRL and Planar mode not supported");
+#if JVET_W0123_TIMD_FUSION
+      }
+#endif
       CHECK(cu.ispMode && cu.mipFlag, "Error: combination of ISP and MIP not supported");
       CHECK(cu.ispMode && pu.multiRefIdx, "Error: combination of ISP and MRL not supported");
       CHECK(cu.ispMode&& cu.colorTransform, "Error: combination of ISP and ACT not supported");
@@ -1244,7 +1347,14 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
           continue;
         }
         // we save the data for future tests
+#if JVET_W0123_TIMD_FUSION
+        if (!cu.timd)
+        {
+#endif
         m_ispTestedModes[m_curIspLfnstIdx].setModeResults((ISPType)cu.ispMode, (int)uiOrgMode.modeId, (int)csTemp->tus.size(), csTemp->cus[0]->firstTU->cbf[COMPONENT_Y] ? csTemp->cost : MAX_DOUBLE, csBest->cost);
+#if JVET_W0123_TIMD_FUSION
+        }
+#endif
         csTemp->cost = !tmpValidReturn ? MAX_DOUBLE : csTemp->cost;
       }
       else
@@ -1261,9 +1371,17 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
         }
       }
 #if JVET_V0130_INTRA_TMP
+#if JVET_W0123_TIMD_FUSION
+      if (!cu.ispMode && !cu.mtsFlag && !cu.lfnstIdx && !cu.bdpcmMode && !pu.multiRefIdx && !cu.mipFlag && !cu.tmpFlag && testISP && !cu.timd)
+#else
       if( !cu.ispMode && !cu.mtsFlag && !cu.lfnstIdx && !cu.bdpcmMode && !pu.multiRefIdx && !cu.mipFlag && !cu.tmpFlag && testISP )
+#endif
+#else
+#if JVET_W0123_TIMD_FUSION
+      if (!cu.ispMode && !cu.mtsFlag && !cu.lfnstIdx && !cu.bdpcmMode && !pu.multiRefIdx && !cu.mipFlag && testISP && !cu.timd)
 #else
       if (!cu.ispMode && !cu.mtsFlag && !cu.lfnstIdx && !cu.bdpcmMode && !pu.multiRefIdx && !cu.mipFlag && testISP)
+#endif
 #endif
       {
 #if JVET_V0130_INTRA_TMP
@@ -1281,7 +1399,11 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
       }
       validReturn |= tmpValidReturn;
 
+#if JVET_W0123_TIMD_FUSION
+      if( sps.getUseLFNST() && mtsUsageFlag == 1 && !cu.ispMode && mode >= 0 && !cu.timd )
+#else
       if( sps.getUseLFNST() && mtsUsageFlag == 1 && !cu.ispMode && mode >= 0 )
+#endif
       {
         m_modeCostStore[lfnstIdx][mode] = tmpValidReturn ? csTemp->cost : (MAX_DOUBLE / 2.0); //(MAX_DOUBLE / 2.0) ??
       }
@@ -1313,6 +1435,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
           bestBDPCMMode = cu.bdpcmMode;
 #if ENABLE_DIMD
           bestDimdMode = cu.dimd;
+#endif
+#if JVET_W0123_TIMD_FUSION
+          bestTimdMode = cu.timd;
 #endif
           if( sps.getUseLFNST() && mtsUsageFlag == 1 && !cu.ispMode )
           {
@@ -1405,6 +1530,13 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
       }
 #endif
       cu.bdpcmMode = bestBDPCMMode;
+#if JVET_W0123_TIMD_FUSION
+      cu.timd = bestTimdMode;
+      if (cu.timd)
+      {
+        pu.intraDir[ CHANNEL_TYPE_LUMA ] = cu.timdMode;
+      }
+#endif
       if (cu.colorTransform)
       {
         CHECK(pu.intraDir[CHANNEL_TYPE_CHROMA] != DM_CHROMA_IDX, "chroma should use DM mode for adaptive color transform");
@@ -6217,6 +6349,9 @@ void IntraSearch::xGetNextISPMode(ModeInfo& modeInfo, const ModeInfo* lastMode,
     if (
 #if ENABLE_DIMD && !JVET_V0087_DIMD_NO_ISP
       candidate.modeId != DIMD_IDX &&
+#endif
+#if JVET_W0123_TIMD_FUSION
+      candidate.modeId != TIMD_IDX &&
 #endif
       maxNumSubPartitions > 2 && (curIspLfnstIdx > 0 || (candidate.modeId >= DC_IDX && ispTestedModes.numTestedModes[nextISPcandSplitType - 1] >= 2)))
     {
@@ -6433,6 +6568,9 @@ bool IntraSearch::xSortISPCandList(double bestCostSoFar, double bestNonISPCost,
     if (
 #if ENABLE_DIMD && !JVET_V0087_DIMD_NO_ISP
       origHadList.at(k).modeId == DIMD_IDX ||
+#endif
+#if JVET_W0123_TIMD_FUSION
+      origHadList.at(k).modeId == TIMD_IDX ||
 #endif
 	!modeIsInList[origHadList.at(k).modeId])
     {
@@ -6484,6 +6622,12 @@ void IntraSearch::xSortISPCandListLFNST()
         {
           continue;
         }
+#endif
+#if JVET_W0123_TIMD_FUSION
+        if( candList[i].modeId == TIMD_IDX )
+        {
+          continue;
+        }
 #endif
         const int candSubParts = ispTestedModesRef.getNumCompletedSubParts(ispMode, candList[i].modeId);
         const double candCost = ispTestedModesRef.getRDCost(ispMode, candList[i].modeId);
diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp
index cec7da12760889e3721451dbd17fe3b50b56380e..19aeb75d35f7e95bb82c2507245036e6afbf7d34 100644
--- a/source/Lib/EncoderLib/VLCWriter.cpp
+++ b/source/Lib/EncoderLib/VLCWriter.cpp
@@ -1392,6 +1392,9 @@ void HLSWriter::codeSPS( const SPS* pcSPS )
   }
 #endif
 
+#if JVET_W0123_TIMD_FUSION
+  WRITE_FLAG( pcSPS->getUseTimd() ? 1 : 0,                                          "sps_timd_enabled_flag");
+#endif
   if( pcSPS->getChromaFormatIdc() != CHROMA_400)
   {
     WRITE_FLAG( pcSPS->getUseLMChroma() ? 1 : 0,                                      "sps_cclm_enabled_flag");
@@ -2979,6 +2982,9 @@ void  HLSWriter::codeConstraintInfo  ( const ConstraintInfo* cinfo )
 #if ENABLE_DIMD
     WRITE_FLAG(cinfo->getNoDimdConstraintFlag() ? 1 : 0, "gci_no_dimd_constraint_flag");
 #endif
+#if JVET_W0123_TIMD_FUSION
+    WRITE_FLAG(cinfo->getNoTimdConstraintFlag() ? 1 : 0, "gci_no_timd_constraint_flag" );
+#endif
 
     /* inter */
     WRITE_FLAG(cinfo->getNoRprConstraintFlag() ? 1 : 0, "gci_no_ref_pic_resampling_constraint_flag");