diff --git a/source/Lib/CommonLib/Contexts.cpp b/source/Lib/CommonLib/Contexts.cpp
index 1bd41905edc89da556daaa387d9d01f3b84b1857..edf20f570ce22ede7d9d7afe8e6678a3f709711e 100644
--- a/source/Lib/CommonLib/Contexts.cpp
+++ b/source/Lib/CommonLib/Contexts.cpp
@@ -1571,7 +1571,17 @@ const CtxSet ContextSetCfg::TransformSkipFlag = ContextSetCfg::addCtxSet
 	{ 1,  5 },
 	{ 2,  5 }
 });
-
+#if JVET_W0103_INTRA_MTS
+const CtxSet ContextSetCfg::MTSIdx = ContextSetCfg::addCtxSet
+({
+  { 45, 35, 20, 45, },
+  { 38, 35, 35, 38, },
+  { 37, 28, 28, 37, },
+  { 8,  10, 10, 8,  },
+  { 8,  10, 10, 8,  },
+  { 9,  10, 10, 8,  }
+  });
+#else
 const CtxSet ContextSetCfg::MTSIdx = ContextSetCfg::addCtxSet
 ({
 	{ 37, 25, 34, 40 },
@@ -1581,7 +1591,7 @@ const CtxSet ContextSetCfg::MTSIdx = ContextSetCfg::addCtxSet
 	{ 9,  0, 10,  8 },
 	{ 9,  1,  9,  0 }
 });
-
+#endif
 const CtxSet ContextSetCfg::ISPMode = ContextSetCfg::addCtxSet
 ({
 #if JVET_W0123_TIMD_FUSION
@@ -2579,7 +2589,15 @@ const CtxSet ContextSetCfg::TransformSkipFlag = ContextSetCfg::addCtxSet
   {  25,   9, },
   {   1,   1, },
 });
-
+#if JVET_W0103_INTRA_MTS
+const CtxSet ContextSetCfg::MTSIdx = ContextSetCfg::addCtxSet
+({
+  { 45, 35, 20, 45, },
+  { 38, 35, 35, 38, },
+  { 37, 28, 28, 37, },
+  { 8,   9,  9, 8,  },
+  });
+#else
 const CtxSet ContextSetCfg::MTSIdx = ContextSetCfg::addCtxSet
 ({
   {  45,  25,  27,   0, },
@@ -2587,7 +2605,7 @@ const CtxSet ContextSetCfg::MTSIdx = ContextSetCfg::addCtxSet
   {  29,   0,  28,   0, },
   {   8,   0,   9,   0, },
 });
-
+#endif
 const CtxSet ContextSetCfg::ISPMode = ContextSetCfg::addCtxSet
 ({
 #if JVET_W0123_TIMD_FUSION
diff --git a/source/Lib/CommonLib/Rom.cpp b/source/Lib/CommonLib/Rom.cpp
index fb4317e06b126a8385b25016ce3e79ed56104f23..1bf0b8ef990831d35f8f10115c7b540ef8313c31 100644
--- a/source/Lib/CommonLib/Rom.cpp
+++ b/source/Lib/CommonLib/Rom.cpp
@@ -3843,7 +3843,213 @@ TMatrixCoeff g_trCoreDCT2P256[256][256];
 TMatrixCoeff g_trCoreDCT8P256[256][256];
 TMatrixCoeff g_trCoreDST7P256[256][256];
 #endif
+#if JVET_W0103_INTRA_MTS
+TMatrixCoeff g_aiTr2[NUM_TRANS_TYPE][2][2];
+TMatrixCoeff g_aiTr4[NUM_TRANS_TYPE][4][4];
+TMatrixCoeff g_aiTr8[NUM_TRANS_TYPE][8][8];
+TMatrixCoeff g_aiTr16[NUM_TRANS_TYPE][16][16];
+TMatrixCoeff g_aiTr32[NUM_TRANS_TYPE][32][32];
+TMatrixCoeff g_aiTr64[NUM_TRANS_TYPE][64][64];
+TMatrixCoeff g_aiTr128[NUM_TRANS_TYPE][128][128];
+TMatrixCoeff g_aiTr256[NUM_TRANS_TYPE][256][256];
 
+const uint8_t g_aucIpmToTrSet[16][36] =
+{
+  //0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 MIP
+  { 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4 }, //4x4
+  { 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9 }, //4x8
+  {10,10,11,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,14 }, //4x16
+  {15,15,16,16,16,16,16,16,16,16,16,16,16,17,17,17,17,17,17,17,17,17,17,17,18,18,18,18,18,18,18,18,18,18,18,19 }, //4x32
+  {20,20,21,21,21,21,21,21,21,21,21,21,21,22,22,22,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23,23,23,23,24 }, //8x4
+  {25,25,26,26,26,26,26,26,26,26,26,26,26,27,27,27,27,27,27,27,27,27,27,27,28,28,28,28,28,28,28,28,28,28,28,29 }, //8x8
+  {30,30,31,31,31,31,31,31,31,31,31,31,31,32,32,32,32,32,32,32,32,32,32,32,33,33,33,33,33,33,33,33,33,33,33,34 }, //8x16
+  {35,35,36,36,36,36,36,36,36,36,36,36,36,37,37,37,37,37,37,37,37,37,37,37,38,38,38,38,38,38,38,38,38,38,38,39 }, //8x32
+  {40,40,41,41,41,41,41,41,41,41,41,41,41,42,42,42,42,42,42,42,42,42,42,42,43,43,43,43,43,43,43,43,43,43,43,44 }, //16x4
+  {45,45,46,46,46,46,46,46,46,46,46,46,46,47,47,47,47,47,47,47,47,47,47,47,48,48,48,48,48,48,48,48,48,48,48,49 }, //16x8
+  {50,50,51,51,51,51,51,51,51,51,51,51,51,52,52,52,52,52,52,52,52,52,52,52,53,53,53,53,53,53,53,53,53,53,53,54 }, //16x16
+  {55,55,56,56,56,56,56,56,56,56,56,56,56,57,57,57,57,57,57,57,57,57,57,57,58,58,58,58,58,58,58,58,58,58,58,59 }, //16x32
+  {60,60,61,61,61,61,61,61,61,61,61,61,61,62,62,62,62,62,62,62,62,62,62,62,63,63,63,63,63,63,63,63,63,63,63,64 }, //32x4
+  {65,65,66,66,66,66,66,66,66,66,66,66,66,67,67,67,67,67,67,67,67,67,67,67,68,68,68,68,68,68,68,68,68,68,68,69 }, //32x8
+  {70,70,71,71,71,71,71,71,71,71,71,71,71,72,72,72,72,72,72,72,72,72,72,72,73,73,73,73,73,73,73,73,73,73,73,74 }, //32x16
+  {75,75,76,76,76,76,76,76,76,76,76,76,76,77,77,77,77,77,77,77,77,77,77,77,78,78,78,78,78,78,78,78,78,78,78,79 }, //32x32
+};
+const int8_t g_aiIdLut[3][3] =
+{
+  { 8, 6, 4 },{ 8, 8, 6 },{ 4, 2, -1 }
+};
+const uint8_t g_aucTrIdxToTr[25][2] =
+{
+    { DCT8, DCT8 },{ DCT8, DST7 },{ DCT8, DCT5 },{ DCT8, DST4 }, {DCT8, DST1},
+    { DST7, DCT8 },{ DST7, DST7 },{ DST7, DCT5 },{ DST7, DST4 }, {DST7, DST1},
+    { DCT5, DCT8 },{ DCT5, DST7 },{ DCT5, DCT5 },{ DCT5, DST4 }, {DCT5, DST1},
+    { DST4, DCT8 },{ DST4, DST7 },{ DST4, DCT5 },{ DST4, DST4 }, {DST4, DST1},
+    { DST1, DCT8 },{ DST1, DST7 },{ DST1, DCT5 },{ DST1, DST4 }, {DST1, DST1},
+};
+
+const uint8_t g_aucTrSet[80][4] =
+{
+//T0:0,  1,  2,  3,
+{ 17, 18, 23, 24},
+//T1:0,  1,  2,  3,
+{  3,  7, 18, 22},
+//T2:0,  1,  2,  3, 
+{  2, 17, 18, 22},
+//T3:0,  1,  2,  3,  
+{  3, 15, 17, 18},
+//T4:0,  1,  2,  3, 
+{  3, 12, 18, 19},
+//T5:0,  1,  2,  3, 
+{ 12, 18, 19, 23},
+//T6:0,  1,  2,  3,
+{  2, 12, 17, 18},
+//T7:0,  1,  2,  3, 
+{  2, 17, 18, 22},
+//T8:0,  1,  2,  3,  
+{  2, 11, 17, 18},
+//T9:0,  1,  2,  3, 
+{ 12, 18, 19, 23},
+//T10:0,  1, 2,   3, 
+{ 12, 13, 16, 24},
+//T11:0,  1,  2,  3, 
+{  2, 11, 16, 23},
+//T12:0,  1,  2,  3, 
+{  2, 13, 17, 22},
+//T13:0,  1,  2,  3,
+{  2, 11, 17, 21},
+//T14:0,  1,  2,  3, 
+{ 13, 16, 19, 22},
+//T15:0,  1,  2,  3,
+{  7, 12, 13, 18},
+//T16:0,  1,  2,  3, 
+{  1, 11, 12, 16},
+//T17:0,  1,  2,  3,
+{  3, 13, 17, 22},
+//T18:0,  1,  2,  3,  
+{  1,  6, 12, 22},
+//T19:0,  1,  2,  3,  
+{ 12, 13, 15, 16},
+//T20:0,  1,  2,  3,  
+{ 18, 19, 23, 24},
+//T21:0,  1,  2,  3, 
+{  2, 17, 18, 24},
+//T22:0,  1,  2,  3,
+{  3,  4, 17, 22},
+//T23:0,  1,  2,  3, 
+{ 12, 18, 19, 23},
+//T24:0,  1,  2,  3,
+{ 12, 18, 19, 23},
+//T25:0,  1,  2,  3,  
+{  6, 12, 18, 24},
+//T26:0,  1,  2,  3, 
+{  2,  6, 12, 21},
+//T27:0,  1,  2,  3, 
+{  1, 11, 17, 22},
+//T28:0,  1,  2,  3, 
+{  3, 11, 16, 17},
+//T29:0,  1,  2,  3, 
+{  8, 12, 19, 23},
+//T30:0,  1,  2,  3, 
+{  7, 13, 16, 23},
+//T31:0,  1,  2,  3, 
+{  1,  6, 11, 12},
+//T32:0,  1,  2,  3,
+{  1, 11, 17, 21},
+//T33:0,  1,  2,  3,
+{  6, 11, 17, 21},
+//T34:0,  1,  2,  3,
+{  8, 11, 14, 17},
+//T35:0,  1,  2,  3, 
+{  6, 11, 12, 21},
+//T36:0,  1,  2,  3, 
+{  1,  6, 11, 12},
+//T37:0,  1,  2,  3, 
+{  2,  6, 11, 12},
+//T38:0,  1,  2,  3,
+{  1,  6, 11, 21},
+//T39:0,  1,  2,  3,
+{  7, 11, 12, 16},
+//T40:0,  1,  2,  3,
+{  8, 12, 19, 24},
+//T41:0,  1,  2,  3,
+{  1, 13, 18, 22},
+//T42:0,  1,  2,  3,
+{  2,  6, 17, 21},
+//T43:0,  1,  2,  3,
+{ 11, 12, 16, 19},
+//T44:0,  1,  2,  3,
+{  8, 12, 17, 24},
+//T45:0,  1,  2,  3,
+{  6, 12, 19, 21},
+//T46:0,  1,  2,  3,
+{  6, 12, 13, 21},
+//T47:0,  1,  2,  3,
+{  2, 16, 17, 21},
+//T48:0,  1,  2,  3,
+{  6, 17, 19, 23},
+//T49:0,  1,  2,  3,
+{  6, 12, 14, 17},
+//T50:0,  1,  2,  3,
+{  6,  7, 11, 21},
+//T51:0,  1,  2,  3,  
+{  1, 11, 12, 16},
+//T52:0,  1,  2,  3,
+{  1,  6, 11, 12},
+//T53:0,  1,  2,  3,
+{  6, 11, 12, 21},
+//T54:0,  1,  2,  3,
+{  7,  8,  9, 11},
+//T55:0,  1,  2,  3,
+{  6,  7, 11, 12},
+//T56:0,  1,  2,  3,
+{  6,  7, 11, 12},
+//T57:0,  1,  2,  3,
+{  1, 11, 12, 16},
+//T58:0,  1,  2,  3,
+{  6, 11, 17, 21},
+//T59:0,  1,  2,  3,
+{  6,  7, 11, 12},
+//T60:0,  1,  2,  3,
+{ 12, 14, 18, 21},
+//T61:0,  1,  2,  3, 
+{  1, 11, 16, 22},
+//T62:0,  1,  2,  3,
+{  1, 11, 16, 22},
+//T63:0,  1,  2,  3, 
+{  7, 13, 15, 16},
+//T64:0,  1,  2,  3, 
+{  1,  8, 12, 19},
+//T65:0,  1,  2,  3,
+{  6,  7,  9, 12},
+//T66:0,  1,  2,  3,  
+{  2,  6, 12, 13},
+//T67:0,  1,  2,  3,  
+{  1, 12, 16, 21},
+//T68:0,  1,  2,  3, 
+{  7, 11, 16, 19},
+//T69:0,  1,  2,  3, 
+{  7,  8, 11, 12},
+//T70:0,  1,  2,  3,  
+{  6,  7, 11, 12},
+//T71:0,  1,  2,  3,  
+{  6,  7, 11, 12},
+//T72:0,  1,  2,  3,  
+{  1,  6, 11, 12},
+//T73:0,  1,  2,  3, 
+{  6,  7, 11, 16},
+//T74:0,  1,  2,  3,  
+{  6,  7, 11, 12},
+//T75:0,  1,  2,  3,  
+{  6,  7, 11, 12},
+//T76:0,  1,  2,  3, 
+{  6, 11, 12, 21},
+//T77:0,  1,  2,  3,
+{  1,  6, 11, 12},
+//T78:0,  1,  2,  3, 
+{  6,  7, 11, 12},
+//T79:0,  1,  2,  3, 
+{  6,  7, 11, 12},
+};
+#endif
 // initialize ROM variables
 void initROM()
 {
@@ -4041,7 +4247,7 @@ void initROM()
     g_paletteQuant[qp] = (int)(threshQP*0.16 + 0.5);
   }
 
-#if SIGN_PREDICTION
+#if SIGN_PREDICTION && !JVET_W0103_INTRA_MTS
   memset(&g_resiBorderTemplate[0][0][0], 0, sizeof(g_resiBorderTemplate));
   const int8_t *p_data = g_initRomSignPred;
   for( int log2Width = 0; log2Width < 6; ++log2Width)
@@ -4094,6 +4300,72 @@ void initROM()
     }
   }
 #endif
+#if JVET_W0103_INTRA_MTS
+#if LMS_LINEAR_MODEL || TRANSFORM_SIMD_OPT || TU_256 
+  c = 2;
+#else
+  int c = 2;
+  const double PI = 3.14159265358979323846;
+#endif
+
+  for (int i = 0; i < 8; i++)
+  {
+    const double s = sqrt((double)c) * (64 << COM16_C806_TRANS_PREC);
+    TMatrixCoeff *iT = NULL;
+
+    switch (i)
+    {
+    case 0: iT = g_aiTr2[0][0]; break;
+    case 1: iT = g_aiTr4[0][0]; break;
+    case 2: iT = g_aiTr8[0][0]; break;
+    case 3: iT = g_aiTr16[0][0]; break;
+    case 4: iT = g_aiTr32[0][0]; break;
+    case 5: iT = g_aiTr64[0][0]; break;
+    case 6: iT = g_aiTr128[0][0]; break;
+    case 7: iT = g_aiTr256[0][0]; break;
+    case 8: exit(0); break;
+    }
+
+    for (int k = 0; k < c; k++)
+    {
+      for (int n = 0; n < c; n++)
+      {
+        double w0, w1, v;
+
+        // DCT-II
+        w0 = k == 0 ? sqrt(0.5) : 1;
+        v = cos(PI*(n + 0.5)*k / c) * w0 * sqrt(2.0 / c);
+        iT[DCT2*c*c + k * c + n] = (short)(s * v + (v > 0 ? 0.5 : -0.5));
+
+        // DCT-V
+        w0 = (k == 0) ? sqrt(0.5) : 1.0;
+        w1 = (n == 0) ? sqrt(0.5) : 1.0;
+        v = cos(PI*n*k / (c - 0.5)) * w0 * w1 * sqrt(2.0 / (c - 0.5));
+        iT[DCT5*c*c + k * c + n] = (short)(s * v + (v > 0 ? 0.5 : -0.5));
+
+        // DCT-VIII
+        v = cos(PI*(k + 0.5)*(n + 0.5) / (c + 0.5)) * sqrt(2.0 / (c + 0.5));
+        iT[DCT8*c*c + k * c + n] = (short)(s * v + (v > 0 ? 0.5 : -0.5));
+
+        // DST-I
+        v = sin(PI*(n + 1)*(k + 1) / (c + 1)) * sqrt(2.0 / (c + 1));
+        iT[DST1*c*c + k * c + n] = (short)(s * v + (v > 0 ? 0.5 : -0.5));
+
+        // DST-VII
+        v = sin(PI*(k + 0.5)*(n + 1) / (c + 0.5)) * sqrt(2.0 / (c + 0.5));
+        iT[DST7*c*c + k * c + n] = (short)(s * v + (v > 0 ? 0.5 : -0.5));
+
+        // DST4
+        v = sin(PI * (k + 0.5) * (n + 0.5) / c) * sqrt(2.0 / c);
+        iT[DST4 * c * c + k * c + n] = (short)(s * v + (v > 0 ? 0.5 : -0.5));
+
+        // ID
+        iT[IDTR * c * c + k * c + n] = (k == n) ? (short)(s + (s > 0 ? 0.5 : -0.5)) : 0;
+      }
+    }
+    c <<= 1;
+  }
+#endif
 }
 
 void destroyROM()
diff --git a/source/Lib/CommonLib/Rom.h b/source/Lib/CommonLib/Rom.h
index b41f205ba94e38bffa8a28c550716edd2b20a4dd..b6f4d1faa7693a4397cb75fe0aa7948941d45e33 100644
--- a/source/Lib/CommonLib/Rom.h
+++ b/source/Lib/CommonLib/Rom.h
@@ -166,7 +166,21 @@ extern TMatrixCoeff g_trCoreDCT2P256[256][256];
 extern TMatrixCoeff g_trCoreDCT8P256[256][256];
 extern TMatrixCoeff g_trCoreDST7P256[256][256];
 #endif
-
+#if JVET_W0103_INTRA_MTS
+extern TMatrixCoeff g_aiTr2[NUM_TRANS_TYPE][2][2];
+extern TMatrixCoeff g_aiTr4[NUM_TRANS_TYPE][4][4];
+extern TMatrixCoeff g_aiTr8[NUM_TRANS_TYPE][8][8];
+extern TMatrixCoeff g_aiTr16[NUM_TRANS_TYPE][16][16];
+extern TMatrixCoeff g_aiTr32[NUM_TRANS_TYPE][32][32];
+extern TMatrixCoeff g_aiTr64[NUM_TRANS_TYPE][64][64];
+extern TMatrixCoeff g_aiTr128[NUM_TRANS_TYPE][128][128];
+extern TMatrixCoeff g_aiTr256[NUM_TRANS_TYPE][256][256];
+
+extern const uint8_t g_aucIpmToTrSet[16][36];
+extern const uint8_t g_aucTrSet[80][4];
+extern const int8_t  g_aiIdLut[3][3];
+extern const uint8_t g_aucTrIdxToTr[25][2];
+#endif
 #if EXTENDED_LFNST
 extern const     int8_t   g_lfnst8x8[ 35 ][ 3 ][ 64 ][ 64 ];
 extern const     int8_t   g_lfnst4x4[ 35 ][ 3 ][ 16 ][ 16 ];
diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp
index 26fbda3252bb36edb6394600b284812be9f215d9..4b7586705aa37c25ec7c5ae35142ce09271e4140 100644
--- a/source/Lib/CommonLib/TrQuant.cpp
+++ b/source/Lib/CommonLib/TrQuant.cpp
@@ -297,6 +297,12 @@ void TrQuant::init( const Quant* otherQuant,
     { fastForwardDCT2_B2, fastForwardDCT2_B4, fastForwardDCT2_B8, fastForwardDCT2_B16, fastForwardDCT2_B32, fastForwardDCT2_B64, fastForwardDCT2_B128, fastForwardDCT2_B256 },
     { nullptr,            fastForwardDCT8_B4, fastForwardDCT8_B8, fastForwardDCT8_B16, fastForwardDCT8_B32, fastForwardDCT8_B64, fastForwardDCT8_B128, fastForwardDCT8_B256 },
     { nullptr,            fastForwardDST7_B4, fastForwardDST7_B8, fastForwardDST7_B16, fastForwardDST7_B32, fastForwardDST7_B64, fastForwardDST7_B128, fastForwardDST7_B256 },
+#if JVET_W0103_INTRA_MTS
+    { nullptr,            fastForwardDCT5_B4, fastForwardDCT5_B8, fastForwardDCT5_B16, fastForwardDCT5_B32, fastForwardDCT5_B64, fastForwardDCT5_B128, fastForwardDCT5_B256 },
+    { nullptr,            fastForwardDST4_B4, fastForwardDST4_B8, fastForwardDST4_B16, fastForwardDST4_B32, fastForwardDST4_B64, fastForwardDST4_B128, fastForwardDST4_B256 },
+    { nullptr,            fastForwardDST1_B4, fastForwardDST1_B8, fastForwardDST1_B16, fastForwardDST1_B32, fastForwardDST1_B64, fastForwardDST1_B128, fastForwardDST1_B256 },
+    { nullptr,            fastForwardIDTR_B4, fastForwardIDTR_B8, fastForwardIDTR_B16, fastForwardIDTR_B32, fastForwardIDTR_B64, fastForwardIDTR_B128, fastForwardIDTR_B256 },
+#endif
   } };
 
   fastInvTrans =
@@ -304,6 +310,12 @@ void TrQuant::init( const Quant* otherQuant,
     { fastInverseDCT2_B2, fastInverseDCT2_B4, fastInverseDCT2_B8, fastInverseDCT2_B16, fastInverseDCT2_B32, fastInverseDCT2_B64, fastInverseDCT2_B128, fastInverseDCT2_B256 },
     { nullptr,            fastInverseDCT8_B4, fastInverseDCT8_B8, fastInverseDCT8_B16, fastInverseDCT8_B32, fastInverseDCT8_B64, fastInverseDCT8_B128, fastInverseDCT8_B256 },
     { nullptr,            fastInverseDST7_B4, fastInverseDST7_B8, fastInverseDST7_B16, fastInverseDST7_B32, fastInverseDST7_B64, fastInverseDST7_B128, fastInverseDST7_B256 },
+#if JVET_W0103_INTRA_MTS
+    { nullptr,            fastInverseDCT5_B4, fastInverseDCT5_B8, fastInverseDCT5_B16, fastInverseDCT5_B32, fastInverseDCT5_B64, fastInverseDCT5_B128, fastInverseDCT5_B256 },
+    { nullptr,            fastInverseDST4_B4, fastInverseDST4_B8, fastInverseDST4_B16, fastInverseDST4_B32, fastInverseDST4_B64, fastInverseDST4_B128, fastInverseDST4_B256 },
+    { nullptr,            fastInverseDST1_B4, fastInverseDST1_B8, fastInverseDST1_B16, fastInverseDST1_B32, fastInverseDST1_B64, fastInverseDST1_B128, fastInverseDST1_B256 },
+    { nullptr,            fastInverseIDTR_B4, fastInverseIDTR_B8, fastInverseIDTR_B16, fastInverseIDTR_B32, fastInverseIDTR_B64, fastInverseIDTR_B128, fastInverseIDTR_B256 },
+#endif
   } };
 #else
   fastFwdTrans =
@@ -311,6 +323,12 @@ void TrQuant::init( const Quant* otherQuant,
     { fastForwardDCT2_B2, fastForwardDCT2_B4, fastForwardDCT2_B8, fastForwardDCT2_B16, fastForwardDCT2_B32, fastForwardDCT2_B64 },
     { nullptr,            fastForwardDCT8_B4, fastForwardDCT8_B8, fastForwardDCT8_B16, fastForwardDCT8_B32, nullptr },
     { nullptr,            fastForwardDST7_B4, fastForwardDST7_B8, fastForwardDST7_B16, fastForwardDST7_B32, nullptr },
+#if JVET_W0103_INTRA_MTS
+    { nullptr,            fastForwardDCT5_B4, fastForwardDCT5_B8, fastForwardDCT5_B16, fastForwardDCT5_B32, nullptr },
+    { nullptr,            fastForwardDST4_B4, fastForwardDST4_B8, fastForwardDST4_B16, fastForwardDST4_B32, nullptr },
+    { nullptr,            fastForwardDST1_B4, fastForwardDST1_B8, fastForwardDST1_B16, fastForwardDST1_B32, nullptr },
+    { nullptr,            fastForwardIDTR_B4, fastForwardIDTR_B8, fastForwardIDTR_B16, fastForwardIDTR_B32, nullptr },
+#endif
   } };
 
   fastInvTrans =
@@ -318,6 +336,12 @@ void TrQuant::init( const Quant* otherQuant,
     { fastInverseDCT2_B2, fastInverseDCT2_B4, fastInverseDCT2_B8, fastInverseDCT2_B16, fastInverseDCT2_B32, fastInverseDCT2_B64 },
     { nullptr,            fastInverseDCT8_B4, fastInverseDCT8_B8, fastInverseDCT8_B16, fastInverseDCT8_B32, nullptr },
     { nullptr,            fastInverseDST7_B4, fastInverseDST7_B8, fastInverseDST7_B16, fastInverseDST7_B32, nullptr },
+#if JVET_W0103_INTRA_MTS
+    { nullptr,            fastInverseDCT5_B4, fastInverseDCT5_B8, fastInverseDCT5_B16, fastInverseDCT5_B32, nullptr },
+    { nullptr,            fastInverseDST4_B4, fastInverseDST4_B8, fastInverseDST4_B16, fastInverseDST4_B32, nullptr },
+    { nullptr,            fastInverseDST1_B4, fastInverseDST1_B8, fastInverseDST1_B16, fastInverseDST1_B32, nullptr },
+    { nullptr,            fastInverseIDTR_B4, fastInverseIDTR_B8, fastInverseIDTR_B16, fastInverseIDTR_B32, nullptr },
+#endif
   } };
 #endif
 
@@ -1323,6 +1347,64 @@ void TrQuant::getTrTypes(const TransformUnit tu, const ComponentID compID, int &
 
   if (isExplicitMTS)
   {
+#if JVET_W0103_INTRA_MTS
+    if (tu.mtsIdx[compID] > MTS_SKIP && CU::isIntra(*tu.cu))
+    {
+      CHECK(compID != COMPONENT_Y, " MTS activated for chroma");
+      uint32_t width = tu.blocks[compID].width;
+      uint32_t height = tu.blocks[compID].height;
+      int TrIdx = (tu.mtsIdx[compID] - MTS_DST7_DST7);
+      CHECK(width < 4 || height < 4, "width < 4 || height < 4 for MTS");
+      uint8_t nSzIdxW = std::min(3, (floorLog2(width) - 2));
+      uint8_t nSzIdxH = std::min(3, (floorLog2(height) - 2));
+      const CompArea& area = tu.blocks[compID];
+      int predMode = PU::getFinalIntraMode(*tu.cs->getPU(area.pos(), toChannelType(compID)), toChannelType(compID));
+#if JVET_W0123_TIMD_FUSION
+      if (tu.cu->timd && compID == COMPONENT_Y)
+      {
+        predMode = MAP131TO67(predMode);
+      }
+#endif
+      int ucMode;
+      int nMdIdx;
+      bool isTrTransposed = false;
+      if (tu.cu->mipFlag) //MIP is treated as planar.
+      {
+        ucMode = 0;
+        nMdIdx = 35;
+        isTrTransposed = (tu.cs->getPU(area.pos(), toChannelType(compID)))->mipTransposedFlag;
+      }
+      else
+      {
+        ucMode = predMode; //"ucMode" is the signaled Mode.
+        predMode = PU::getWideAngle(tu, (uint32_t)predMode, compID);
+        CHECK(predMode < -(NUM_EXT_LUMA_MODE >> 1) && predMode >= NUM_LUMA_MODE + (NUM_EXT_LUMA_MODE >> 1), "luma mode out of range");
+        predMode = (predMode < 0) ? 2 : (predMode >= NUM_LUMA_MODE) ? 66 : predMode;
+        nMdIdx = predMode > DIA_IDX ? (NUM_LUMA_MODE + 1 - predMode) : predMode;
+        isTrTransposed = (predMode > DIA_IDX) ? true : false;
+      }
+      uint8_t nSzIdx = isTrTransposed ? (nSzIdxH * 4 + nSzIdxW) : (nSzIdxW * 4 + nSzIdxH);
+      CHECK(nSzIdx >= 16, "nSzIdx >= 16");
+      CHECK(nMdIdx >= 36, "nMdIdx >= 36");
+      uint8_t nTrSet = g_aucIpmToTrSet[nSzIdx][nMdIdx];
+      CHECK(nTrSet >= 80, "nTrSet >= 80");
+      trTypeVer = g_aucTrIdxToTr[g_aucTrSet[nTrSet][TrIdx]][predMode > DIA_IDX ? 1 : 0];
+      trTypeHor = g_aucTrIdxToTr[g_aucTrSet[nTrSet][TrIdx]][predMode > DIA_IDX ? 0 : 1];
+      predMode = ucMode; //to Check IDTR criteria, signaled mode should be used to check the difference
+      if (TrIdx == 3 && width <= 16 && height <= 16)
+      {
+        if (abs(predMode - HOR_IDX) <= g_aiIdLut[floorLog2(width) - 2][floorLog2(height) - 2])
+        {
+          trTypeVer = IDTR;
+        }
+        if (abs(predMode - VER_IDX) <= g_aiIdLut[floorLog2(width) - 2][floorLog2(height) - 2])
+        {
+          trTypeHor = IDTR;
+        }
+      }
+    }
+    else
+#endif
     if (tu.mtsIdx[compID] > MTS_SKIP)
     {
       int indHor = (tu.mtsIdx[compID] - MTS_DST7_DST7) & 1;
@@ -1621,7 +1703,35 @@ void TrQuant::transformNxN( TransformUnit& tu, const ComponentID& compID, const
     itC++;
   }
 }
+#if JVET_W0103_INTRA_MTS
+// does transform for MTS candidates and return absSum of unquant Coeffs.
+uint64_t TrQuant::transformNxN(TransformUnit& tu)
+{
+  CHECK(!tu.cu->mtsFlag, "mtsFlag should be on for selection");
+  CodingStructure &cs = *tu.cs;
+  const CompArea &rect = tu.blocks[COMPONENT_Y];
+  const uint32_t uiWidth = rect.width;
+  const uint32_t uiHeight = rect.height;
+
+  const CPelBuf resiBuf = cs.getResiBuf(rect);
+  CoeffBuf tempCoeff(m_mtsCoeffs[tu.mtsIdx[0]], rect);
+  xT(tu, COMPONENT_Y, resiBuf, tempCoeff, uiWidth, uiHeight);
+
 
+  const TCoeff *dstCoeffBuf = tempCoeff.buf;
+  const int  dstCoeffStride = tempCoeff.stride;
+  uint64_t coeffAbsSum = 0;
+
+  for (int y = 0; y < uiHeight; y++)
+  {
+    for (int x = 0; x < uiWidth; x++)
+    {
+      coeffAbsSum += abs(dstCoeffBuf[(y * dstCoeffStride) + x]);
+    }
+  }
+  return coeffAbsSum;
+}
+#endif
 void TrQuant::transformNxN( TransformUnit& tu, const ComponentID& compID, const QpParam& cQP, TCoeff& uiAbsSum, const Ctx& ctx, const bool loadTr )
 {
         CodingStructure &cs = *tu.cs;
@@ -1803,7 +1913,11 @@ void TrQuant::predCoeffSigns(TransformUnit &tu, const ComponentID compID, const
   PelBuf     bufResiTemplateReshape(predResiTemplateReshape, uiWidth + uiHeight - 1, 1);
   int trHor, trVer;
   getTrTypes(tu, residCompID, trHor, trVer);
+#if JVET_W0103_INTRA_MTS
+  int actualTrIdx = trHor * NUM_TRANS_TYPE + trVer;
+#else
   int actualTrIdx = trHor * 3 + trVer;
+#endif
   int log2Width = floorLog2(uiWidth);
   int log2Height = floorLog2(uiHeight);
   if(!g_resiBorderTemplate[log2Width-2][log2Height-2][actualTrIdx])
diff --git a/source/Lib/CommonLib/TrQuant.h b/source/Lib/CommonLib/TrQuant.h
index 8556825e4e00241ab087347b8a5609876dfea999..77d99074a3565ce3cce5c314fb2b589fe8dd38fd 100644
--- a/source/Lib/CommonLib/TrQuant.h
+++ b/source/Lib/CommonLib/TrQuant.h
@@ -157,6 +157,9 @@ public:
   void invTransformNxN  (TransformUnit &tu, const ComponentID &compID, PelBuf &pResi, const QpParam &cQPs);
   void transformNxN     ( TransformUnit& tu, const ComponentID& compID, const QpParam& cQP, std::vector<TrMode>* trModes, const int maxCand );
   void transformNxN     ( TransformUnit& tu, const ComponentID& compID, const QpParam& cQP, TCoeff& uiAbsSum, const Ctx& ctx, const bool loadTr = false );
+#if JVET_W0103_INTRA_MTS
+  uint64_t transformNxN(TransformUnit& tu);
+#endif
 
   void transformSkipQuantOneSample(TransformUnit &tu, const ComponentID &compID, const TCoeff &resiDiff, TCoeff &coeff,    const uint32_t &uiPos, const QpParam &cQP, const bool bUseHalfRoundingPoint);
   void invTrSkipDeQuantOneSample  (TransformUnit &tu, const ComponentID &compID, const TCoeff &pcCoeff,  Pel &reconSample, const uint32_t &uiPos, const QpParam &cQP);
diff --git a/source/Lib/CommonLib/TrQuant_EMT.cpp b/source/Lib/CommonLib/TrQuant_EMT.cpp
index e0e8ef8ccdb917ca379a3dc5a217694a0c2cbd6a..6912cb56b000dc05c8137b546b633b1fe45cea87 100644
--- a/source/Lib/CommonLib/TrQuant_EMT.cpp
+++ b/source/Lib/CommonLib/TrQuant_EMT.cpp
@@ -1973,4 +1973,289 @@ void fastInverseDCT8_B256( const TCoeff *src, TCoeff *dst, int shift, int line,
 {
   _fastInverseMM< 256 >( src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_trCoreDCT8P256[0] );
 }
+#endif
+#if JVET_W0103_INTRA_MTS
+//DCT5
+void fastForwardDCT5_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 4 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr4[DCT5][0]);
+}
+
+void fastInverseDCT5_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 4 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr4[DCT5][0]);
+}
+
+void fastForwardDCT5_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 8 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr8[DCT5][0]);
+}
+
+void fastInverseDCT5_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 8 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr8[DCT5][0]);
+}
+
+void fastForwardDCT5_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 16 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr16[DCT5][0]);
+}
+
+void fastInverseDCT5_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 16 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr16[DCT5][0]);
+}
+
+void fastForwardDCT5_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 32 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr32[DCT5][0]);
+}
+
+void fastInverseDCT5_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 32 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr32[DCT5][0]);
+}
+
+void fastForwardDCT5_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 64 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr64[DCT5][0]);
+}
+
+void fastInverseDCT5_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 64 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr64[DCT5][0]);
+}
+
+void fastForwardDCT5_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 128 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr128[DCT5][0]);
+}
+
+void fastInverseDCT5_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 128 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr128[DCT5][0]);
+}
+
+void fastForwardDCT5_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 256 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr256[DCT5][0]);
+}
+
+void fastInverseDCT5_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 256 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr256[DCT5][0]);
+}
+
+//DST4
+void fastForwardDST4_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 4 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr4[DST4][0]);
+}
+
+void fastInverseDST4_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 4 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr4[DST4][0]);
+}
+
+void fastForwardDST4_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 8 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr8[DST4][0]);
+}
+
+void fastInverseDST4_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 8 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr8[DST4][0]);
+}
+
+void fastForwardDST4_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 16 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr16[DST4][0]);
+}
+
+void fastInverseDST4_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 16 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr16[DST4][0]);
+}
+
+void fastForwardDST4_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 32 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr32[DST4][0]);
+}
+
+void fastInverseDST4_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 32 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr32[DST4][0]);
+}
+
+void fastForwardDST4_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 64 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr64[DST4][0]);
+}
+
+void fastInverseDST4_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 64 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr64[DST4][0]);
+}
+
+void fastForwardDST4_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 128 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr128[DST4][0]);
+}
+
+void fastInverseDST4_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 128 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr128[DST4][0]);
+}
+
+void fastForwardDST4_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 256 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr256[DST4][0]);
+}
+
+void fastInverseDST4_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 256 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr256[DST4][0]);
+}
+
+//DST1
+void fastForwardDST1_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 4 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr4[DST1][0]);
+}
+
+void fastInverseDST1_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 4 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr4[DST1][0]);
+}
+
+void fastForwardDST1_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 8 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr8[DST1][0]);
+}
+
+void fastInverseDST1_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 8 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr8[DST1][0]);
+}
+
+void fastForwardDST1_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 16 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr16[DST1][0]);
+}
+
+void fastInverseDST1_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 16 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr16[DST1][0]);
+}
+
+void fastForwardDST1_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 32 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr32[DST1][0]);
+}
+
+void fastInverseDST1_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 32 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr32[DST1][0]);
+}
+
+void fastForwardDST1_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 64 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr64[DST1][0]);
+}
+
+void fastInverseDST1_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 64 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr64[DST1][0]);
+}
+
+void fastForwardDST1_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 128 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr128[DST1][0]);
+}
+
+void fastInverseDST1_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 128 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr128[DST1][0]);
+}
+
+void fastForwardDST1_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 256 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr256[DST1][0]);
+}
+
+void fastInverseDST1_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 256 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr256[DST1][0]);
+}
+
+//IDTR
+void fastForwardIDTR_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 4 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr4[IDTR][0]);
+}
+
+void fastInverseIDTR_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 4 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr4[IDTR][0]);
+}
+
+void fastForwardIDTR_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 8 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr8[IDTR][0]);
+}
+
+void fastInverseIDTR_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 8 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr8[IDTR][0]);
+}
+
+void fastForwardIDTR_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 16 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr16[IDTR][0]);
+}
+
+void fastInverseIDTR_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 16 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr16[IDTR][0]);
+}
+
+void fastForwardIDTR_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 32 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr32[IDTR][0]);
+}
+
+void fastInverseIDTR_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 32 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr32[IDTR][0]);
+}
+
+void fastForwardIDTR_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 64 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr64[IDTR][0]);
+}
+
+void fastInverseIDTR_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 64 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr64[IDTR][0]);
+}
+
+void fastForwardIDTR_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 128 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr128[IDTR][0]);
+}
+
+void fastInverseIDTR_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 128 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr128[IDTR][0]);
+}
+
+void fastForwardIDTR_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
+{
+  _fastForwardMM< 256 >(src, dst, shift, line, iSkipLine, iSkipLine2, g_aiTr256[IDTR][0]);
+}
+
+void fastInverseIDTR_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
+{
+  _fastInverseMM< 256 >(src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, g_aiTr256[IDTR][0]);
+}
 #endif
\ No newline at end of file
diff --git a/source/Lib/CommonLib/TrQuant_EMT.h b/source/Lib/CommonLib/TrQuant_EMT.h
index 03d7491dce4ee3196c0cf5e17e339eb5cb1e6af1..2379795dacd6027157d2c3f085a199854d67b670 100644
--- a/source/Lib/CommonLib/TrQuant_EMT.h
+++ b/source/Lib/CommonLib/TrQuant_EMT.h
@@ -97,5 +97,78 @@ void fastForwardDCT8_B256( const TCoeff *src, TCoeff *dst, int shift, int line,
 void fastInverseDCT8_B256( const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum );
 #endif
 
+#if JVET_W0103_INTRA_MTS
+//DST-IV transforms
+void fastForwardDST4_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseDST4_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+void fastForwardDST4_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseDST4_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+void fastForwardDST4_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseDST4_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+void fastForwardDST4_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseDST4_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+void fastForwardDST4_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseDST4_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+#if TU_256
+void fastForwardDST4_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseDST4_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+void fastForwardDST4_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseDST4_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+#endif
+
+//DST-I transforms
+void fastForwardDST1_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseDST1_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+void fastForwardDST1_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseDST1_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+void fastForwardDST1_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseDST1_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+void fastForwardDST1_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseDST1_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+void fastForwardDST1_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseDST1_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+#if TU_256
+void fastForwardDST1_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseDST1_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+void fastForwardDST1_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseDST1_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+#endif
+
+//DCT-V transforms
+void fastForwardDCT5_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseDCT5_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+void fastForwardDCT5_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseDCT5_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+void fastForwardDCT5_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseDCT5_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+void fastForwardDCT5_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseDCT5_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+void fastForwardDCT5_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseDCT5_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+#if TU_256
+void fastForwardDCT5_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseDCT5_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+void fastForwardDCT5_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseDCT5_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+#endif
+
+//IDTR transforms
+void fastForwardIDTR_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseIDTR_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+void fastForwardIDTR_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseIDTR_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+void fastForwardIDTR_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseIDTR_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+void fastForwardIDTR_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseIDTR_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+void fastForwardIDTR_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseIDTR_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+#if TU_256
+void fastForwardIDTR_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseIDTR_B128(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+void fastForwardIDTR_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2);
+void fastInverseIDTR_B256(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum);
+#endif
+#endif
 
 #endif // __TRQUANT__
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index 63d251c2662434aba131f547bb649856a53767d5..83d7c38c4c00b07468f3815a5f00b0e51a6bad89 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -52,10 +52,13 @@
 
 
 
+
+
 #define BASE_ENCODER                                      1
 #define BASE_NORMATIVE                                    1
 #define TOOLS                                             1
 
+
 #if BASE_ENCODER
 // Lossy encoder speedups
 #define AFFINE_ENC_OPT                                    1 // Affine encoder optimization
@@ -132,6 +135,8 @@
 #define EXTENDED_LFNST                                    1 // Extended LFNST
 #define SIGN_PREDICTION                                   1 // transform coefficients sign prediction
 
+#define JVET_W0103_INTRA_MTS                              1 // JVET W0103: Extended Intra MTS
+
 // Entropy Coding
 #define EC_HIGH_PRECISION                                 1 // CABAC high precision
 #define SLICE_TYPE_WIN_SIZE                               1 // Context window initialization based on slice type
@@ -157,7 +162,6 @@
 #if JVET_V0094_BILATERAL_FILTER
 #define ENABLE_SIMD_BILATERAL_FILTER                      1
 #endif
-
 #endif // tools
 
 
@@ -541,11 +545,23 @@ enum QuantFlags
 //EMT transform tags
 enum TransType
 {
+#if JVET_W0103_INTRA_MTS
+  DCT2 = 0,
+  DCT8 = 1,
+  DST7 = 2,
+  DCT5 = 3,
+  DST4 = 4,
+  DST1 = 5,
+  IDTR = 6,
+  NUM_TRANS_TYPE = 7,
+  DCT2_EMT = 8
+#else
   DCT2 = 0,
   DCT8 = 1,
   DST7 = 2,
   NUM_TRANS_TYPE = 3,
   DCT2_EMT = 4
+#endif
 };
 
 enum MTSIdx
diff --git a/source/Lib/CommonLib/x86/TrQuantX86.h b/source/Lib/CommonLib/x86/TrQuantX86.h
index 5f7238fc60ea6aa9385a797399af4a7ccb3b524f..97f5215e77fc940b6dee55727666c623d283dc88 100644
--- a/source/Lib/CommonLib/x86/TrQuantX86.h
+++ b/source/Lib/CommonLib/x86/TrQuantX86.h
@@ -547,6 +547,12 @@ void TrQuant::_initTrQuantX86()
     { g_trCoreDCT2P2[TRANSFORM_FORWARD][0], g_trCoreDCT2P4[TRANSFORM_FORWARD][0], g_trCoreDCT2P8[TRANSFORM_FORWARD][0], g_trCoreDCT2P16[TRANSFORM_FORWARD][0], g_trCoreDCT2P32[TRANSFORM_FORWARD][0], g_trCoreDCT2P64[TRANSFORM_FORWARD][0], g_trCoreDCT2P128[TRANSFORM_FORWARD][0], g_trCoreDCT2P256[0] },
     { nullptr,                              g_trCoreDCT8P4[TRANSFORM_FORWARD][0], g_trCoreDCT8P8[TRANSFORM_FORWARD][0], g_trCoreDCT8P16[TRANSFORM_FORWARD][0], g_trCoreDCT8P32[TRANSFORM_FORWARD][0], g_trCoreDCT8P64[TRANSFORM_FORWARD][0], g_trCoreDCT8P128[TRANSFORM_FORWARD][0], g_trCoreDCT8P256[0] },
     { nullptr,                              g_trCoreDST7P4[TRANSFORM_FORWARD][0], g_trCoreDST7P8[TRANSFORM_FORWARD][0], g_trCoreDST7P16[TRANSFORM_FORWARD][0], g_trCoreDST7P32[TRANSFORM_FORWARD][0], g_trCoreDST7P64[TRANSFORM_FORWARD][0], g_trCoreDST7P128[TRANSFORM_FORWARD][0], g_trCoreDST7P256[0] },
+#if JVET_W0103_INTRA_MTS
+    { nullptr,                              g_aiTr4[DCT5][0], g_aiTr8[DCT5][0], g_aiTr16[DCT5][0], g_aiTr32[DCT5][0], g_aiTr64[DCT5][0], g_aiTr128[DCT5][0], g_aiTr256[DCT5][0] },
+    { nullptr,                              g_aiTr4[DST4][0], g_aiTr8[DST4][0], g_aiTr16[DST4][0], g_aiTr32[DST4][0], g_aiTr64[DST4][0], g_aiTr128[DST4][0], g_aiTr256[DST4][0] },
+    { nullptr,                              g_aiTr4[DST1][0], g_aiTr8[DST1][0], g_aiTr16[DST1][0], g_aiTr32[DST1][0], g_aiTr64[DST1][0], g_aiTr128[DST1][0], g_aiTr256[DST1][0] },
+    { nullptr,                              g_aiTr4[IDTR][0], g_aiTr8[IDTR][0], g_aiTr16[IDTR][0], g_aiTr32[IDTR][0], g_aiTr64[IDTR][0], g_aiTr128[IDTR][0], g_aiTr256[IDTR][0] },
+#endif
   } };
 
   m_inverseTransformKernels =
@@ -554,6 +560,12 @@ void TrQuant::_initTrQuantX86()
     { g_trCoreDCT2P2[TRANSFORM_INVERSE][0], g_trCoreDCT2P4[TRANSFORM_INVERSE][0], g_trCoreDCT2P8[TRANSFORM_INVERSE][0], g_trCoreDCT2P16[TRANSFORM_INVERSE][0], g_trCoreDCT2P32[TRANSFORM_INVERSE][0], g_trCoreDCT2P64[TRANSFORM_INVERSE][0], g_trCoreDCT2P128[TRANSFORM_INVERSE][0], g_trCoreDCT2P256[0] },
     { nullptr,                              g_trCoreDCT8P4[TRANSFORM_INVERSE][0], g_trCoreDCT8P8[TRANSFORM_INVERSE][0], g_trCoreDCT8P16[TRANSFORM_INVERSE][0], g_trCoreDCT8P32[TRANSFORM_INVERSE][0], g_trCoreDCT8P64[TRANSFORM_INVERSE][0], g_trCoreDCT8P128[TRANSFORM_INVERSE][0], g_trCoreDCT8P256[0] },
     { nullptr,                              g_trCoreDST7P4[TRANSFORM_INVERSE][0], g_trCoreDST7P8[TRANSFORM_INVERSE][0], g_trCoreDST7P16[TRANSFORM_INVERSE][0], g_trCoreDST7P32[TRANSFORM_INVERSE][0], g_trCoreDST7P64[TRANSFORM_INVERSE][0], g_trCoreDST7P128[TRANSFORM_INVERSE][0], g_trCoreDST7P256[0] },
+#if JVET_W0103_INTRA_MTS
+    { nullptr,                              g_aiTr4[DCT5][0], g_aiTr8[DCT5][0], g_aiTr16[DCT5][0], g_aiTr32[DCT5][0], g_aiTr64[DCT5][0], g_aiTr128[DCT5][0], g_aiTr256[DCT5][0] },
+    { nullptr,                              g_aiTr4[DST4][0], g_aiTr8[DST4][0], g_aiTr16[DST4][0], g_aiTr32[DST4][0], g_aiTr64[DST4][0], g_aiTr128[DST4][0], g_aiTr256[DST4][0] },
+    { nullptr,                              g_aiTr4[DST1][0], g_aiTr8[DST1][0], g_aiTr16[DST1][0], g_aiTr32[DST1][0], g_aiTr64[DST1][0], g_aiTr128[DST1][0], g_aiTr256[DST1][0] },
+    { nullptr,                              g_aiTr4[IDTR][0], g_aiTr8[IDTR][0], g_aiTr16[IDTR][0], g_aiTr32[IDTR][0], g_aiTr64[IDTR][0], g_aiTr128[IDTR][0], g_aiTr256[IDTR][0] },
+#endif
   } };
   
   fastFwdTrans[0][0] = fastForwardTransform_SIMD<DCT2, 2>;
@@ -583,6 +595,44 @@ void TrQuant::_initTrQuantX86()
   fastFwdTrans[2][6] = fastForwardTransform_SIMD<DST7, 128>;
   fastFwdTrans[2][7] = fastForwardTransform_SIMD<DST7, 256>;
 
+#if JVET_W0103_INTRA_MTS
+  fastFwdTrans[3][0] = nullptr;
+  fastFwdTrans[3][1] = fastForwardTransform_SIMD<DCT5, 4>;
+  fastFwdTrans[3][2] = fastForwardTransform_SIMD<DCT5, 8>;
+  fastFwdTrans[3][3] = fastForwardTransform_SIMD<DCT5, 16>;
+  fastFwdTrans[3][4] = fastForwardTransform_SIMD<DCT5, 32>;
+  fastFwdTrans[3][5] = fastForwardTransform_SIMD<DCT5, 64>;
+  fastFwdTrans[3][6] = fastForwardTransform_SIMD<DCT5, 128>;
+  fastFwdTrans[3][7] = fastForwardTransform_SIMD<DCT5, 256>;
+
+  fastFwdTrans[4][0] = nullptr;
+  fastFwdTrans[4][1] = fastForwardTransform_SIMD<DST4, 4>;
+  fastFwdTrans[4][2] = fastForwardTransform_SIMD<DST4, 8>;
+  fastFwdTrans[4][3] = fastForwardTransform_SIMD<DST4, 16>;
+  fastFwdTrans[4][4] = fastForwardTransform_SIMD<DST4, 32>;
+  fastFwdTrans[4][5] = fastForwardTransform_SIMD<DST4, 64>;
+  fastFwdTrans[4][6] = fastForwardTransform_SIMD<DST4, 128>;
+  fastFwdTrans[4][7] = fastForwardTransform_SIMD<DST4, 256>;
+
+  fastFwdTrans[5][0] = nullptr;
+  fastFwdTrans[5][1] = fastForwardTransform_SIMD<DST1, 4>;
+  fastFwdTrans[5][2] = fastForwardTransform_SIMD<DST1, 8>;
+  fastFwdTrans[5][3] = fastForwardTransform_SIMD<DST1, 16>;
+  fastFwdTrans[5][4] = fastForwardTransform_SIMD<DST1, 32>;
+  fastFwdTrans[5][5] = fastForwardTransform_SIMD<DST1, 64>;
+  fastFwdTrans[5][6] = fastForwardTransform_SIMD<DST1, 128>;
+  fastFwdTrans[5][7] = fastForwardTransform_SIMD<DST1, 256>;
+
+  fastFwdTrans[6][0] = nullptr;
+  fastFwdTrans[6][1] = fastForwardTransform_SIMD<IDTR, 4>;
+  fastFwdTrans[6][2] = fastForwardTransform_SIMD<IDTR, 8>;
+  fastFwdTrans[6][3] = fastForwardTransform_SIMD<IDTR, 16>;
+  fastFwdTrans[6][4] = fastForwardTransform_SIMD<IDTR, 32>;
+  fastFwdTrans[6][5] = fastForwardTransform_SIMD<IDTR, 64>;
+  fastFwdTrans[6][6] = fastForwardTransform_SIMD<IDTR, 128>;
+  fastFwdTrans[6][7] = fastForwardTransform_SIMD<IDTR, 256>;
+#endif
+
   fastInvTrans[0][0] = fastInverseTransform_SIMD<DCT2, 2>;
   fastInvTrans[0][1] = fastInverseTransform_SIMD<DCT2, 4>;
   fastInvTrans[0][2] = fastInverseTransform_SIMD<DCT2, 8>;
@@ -609,6 +659,44 @@ void TrQuant::_initTrQuantX86()
   fastInvTrans[2][5] = fastInverseTransform_SIMD<DST7, 64>;
   fastInvTrans[2][6] = fastInverseTransform_SIMD<DST7, 128>;
   fastInvTrans[2][7] = fastInverseTransform_SIMD<DST7, 256>;
+
+#if JVET_W0103_INTRA_MTS
+  fastInvTrans[3][0] = nullptr;
+  fastInvTrans[3][1] = fastInverseTransform_SIMD<DCT5, 4>;
+  fastInvTrans[3][2] = fastInverseTransform_SIMD<DCT5, 8>;
+  fastInvTrans[3][3] = fastInverseTransform_SIMD<DCT5, 16>;
+  fastInvTrans[3][4] = fastInverseTransform_SIMD<DCT5, 32>;
+  fastInvTrans[3][5] = fastInverseTransform_SIMD<DCT5, 64>;
+  fastInvTrans[3][6] = fastInverseTransform_SIMD<DCT5, 128>;
+  fastInvTrans[3][7] = fastInverseTransform_SIMD<DCT5, 256>;
+
+  fastInvTrans[4][0] = nullptr;
+  fastInvTrans[4][1] = fastInverseTransform_SIMD<DST4, 4>;
+  fastInvTrans[4][2] = fastInverseTransform_SIMD<DST4, 8>;
+  fastInvTrans[4][3] = fastInverseTransform_SIMD<DST4, 16>;
+  fastInvTrans[4][4] = fastInverseTransform_SIMD<DST4, 32>;
+  fastInvTrans[4][5] = fastInverseTransform_SIMD<DST4, 64>;
+  fastInvTrans[4][6] = fastInverseTransform_SIMD<DST4, 128>;
+  fastInvTrans[4][7] = fastInverseTransform_SIMD<DST4, 256>;
+
+  fastInvTrans[5][0] = nullptr;
+  fastInvTrans[5][1] = fastInverseTransform_SIMD<DST1, 4>;
+  fastInvTrans[5][2] = fastInverseTransform_SIMD<DST1, 8>;
+  fastInvTrans[5][3] = fastInverseTransform_SIMD<DST1, 16>;
+  fastInvTrans[5][4] = fastInverseTransform_SIMD<DST1, 32>;
+  fastInvTrans[5][5] = fastInverseTransform_SIMD<DST1, 64>;
+  fastInvTrans[5][6] = fastInverseTransform_SIMD<DST1, 128>;
+  fastInvTrans[5][7] = fastInverseTransform_SIMD<DST1, 256>;
+
+  fastInvTrans[6][0] = nullptr;
+  fastInvTrans[6][1] = fastInverseTransform_SIMD<IDTR, 4>;
+  fastInvTrans[6][2] = fastInverseTransform_SIMD<IDTR, 8>;
+  fastInvTrans[6][3] = fastInverseTransform_SIMD<IDTR, 16>;
+  fastInvTrans[6][4] = fastInverseTransform_SIMD<IDTR, 32>;
+  fastInvTrans[6][5] = fastInverseTransform_SIMD<IDTR, 64>;
+  fastInvTrans[6][6] = fastInverseTransform_SIMD<IDTR, 128>;
+  fastInvTrans[6][7] = fastInverseTransform_SIMD<IDTR, 256>;
+#endif
 #else
   m_forwardTransformKernels =
   { {
diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp
index a0ced2ca8a07d59063fdaf837ccf7d5bb7354ec7..2a7113eee1f8149d4147ed596a9b19ad3845ba2d 100644
--- a/source/Lib/DecoderLib/CABACReader.cpp
+++ b/source/Lib/DecoderLib/CABACReader.cpp
@@ -3858,11 +3858,21 @@ void CABACReader::mts_idx( CodingUnit& cu, CUCtx& cuCtx )
       cuCtx.mtsLastScanPos && cu.lfnstIdx == 0 && mtsIdx != MTS_SKIP)
   {
     RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET_SIZE2( STATS__CABAC_BITS__MTS_FLAGS, tu.blocks[COMPONENT_Y], COMPONENT_Y );
+#if JVET_W0103_INTRA_MTS
+    int ctxIdx = (cu.mipFlag) ? 3 : 0;
+#else
     int ctxIdx = 0;
+#endif
     int symbol = m_BinDecoder.decodeBin( Ctx::MTSIdx(ctxIdx));
 
     if( symbol )
     {
+#if JVET_W0103_INTRA_MTS
+      int bins[2];
+      bins[0] = m_BinDecoder.decodeBin(Ctx::MTSIdx(1));
+      bins[1] = m_BinDecoder.decodeBin(Ctx::MTSIdx(2));
+      mtsIdx = MTS_DST7_DST7 + (bins[0] << 1) + bins[1];
+#else
       ctxIdx = 1;
       mtsIdx = MTS_DST7_DST7; // mtsIdx = 2 -- 4
       for( int i = 0; i < 3; i++, ctxIdx++ )
@@ -3875,6 +3885,7 @@ void CABACReader::mts_idx( CodingUnit& cu, CUCtx& cuCtx )
           break;
         }
       }
+#endif
     }
   }
 
diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp
index 4904308cf850d1f40e3d256220d054e1791b8dc5..01b001838402ac20b70d35d1f1d8fcb418cfdc66 100644
--- a/source/Lib/EncoderLib/CABACWriter.cpp
+++ b/source/Lib/EncoderLib/CABACWriter.cpp
@@ -3604,12 +3604,22 @@ void CABACWriter::mts_idx( const CodingUnit& cu, CUCtx* cuCtx )
       cuCtx->mtsLastScanPos && cu.lfnstIdx == 0 && mtsIdx != MTS_SKIP)
   {
     int symbol = mtsIdx != MTS_DCT2_DCT2 ? 1 : 0;
+#if JVET_W0103_INTRA_MTS
+    int ctxIdx = (cu.mipFlag) ? 3 : 0;
+#else
     int ctxIdx = 0;
+#endif
 
     m_BinEncoder.encodeBin( symbol, Ctx::MTSIdx(ctxIdx));
 
     if( symbol )
     {
+#if JVET_W0103_INTRA_MTS
+      int TrIdx = (tu.mtsIdx[COMPONENT_Y] - MTS_DST7_DST7);
+      CHECK(TrIdx < 0 || TrIdx >= 4, "TrIdx outside range");
+      m_BinEncoder.encodeBin(TrIdx >> 1, Ctx::MTSIdx(1));
+      m_BinEncoder.encodeBin(TrIdx & 1, Ctx::MTSIdx(2));
+#else
       ctxIdx = 1;
       for( int i = 0; i < 3; i++, ctxIdx++ )
       {
@@ -3621,6 +3631,7 @@ void CABACWriter::mts_idx( const CodingUnit& cu, CUCtx* cuCtx )
           break;
         }
       }
+#endif
     }
   }
   DTRACE( g_trace_ctx, D_SYNTAX, "mts_idx() etype=%d pos=(%d,%d) mtsIdx=%d\n", COMPONENT_Y, tu.cu->lx(), tu.cu->ly(), mtsIdx);
diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp
index 2142260474e0634d82ada0799cc17cc8bad0f82a..d6dc2b317dd81897ab461b923c9eca00e19e1048 100644
--- a/source/Lib/EncoderLib/EncCu.cpp
+++ b/source/Lib/EncoderLib/EncCu.cpp
@@ -1832,6 +1832,9 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS
   bool   trGrpCheck        [ 4 ] = { true, true, true, true };
   int    startMTSIdx       [ 4 ] = { 0, 1, 2, 3 };
   int    endMTSIdx         [ 4 ] = { 0, 1, 2, 3 };
+#if JVET_W0103_INTRA_MTS
+  endMTSIdx[0] = 3; //put all MTS candidates in "Grp 0"
+#endif
   double trGrpStopThreshold[ 3 ] = { 1.001, 1.001, 1.001 };
   int    bestMtsFlag             =   0;
   int    bestLfnstIdx            =   0;
@@ -1857,8 +1860,11 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS
   bool       skipOtherLfnst      = false;
   int        startLfnstIdx       = 0;
   int        endLfnstIdx         = sps.getUseLFNST() ? maxLfnstIdx : 0;
-
+#if JVET_W0103_INTRA_MTS
+  int grpNumMax = 1;
+#else
   int grpNumMax = sps.getUseLFNST() ? m_pcEncCfg->getMTSIntraMaxCand() : 1;
+#endif
   m_modeCtrl->setISPWasTested(false);
   m_pcIntraSearch->invalidateBestModeCost();
   if (sps.getUseColorTrans() && !CS::isDualITree(*tempCS))
@@ -2222,7 +2228,6 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS
           xCalDebCost( *tempCS, partitioner );
           tempCS->useDbCost = m_pcEncCfg->getUseEncDbOpt();
 
-
 #if WCG_EXT
           DTRACE_MODE_COST( *tempCS, m_pcRdCost->getLambda( true ) );
 #else
@@ -2237,6 +2242,7 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS
               bestCS->tmpColorSpaceIntraCost[colorSpaceIdx] = tempCS->cost;
             }
           }
+         
           if( !sps.getUseLFNST() )
           {
             xCheckBestMode( tempCS, bestCS, partitioner, encTestMode );
@@ -2249,6 +2255,7 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS
               bestSelFlag  [ trGrpIdx ] = true;
               bestMtsFlag               = mtsFlag;
               bestLfnstIdx              = lfnstIdx;
+
               if( bestCS->cus.size() == 1 )
               {
                 CodingUnit &cu = *bestCS->cus.front();
@@ -2261,7 +2268,7 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS
                 }
               }
             }
-
+            
             //we decide to skip the non-DCT-II transforms and LFNST according to the ISP results
             if ((endMtsFlag > 0 || endLfnstIdx > 0) && (cu.ispMode || (bestCS && bestCS->cus[0]->ispMode)) && tempCS->slice->isIntra() && m_pcEncCfg->getUseFastISP())
             {
@@ -2269,7 +2276,7 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS
               double bestIspCost       = m_modeCtrl->getIspCost();
               CHECKD( bestCostDct2NoIsp <= bestIspCost, "wrong cost!" );
               double threshold = 1.4;
-
+              
               double lfnstThreshold = 1.01 * threshold;
               if( m_modeCtrl->getStopNonDCT2Transforms() || bestCostDct2NoIsp > bestIspCost*lfnstThreshold )
               {
@@ -2294,6 +2301,16 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS
                 break;
               }
             }
+#if JVET_W0103_INTRA_MTS
+            if (lfnstIdx && m_modeCtrl->getMtsFirstPassNoIspCost() != MAX_DOUBLE && isLuma(partitioner.chType))
+            {
+              double threshold = 1.5;
+              if (m_modeCtrl->getMtsFirstPassNoIspCost() > threshold * bestCS->cost)
+              {
+                endLfnstIdx = lfnstIdx;
+              }
+            }
+#endif
           }
 
         } //for emtCuFlag
diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp
index aa634aa9dcd750952c764c140dd7b54ea5af8cd0..23c47b245bcc8b23b11a4dfb0283591275026a20 100644
--- a/source/Lib/EncoderLib/IntraSearch.cpp
+++ b/source/Lib/EncoderLib/IntraSearch.cpp
@@ -380,6 +380,45 @@ double IntraSearch::findInterCUCost( CodingUnit &cu )
   return COST_UNKNOWN;
 }
 #endif
+#if JVET_W0103_INTRA_MTS
+bool IntraSearch::testISPforCurrCU(const CodingUnit &cu)
+{
+  CodingStructure       &cs = *cu.cs;
+  auto &pu = *cu.firstPU;
+  const CompArea &area = pu.Y();
+  PelBuf piOrg = cs.getOrgBuf(area);
+
+  Pel* pOrg = piOrg.buf;
+  int uiWidth = area.width;
+  int uiHeight = area.height;
+  int iStride = piOrg.stride;
+  int Gsum = 0;
+  int nPix = (uiWidth - 2) * (uiHeight - 2);
+  for (int y = 1; y < (uiHeight - 1); y++)
+  {
+    for (int x = 1; x < (uiWidth - 1); x++)
+    {
+      const Pel *p = pOrg + y * iStride + x;
+
+      int iDy = p[-iStride - 1] + 2 * p[-1] + p[iStride - 1] - p[-iStride + 1] - 2 * p[+1] - p[iStride + 1];
+      int iDx = p[iStride - 1] + 2 * p[iStride] + p[iStride + 1] - p[-iStride - 1] - 2 * p[-iStride] - p[-iStride + 1];
+
+      if (iDy == 0 && iDx == 0)
+        continue;
+
+      int iAmp = (int)(abs(iDx) + abs(iDy));
+      Gsum += iAmp;
+    }
+  }
+  Gsum = (Gsum + (nPix >> 1)) / nPix;
+
+  bool testISP = true;
+  CHECK(m_numModesISPRDO != -1, "m_numModesISPRDO!=-1");
+
+  m_numModesISPRDO = (Gsum < 50 && uiWidth >= 16 && uiHeight >= 16) ? 1 : 2;
+  return testISP;
+}
+#endif
 bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, const double bestCostSoFar, bool mtsCheckRangeFlag, int mtsFirstCheckId, int mtsLastCheckId, bool moreProbMTSIdxFirst, CodingStructure* bestCS)
 {
   CodingStructure       &cs            = *cu.cs;
@@ -441,6 +480,18 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
     mtsUsageFlag = 0;
   }
 
+#if JVET_W0103_INTRA_MTS
+  if (!cu.mtsFlag && !cu.lfnstIdx)
+  {
+    m_globalBestCostStore = MAX_DOUBLE;
+    m_globalBestCostValid = false;
+    if (bestCS->getCU(partitioner.chType) != NULL && bestCS->getCU(partitioner.chType)->predMode != MODE_INTRA && bestCostSoFar != MAX_DOUBLE)
+    {
+      m_globalBestCostStore = bestCostSoFar;
+      m_globalBestCostValid = true;
+    }
+  }
+#endif
   const bool colorTransformIsEnabled = sps.getUseColorTrans() && !CS::isDualITree(cs);
   const bool isFirstColorSpace       = colorTransformIsEnabled && ((m_pcEncCfg->getRGBFormatFlag() && cu.colorTransform) || (!m_pcEncCfg->getRGBFormatFlag() && !cu.colorTransform));
   const bool isSecondColorSpace      = colorTransformIsEnabled && ((m_pcEncCfg->getRGBFormatFlag() && !cu.colorTransform) || (!m_pcEncCfg->getRGBFormatFlag() && cu.colorTransform));
@@ -449,7 +500,13 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
   bool ispCanBeUsed   = sps.getUseISP() && cu.mtsFlag == 0 && cu.lfnstIdx == 0 && CU::canUseISP(width, height, cu.cs->sps->getMaxTbSize());
   bool saveDataForISP = ispCanBeUsed && (!colorTransformIsEnabled || isFirstColorSpace);
   bool testISP        = ispCanBeUsed && (!colorTransformIsEnabled || !cu.colorTransform);
-
+#if JVET_W0103_INTRA_MTS 
+  if (testISP && m_pcEncCfg->getUseFastISP())
+  {
+    m_numModesISPRDO = -1;
+    testISP &= testISPforCurrCU(cu);
+  }
+#endif
   if ( saveDataForISP )
   {
     //reset the intra modes lists variables
@@ -1016,7 +1073,25 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
         if ((m_pcEncCfg->getUseFastLFNST() || !cu.slice->isIntra()) && m_bestModeCostValid[lfnstIdx])
         {
           numModesForFullRD = 0;
+#if JVET_W0103_INTRA_MTS
+          double thresholdSkipMode = 1.0 + ((cu.lfnstIdx > 0) ? 0.1 : 0.8) * (1.4 / sqrt((double)(width * height)));
+          std::vector<std::pair<ModeInfo, double>> ModeInfoWithDCT2Cost(m_savedNumRdModes[0]);
+          for (int i = 0; i < m_savedNumRdModes[0]; i++)
+          {
+            ModeInfoWithDCT2Cost[i] = { m_savedRdModeList[0][i], m_modeCostStore[0][i] };
+          }
+          std::stable_sort(ModeInfoWithDCT2Cost.begin(), ModeInfoWithDCT2Cost.end(), [](const std::pair<ModeInfo, double> & l, const std::pair<ModeInfo, double> & r) {return l.second < r.second; });
 
+          // **Reorder the modes** and Skip checking the modes with much larger R-D cost than the best mode
+          for (int i = 0; i < m_savedNumRdModes[0]; i++)
+          {
+            if (ModeInfoWithDCT2Cost[i].second <= thresholdSkipMode * ModeInfoWithDCT2Cost[0].second)
+            {
+              uiRdModeList.push_back(ModeInfoWithDCT2Cost[i].first);
+              numModesForFullRD++;
+            }
+          }
+#else
           double thresholdSkipMode = 1.0 + ((cu.lfnstIdx > 0) ? 0.1 : 1.0) * (1.4 / sqrt((double) (width * height)));
 
           // Skip checking the modes with much larger R-D cost than the best mode
@@ -1028,6 +1103,7 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
               numModesForFullRD++;
             }
           }
+#endif
         }
         else   // this is necessary because we skip the candidates list calculation, since it was already obtained for
                // the DCT-II. Now we load it
@@ -1060,7 +1136,6 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
       CHECK(numModesForFullRD != uiRdModeList.size(), "Inconsistent state!");
 #endif
       // after this point, don't use numModesForFullRD
-
       // PBINTRA fast
       if (m_pcEncCfg->getUsePbIntraFast() && !cs.slice->isIntra() && uiRdModeList.size() < numModesAvailable
           && !cs.slice->getDisableSATDForRD() && (mtsUsageFlag != 2 || lfnstIdx > 0))
@@ -1270,6 +1345,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
 #endif
 #if JVET_V0130_INTRA_TMP
       cu.tmpFlag = uiOrgMode.tmpFlag;
+#if JVET_W0103_INTRA_MTS
+      if (cu.tmpFlag && cu.mtsFlag) continue;
+#endif
 #endif
       cu.mipFlag                     = uiOrgMode.mipFlg;
       pu.mipTransposedFlag           = uiOrgMode.mipTrFlg;
@@ -1426,6 +1504,7 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
             sortRdModeListFirstColorSpace(uiOrgMode, csTemp->cost, cu.bdpcmMode, m_savedRdModeFirstColorSpace[m_savedRdModeIdx], m_savedRdCostFirstColorSpace[m_savedRdModeIdx], m_savedBDPCMModeFirstColorSpace[m_savedRdModeIdx], m_numSavedRdModeFirstColorSpace[m_savedRdModeIdx]);
           }
         }
+
         // check r-d cost
         if( csTemp->cost < csBest->cost )
         {
@@ -1444,6 +1523,13 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
             m_bestModeCostStore[ lfnstIdx ] = csBest->cost; //cs.cost;
             m_bestModeCostValid[ lfnstIdx ] = true;
           }
+#if JVET_W0103_INTRA_MTS
+          if (sps.getUseLFNST() && m_globalBestCostStore > csBest->cost)
+          {
+            m_globalBestCostStore = csBest->cost;
+            m_globalBestCostValid = true;
+          }
+#endif
           if( csBest->cost < bestCurrentCost )
           {
             bestCurrentCost = csBest->cost;
@@ -3411,7 +3497,98 @@ uint64_t IntraSearch::xGetIntraFracBitsQTChroma(TransformUnit& currTU, const Com
   uint64_t fracBits = m_CABACEstimator->getEstFracBits();
   return fracBits;
 }
+#if JVET_W0103_INTRA_MTS
+void IntraSearch::xSelectAMTForFullRD(TransformUnit &tu)
+{
+  if (!tu.blocks[COMPONENT_Y].valid())
+  {
+    return;
+  }
+
+  if (!tu.cu->mtsFlag)
+  {
+    return;
+  }
+
+  CodingStructure &cs = *tu.cs;
+  m_pcRdCost->setChromaFormat(cs.sps->getChromaFormatIdc());
+
+  const CompArea      &area = tu.blocks[COMPONENT_Y];
+
+  const ChannelType    chType = toChannelType(COMPONENT_Y);
+
+
+  PelBuf         piOrg = cs.getOrgBuf(area);
+  PelBuf         piPred = cs.getPredBuf(area);
+  PelBuf         piResi = cs.getResiBuf(area);
+
 
+  const PredictionUnit &pu = *cs.getPU(area.pos(), chType);
+
+  //===== init availability pattern =====
+
+  PelBuf sharedPredTS(m_pSharedPredTransformSkip[COMPONENT_Y], area);
+  initIntraPatternChType(*tu.cu, area);
+
+  //===== get prediction signal =====
+  if (PU::isMIP(pu, chType))
+  {
+    initIntraMip(pu, area);
+    predIntraMip(COMPONENT_Y, piPred, pu);
+  }
+  else
+  {
+    predIntraAng(COMPONENT_Y, piPred, pu);
+  }
+
+
+  // save prediction
+  sharedPredTS.copyFrom(piPred);
+
+  const Slice           &slice = *cs.slice;
+  //===== get residual signal =====
+  piResi.copyFrom(piOrg);
+  if (slice.getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
+  {
+    piResi.rspSignal(m_pcReshape->getFwdLUT());
+    piResi.subtract(piPred);
+  }
+  else
+  {
+    piResi.subtract(piPred);
+  }
+  // do transform and calculate Coeff AbsSum for all MTS candidates
+  std::vector<std::pair<int, uint64_t>> CoeffAbsSum(4);
+
+  for (int i = 0; i < 4; i++)
+  {
+    tu.mtsIdx[0] = i + MTS_DST7_DST7;
+    uint64_t AbsSum = m_pcTrQuant->transformNxN(tu);
+    CoeffAbsSum[i] = { i, AbsSum };
+  }
+  std::stable_sort(CoeffAbsSum.begin(), CoeffAbsSum.end(), [](const std::pair<int, uint64_t> & l, const std::pair<int, uint64_t> & r) {return l.second < r.second; });
+
+  for (int i = 0; i < 4; i++)
+  {
+    m_TestAMTForFullRD[i] = CoeffAbsSum[i].first;
+  }
+  m_numCandAMTForFullRD = 4;
+
+  if (m_pcEncCfg->getUseFastLFNST())
+  {
+    double skipThreshold = 1.0 + 1.0 / sqrt((double)(area.width*area.height));
+    skipThreshold = std::max(skipThreshold, 1.03);
+    for (int i = 1; i < m_numCandAMTForFullRD; i++)
+    {
+      if (CoeffAbsSum[i].second > skipThreshold * CoeffAbsSum[0].second)
+      {
+        m_numCandAMTForFullRD = i;
+        break;
+      }
+    }
+  }
+}
+#endif
 void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &compID, Distortion& ruiDist, const int &default0Save1Load2, uint32_t* numSig, std::vector<TrMode>* trModes, const bool loadTr)
 {
   if (!tu.blocks[compID].valid())
@@ -3532,7 +3709,11 @@ void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &comp
 
   const Slice           &slice = *cs.slice;
   bool flag = slice.getLmcsEnabledFlag() && (slice.isIntra() || (!slice.isIntra() && m_pcReshape->getCTUFlag()));
+#if JVET_W0103_INTRA_MTS
+  if (!tu.cu->mtsFlag && isLuma(compID))
+#else
   if (isLuma(compID))
+#endif
   {
     //===== get residual signal =====
     piResi.copyFrom( piOrg  );
@@ -4254,13 +4435,21 @@ bool IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par
     bool    cbfBestMode      = false;
     bool    cbfBestModeValid = false;
     bool    cbfDCT2  = true;
-
+#if JVET_W0103_INTRA_MTS
+    if (sps.getUseLFNST() && cu.mtsFlag) xSelectAMTForFullRD(tu);
+#endif
     double bestDCT2cost = MAX_DOUBLE;
     double threshold = m_pcEncCfg->getUseFastISP() && !cu.ispMode && ispIsCurrentWinner && nNumTransformCands > 1 ? 1 + 1.4 / sqrt( cu.lwidth() * cu.lheight() ) : 1;
     for( int modeId = firstCheckId; modeId <= ( sps.getUseLFNST() ? lastCheckId : ( nNumTransformCands - 1 ) ); modeId++ )
     {
       uint8_t transformIndex = modeId;
-
+#if JVET_W0103_INTRA_MTS
+      if (sps.getUseLFNST() && cu.mtsFlag)
+      {
+        if (modeId >= m_numCandAMTForFullRD) continue;
+        transformIndex = m_TestAMTForFullRD[modeId];
+      }
+#endif
       if( sps.getUseLFNST() )
       {
         if( ( transformIndex < lastCheckId ) || ( ( transformIndex == lastCheckId ) && !checkTransformSkip ) ) //we avoid this if the mode is transformSkip
@@ -4370,6 +4559,12 @@ bool IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par
             }
           }
         }
+#if JVET_W0103_INTRA_MTS
+        else if (cu.mtsFlag)
+        {
+          xIntraCodingTUBlock(tu, COMPONENT_Y, singleDistTmpLuma, 2, &numSig, nullptr, true);
+        }
+#endif
         else
         {
           xIntraCodingTUBlock( tu, COMPONENT_Y, singleDistTmpLuma, default0Save1Load2, &numSig );
@@ -4443,7 +4638,21 @@ bool IntraSearch::xRecurIntraCodingLumaQT( CodingStructure &cs, Partitioner &par
       {
         bestDCT2cost = singleCostTmp;
       }
-
+#if JVET_W0103_INTRA_MTS
+      if (sps.getUseLFNST() && cu.mtsFlag)
+      {
+        if (singleCostTmp != MAX_DOUBLE)
+        {
+          const CompArea&       area = tu.blocks[COMPONENT_Y];
+          double skipThreshold = 1.0 + 1.0 / sqrt((double)(area.width*area.height));
+          skipThreshold = std::max(skipThreshold, !m_pcEncCfg->getUseFastLFNST()? 1.06: 1.03);
+          if (singleCostTmp > skipThreshold * m_globalBestCostStore)
+          {
+            m_numCandAMTForFullRD = modeId + 1;
+          }
+        }
+      }
+#endif
       if (singleCostTmp < dSingleCost)
       {
         dSingleCost       = singleCostTmp;
@@ -6512,13 +6721,18 @@ bool IntraSearch::xSortISPCandList(double bestCostSoFar, double bestNonISPCost,
       ModeInfo(refMode.mipFlg, refMode.mipTrFlg, refMode.mRefId, refMode.ispMod, bestISPModeInRelCU));
     modeIsInList[bestISPModeInRelCU] = true;
   }
-
   // Planar
+#if JVET_W0103_INTRA_MTS
+  // push planar later when FastISP is on.
+  if (!m_pcEncCfg->getUseFastISP() && !modeIsInList[mode1])
+#else
   if (!modeIsInList[mode1])
+#endif
   {
     destListPtr->push_back(ModeInfo(refMode.mipFlg, refMode.mipTrFlg, refMode.mRefId, refMode.ispMod, mode1));
     modeIsInList[mode1] = true;
   }
+
   // Best angle in regular intra
   if (mode2 != -1 && !modeIsInList[mode2])
   {
@@ -6543,7 +6757,14 @@ bool IntraSearch::xSortISPCandList(double bestCostSoFar, double bestNonISPCost,
       }
     }
   }
-
+#if JVET_W0103_INTRA_MTS
+  // Planar (after angular modes when FastISP is on)
+  if (!modeIsInList[mode1])
+  {
+    destListPtr->push_back(ModeInfo(refMode.mipFlg, refMode.mipTrFlg, refMode.mRefId, refMode.ispMod, mode1));
+    modeIsInList[mode1] = true;
+  }
+#endif
   // DC is added after the angles from regular intra
   if (dcModeIndex != -1 && !modeIsInList[DC_IDX])
   {
@@ -6555,6 +6776,12 @@ bool IntraSearch::xSortISPCandList(double bestCostSoFar, double bestNonISPCost,
   for (int j = 0; j < NUM_LFNST_NUM_PER_SET; j++)
   {
     m_ispTestedModes[j].numOrigModesToTest = (int)destListPtr->size();
+#if JVET_W0103_INTRA_MTS
+    if (m_pcEncCfg->getUseFastISP() && m_numModesISPRDO != -1 && destListPtr->size() > m_numModesISPRDO)
+    {
+      m_ispTestedModes[j].numOrigModesToTest = m_numModesISPRDO;
+    }
+#endif
   }
   const int addedModesFromHadList = 3;
   int       newModesAdded = 0;
diff --git a/source/Lib/EncoderLib/IntraSearch.h b/source/Lib/EncoderLib/IntraSearch.h
index 31b813dabccf8007b977a5b4d48c8ed6f2e78a89..6afe99d63fa7dc88ec2be3722b77c29fa091e9ca 100644
--- a/source/Lib/EncoderLib/IntraSearch.h
+++ b/source/Lib/EncoderLib/IntraSearch.h
@@ -198,7 +198,10 @@ class IntraSearch : public IntraPrediction
 private:
   EncModeCtrl    *m_modeCtrl;
   Pel*            m_pSharedPredTransformSkip[MAX_NUM_TBLOCKS];
-
+#if JVET_W0103_INTRA_MTS
+  int            m_TestAMTForFullRD[4];
+  int            m_numCandAMTForFullRD;
+#endif
   XUCache         m_unitCache;
 
   CodingStructure ****m_pSplitCS;
@@ -371,7 +374,11 @@ private:
   double     m_modeCostStore[ NUM_LFNST_NUM_PER_SET ][ NUM_LUMA_MODE ];                   // RD cost of each mode for each PU using DCT2
   ModeInfo   m_savedRdModeList[ NUM_LFNST_NUM_PER_SET ][ NUM_LUMA_MODE ];
   int32_t    m_savedNumRdModes[ NUM_LFNST_NUM_PER_SET ];
-
+#if JVET_W0103_INTRA_MTS
+  double     m_globalBestCostStore;
+  bool       m_globalBestCostValid;
+  int        m_numModesISPRDO; //full modes for ISP testing.
+#endif
   ModeInfo                                           m_savedRdModeFirstColorSpace[4 * NUM_LFNST_NUM_PER_SET * 2][FAST_UDI_MAX_RDMODE_NUM];
   char                                               m_savedBDPCMModeFirstColorSpace[4 * NUM_LFNST_NUM_PER_SET * 2][FAST_UDI_MAX_RDMODE_NUM];
   double                                             m_savedRdCostFirstColorSpace[4 * NUM_LFNST_NUM_PER_SET * 2][FAST_UDI_MAX_RDMODE_NUM];
@@ -494,7 +501,10 @@ protected:
 
   void xIntraCodingTUBlock        (TransformUnit &tu, const ComponentID &compID, Distortion& ruiDist, const int &default0Save1Load2 = 0, uint32_t* numSig = nullptr, std::vector<TrMode>* trModes=nullptr, const bool loadTr=false );
   void xIntraCodingACTTUBlock(TransformUnit &tu, const ComponentID &compID, Distortion& ruiDist, std::vector<TrMode>* trModes = nullptr, const bool loadTr = false);
-
+#if JVET_W0103_INTRA_MTS
+  void xSelectAMTForFullRD(TransformUnit &tu);
+  bool testISPforCurrCU(const CodingUnit &cu);
+#endif
   ChromaCbfs xRecurIntraChromaCodingQT( CodingStructure &cs, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE,                          const PartSplit ispType = TU_NO_ISP );
   bool       xRecurIntraCodingLumaQT  ( CodingStructure &cs, Partitioner& pm, const double bestCostSoFar = MAX_DOUBLE, const int subTuIdx = -1, const PartSplit ispType = TU_NO_ISP, const bool ispIsCurrentWinner = false, bool mtsCheckRangeFlag = false, int mtsFirstCheckId = 0, int mtsLastCheckId = 0, bool moreProbMTSIdxFirst = false );
   bool       xRecurIntraCodingACTQT(CodingStructure &cs, Partitioner& pm, bool mtsCheckRangeFlag = false, int mtsFirstCheckId = 0, int mtsLastCheckId = 0, bool moreProbMTSIdxFirst = false);