diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp
index 59e832da9243a6d2aaaf72adb89f13d7167b58e9..66328ec040558146ec129bca091b440e70ba6a84 100644
--- a/source/App/EncoderApp/EncApp.cpp
+++ b/source/App/EncoderApp/EncApp.cpp
@@ -420,6 +420,10 @@ void EncApp::xInitLibCfg()
     m_cEncLib.setNoTimdConstraintFlag(m_noTimdConstraintFlag);
     CHECK(m_noTimdConstraintFlag && m_timd, "TIMD shall be deactivated when m_noTimdConstraintFlag is equal to 1");
 #endif
+#if JVET_AB0155_SGPM
+    m_cEncLib.setNoSgpmConstraintFlag(m_noSgpmConstraintFlag);
+    CHECK(m_noSgpmConstraintFlag && m_sgpm, "SGPM shall be deactivated when m_noSgpmConstraintFlag is equal to 1");
+#endif
 #if ENABLE_OBMC
     m_cEncLib.setNoObmcConstraintFlag(m_noObmcConstraintFlag);
     CHECK(m_noObmcConstraintFlag && m_OBMC, "OBMC shall be deactivated when m_noObmcConstraintFlag is equal to 1");
@@ -557,6 +561,9 @@ void EncApp::xInitLibCfg()
 #if JVET_W0123_TIMD_FUSION
     m_cEncLib.setNoTimdConstraintFlag(false);
 #endif
+#if JVET_AB0155_SGPM
+    m_cEncLib.setNoSgpmConstraintFlag(false);
+#endif
 #if ENABLE_OBMC
     m_cEncLib.setNoObmcConstraintFlag(false);
 #endif
@@ -847,6 +854,9 @@ void EncApp::xInitLibCfg()
 #if JVET_W0123_TIMD_FUSION
   m_cEncLib.setUseTimd                                           ( m_timd );
 #endif
+#if JVET_AB0155_SGPM
+  m_cEncLib.setUseSgpm                                           ( m_sgpm );
+#endif
 #if ENABLE_OBMC
   m_cEncLib.setUseObmc                                           ( m_OBMC );
 #endif
diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp
index ae45ca9689c7ecc05ad842e253799bfaa8fa2088..3da2ca3515bfcb168f10cb4b7a222a553dfa3703 100644
--- a/source/App/EncoderApp/EncAppCfg.cpp
+++ b/source/App/EncoderApp/EncAppCfg.cpp
@@ -920,6 +920,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
 #if JVET_W0123_TIMD_FUSION
   ("NoTimdConstraintFlag",                             m_noTimdConstraintFlag,                          false, "Indicate that TIMD is deactivated")
 #endif
+#if JVET_AB0155_SGPM
+  ("NoSgpmConstraintFlag",                             m_noSgpmConstraintFlag,                          false, "Indicate that SGPM is deactivated")
+#endif
 #if ENABLE_OBMC
   ("NoObmcConstraintFlag",                             m_noObmcConstraintFlag,                            false, "Indicate that OBMC is deactivated")
 #endif
@@ -1096,6 +1099,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
 #if JVET_W0123_TIMD_FUSION
   ( "TIMD",                                           m_timd,                                            true,  "Enable template based intra mode derivation\n" )
 #endif
+#if JVET_AB0155_SGPM
+  ( "SGPM",                                           m_sgpm,                                            true,  "Enable spatial geometric partitioning mode\n" )
+#endif
 #if ENABLE_OBMC
   ("OBMC",                                            m_OBMC,                                           true, "Overlapping Block Motion Compensation")
 #endif
@@ -5209,6 +5215,9 @@ void EncAppCfg::xPrintParameter()
 #if JVET_W0123_TIMD_FUSION
   msg( VERBOSE, "TIMD:%d ", m_timd );
 #endif
+#if JVET_AB0155_SGPM
+  msg(VERBOSE, "SGPM:%d ", m_sgpm);
+#endif
 #if JVET_V0130_INTRA_TMP
   msg( VERBOSE, "IntraTMP:%d ", m_intraTMP );
   msg( VERBOSE, "IntraTmpMaxSize:%d ", m_intraTmpMaxSize );
diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h
index 157621de4960bb70c66e08d2ce5981fd0d512356..c679eebc751e3e50bc7a7e237467b888173b74a8 100644
--- a/source/App/EncoderApp/EncAppCfg.h
+++ b/source/App/EncoderApp/EncAppCfg.h
@@ -208,6 +208,9 @@ protected:
 #if JVET_W0123_TIMD_FUSION
   bool      m_noTimdConstraintFlag;
 #endif
+#if JVET_AB0155_SGPM
+  bool      m_noSgpmConstraintFlag;
+#endif
 #if ENABLE_OBMC
   bool      m_noObmcConstraintFlag;
 #endif
@@ -457,6 +460,9 @@ protected:
 #if JVET_W0123_TIMD_FUSION
   bool      m_timd;
 #endif
+#if JVET_AB0155_SGPM
+  bool      m_sgpm;
+#endif
 #if ENABLE_OBMC
   bool      m_OBMC;
 #endif
diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h
index e659e43397c33bff3d1cae7bc5fa3ad3aa6c7508..8e00e1fbc7a0b32cf1937f5d40a583d8c9c24a61 100644
--- a/source/Lib/CommonLib/CommonDef.h
+++ b/source/Lib/CommonLib/CommonDef.h
@@ -473,6 +473,9 @@ static const int LM_CHROMA_IDX = NUM_LUMA_MODE; ///< chroma mode index for deriv
 #if ENABLE_DIMD
 static const int DIMD_IDX =                                        99; ///< index for intra DIMD mode
 #endif
+#if JVET_AB0155_SGPM
+static const int SGPM_IDX = 200;   ///< index for SGPM mode
+#endif
 #if JVET_W0123_TIMD_FUSION
 static const int TIMD_IDX =                                       199; ///< index for intra TIMD mode
 static const int DIMD_MAX_TEMP_SIZE =                               4;
@@ -548,7 +551,13 @@ static const int NUM_MOST_PROBABLE_MODES = 6;
 static const int LM_SYMBOL_NUM = (1 + NUM_LMC_MODE);
 
 static const int MAX_NUM_MIP_MODE =                                32; ///< maximum number of MIP pred. modes
+#if JVET_AB0155_SGPM
+static const int SGPM_NUM = 16;
+static const int FAST_UDI_MAX_RDMODE_NUM = (NUM_LUMA_MODE + MAX_NUM_MIP_MODE + SGPM_NUM);   ///< maximum number of RD comparison in fast-UDI estimation loop
+#else
+
 static const int FAST_UDI_MAX_RDMODE_NUM = (NUM_LUMA_MODE + MAX_NUM_MIP_MODE); ///< maximum number of RD comparison in fast-UDI estimation loop
+#endif
 
 static const int MAX_LFNST_COEF_NUM =                              16;
 
@@ -1026,13 +1035,28 @@ static const int GEO_NUM_ANGLES =                                  32;
 static const int GEO_NUM_DISTANCES =                                4;
 static const int GEO_NUM_PRESTORED_MASK =                           6;
 static const int GEO_WEIGHT_MASK_SIZE = 3 * (GEO_MAX_CU_SIZE >> 3) * 2 + GEO_MAX_CU_SIZE;
-#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
+
+#if JVET_AB0155_SGPM
+static const int GEO_MIN_CU_LOG2_EX         = 2;
+static const int GEO_MAX_CU_LOG2_EX         = 6;
+static const int GEO_MIN_CU_SIZE_EX         = 1 << GEO_MIN_CU_LOG2_EX;
+static const int GEO_MAX_CU_SIZE_EX         = 1 << GEO_MAX_CU_LOG2_EX;
+static const int GEO_NUM_CU_SIZE_EX         = (GEO_MAX_CU_LOG2_EX - GEO_MIN_CU_LOG2_EX) + 1;
+
+static const int SGPM_MIN_PIX = 32;
+static const int SGPM_NUM_MPM = 3;
+static const int SGPM_TEMPLATE_SIZE = 1;
+#endif
+
+#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_AB0155_SGPM
 #if !JVET_W0090_ARMC_TM
 static const int AML_MERGE_TEMPLATE_SIZE =                          1;
 #endif
 static const int GEO_MODE_SEL_TM_SIZE =       AML_MERGE_TEMPLATE_SIZE;
 static const int GEO_TM_ADDED_WEIGHT_MASK_SIZE = GEO_MODE_SEL_TM_SIZE;
 static const int GEO_WEIGHT_MASK_SIZE_EXT = GEO_WEIGHT_MASK_SIZE + GEO_TM_ADDED_WEIGHT_MASK_SIZE * 2;
+#endif
+#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
 static const int GEO_SPLIT_MODE_RICE_CODE_DIVISOR =                 4;
 static const int GEO_MODE_COMPRESSION_RATIO =                       2;
 static const int GEO_NUM_SIG_PARTMODE = GEO_NUM_PARTITION_MODE / GEO_MODE_COMPRESSION_RATIO; ///< max number of splitting modes for signaling
@@ -1060,6 +1084,11 @@ static const int GEO_MAX_TRY_WEIGHTED_SATD = 8;
 #if JVET_AA0058_GPM_ADP_BLD
 static const int GEO_NUM_BLD = 5;
 #endif
+#if JVET_AB0155_SGPM
+static const int TOTAL_GEO_NUM_BLD = 6; // GPM 0~4, SGPM 1~5
+#define GET_SGPM_BLD_IDX(a, b)                                                                                           \
+  (std::min(a, b) <= 4 ? 1 : std::min(a, b) <= 8 ? 2 : std::min(a, b) <= 16 ? 3 : std::min(a, b) <= 32 ? 4 : 5)
+#endif
 
 #if ENABLE_OBMC
 static const unsigned int defaultWeight[2][4] = { {27, 16, 6, 0}, {27, 0, 0, 0} };
diff --git a/source/Lib/CommonLib/ContextModelling.cpp b/source/Lib/CommonLib/ContextModelling.cpp
index bb791a5c9f500600cfa57be82e8ca009706c8ece..132a334d42cc01769e41d7a5f43c32302e6f49a9 100644
--- a/source/Lib/CommonLib/ContextModelling.cpp
+++ b/source/Lib/CommonLib/ContextModelling.cpp
@@ -402,6 +402,19 @@ unsigned DeriveCtx::CtxTimdFlag(const CodingUnit& cu)
 }
 #endif
 
+#if JVET_AB0155_SGPM
+unsigned DeriveCtx::CtxSgpmFlag(const CodingUnit &cu)
+{
+  const CodingStructure *cs     = cu.cs;
+  unsigned               ctxId  = 0;
+  const CodingUnit *     cuLeft = cs->getCURestricted(cu.lumaPos().offset(-1, 0), cu, CH_L);
+  ctxId                         = (cuLeft && cuLeft->sgpm) ? 1 : 0;
+  const CodingUnit *cuAbove     = cs->getCURestricted(cu.lumaPos().offset(0, -1), cu, CH_L);
+  ctxId += (cuAbove && cuAbove->sgpm) ? 1 : 0;
+  return ctxId;
+}
+#endif
+
 unsigned DeriveCtx::CtxPredModeFlag( const CodingUnit& cu )
 {
   const CodingUnit *cuLeft  = cu.cs->getCURestricted(cu.lumaPos().offset(-1, 0), cu, CH_L);
diff --git a/source/Lib/CommonLib/ContextModelling.h b/source/Lib/CommonLib/ContextModelling.h
index 81dd14e66c861f16e4958a857f931197404d899b..db3638d8952c44c4f1ee8cfd72bc9fd81dc72df3 100644
--- a/source/Lib/CommonLib/ContextModelling.h
+++ b/source/Lib/CommonLib/ContextModelling.h
@@ -717,6 +717,9 @@ unsigned CtxDIMDFlag(const CodingUnit& cu);
 #if JVET_W0123_TIMD_FUSION
 unsigned CtxTimdFlag( const CodingUnit& cu );
 #endif
+#if JVET_AB0155_SGPM
+unsigned CtxSgpmFlag(const CodingUnit &cu);
+#endif
 }
 
 #endif // __CONTEXTMODELLING__
diff --git a/source/Lib/CommonLib/Contexts.cpp b/source/Lib/CommonLib/Contexts.cpp
index 78339e8f0548bc56d7218c4afa4a471083f3d5f9..27a6b536de8d8c239f61eb974e30e30edea53543 100644
--- a/source/Lib/CommonLib/Contexts.cpp
+++ b/source/Lib/CommonLib/Contexts.cpp
@@ -2682,6 +2682,22 @@ const CtxSet ContextSetCfg::TimdFlag = ContextSetCfg::addCtxSet
   });
 #endif
 
+#if JVET_AB0155_SGPM
+const CtxSet ContextSetCfg::SgpmFlag = ContextSetCfg::addCtxSet
+({
+  {  41,  34,  42, },
+  {  34,  34,  34, },
+  {  42,  50,  58, },
+  {   6,   6,   6, },
+  {   7,   7,   5, },
+  {   6,   6,   2, },
+  {  11,  11,  18, },
+  {   4,   4,   4, },
+  {  11,  11,  11, },
+  { 124, 126, 126, },
+  { 126, 124, 117, },
+});
+#endif
 #if ENABLE_OBMC 
 const CtxSet ContextSetCfg::ObmcFlag = ContextSetCfg::addCtxSet
 ({
diff --git a/source/Lib/CommonLib/Contexts.h b/source/Lib/CommonLib/Contexts.h
index 19352e5410fe0c69b4bccf976d185514aa859472..208c874fd639f755498e6523003a9bb13127894c 100644
--- a/source/Lib/CommonLib/Contexts.h
+++ b/source/Lib/CommonLib/Contexts.h
@@ -487,6 +487,9 @@ public:
 #if JVET_W0123_TIMD_FUSION
   static const CtxSet   TimdFlag;
 #endif
+#if JVET_AB0155_SGPM
+  static const CtxSet   SgpmFlag;
+#endif
 #if ENABLE_OBMC
   static const CtxSet   ObmcFlag;
 #endif 
diff --git a/source/Lib/CommonLib/InterpolationFilter.cpp b/source/Lib/CommonLib/InterpolationFilter.cpp
index cb9a2cd501953ce89cf0ba652cc396fbc2c7b701..7adc95bb18781d49de392133d069eac01bad2aff 100644
--- a/source/Lib/CommonLib/InterpolationFilter.cpp
+++ b/source/Lib/CommonLib/InterpolationFilter.cpp
@@ -1283,6 +1283,11 @@ InterpolationFilter::InterpolationFilter()
 #if JVET_Y0065_GPM_INTRA
   m_weightedGeoBlkRounded = xWeightedGeoBlkRounded;
 #endif
+#if JVET_AB0155_SGPM
+  m_weightedSgpm = xWeightedSgpm;
+  m_sadTM     = xSadTM;
+  m_sgpmSadTM = xSgpmSadTM;
+#endif
 #endif
 #if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
   m_weightedGeoTplA = xWeightedGeoTpl<true>;
@@ -2145,6 +2150,209 @@ void InterpolationFilter::filterVer(const ComponentID compID, Pel const *src, in
   }
 }
 
+#if JVET_AB0155_SGPM
+int InterpolationFilter::xSadTM(const PredictionUnit &pu, const int width, const int height, const int templateWidth,
+                                const int templateHeight, const ComponentID compIdx, PelBuf &predBuf, PelBuf &recBuf,
+                                PelBuf &adBuf)
+{
+  int     sad         = 0;
+  int32_t iPredStride = predBuf.stride;
+  int32_t iRecStride  = recBuf.stride;
+  int32_t iAdStride   = adBuf.stride;
+
+  // top template
+  Pel *piPred = predBuf.buf + templateWidth;
+  // start point of predBuf is (-templateWidth, -templateHeight) of current block
+  Pel *piAd  = adBuf.buf + templateWidth;
+  Pel *piRec = recBuf.buf - templateHeight * iRecStride;   // start point of recBuf is (0,0) of current block
+
+  for (int y = 0; y < templateHeight; y++)
+  {
+    for (int x = 0; x < width; x++)
+    {
+      *piAd = abs(*piRec - *piPred);
+      sad += *piAd;
+      piRec++;
+      piPred++;
+      piAd++;
+    }
+    piPred += (iPredStride - width);
+    piAd += (iAdStride - width);
+    piRec += (iRecStride - width);
+  }
+  // left template
+  piPred = predBuf.buf + templateHeight * iPredStride;
+  // start point of predBuf is (-templateWidth, -templateHeight) of current block
+  piAd  = adBuf.buf + templateHeight * iAdStride;
+  piRec = recBuf.buf - templateWidth;   // start point of recBuf is (0,0) of current block
+
+  for (int y = 0; y < height; y++)
+  {
+    for (int x = 0; x < templateWidth; x++)
+    {
+      *piAd = abs(*piRec - *piPred);
+      sad += *piAd;
+      piRec++;
+      piPred++;
+      piAd++;
+    }
+    piPred += (iPredStride - templateWidth);
+    piAd += (iAdStride - templateWidth);
+    piRec += (iRecStride - templateWidth);
+    
+  }
+  return sad;
+}
+
+int InterpolationFilter::xSgpmSadTM(const PredictionUnit &pu, const int width, const int height,
+                                         const int templateWidth, const int templateHeight, const ComponentID compIdx,
+                                        const uint8_t splitDir, PelBuf &adBuf)
+{
+  int16_t angle = g_GeoParams[splitDir][0];
+  int16_t wIdx  = floorLog2(pu.lwidth()) - GEO_MIN_CU_LOG2_EX;
+  int16_t hIdx  = floorLog2(pu.lheight()) - GEO_MIN_CU_LOG2_EX;
+  int16_t stepX = 1;
+  int      maskStride  = 0;
+  int16_t *weight      = nullptr;
+
+  if (g_angle2mirror[angle] == 2)
+  {
+    stepX      = 1;
+    maskStride = -GEO_WEIGHT_MASK_SIZE_EXT;
+    weight     = &g_globalGeoWeightsTpl[g_angle2mask[angle]]
+                                   [(GEO_WEIGHT_MASK_SIZE_EXT - 1 - g_weightOffsetEx[splitDir][hIdx][wIdx][1]
+                                     - GEO_TM_ADDED_WEIGHT_MASK_SIZE)
+                                      * GEO_WEIGHT_MASK_SIZE_EXT
+                                    + g_weightOffsetEx[splitDir][hIdx][wIdx][0] + GEO_TM_ADDED_WEIGHT_MASK_SIZE];
+  }
+  else if (g_angle2mirror[angle] == 1)
+  {
+    stepX      = -1;
+    maskStride = GEO_WEIGHT_MASK_SIZE_EXT;
+    weight     = &g_globalGeoWeightsTpl[g_angle2mask[angle]]
+                                   [(g_weightOffsetEx[splitDir][hIdx][wIdx][1] + GEO_TM_ADDED_WEIGHT_MASK_SIZE)
+                                      * GEO_WEIGHT_MASK_SIZE_EXT
+                                    + (GEO_WEIGHT_MASK_SIZE_EXT - 1 - g_weightOffsetEx[splitDir][hIdx][wIdx][0]
+                                       - GEO_TM_ADDED_WEIGHT_MASK_SIZE)];
+  }
+  else
+  {
+    stepX      = 1;
+    maskStride = GEO_WEIGHT_MASK_SIZE_EXT;
+    weight     = &g_globalGeoWeightsTpl[g_angle2mask[angle]]
+                                   [(g_weightOffsetEx[splitDir][hIdx][wIdx][1] + GEO_TM_ADDED_WEIGHT_MASK_SIZE)
+                                      * GEO_WEIGHT_MASK_SIZE_EXT
+                                    + g_weightOffsetEx[splitDir][hIdx][wIdx][0] + GEO_TM_ADDED_WEIGHT_MASK_SIZE];
+  }
+  
+  int32_t iAdStride  = adBuf.stride;
+
+  // top template
+  Pel *piAd = adBuf.buf + templateWidth;   // start point of adBuf is (-templateWidth, -templateHeight) of current block
+  Pel *weightTmp = weight - templateHeight * maskStride;
+
+  int sum = 0;
+
+
+  for (int y = 0; y < templateHeight; y++)
+  {
+    for (int x = 0; x < width; x++)
+    {
+      sum += *piAd * (*weightTmp);
+      piAd++;
+      weightTmp += stepX;
+    }
+    piAd += (iAdStride - width);
+    weightTmp += (maskStride - width * stepX);
+  }
+
+  // left template
+  piAd = adBuf.buf + templateHeight * iAdStride;
+  // start point of predBuf is (-templateWidth, -templateHeight) of current block
+  weightTmp = weight - templateWidth * stepX;
+
+  for (int y = 0; y < height; y++)
+  {
+    for (int x = 0; x < templateWidth; x++)
+    {
+      sum += *piAd * (*weightTmp);
+      piAd++;
+      weightTmp += stepX;
+    }
+    piAd += (iAdStride - templateWidth);
+    weightTmp += (maskStride - templateWidth * stepX);
+  }
+  return sum;
+}
+
+void InterpolationFilter::xWeightedSgpm(const PredictionUnit &pu, const uint32_t width, const uint32_t height,
+                                        const ComponentID compIdx, const uint8_t splitDir, PelBuf &predDst,
+                                        PelBuf &predSrc0, PelBuf &predSrc1)
+{
+  Pel *   dst        = predDst.buf;
+  Pel *   src0       = predSrc0.buf;
+  Pel *   src1       = predSrc1.buf;
+  int32_t strideDst  = predDst.stride - width;
+  int32_t strideSrc0 = predSrc0.stride - width;
+  int32_t strideSrc1 = predSrc1.stride - width;
+
+  const ClpRng  clipRng        = pu.cu->slice->clpRngs().comp[compIdx];
+
+  const int32_t shiftWeighted  = 5;
+  const int32_t offsetWeighted = 16;
+  const uint32_t scaleX         = getComponentScaleX(compIdx, pu.chromaFormat);
+  const uint32_t scaleY         = getComponentScaleY(compIdx, pu.chromaFormat);
+
+  int16_t  angle  = g_GeoParams[splitDir][0];
+  int16_t  wIdx   = floorLog2(pu.lwidth()) - GEO_MIN_CU_LOG2_EX;
+  int16_t  hIdx   = floorLog2(pu.lheight()) - GEO_MIN_CU_LOG2_EX;
+  int16_t  stepX  = 1 << scaleX;
+  int16_t  stepY  = 0;
+  int16_t *weight = nullptr;
+
+  if (g_angle2mirror[angle] == 2)
+  {
+    stepY  = -(int) ((GEO_WEIGHT_MASK_SIZE << scaleY) + pu.lwidth());
+    weight = &g_globalGeoWeights
+               [GET_SGPM_BLD_IDX(pu.lwidth(), pu.lheight())]
+               [g_angle2mask[angle]]
+               [(GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffsetEx[splitDir][hIdx][wIdx][1])
+                  * GEO_WEIGHT_MASK_SIZE
+                + g_weightOffsetEx[splitDir][hIdx][wIdx][0]];
+  }
+  else if (g_angle2mirror[angle] == 1)
+  {
+    stepX  = -1 << scaleX;
+    stepY  = (GEO_WEIGHT_MASK_SIZE << scaleY) + pu.lwidth();
+    weight = &g_globalGeoWeights
+               [GET_SGPM_BLD_IDX(pu.lwidth(), pu.lheight())]
+               [g_angle2mask[angle]]
+               [g_weightOffsetEx[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE
+                + (GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffsetEx[splitDir][hIdx][wIdx][0])];
+  }
+  else
+  {
+    stepY  = (GEO_WEIGHT_MASK_SIZE << scaleY) - pu.lwidth();
+    weight = &g_globalGeoWeights
+               [GET_SGPM_BLD_IDX(pu.lwidth(), pu.lheight())]
+               [g_angle2mask[angle]]
+               [g_weightOffsetEx[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE
+                + g_weightOffsetEx[splitDir][hIdx][wIdx][0]];
+  }
+  for (int y = 0; y < height; y++)
+  {
+    for (int x = 0; x < width; x++)
+    {
+      *dst++ = ClipPel(rightShift((*weight * (*src0++) + ((32 - *weight) * (*src1++)) + offsetWeighted), shiftWeighted), clipRng);
+      weight += stepX;
+    }
+    dst += strideDst;
+    src0 += strideSrc0;
+    src1 += strideSrc1;
+    weight += stepY;
+  }
+}
+#endif
 #if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
 template <bool trueTFalseL>
 void InterpolationFilter::xWeightedGeoTpl(const PredictionUnit &pu, const uint8_t splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1)
diff --git a/source/Lib/CommonLib/InterpolationFilter.h b/source/Lib/CommonLib/InterpolationFilter.h
index ed2c33291aeb35a4892f741989627b6a68c98c63..57c3d879b5bf4360c61d097a34e7677fc7972c23 100644
--- a/source/Lib/CommonLib/InterpolationFilter.h
+++ b/source/Lib/CommonLib/InterpolationFilter.h
@@ -212,6 +212,25 @@ public:
 #else
   void( *m_weightedGeoBlkRounded )(const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const uint8_t splitDir, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1);
 #endif
+#endif
+#if JVET_AB0155_SGPM
+  void (*m_weightedSgpm)(const PredictionUnit &pu, const uint32_t width, const uint32_t height,
+                         const ComponentID compIdx, const uint8_t splitDir, PelBuf &predDst, PelBuf &predSrc0,
+                         PelBuf &predSrc1);
+  static void xWeightedSgpm(const PredictionUnit &pu, const uint32_t width, const uint32_t height,
+                         const ComponentID compIdx, const uint8_t splitDir, PelBuf &predDst, PelBuf &predSrc0,
+                         PelBuf &predSrc1);
+
+  int (*m_sadTM)(const PredictionUnit &pu, const int width, const int height, const int templateWidth,
+                 const int templateHeight, const ComponentID compIdx, PelBuf &predBuf, PelBuf &recBuf, PelBuf &adBuf);
+  static int xSadTM(const PredictionUnit &pu, const int width, const int height, const int templateWidth,
+                    const int templateHeight, const ComponentID compIdx, PelBuf &predBuf, PelBuf &recBuf,
+                    PelBuf &adBuf);
+  int (*m_sgpmSadTM)(const PredictionUnit &pu, const int width, const int height, const int templateWidth,
+                     const int templateHeight, const ComponentID compIdx, const uint8_t splitDir, PelBuf &adBuf);
+  static int xSgpmSadTM(const PredictionUnit &pu, const int width, const int height, const int templateWidth,
+                            const int templateHeight, const ComponentID compIdx, const uint8_t splitDir,
+                              PelBuf &adBuf);
 #endif
 
   void initInterpolationFilter( bool enable );
diff --git a/source/Lib/CommonLib/IntraPrediction.cpp b/source/Lib/CommonLib/IntraPrediction.cpp
index 9d9afd03cb7b7cb474b34995f51ee7d2b69374b0..18762a82377b4916760abd66ce7f011b9656ede7 100644
--- a/source/Lib/CommonLib/IntraPrediction.cpp
+++ b/source/Lib/CommonLib/IntraPrediction.cpp
@@ -148,6 +148,14 @@ void IntraPrediction::destroy()
 #if JVET_W0123_TIMD_FUSION
   delete m_timdSatdCost;
 #endif
+#if JVET_AB0155_SGPM
+  for (auto &buffer: m_sgpmBuffer)
+  {
+    buffer.destroy();
+  }
+  m_sgpmBuffer.clear();
+#endif
+
   delete[] m_piTemp;
   m_piTemp = nullptr;
   delete[] m_pMdlmTemp;
@@ -202,6 +210,10 @@ void IntraPrediction::destroy()
 
 void IntraPrediction::init(ChromaFormat chromaFormatIDC, const unsigned bitDepthY)
 {
+#if JVET_AB0155_SGPM
+  m_if.initInterpolationFilter(true);
+#endif
+
 #if MERGE_ENC_OPT
   if (m_currChromaFormat != chromaFormatIDC)
   {
@@ -246,6 +258,21 @@ void IntraPrediction::init(ChromaFormat chromaFormatIDC, const unsigned bitDepth
     m_timdSatdCost = new RdCost;
   }
 #endif
+#if JVET_AB0155_SGPM
+  for (auto &buffer: m_sgpmBuffer)
+  {
+    buffer.destroy();
+  }
+
+  // the number of total temporal buffers can be adjusted by changing the number here
+  m_sgpmBuffer.resize(1);
+
+  for (auto &buffer: m_sgpmBuffer)
+  {
+    buffer.create(CHROMA_400, Area(0, 0, MAX_CU_SIZE + DIMD_MAX_TEMP_SIZE, MAX_CU_SIZE + DIMD_MAX_TEMP_SIZE));
+  }
+#endif
+
   if (m_piTemp == nullptr)
   {
     m_piTemp = new Pel[(MAX_CU_SIZE + 1) * (MAX_CU_SIZE + 1)];
@@ -807,7 +834,11 @@ int IntraPrediction::getModifiedWideAngle( int width, int height, int predMode )
 }
 
 #if JVET_W0123_TIMD_FUSION
+#if JVET_AB0155_SGPM
+int IntraPrediction::getWideAngleExt(int width, int height, int predMode, bool bSgpm)
+#else
 int IntraPrediction::getWideAngleExt( int width, int height, int predMode )
+#endif
 {
   if ( predMode > DC_IDX && predMode <= EXT_VDIA_IDX )
   {
@@ -815,11 +846,33 @@ int IntraPrediction::getWideAngleExt( int width, int height, int predMode )
     int deltaSize = abs(floorLog2(width) - floorLog2(height));
     if (width > height && predMode < 2 + modeShift[deltaSize])
     {
+#if JVET_AB0155_SGPM
+      if (bSgpm)
+      {
+        predMode += EXT_VDIA_IDX;
+      }
+      else
+      {
+        predMode += (EXT_VDIA_IDX - 1);
+      }
+#else
       predMode += (EXT_VDIA_IDX - 1);
+#endif
     }
     else if (height > width && predMode > EXT_VDIA_IDX - modeShift[deltaSize])
     {
+#if JVET_AB0155_SGPM
+      if (bSgpm)
+      {
+        predMode -= EXT_VDIA_IDX;
+      }
+      else
+      {
+        predMode -= (EXT_VDIA_IDX - 1);
+      }
+#else
       predMode -= (EXT_VDIA_IDX - 1);
+#endif
     }
   }
   return predMode;
@@ -1101,6 +1154,46 @@ void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, co
   }
 #endif
 
+#if JVET_AB0155_SGPM
+  
+  if(PU::isSgpm(pu, channelType))
+  {
+    int            width  = piPred.width;
+    int            height = piPred.height;
+    const UnitArea localUnitArea(pu.chromaFormat, Area(0, 0, width, height));
+    PelBuf predFusion = m_tempBuffer[1].getBuf(localUnitArea.Y());
+    IntraPredParam m_ipaParam2 = m_ipaParam;
+    CompArea compArea = (compID == COMPONENT_Y) ? pu.Y()
+                                               : (compID == COMPONENT_Cb) ? pu.Cb() : pu.Cr();
+    initIntraPatternChType(*pu.cu, compArea, false, 1); 
+    const uint32_t uiDirMode2 = PU::getFinalIntraMode(pu, channelType, 1);
+    const CPelBuf &srcBuf2 = CPelBuf(getPredictorPtr(compID), srcStride, srcHStride);
+    switch (uiDirMode2)
+    {
+    case (PLANAR_IDX): xPredIntraPlanar(srcBuf2, predFusion); break;
+    case (DC_IDX): xPredIntraDc(srcBuf2, predFusion, channelType, false); break;
+    default: xPredIntraAng(srcBuf2, predFusion, channelType, clpRng, bExtIntraDir); break;
+    }
+
+    #if JVET_X0148_TIMD_PDPC
+#if CIIP_PDPC
+    if ((m_ipaParam.applyPDPC || pu.ciipPDPC) && (uiDirMode2 == PLANAR_IDX || uiDirMode2 == DC_IDX))
+#else
+    if (m_ipaParam.applyPDPC && (uiDirMode2 == PLANAR_IDX || uiDirMode2 == DC_IDX))
+#endif
+    {
+      xIntraPredPlanarDcPdpc(srcBuf2, m_tempBuffer[1].getBuf(localUnitArea.Y()).buf,
+                             m_tempBuffer[1].getBuf(localUnitArea.Y()).stride, iWidth, iHeight, pu.ciipPDPC);
+    }
+#endif
+    
+    m_ipaParam           = m_ipaParam2;
+    
+    int     splitDir   = pu.cu->sgpmSplitDir;
+    m_if.m_weightedSgpm(pu, width, height, compID, splitDir, piPred, piPred, predFusion);
+  }
+#endif
+
 #if !JVET_X0148_TIMD_PDPC
 #if CIIP_PDPC
   if (m_ipaParam.applyPDPC || pu.ciipPDPC)
@@ -1370,7 +1463,11 @@ void IntraPrediction::xPredIntraDc( const CPelBuf &pSrc, PelBuf &pDst, const Cha
 }
 
 // Function for initialization of intra prediction parameters
+#if JVET_AB0155_SGPM
+void IntraPrediction::initPredIntraParams(const PredictionUnit &pu, const CompArea area, const SPS &sps, const int partIdx)
+#else
 void IntraPrediction::initPredIntraParams(const PredictionUnit & pu, const CompArea area, const SPS& sps)
+#endif
 {
   const ComponentID compId = area.compID;
   const ChannelType chType = toChannelType(compId);
@@ -1383,7 +1480,11 @@ void IntraPrediction::initPredIntraParams(const PredictionUnit & pu, const CompA
   const Size   cuSize    = Size( pu.cu->blocks[compId].width, pu.cu->blocks[compId].height );
   const Size   puSize    = Size( area.width, area.height );
   const Size&  blockSize = useISP ? cuSize : puSize;
+#if JVET_AB0155_SGPM
+  const int dirMode = PU::getFinalIntraMode(pu, chType, partIdx);
+#else
   const int      dirMode = PU::getFinalIntraMode(pu, chType);
+#endif
 #if JVET_W0123_TIMD_FUSION
   const int     predMode = bExtIntraDir ? getWideAngleExt( blockSize.width, blockSize.height, dirMode ) : getModifiedWideAngle( blockSize.width, blockSize.height, dirMode );
 #else
@@ -2302,7 +2403,12 @@ inline int  isLeftAvailable       ( const CodingUnit &cu, const ChannelType &chT
 inline int  isAboveRightAvailable ( const CodingUnit &cu, const ChannelType &chType, const Position &posRT, const uint32_t uiNumUnitsInPU, const uint32_t unitHeight, bool *validFlags );
 inline int  isBelowLeftAvailable  ( const CodingUnit &cu, const ChannelType &chType, const Position &posLB, const uint32_t uiNumUnitsInPU, const uint32_t unitHeight, bool *validFlags );
 
+#if JVET_AB0155_SGPM
+void IntraPrediction::initIntraPatternChType(const CodingUnit &cu, const CompArea &area, const bool forceRefFilterFlag,
+                                             const int partIdx)
+#else
 void IntraPrediction::initIntraPatternChType(const CodingUnit &cu, const CompArea &area, const bool forceRefFilterFlag)
+#endif
 {
 #if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
   CHECK(area.width == 2, "Width of 2 is not supported");
@@ -2311,16 +2417,27 @@ void IntraPrediction::initIntraPatternChType(const CodingUnit &cu, const CompAre
 
   if (!forceRefFilterFlag)
   {
+#if JVET_AB0155_SGPM
+    initPredIntraParams(*cu.firstPU, area, *cs.sps, partIdx);
+#else
     initPredIntraParams(*cu.firstPU, area, *cs.sps);
+#endif
   }
 
   Pel *refBufUnfiltered = m_refBuffer[area.compID][PRED_BUF_UNFILTERED];
   Pel *refBufFiltered   = m_refBuffer[area.compID][PRED_BUF_FILTERED];
 
   setReferenceArrayLengths( area );
-
+#if JVET_AB0155_SGPM
+  if (!partIdx) 
+  {
+    // ----- Step 1: unfiltered reference samples -----
+    xFillReferenceSamples(cs.picture->getRecoBuf(area), refBufUnfiltered, area, cu);
+  }
+#else
   // ----- Step 1: unfiltered reference samples -----
   xFillReferenceSamples( cs.picture->getRecoBuf( area ), refBufUnfiltered, area, cu );
+#endif
   // ----- Step 2: filtered reference samples -----
   if( m_ipaParam.refFilterFlag || forceRefFilterFlag )
   {
@@ -3202,12 +3319,19 @@ void IntraPrediction::xPredTimdIntraDc( const PredictionUnit &pu, const CPelBuf
   }
 }
 
+#if JVET_AB0155_SGPM
+void IntraPrediction::initPredTimdIntraParams(const PredictionUnit &pu, const CompArea area, int dirMode, bool bSgpm)
+#else
 void IntraPrediction::initPredTimdIntraParams(const PredictionUnit & pu, const CompArea area, int dirMode)
+#endif
 {
   const Size   puSize    = Size( area.width, area.height );
   const Size&  blockSize = puSize;
+#if JVET_AB0155_SGPM
+  const int predMode = getWideAngleExt(blockSize.width, blockSize.height, dirMode, bSgpm);
+#else
   const int     predMode = getWideAngleExt( blockSize.width, blockSize.height, dirMode );
-
+#endif
   m_ipaParam.isModeVer            = predMode >= EXT_DIA_IDX;
   m_ipaParam.refFilterFlag        = false;
   m_ipaParam.interpolationFlag    = false;
@@ -3838,6 +3962,662 @@ void IntraPrediction::xFillTimdReferenceSamples(const CPelBuf &recoBuf, Pel* ref
   }
 }
 
+#if JVET_AB0155_SGPM
+void IntraPrediction::deriveSgpmModeOrdered(const CPelBuf &recoBuf, const CompArea &area, CodingUnit &cu,
+                                            static_vector<SgpmInfo, SGPM_NUM> &candModeList,
+                                            static_vector<double, SGPM_NUM> &  candCostList)
+{
+  SizeType uiWidth         = cu.lwidth();
+  SizeType uiHeight        = cu.lheight();
+
+  int      iCurX = cu.lx();
+  int      iCurY = cu.ly();
+  int      iRefX = -1, iRefY = -1;
+  uint32_t uiRefWidth = 0, uiRefHeight = 0;
+
+  const int iTempWidth = SGPM_TEMPLATE_SIZE, iTempHeight = SGPM_TEMPLATE_SIZE;
+
+  TEMPLATE_TYPE eTempType = CU::deriveTimdRefType(iCurX, iCurY, uiWidth, uiHeight, iTempWidth, iTempHeight, iRefX,
+                                                  iRefY, uiRefWidth, uiRefHeight);
+  auto &        pu        = *cu.firstPU;
+  uint32_t      uiRealW   = uiRefWidth + (eTempType == LEFT_NEIGHBOR ? iTempWidth : 0);
+  uint32_t      uiRealH   = uiRefHeight + (eTempType == ABOVE_NEIGHBOR ? iTempHeight : 0);
+
+  const UnitArea localUnitArea(pu.chromaFormat, Area(0, 0, uiRealW, uiRealH));
+  uint32_t       uiPredStride = m_sgpmBuffer[0].getBuf(localUnitArea.Y()).stride;
+  CHECK(eTempType != LEFT_ABOVE_NEIGHBOR, "left and above both should exist");
+  
+  const CodingStructure &cs = *cu.cs;
+  m_ipaParam.multiRefIndex  = iTempWidth;
+  Pel *piOrg                = cs.picture->getRecoBuf(area).buf;
+  int  iOrgStride           = cs.picture->getRecoBuf(area).stride;
+  piOrg += (iRefY - iCurY) * iOrgStride + (iRefX - iCurX);
+
+  initTimdIntraPatternLuma(cu, area, eTempType != ABOVE_NEIGHBOR ? iTempWidth : 0,
+                           eTempType != LEFT_NEIGHBOR ? iTempHeight : 0, uiRefWidth, uiRefHeight);
+
+  Distortion sadWholeTM[NUM_LUMA_MODE];
+  Distortion sadPartsTM[NUM_LUMA_MODE][GEO_NUM_PARTITION_MODE];
+  uint8_t    ipmList[GEO_NUM_PARTITION_MODE][2][SGPM_NUM_MPM];
+  bool       sadPartsNeeded[NUM_LUMA_MODE][GEO_NUM_PARTITION_MODE] = {};
+  bool       ipmNeeded[NUM_LUMA_MODE]                                 = {};
+
+  for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++)
+  {
+    if (!g_sgpm_splitDir[splitDir])
+    {
+      continue;
+    }
+
+    int16_t angle = g_GeoParams[splitDir][0];
+    for (int partIdx = 0; partIdx < 2; partIdx++)
+    {
+      PU::getSgpmIntraMPMs(pu, ipmList[splitDir][partIdx], splitDir, g_geoTmShape[partIdx][angle]);
+      for (int modeIdx = 0; modeIdx < SGPM_NUM_MPM; modeIdx++)
+      {
+        int ipmIdx                       = ipmList[splitDir][partIdx][modeIdx];
+        ipmNeeded[ipmIdx]                = true;
+        sadPartsNeeded[ipmIdx][splitDir] = true;
+      }
+    }
+  }
+
+  for (int ipmIdx = 0; ipmIdx < NUM_LUMA_MODE; ipmIdx++)
+  {
+    if (ipmNeeded[ipmIdx])
+    {
+      int iMode = MAP67TO131(ipmIdx);
+      initPredTimdIntraParams(pu, area, iMode, true);
+      Pel *tempPred = m_sgpmBuffer[0].getBuf(localUnitArea.Y()).buf;
+      predTimdIntraAng(COMPONENT_Y, pu, iMode, tempPred, uiPredStride, uiRealW, uiRealH, eTempType,
+                       (eTempType == ABOVE_NEIGHBOR) ? 0 : iTempWidth, (eTempType == LEFT_NEIGHBOR) ? 0 : iTempHeight);
+
+      PelBuf predBuf = m_sgpmBuffer[0].getBuf(localUnitArea.Y());
+      PelBuf recBuf  = cs.picture->getRecoBuf(area);
+      PelBuf adBuf   = m_sgpmBuffer[0].getBuf(localUnitArea.Y());
+
+      sadWholeTM[ipmIdx] =
+        m_if.m_sadTM(pu, uiWidth, uiHeight, iTempWidth, iTempHeight, COMPONENT_Y, predBuf, recBuf, adBuf);
+
+      for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++)
+      {
+        if (sadPartsNeeded[ipmIdx][splitDir])
+        {
+          sadPartsTM[ipmIdx][splitDir] =
+            m_if.m_sgpmSadTM(pu, uiWidth, uiHeight, iTempWidth, iTempHeight, COMPONENT_Y, splitDir, adBuf);
+        }
+      }
+    }
+  }
+  // check every possible combination
+  uint32_t cntComb = 0;
+  for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++)
+  {
+    if (!g_sgpm_splitDir[splitDir])
+    {
+      continue;
+    }
+
+    for (int mode0Idx = 0; mode0Idx < SGPM_NUM_MPM; mode0Idx++)
+    {
+      for (int mode1Idx = 0; mode1Idx < SGPM_NUM_MPM; mode1Idx++)
+      {
+        int ipm0Idx = ipmList[splitDir][0][mode0Idx];
+        int ipm1Idx = ipmList[splitDir][1][mode1Idx];
+        if (ipm0Idx == ipm1Idx)
+        {
+          continue;
+        }
+
+        double cost = static_cast<double>(sadPartsTM[ipm0Idx][splitDir]) + static_cast<double>(sadWholeTM[ipm1Idx])
+                      - static_cast<double>(sadPartsTM[ipm1Idx][splitDir]);
+
+        cntComb++;
+
+        if ((cntComb > SGPM_NUM && cost < candCostList[SGPM_NUM - 1]) || cntComb <= SGPM_NUM)
+        {
+          updateCandList(SgpmInfo(splitDir, ipm0Idx, ipm1Idx), cost, candModeList, candCostList, SGPM_NUM);
+        }
+      }
+    }
+  }
+}
+#endif
+
+#if JVET_AB0155_SGPM
+int IntraPrediction::deriveTimdMode(const CPelBuf &recoBuf, const CompArea &area, CodingUnit &cu, bool bFull, bool bHorVer)
+{
+  int      channelBitDepth = cu.slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA);
+  SizeType uiWidth         = cu.lwidth();
+  SizeType uiHeight        = cu.lheight();
+
+  static Pel PredLuma[(MAX_CU_SIZE + DIMD_MAX_TEMP_SIZE) * (MAX_CU_SIZE + DIMD_MAX_TEMP_SIZE)];
+  memset(PredLuma, 0, (MAX_CU_SIZE + DIMD_MAX_TEMP_SIZE) * (MAX_CU_SIZE + DIMD_MAX_TEMP_SIZE) * sizeof(Pel));
+  Pel *    piPred       = PredLuma;
+  uint32_t uiPredStride = MAX_CU_SIZE + DIMD_MAX_TEMP_SIZE;
+
+  int      iCurX = cu.lx();
+  int      iCurY = cu.ly();
+  int      iRefX = -1, iRefY = -1;
+  uint32_t uiRefWidth = 0, uiRefHeight = 0;
+
+  int iTempWidth = 4, iTempHeight = 4;
+  if (uiWidth <= 8)
+  {
+    iTempWidth = 2;
+  }
+  if (uiHeight <= 8)
+  {
+    iTempHeight = 2;
+  }
+
+  TEMPLATE_TYPE eTempType = CU::deriveTimdRefType(iCurX, iCurY, uiWidth, uiHeight, iTempWidth, iTempHeight, iRefX,
+                                                  iRefY, uiRefWidth, uiRefHeight);
+
+  if (eTempType != NO_NEIGHBOR)
+  {
+    const CodingStructure &cs = *cu.cs;
+    m_ipaParam.multiRefIndex  = iTempWidth;
+    Pel *piOrg                = cs.picture->getRecoBuf(area).buf;
+    int  iOrgStride           = cs.picture->getRecoBuf(area).stride;
+    piOrg += (iRefY - iCurY) * iOrgStride + (iRefX - iCurX);
+    DistParam distParamSad[2];   // above, left
+    distParamSad[0].applyWeight = false;
+    distParamSad[0].useMR       = false;
+    distParamSad[1].applyWeight = false;
+    distParamSad[1].useMR       = false;
+    if (eTempType == LEFT_ABOVE_NEIGHBOR)
+    {
+      m_timdSatdCost->setTimdDistParam(distParamSad[0], piOrg + iTempWidth, piPred + iTempWidth, iOrgStride,
+                                       uiPredStride, channelBitDepth, COMPONENT_Y, uiWidth, iTempHeight, 0, 1,
+                                       true);   // Use HAD (SATD) cost
+      m_timdSatdCost->setTimdDistParam(distParamSad[1], piOrg + iTempHeight * iOrgStride,
+                                       piPred + iTempHeight * uiPredStride, iOrgStride, uiPredStride, channelBitDepth,
+                                       COMPONENT_Y, iTempWidth, uiHeight, 0, 1, true);   // Use HAD (SATD) cost
+    }
+    else if (eTempType == LEFT_NEIGHBOR)
+    {
+      m_timdSatdCost->setTimdDistParam(distParamSad[1], piOrg, piPred, iOrgStride, uiPredStride, channelBitDepth,
+                                       COMPONENT_Y, iTempWidth, uiHeight, 0, 1, true);
+    }
+    else if (eTempType == ABOVE_NEIGHBOR)
+    {
+      m_timdSatdCost->setTimdDistParam(distParamSad[0], piOrg, piPred, iOrgStride, uiPredStride, channelBitDepth,
+                                       COMPONENT_Y, uiWidth, iTempHeight, 0, 1, true);
+    }
+    initTimdIntraPatternLuma(cu, area, eTempType != ABOVE_NEIGHBOR ? iTempWidth : 0,
+                             eTempType != LEFT_NEIGHBOR ? iTempHeight : 0, uiRefWidth, uiRefHeight);
+
+    uint32_t uiIntraDirNeighbor[5] = { 0 }, modeIdx = 0;
+    bool     includedMode[EXT_VDIA_IDX + 1];
+    memset(includedMode, false, (EXT_VDIA_IDX + 1) * sizeof(bool));
+    auto &   pu      = *cu.firstPU;
+    uint32_t uiRealW = uiRefWidth + (eTempType == LEFT_NEIGHBOR ? iTempWidth : 0);
+    uint32_t uiRealH = uiRefHeight + (eTempType == ABOVE_NEIGHBOR ? iTempHeight : 0);
+    uint64_t maxCost = (uint64_t)(iTempWidth * cu.lheight() + iTempHeight * cu.lwidth());
+
+    uint64_t uiBestCost      = MAX_UINT64;
+    int      iBestMode       = PLANAR_IDX;
+    uint64_t uiSecondaryCost = MAX_UINT64;
+    int      iSecondaryMode  = PLANAR_IDX;
+
+    uint64_t uiBestCostHor = MAX_UINT64;
+    uint64_t uiBestCostVer = MAX_UINT64;
+    int      iBestModeHor  = PLANAR_IDX;
+    int      iBestModeVer  = PLANAR_IDX;
+
+    const Position posLTx = pu.Y().topLeft();
+    const Position posRTx = pu.Y().topRight();
+    const Position posLBx = pu.Y().bottomLeft();
+
+    // left
+    const PredictionUnit *puLeftx = pu.cs->getPURestricted(posLBx.offset(-1, 0), pu, pu.chType);
+    if (puLeftx && CU::isIntra(*puLeftx->cu))
+    {
+      uiIntraDirNeighbor[modeIdx] = PU::getIntraDirLuma(*puLeftx);
+      if (!puLeftx->cu->timd)
+      {
+        uiIntraDirNeighbor[modeIdx] = MAP67TO131(uiIntraDirNeighbor[modeIdx]);
+      }
+      if (!includedMode[uiIntraDirNeighbor[modeIdx]])
+      {
+        includedMode[uiIntraDirNeighbor[modeIdx]] = true;
+        modeIdx++;
+      }
+    }
+    // above
+    const PredictionUnit *puAbovex = pu.cs->getPURestricted(posRTx.offset(0, -1), pu, pu.chType);
+    if (puAbovex && CU::isIntra(*puAbovex->cu) && CU::isSameCtu(*pu.cu, *puAbovex->cu))
+    {
+      uiIntraDirNeighbor[modeIdx] = PU::getIntraDirLuma(*puAbovex);
+      if (!puAbovex->cu->timd)
+      {
+        uiIntraDirNeighbor[modeIdx] = MAP67TO131(uiIntraDirNeighbor[modeIdx]);
+      }
+      if (!includedMode[uiIntraDirNeighbor[modeIdx]])
+      {
+        includedMode[uiIntraDirNeighbor[modeIdx]] = true;
+        modeIdx++;
+      }
+    }
+    // below left
+    const PredictionUnit *puLeftBottomx = cs.getPURestricted(posLBx.offset(-1, 1), pu, pu.chType);
+    if (puLeftBottomx && CU::isIntra(*puLeftBottomx->cu))
+    {
+      uiIntraDirNeighbor[modeIdx] = PU::getIntraDirLuma(*puLeftBottomx);
+      if (!puLeftBottomx->cu->timd)
+      {
+        uiIntraDirNeighbor[modeIdx] = MAP67TO131(uiIntraDirNeighbor[modeIdx]);
+      }
+      if (!includedMode[uiIntraDirNeighbor[modeIdx]])
+      {
+        includedMode[uiIntraDirNeighbor[modeIdx]] = true;
+        modeIdx++;
+      }
+    }
+    // above right
+    const PredictionUnit *puAboveRightx = cs.getPURestricted(posRTx.offset(1, -1), pu, pu.chType);
+    if (puAboveRightx && CU::isIntra(*puAboveRightx->cu))
+    {
+      uiIntraDirNeighbor[modeIdx] = PU::getIntraDirLuma(*puAboveRightx);
+      if (!puAboveRightx->cu->timd)
+      {
+        uiIntraDirNeighbor[modeIdx] = MAP67TO131(uiIntraDirNeighbor[modeIdx]);
+      }
+      if (!includedMode[uiIntraDirNeighbor[modeIdx]])
+      {
+        includedMode[uiIntraDirNeighbor[modeIdx]] = true;
+        modeIdx++;
+      }
+    }
+    // above left
+    const PredictionUnit *puAboveLeftx = cs.getPURestricted(posLTx.offset(-1, -1), pu, pu.chType);
+    if (puAboveLeftx && CU::isIntra(*puAboveLeftx->cu))
+    {
+      uiIntraDirNeighbor[modeIdx] = PU::getIntraDirLuma(*puAboveLeftx);
+      if (!puAboveLeftx->cu->timd)
+      {
+        uiIntraDirNeighbor[modeIdx] = MAP67TO131(uiIntraDirNeighbor[modeIdx]);
+      }
+      if (!includedMode[uiIntraDirNeighbor[modeIdx]])
+      {
+        includedMode[uiIntraDirNeighbor[modeIdx]] = true;
+        modeIdx++;
+      }
+    }
+    bool bNoAngular = false;
+    if (modeIdx >= 2)
+    {
+      bNoAngular = true;
+      for (uint32_t i = 0; i < modeIdx; i++)
+      {
+        if (uiIntraDirNeighbor[i] > DC_IDX)
+        {
+          bNoAngular = false;
+          break;
+        }
+      }
+    }
+
+    if (bNoAngular)
+    {
+      if (bFull)
+      {
+        for (int iMode = 0; iMode <= 1; iMode++)
+        {
+          uint64_t uiCost = 0;
+          initPredTimdIntraParams(pu, area, iMode);
+          predTimdIntraAng(COMPONENT_Y, pu, iMode, piPred, uiPredStride, uiRealW, uiRealH, eTempType,
+                           (eTempType == ABOVE_NEIGHBOR) ? 0 : iTempWidth,
+                           (eTempType == LEFT_NEIGHBOR) ? 0 : iTempHeight);
+          if (eTempType == LEFT_ABOVE_NEIGHBOR)
+          {
+            uiCost += distParamSad[0].distFunc(distParamSad[0]);
+            uiCost += distParamSad[1].distFunc(distParamSad[1]);
+          }
+          else if (eTempType == LEFT_NEIGHBOR)
+          {
+            uiCost = distParamSad[1].distFunc(distParamSad[1]);
+          }
+          else if (eTempType == ABOVE_NEIGHBOR)
+          {
+            uiCost += distParamSad[0].distFunc(distParamSad[0]);
+          }
+          else
+          {
+            assert(0);
+          }
+
+          if (uiCost < uiBestCost)
+          {
+            uiBestCost = uiCost;
+            iBestMode  = iMode;
+          }
+          if (uiBestCost <= maxCost)
+          {
+            break;
+          }
+        }
+        cu.timdMode      = iBestMode;
+        cu.timdIsBlended = false;
+      }
+      if (bHorVer)
+      {
+        cu.timdHor = PLANAR_IDX;
+        cu.timdVer = PLANAR_IDX;
+      }
+      return iBestMode;
+    }
+#if SECONDARY_MPM
+    uint8_t mpmList[NUM_MOST_PROBABLE_MODES];
+    uint8_t intraNonMPM[NUM_NON_MPM_MODES];
+    PU::getIntraMPMs(pu, mpmList, intraNonMPM);
+#else
+    unsigned mpmList[NUM_MOST_PROBABLE_MODES];
+    PU::getIntraMPMs(pu, mpmList);
+#endif
+    unsigned mpmExtraList[NUM_MOST_PROBABLE_MODES + 3];   // +DC/VER/HOR
+    int      maxModeNum      = NUM_MOST_PROBABLE_MODES;
+    unsigned modeCandList[3] = { DC_IDX, HOR_IDX, VER_IDX };
+    bool     bNotExist[3]    = { true, true, true };
+    for (int i = 0; i < NUM_MOST_PROBABLE_MODES; i++)
+    {
+      mpmExtraList[i] = mpmList[i];
+      if (bNotExist[0] && mpmList[i] == DC_IDX)
+      {
+        bNotExist[0] = false;
+      }
+      if (bNotExist[1] && mpmList[i] == HOR_IDX)
+      {
+        bNotExist[1] = false;
+      }
+      if (bNotExist[2] && mpmList[i] == VER_IDX)
+      {
+        bNotExist[2] = false;
+      }
+    }
+    for (int i = 0; i < 3; i++)
+    {
+      if (bNotExist[i])
+      {
+        mpmExtraList[maxModeNum++] = modeCandList[i];
+      }
+    }
+    bool updateFull = true;
+    for (int i = 0; i < maxModeNum; i++)
+    {
+      uint64_t uiCost    = 0;
+      int      iMode     = mpmExtraList[i];
+      uint64_t uiCostVer = -1;
+      uint64_t uiCostHor = -1;
+      uint64_t tmpCost0  = 0;
+      uint64_t tmpCost1  = 0;
+      if (iMode > DC_IDX)
+      {
+        iMode = MAP67TO131(iMode);
+      }
+      else
+      {
+        if (!bFull && bHorVer)
+        {
+          continue;
+        }
+      }
+      initPredTimdIntraParams(pu, area, iMode);
+      predTimdIntraAng(COMPONENT_Y, pu, iMode, piPred, uiPredStride, uiRealW, uiRealH, eTempType,
+                       (eTempType == ABOVE_NEIGHBOR) ? 0 : iTempWidth, (eTempType == LEFT_NEIGHBOR) ? 0 : iTempHeight);
+      if (eTempType == LEFT_ABOVE_NEIGHBOR)
+      {
+        if (bFull && updateFull)
+        {
+          tmpCost0 = distParamSad[0].distFunc(distParamSad[0]);
+          tmpCost1 = distParamSad[1].distFunc(distParamSad[1]);
+        }
+        else
+        {
+          if (iMode > EXT_DIA_IDX)
+          {
+            tmpCost0 = distParamSad[0].distFunc(distParamSad[0]);
+          }
+          else
+          {
+            tmpCost1 = distParamSad[1].distFunc(distParamSad[1]);
+          }
+        }
+      }
+      else if (eTempType == LEFT_NEIGHBOR)
+      {
+        tmpCost0 = distParamSad[1].distFunc(distParamSad[1]);
+      }
+      else if (eTempType == ABOVE_NEIGHBOR)
+      {
+        tmpCost1 = distParamSad[0].distFunc(distParamSad[0]);
+      }
+      else
+      {
+        assert(0);
+      }
+
+      if (bFull && updateFull)
+      {
+        uiCost = tmpCost0 + tmpCost1;
+        if (uiCost < uiBestCost)
+        {
+          uiSecondaryCost = uiBestCost;
+          iSecondaryMode  = iBestMode;
+          uiBestCost      = uiCost;
+          iBestMode       = iMode;
+        }
+        else if (uiCost < uiSecondaryCost)
+        {
+          uiSecondaryCost = uiCost;
+          iSecondaryMode  = iMode;
+        }
+        if (uiSecondaryCost <= maxCost)
+        {
+          updateFull = false;
+          if (!bHorVer)
+          {
+            break;
+          }
+        }
+      }
+      if (bHorVer && iMode > DC_IDX)
+      {
+        if (eTempType == LEFT_ABOVE_NEIGHBOR)
+        {
+          if (iMode > EXT_DIA_IDX)
+          {
+            uiCostVer += tmpCost0;
+          }
+          else
+          {
+            uiCostHor += tmpCost1;
+          }
+        }
+        else if (eTempType == LEFT_NEIGHBOR)
+        {
+          uiCostHor += tmpCost1;
+        }
+        else if (eTempType == ABOVE_NEIGHBOR)
+        {
+          uiCostVer += tmpCost0;
+        }
+        if (uiCostHor < uiBestCostHor)
+        {
+          uiBestCostHor = uiCostHor;
+          iBestModeHor  = iMode;
+        }
+        if (uiCostVer < uiBestCostVer)
+        {
+          uiBestCostVer = uiCostVer;
+          iBestModeVer  = iMode;
+        }
+      }
+      
+    }
+
+    if(bFull)
+    {
+      int midMode = iBestMode;
+      if (midMode > DC_IDX && uiBestCost > maxCost)
+      {
+        for (int i = -1; i <= 1; i += 2)
+        {
+          int iMode = midMode + i;
+          if (iMode <= DC_IDX || iMode > EXT_VDIA_IDX)
+          {
+            continue;
+          }
+          initPredTimdIntraParams(pu, area, iMode);
+          predTimdIntraAng(COMPONENT_Y, pu, iMode, piPred, uiPredStride, uiRealW, uiRealH, eTempType,
+                           (eTempType == ABOVE_NEIGHBOR) ? 0 : iTempWidth,
+                           (eTempType == LEFT_NEIGHBOR) ? 0 : iTempHeight);
+          uint64_t uiCost = 0;
+          if (eTempType == LEFT_ABOVE_NEIGHBOR)
+          {
+            uiCost += distParamSad[0].distFunc(distParamSad[0]);
+            uiCost += distParamSad[1].distFunc(distParamSad[1]);
+          }
+          else if (eTempType == LEFT_NEIGHBOR)
+          {
+            uiCost = distParamSad[1].distFunc(distParamSad[1]);
+          }
+          else if (eTempType == ABOVE_NEIGHBOR)
+          {
+            uiCost += distParamSad[0].distFunc(distParamSad[0]);
+          }
+          else
+          {
+            assert(0);
+          }
+
+          if (uiCost < uiBestCost)
+          {
+            uiBestCost = uiCost;
+            iBestMode  = iMode;
+          }
+          if (uiBestCost <= maxCost)
+          {
+            break;
+          }
+        }
+      }
+
+      midMode = iSecondaryMode;
+      if (midMode > DC_IDX && uiSecondaryCost > maxCost)
+      {
+        for (int i = -1; i <= 1; i += 2)
+        {
+          int iMode = midMode + i;
+          if (iMode <= DC_IDX || iMode > EXT_VDIA_IDX)
+          {
+            continue;
+          }
+          initPredTimdIntraParams(pu, area, iMode);
+          predTimdIntraAng(COMPONENT_Y, pu, iMode, piPred, uiPredStride, uiRealW, uiRealH, eTempType,
+                           (eTempType == ABOVE_NEIGHBOR) ? 0 : iTempWidth,
+                           (eTempType == LEFT_NEIGHBOR) ? 0 : iTempHeight);
+          uint64_t uiCost = 0;
+          if (eTempType == LEFT_ABOVE_NEIGHBOR)
+          {
+            uiCost += distParamSad[0].distFunc(distParamSad[0]);
+            uiCost += distParamSad[1].distFunc(distParamSad[1]);
+          }
+          else if (eTempType == LEFT_NEIGHBOR)
+          {
+            uiCost = distParamSad[1].distFunc(distParamSad[1]);
+          }
+          else if (eTempType == ABOVE_NEIGHBOR)
+          {
+            uiCost += distParamSad[0].distFunc(distParamSad[0]);
+          }
+          else
+          {
+            assert(0);
+          }
+
+          if (uiCost < uiSecondaryCost)
+          {
+            uiSecondaryCost = uiCost;
+            iSecondaryMode  = iMode;
+          }
+          if (uiSecondaryCost <= maxCost)
+          {
+            break;
+          }
+        }
+      }
+
+      // if( uiSecondaryCost < 2 * uiBestCost ), 2 * uiBestCost can overflow uint64_t
+      if (uiSecondaryCost < uiBestCost || (uiSecondaryCost - uiBestCost < uiBestCost))
+      {
+        cu.timdMode          = iBestMode;
+        cu.timdIsBlended     = true;
+        cu.timdModeSecondary = iSecondaryMode;
+
+        const int blend_sum_weight = 6;
+        int       sum_weight       = 1 << blend_sum_weight;
+
+#if JVET_X0149_TIMD_DIMD_LUT
+        int      g_gradDivTable[16] = { 0, 7, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 1, 1, 0 };
+        uint64_t s0                 = uiSecondaryCost;
+        // uiBestCost + uiSecondaryCost can overlow uint64_t
+        uint64_t s1 = (MAX_UINT64 - uiSecondaryCost < uiBestCost) ? MAX_UINT64 : (uiBestCost + uiSecondaryCost);
+        int      x  = floorLog2_uint64(s1);
+        CHECK(x < 0, "floor log2 value should be no negative");
+        int norm_s1 = int(s1 << 4 >> x) & 15;
+        int v       = g_gradDivTable[norm_s1] | 8;
+        x += (norm_s1 != 0);
+        int shift  = x + 3;
+        int add    = (1 << (shift - 1));
+        int iRatio = int((s0 * v * sum_weight + add) >> shift);
+
+        if (iRatio > sum_weight)
+        {
+          iRatio = sum_weight;
+        }
+
+        CHECK(iRatio > sum_weight, "Wrong DIMD ratio");
+#else
+        double dRatio = 0.0;
+        dRatio        = (double) uiSecondaryCost / (double) (uiBestCost + uiSecondaryCost);
+        int iRatio    = static_cast<int>(dRatio * sum_weight + 0.5);
+#endif
+        cu.timdFusionWeight[0] = iRatio;
+        cu.timdFusionWeight[1] = sum_weight - iRatio;
+      }
+      else
+      {
+        cu.timdMode      = iBestMode;
+        cu.timdIsBlended = false;
+      }
+    }
+    if (bHorVer)
+    {
+      cu.timdHor = iBestModeHor;
+      cu.timdVer = iBestModeVer;
+    }
+
+    return iBestMode;
+  }
+  else
+  {
+    if (bFull)
+    {
+      cu.timdMode      = PLANAR_IDX;
+      cu.timdIsBlended = false;
+    }
+    if (bHorVer)
+    {
+      cu.timdHor = PLANAR_IDX;
+      cu.timdVer = PLANAR_IDX;
+    }
+    return PLANAR_IDX;
+  }
+}
+#else   // SGPM
+
 int IntraPrediction::deriveTimdMode( const CPelBuf &recoBuf, const CompArea &area, CodingUnit &cu )
 {
   int channelBitDepth = cu.slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA);
@@ -4260,6 +5040,7 @@ int IntraPrediction::deriveTimdMode( const CPelBuf &recoBuf, const CompArea &are
     return PLANAR_IDX;
   }
 }
+#endif
 #if INTRA_TRANS_ENC_OPT
 void IntraPrediction::timdBlending(Pel *pDst, int strideDst, Pel *pSrc, int strideSrc, int w0, int w1, int width, int height)
 {
diff --git a/source/Lib/CommonLib/IntraPrediction.h b/source/Lib/CommonLib/IntraPrediction.h
index 65225dff96000c1ccce2ed826e91ce7cc9c8805f..2a49bfa090905e8b443d3ac86f3263dcfa1f781e 100644
--- a/source/Lib/CommonLib/IntraPrediction.h
+++ b/source/Lib/CommonLib/IntraPrediction.h
@@ -48,7 +48,9 @@
 #endif
 
 #include "MatrixIntraPrediction.h"
-
+#if JVET_AB0155_SGPM
+#include "CommonLib/InterpolationFilter.h"
+#endif
 //! \ingroup CommonLib
 //! \{
 
@@ -164,6 +166,9 @@ public:
 protected:
   Pel      m_refBuffer[MAX_NUM_COMPONENT][NUM_PRED_BUF][(MAX_CU_SIZE * 2 + 1 + MAX_REF_LINE_IDX) * 2];
   uint32_t m_refBufferStride[MAX_NUM_COMPONENT];
+#if JVET_AB0155_SGPM
+  InterpolationFilter m_if;
+#endif
 
 private:
 
@@ -248,7 +253,9 @@ protected:
   ScanElement* m_scanOrder;
   bool         m_bestScanRotationMode;
   std::vector<PelStorage>   m_tempBuffer;
-
+#if JVET_AB0155_SGPM
+  std::vector<PelStorage>   m_sgpmBuffer;
+#endif
 #if JVET_V0130_INTRA_TMP
   int          m_uiPartLibSize;
   TempLibFast  m_tempLibFast;
@@ -267,8 +274,11 @@ protected:
 #else
   void xPredIntraAng              ( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const ClpRng& clpRng);
 #endif
-
+#if JVET_AB0155_SGPM
+  void initPredIntraParams(const PredictionUnit &pu, const CompArea compArea, const SPS &sps, const int partIdx = 0);
+#else
   void initPredIntraParams        ( const PredictionUnit & pu,  const CompArea compArea, const SPS& sps );
+#endif
 
   static bool isIntegerSlope(const int absAng) { return (0 == (absAng & 0x1F)); }
 #if JVET_W0123_TIMD_FUSION
@@ -288,8 +298,13 @@ protected:
 
   static int getModifiedWideAngle         ( int width, int height, int predMode );
 #if JVET_W0123_TIMD_FUSION
+#if JVET_AB0155_SGPM
+  static int getWideAngleExt(int width, int height, int predMode, bool bSgpm = false);
+#else
   static int getWideAngleExt      ( int width, int height, int predMode );
 #endif
+#endif
+
   void setReferenceArrayLengths   ( const CompArea &area );
 
   void destroy                    ();
@@ -362,9 +377,17 @@ public:
   void xIntraPredTimdAngPdpc(Pel* pDsty,const int dstStride,Pel* refSide,const int width,const int height, int xOffset, int yOffset, int scale, int invAngle);
   void xFillTimdReferenceSamples  ( const CPelBuf &recoBuf, Pel* refBufUnfiltered, const CompArea &area, const CodingUnit &cu, int iTemplateWidth, int iTemplateHeight );
   Pel  xGetPredTimdValDc          ( const CPelBuf &pSrc, const Size &dstSize, TEMPLATE_TYPE eTempType, int iTempHeight, int iTempWidth );
+#if JVET_AB0155_SGPM
+  void initPredTimdIntraParams(const PredictionUnit &pu, const CompArea area, int dirMode, bool bSgpm = false);
+#else
   void initPredTimdIntraParams    (const PredictionUnit & pu, const CompArea area, int dirMode);
+#endif
   void predTimdIntraAng           ( const ComponentID compId, const PredictionUnit &pu, uint32_t uiDirMode, Pel* pPred, uint32_t uiStride, uint32_t iWidth, uint32_t iHeight, TEMPLATE_TYPE eTempType, int32_t iTemplateWidth, int32_t iTemplateHeight);
+#if JVET_AB0155_SGPM
+  int deriveTimdMode              ( const CPelBuf &recoBuf, const CompArea &area, CodingUnit &cu, bool bFull = true, bool bHorVer = false );
+#else
   int deriveTimdMode              ( const CPelBuf &recoBuf, const CompArea &area, CodingUnit &cu );
+#endif
   void initTimdIntraPatternLuma   (const CodingUnit &cu, const CompArea &area, int iTemplateWidth, int iTemplateHeight, uint32_t uiRefWidth, uint32_t uiRefHeight);
 #if GRAD_PDPC
   void xIntraPredTimdAngGradPdpc  (Pel* pDsty, const int dstStride, Pel* refMain, Pel* refSide, const int width, const int height, int xOffset, int yOffset, int scale, int deltaPos, int intraPredAngle, const ClpRng& clpRng);
@@ -377,6 +400,11 @@ public:
 #endif
 #endif
 #endif
+#if JVET_AB0155_SGPM
+  void deriveSgpmModeOrdered(const CPelBuf &recoBuf, const CompArea &area, CodingUnit &cu,
+                             static_vector<SgpmInfo, SGPM_NUM> &candModeList,
+                             static_vector<double, SGPM_NUM> &  candCostList);
+#endif
 #if JVET_Z0056_GPM_SPLIT_MODE_REORDERING && JVET_Y0065_GPM_INTRA
 protected:
   bool    m_abFilledIntraGPMRefTpl[NUM_INTRA_MODE];
@@ -411,7 +439,14 @@ public:
   Pel xGlmGetLumaVal    (const int s[6], const int c[6], const int glmIdx, const Pel val) const;
 #endif
   /// set parameters from CU data for accessing intra data
-  void initIntraPatternChType     (const CodingUnit &cu, const CompArea &area, const bool forceRefFilterFlag = false); // use forceRefFilterFlag to get both filtered and unfiltered buffers
+  
+#if JVET_AB0155_SGPM
+  void initIntraPatternChType(const CodingUnit &cu, const CompArea &area, const bool forceRefFilterFlag = false,
+    const int partIdx = 0);   // use forceRefFilterFlag to get both filtered and unfiltered buffers
+#else   // SGPM
+  void initIntraPatternChType(
+    const CodingUnit &cu, const CompArea &area, const bool forceRefFilterFlag = false);   // use forceRefFilterFlag to get both filtered and unfiltered buffers
+#endif
   void initIntraPatternChTypeISP  (const CodingUnit& cu, const CompArea& area, PelBuf& piReco, const bool forceRefFilterFlag = false); // use forceRefFilterFlag to get both filtered and unfiltered buffers
 
   // Matrix-based intra prediction
diff --git a/source/Lib/CommonLib/Rom.cpp b/source/Lib/CommonLib/Rom.cpp
index 4244a632d8e679d4a2b0425ce5e01be6615238b7..b17ede6742cedb5b73028195893deb3d1f874b10 100644
--- a/source/Lib/CommonLib/Rom.cpp
+++ b/source/Lib/CommonLib/Rom.cpp
@@ -5132,7 +5132,7 @@ void initGeoTemplate()
       modeIdx++;
     }
   }
-#if JVET_AA0058_GPM_ADP_BLD
+#if JVET_AA0058_GPM_ADP_BLD || JVET_AB0155_SGPM
   // initialization of blending weights
   for (int angleIdx = 0; angleIdx < (GEO_NUM_ANGLES >> 2) + 1; angleIdx++)
   {
@@ -5140,7 +5140,11 @@ void initGeoTemplate()
     {
       continue;
     }
+#if JVET_AB0155_SGPM
+    for (int bldIdx = 0; bldIdx < TOTAL_GEO_NUM_BLD; bldIdx++)
+#else
     for (int bldIdx = 0; bldIdx < GEO_NUM_BLD; bldIdx++)
+#endif
     {
       g_globalGeoWeights[bldIdx][g_angle2mask[angleIdx]] = new int16_t[GEO_WEIGHT_MASK_SIZE * GEO_WEIGHT_MASK_SIZE];
 
@@ -5179,7 +5183,7 @@ void initGeoTemplate()
     {
       continue;
     }
-#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
+#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_AB0155_SGPM
     g_globalGeoWeightsTpl[g_angle2mask[angleIdx]] = new Pel[GEO_WEIGHT_MASK_SIZE_EXT * GEO_WEIGHT_MASK_SIZE_EXT];
 #endif
 #if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
@@ -5193,7 +5197,7 @@ void initGeoTemplate()
     int16_t rho = (g_Dis[distanceX] << (GEO_MAX_CU_LOG2 + 1)) + (g_Dis[distanceY] << (GEO_MAX_CU_LOG2 + 1));
     static const int16_t maskOffset = (2 * GEO_MAX_CU_SIZE - GEO_WEIGHT_MASK_SIZE) >> 1;
     int index = 0;
-#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
+#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_AB0155_SGPM
     int indexGeoWeight = 0;
     for (int y = -GEO_TM_ADDED_WEIGHT_MASK_SIZE; y < GEO_WEIGHT_MASK_SIZE + GEO_TM_ADDED_WEIGHT_MASK_SIZE; y++)
     {
@@ -5231,7 +5235,7 @@ void initGeoTemplate()
       continue;
     }
     g_globalGeoWeights[g_angle2mask[angleIdx]] = new int16_t[GEO_WEIGHT_MASK_SIZE * GEO_WEIGHT_MASK_SIZE];
-#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
+#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_AB0155_SGPM
     g_globalGeoWeightsTpl[g_angle2mask[angleIdx]] = new Pel[GEO_WEIGHT_MASK_SIZE_EXT * GEO_WEIGHT_MASK_SIZE_EXT];
 #endif
 #if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
@@ -5245,7 +5249,7 @@ void initGeoTemplate()
     int16_t rho = (g_Dis[distanceX] << (GEO_MAX_CU_LOG2+1)) + (g_Dis[distanceY] << (GEO_MAX_CU_LOG2 + 1));
     static const int16_t maskOffset = (2*GEO_MAX_CU_SIZE - GEO_WEIGHT_MASK_SIZE) >> 1;
     int index = 0;
-#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
+#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_AB0155_SGPM
     int indexGeoWeight = 0;
     for( int y = -GEO_TM_ADDED_WEIGHT_MASK_SIZE; y < GEO_WEIGHT_MASK_SIZE + GEO_TM_ADDED_WEIGHT_MASK_SIZE; y++ )
     {
@@ -5309,18 +5313,50 @@ void initGeoTemplate()
       }
     }
   }
-
+#if JVET_AB0155_SGPM
+  for (int hIdx = 0; hIdx < GEO_NUM_CU_SIZE_EX; hIdx++)
+  {
+    int16_t height = 1 << (hIdx + GEO_MIN_CU_LOG2_EX);
+    for (int wIdx = 0; wIdx < GEO_NUM_CU_SIZE_EX; wIdx++)
+    {
+      int16_t width = 1 << (wIdx + GEO_MIN_CU_LOG2_EX);
+      for (int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; splitDir++)
+      {
+        int16_t angle    = g_GeoParams[splitDir][0];
+        int16_t distance = g_GeoParams[splitDir][1];
+        int16_t offsetX  = (GEO_WEIGHT_MASK_SIZE - width) >> 1;
+        int16_t offsetY  = (GEO_WEIGHT_MASK_SIZE - height) >> 1;
+        if (distance > 0)
+        {
+          if (angle % 16 == 8 || (angle % 16 != 0 && height >= width))
+          {
+            offsetY += angle < 16 ? ((distance * (int32_t) height) >> 3) : -((distance * (int32_t) height) >> 3);
+          }
+          else
+          {
+            offsetX += angle < 16 ? ((distance * (int32_t) width) >> 3) : -((distance * (int32_t) width) >> 3);
+          }
+        }
+        g_weightOffsetEx[splitDir][hIdx][wIdx][0] = offsetX;
+        g_weightOffsetEx[splitDir][hIdx][wIdx][1] = offsetY;
+      }
+    }
+  }
+#endif
 
 }
 
 int16_t** g_GeoParams;
-#if JVET_AA0058_GPM_ADP_BLD
+#if JVET_AB0155_SGPM
+int16_t *g_globalGeoWeights[TOTAL_GEO_NUM_BLD][GEO_NUM_PRESTORED_MASK];
+int      g_bld2Width[TOTAL_GEO_NUM_BLD] = { 1, 2, 4, 8, 16, 32 };
+#elif JVET_AA0058_GPM_ADP_BLD
 int16_t*  g_globalGeoWeights[GEO_NUM_BLD][GEO_NUM_PRESTORED_MASK];
 int       g_bld2Width[GEO_NUM_BLD] = { 1, 2, 4, 8, 16 };
 #else
 int16_t*  g_globalGeoWeights   [GEO_NUM_PRESTORED_MASK];
 #endif
-#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
+#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_AB0155_SGPM
 Pel*      g_globalGeoWeightsTpl[GEO_NUM_PRESTORED_MASK];
 #endif
 #if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
@@ -5328,6 +5364,20 @@ Pel*      g_globalGeoEncSADmask[GEO_NUM_PRESTORED_MASK];
 #else
 int16_t*  g_globalGeoEncSADmask[GEO_NUM_PRESTORED_MASK];
 #endif
+#if JVET_AB0155_SGPM
+int16_t g_weightOffsetEx[GEO_NUM_PARTITION_MODE][GEO_NUM_CU_SIZE_EX][GEO_NUM_CU_SIZE_EX][2];
+int8_t g_sgpm_splitDir[GEO_NUM_PARTITION_MODE] = {
+1,1,0,0,0,0,1,0,
+1,0,1,0,1,0,1,0,
+1,0,1,1,1,0,1,0,
+1,0,1,0,1,0,1,0,
+0,0,0,0,1,1,0,0,
+0,0,1,0,0,1,0,0,
+1,0,1,1,0,1,0,0,
+1,0,0,1,0,0,0,0
+};
+#endif
+
 int16_t   g_weightOffset       [GEO_NUM_PARTITION_MODE][GEO_NUM_CU_SIZE][GEO_NUM_CU_SIZE][2];
 int8_t    g_angle2mask[GEO_NUM_ANGLES] = { 0, -1, 1, 2, 3, 4, -1, -1, 5, -1, -1, 4, 3, 2, 1, -1, 0, -1, 1, 2, 3, 4, -1, -1, 5, -1, -1, 4, 3, 2, 1, -1 };
 int8_t    g_Dis[GEO_NUM_ANGLES] = { 8, 8, 8, 8, 4, 4, 2, 1, 0, -1, -2, -4, -4, -8, -8, -8, -8, -8, -8, -8, -4, -4, -2, -1, 0, 1, 2, 4, 4, 8, 8, 8 };
diff --git a/source/Lib/CommonLib/Rom.h b/source/Lib/CommonLib/Rom.h
index b782d80f43558205873c2ff0d03596435dfcd961..ad83eea26461b138a12b4b2fd2db7b86ee4180ed 100644
--- a/source/Lib/CommonLib/Rom.h
+++ b/source/Lib/CommonLib/Rom.h
@@ -342,13 +342,16 @@ const int g_IBCBufferSize = 256 * 128;
 
 void initGeoTemplate();
 extern int16_t** g_GeoParams;
-#if JVET_AA0058_GPM_ADP_BLD
+#if JVET_AB0155_SGPM
+extern int16_t *g_globalGeoWeights[TOTAL_GEO_NUM_BLD][GEO_NUM_PRESTORED_MASK];
+extern int      g_bld2Width[TOTAL_GEO_NUM_BLD];
+#elif JVET_AA0058_GPM_ADP_BLD
 extern int16_t*  g_globalGeoWeights   [GEO_NUM_BLD][GEO_NUM_PRESTORED_MASK];
 extern int       g_bld2Width          [GEO_NUM_BLD];
 #else
 extern int16_t*  g_globalGeoWeights   [GEO_NUM_PRESTORED_MASK];
 #endif
-#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
+#if JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_AB0155_SGPM
 extern Pel*      g_globalGeoWeightsTpl[GEO_NUM_PRESTORED_MASK];
 #endif
 #if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
@@ -360,6 +363,11 @@ extern int16_t   g_weightOffset       [GEO_NUM_PARTITION_MODE][GEO_NUM_CU_SIZE][
 extern int8_t    g_angle2mask         [GEO_NUM_ANGLES];
 extern int8_t    g_Dis[GEO_NUM_ANGLES];
 extern int8_t    g_angle2mirror[GEO_NUM_ANGLES];
+
+#if JVET_AB0155_SGPM
+extern int16_t   g_weightOffsetEx[GEO_NUM_PARTITION_MODE][GEO_NUM_CU_SIZE_EX][GEO_NUM_CU_SIZE_EX][2];
+extern int8_t g_sgpm_splitDir[GEO_NUM_PARTITION_MODE];
+#endif
 #if JVET_Y0065_GPM_INTRA
 extern int8_t    g_geoAngle2IntraAng  [GEO_NUM_ANGLES];
 #endif
diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h
index 4e498324b34cd1bb678335c3437e10a8efd7da58..b15d84665caa77ec5fc0789a7d92d2cc7c8bca2c 100644
--- a/source/Lib/CommonLib/Slice.h
+++ b/source/Lib/CommonLib/Slice.h
@@ -325,6 +325,9 @@ class ConstraintInfo
 #if JVET_W0123_TIMD_FUSION
   bool              m_noTimdConstraintFlag;
 #endif
+#if JVET_AB0155_SGPM
+  bool              m_noSgpmConstraintFlag;
+#endif
 #if ENABLE_OBMC
   bool              m_noObmcConstraintFlag;
 #endif
@@ -635,6 +638,10 @@ public:
   bool          getNoTimdConstraintFlag() const { return m_noTimdConstraintFlag; }
   void          setNoTimdConstraintFlag(bool bVal) { m_noTimdConstraintFlag = bVal; }
 #endif
+#if JVET_AB0155_SGPM
+  bool          getNoSgpmConstraintFlag() const { return m_noSgpmConstraintFlag; }
+  void          setNoSgpmConstraintFlag(bool bVal) { m_noSgpmConstraintFlag = bVal; }
+#endif
 #if ENABLE_OBMC
   bool          getNoObmcConstraintFlag() const { return m_noObmcConstraintFlag; }
   void          setNoObmcConstraintFlag(bool bVal) { m_noObmcConstraintFlag = bVal; }
@@ -1717,6 +1724,9 @@ private:
 #if JVET_W0123_TIMD_FUSION
   bool              m_timd;
 #endif
+#if JVET_AB0155_SGPM
+  bool              m_sgpm;
+#endif
 #if JVET_V0130_INTRA_TMP
   bool              m_intraTMP;                                       ///< intra Template Matching 
   unsigned          m_intraTmpMaxSize;                               ///< max CU size for which intra TMP is allowed
@@ -2258,6 +2268,11 @@ void                    setCCALFEnabledFlag( bool b )
   void      setUseTimd         ( bool b )                                        { m_timd = b; }
   bool      getUseTimd         ()                                      const     { return m_timd; }
 #endif
+#if JVET_AB0155_SGPM
+  void      setUseSgpm         (bool b)                                          { m_sgpm = b; }
+  bool      getUseSgpm         ()                                      const     { return m_sgpm; }
+#endif
+
 #if ENABLE_OBMC
   void      setUseOBMC         ( bool b )                                        { m_OBMC = b; }
   bool      getUseOBMC         ()                                      const     { return m_OBMC; }
diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp
index f8b39827d48ff6fe169d73854f7c3c7af5261caa..aa7735cfb4e9a8cc7fea2d0361a3f427ea613516 100644
--- a/source/Lib/CommonLib/TrQuant.cpp
+++ b/source/Lib/CommonLib/TrQuant.cpp
@@ -502,6 +502,12 @@ void TrQuant::xInvLfnst( const TransformUnit &tu, const ComponentID compID )
     {
       intraMode = PLANAR_IDX;
     }
+#if JVET_AB0155_SGPM
+    if (PU::isSgpm(*tu.cs->getPU(area.pos(), toChannelType(compID)), toChannelType(compID)))
+    {
+      intraMode = g_geoAngle2IntraAng[g_GeoParams[tu.cu->sgpmSplitDir][0]];
+    }
+#endif
 #if JVET_V0130_INTRA_TMP
     if( PU::isTmp( *tu.cs->getPU( area.pos(), toChannelType( compID ) ), toChannelType( compID ) ) )
     {
@@ -734,6 +740,12 @@ void TrQuant::xFwdLfnst( const TransformUnit &tu, const ComponentID compID, cons
       intraMode = PLANAR_IDX;
     }
 #endif
+#if JVET_AB0155_SGPM
+    if (PU::isSgpm(*tu.cs->getPU(area.pos(), toChannelType(compID)), toChannelType(compID)))
+    {
+      intraMode = g_geoAngle2IntraAng[g_GeoParams[tu.cu->sgpmSplitDir][0]];
+    }
+#endif
 #if JVET_W0123_TIMD_FUSION
     if (tu.cu->timd && compID == COMPONENT_Y)
     {
@@ -1128,6 +1140,12 @@ void TrQuant::getTrTypes(const TransformUnit tu, const ComponentID compID, int &
       {
         predMode = MAP131TO67(predMode);
       }
+#endif
+#if JVET_AB0155_SGPM
+      if (tu.cu->sgpm)
+      {
+        predMode = g_geoAngle2IntraAng[g_GeoParams[tu.cu->sgpmSplitDir][0]];
+      }
 #endif
       int ucMode;
       int nMdIdx;
@@ -2202,6 +2220,12 @@ int TrQuant::getLfnstIdx(const TransformUnit &tu, ComponentID compID)
     intraMode = PLANAR_IDX;
   }
 #endif
+#if JVET_AB0155_SGPM
+  if (PU::isSgpm(*tu.cs->getPU(area.pos(), toChannelType(compID)), toChannelType(compID)))
+  {
+    intraMode = g_geoAngle2IntraAng[g_GeoParams[tu.cu->sgpmSplitDir][0]];
+  }
+#endif
 #if JVET_W0123_TIMD_FUSION
   if (tu.cu->timd && compID == COMPONENT_Y)
   {
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index 9f04d107643a32c94485b1207436faa2b86dd943..bfa93f969d4e21016310b87cef69476c3a7cbbca 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -146,7 +146,7 @@
 #define JVET_AB0174_CCCM_DIV_FREE                         1 // JVET-AB0174: CCCM with division free operation
 #endif
 #define JVET_AB0061_ITMP_BV_FOR_IBC                       1 // JVET-AB0061: Storing IntraTMP BV for IBC BV prediction
-
+#define JVET_AB0155_SGPM                                  1 // JVET-AB0155: spatial geometric partitioning mode
 
 //IBC
 #define JVET_Y0058_IBC_LIST_MODIFY                        1 // JVET-Y0058: Modifications of IBC merge/AMVP list construction, ARMC-TM-IBC part is included under JVET_W0090_ARMC_TM
@@ -1651,7 +1651,24 @@ enum RESHAPE_SIGNAL_TYPE
   RESHAPE_SIGNAL_NULL = 100,
 };
 
+#if JVET_AB0155_SGPM
+struct SgpmInfo
+{
+  int sgpmSplitDir;
+  int sgpmMode0;
+  int sgpmMode1;
 
+  SgpmInfo() : sgpmSplitDir(0), sgpmMode0(0), sgpmMode1(0) {}
+  SgpmInfo(const int sd, const int sm0, const int sm1) : sgpmSplitDir(sd), sgpmMode0(sm0), sgpmMode1(sm1) {}
+  SgpmInfo &operator=(const SgpmInfo &other)
+  {
+    sgpmSplitDir = other.sgpmSplitDir;
+    sgpmMode0    = other.sgpmMode0;
+    sgpmMode1    = other.sgpmMode1;
+    return *this;
+  }
+};
+#endif
 // ---------------------------------------------------------------------------
 // exception class
 // ---------------------------------------------------------------------------
diff --git a/source/Lib/CommonLib/Unit.cpp b/source/Lib/CommonLib/Unit.cpp
index 86590433b8627bcf5e142ea15dc77d8669764ddf..77695d1ba500ac28a8ff0faf1e16c80d62c1db70 100644
--- a/source/Lib/CommonLib/Unit.cpp
+++ b/source/Lib/CommonLib/Unit.cpp
@@ -301,6 +301,15 @@ CodingUnit& CodingUnit::operator=( const CodingUnit& other )
   timdFusionWeight[0] = other.timdFusionWeight[0];
   timdFusionWeight[1] = other.timdFusionWeight[1];
 #endif
+#if JVET_AB0155_SGPM
+  timdHor      = other.timdHor;
+  timdVer      = other.timdVer;
+  sgpm         = other.sgpm;
+  sgpmIdx      = other.sgpmIdx;
+  sgpmSplitDir = other.sgpmSplitDir;
+  sgpmMode0    = other.sgpmMode0;
+  sgpmMode1    = other.sgpmMode1;
+#endif
 #if ENABLE_OBMC
   obmcFlag          = other.obmcFlag;
   isobmcMC          = other.isobmcMC;
@@ -400,6 +409,15 @@ void CodingUnit::initData()
   timdFusionWeight[0] = -1;
   timdFusionWeight[1] = -1;
 #endif
+#if JVET_AB0155_SGPM
+  timdHor      = -1;
+  timdVer      = -1;
+  sgpm         = false;
+  sgpmIdx      = -1;
+  sgpmSplitDir = -1;
+  sgpmMode0    = -1;
+  sgpmMode1    = -1;
+#endif
 #if ENABLE_OBMC
   obmcFlag          = true;
   isobmcMC          = false;
@@ -639,6 +657,10 @@ void PredictionUnit::initData()
 
   intraDir[0] = DC_IDX;
   intraDir[1] = PLANAR_IDX;
+#if JVET_AB0155_SGPM
+  intraDir1[0] = DC_IDX;
+  intraDir1[1] = PLANAR_IDX;
+#endif
 #if JVET_Z0050_DIMD_CHROMA_FUSION
   isChromaFusion = false;
 #endif
@@ -769,6 +791,9 @@ PredictionUnit& PredictionUnit::operator=(const IntraPredictionData& predData)
   for (uint32_t i = 0; i < MAX_NUM_CHANNEL_TYPE; i++)
   {
     intraDir[i] = predData.intraDir[i];
+#if JVET_AB0155_SGPM
+    intraDir1[i] = predData.intraDir1[i];
+#endif
   }
 #if JVET_Z0050_DIMD_CHROMA_FUSION
   isChromaFusion = predData.isChromaFusion;
@@ -904,6 +929,9 @@ PredictionUnit& PredictionUnit::operator=( const PredictionUnit& other )
   for( uint32_t i = 0; i < MAX_NUM_CHANNEL_TYPE; i++ )
   {
     intraDir[ i ] = other.intraDir[ i ];
+#if JVET_AB0155_SGPM
+    intraDir1[i] = other.intraDir1[i];
+#endif
   }
 #if JVET_Z0050_DIMD_CHROMA_FUSION
   isChromaFusion = other.isChromaFusion;
diff --git a/source/Lib/CommonLib/Unit.h b/source/Lib/CommonLib/Unit.h
index e3ba3f5c7b1765e7dc772728fd4ed9a3fe458f50..270a98d8fc738d396766e903017c5c6d1cf5cba0 100644
--- a/source/Lib/CommonLib/Unit.h
+++ b/source/Lib/CommonLib/Unit.h
@@ -335,6 +335,15 @@ struct CodingUnit : public UnitArea
   bool timdIsBlended;
   int8_t timdFusionWeight[2];
 #endif
+#if JVET_AB0155_SGPM
+  int timdHor;
+  int timdVer;
+  bool sgpm;
+  int  sgpmIdx;
+  int  sgpmSplitDir;
+  int  sgpmMode0;
+  int  sgpmMode1;
+#endif
 #if ENABLE_OBMC
   bool           obmcFlag;
   bool           isobmcMC;
@@ -425,6 +434,9 @@ struct IntraPredictionData
   uint8_t intraNonMPM[NUM_NON_MPM_MODES];
 #endif
   uint8_t  intraDir[MAX_NUM_CHANNEL_TYPE];
+#if JVET_AB0155_SGPM
+  uint8_t intraDir1[MAX_NUM_CHANNEL_TYPE];
+#endif
 #if JVET_Z0050_DIMD_CHROMA_FUSION
   bool      isChromaFusion;
 #endif
diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp
index 3028d15f6cb83e4f5f7d6e1fcb2ab4d8bda04ef5..1fb99538d0c148935bfcd67999dbfe1832cb4d9a 100644
--- a/source/Lib/CommonLib/UnitTools.cpp
+++ b/source/Lib/CommonLib/UnitTools.cpp
@@ -1517,6 +1517,286 @@ void PU::getGeoIntraMPMs( const PredictionUnit &pu, uint8_t* mpm, uint8_t splitD
   mpm[numValidMPM] = PLANAR_IDX;
 }
 #endif
+#if JVET_AB0155_SGPM
+void PU::getSgpmIntraMPMs(const PredictionUnit &pu, uint8_t *mpm, uint8_t splitDir, uint8_t shape)
+{
+  bool includedMode[NUM_INTRA_MODE];
+  memset(includedMode, false, sizeof(includedMode));
+
+  int numValidMPM = 0;
+  CodingUnit *cu          = pu.cu;
+  bool timdDerived = !(cu->lwidth() * cu->lheight() > 1024);
+  if (timdDerived)
+  {
+    if (includedMode[MAP131TO67(cu->timdHor)] == false && cu->timdHor > DC_IDX)
+    {
+      mpm[numValidMPM] = MAP131TO67(cu->timdHor);
+      if (!includedMode[mpm[numValidMPM]])
+      {
+        includedMode[mpm[numValidMPM++]] = true;
+        if (numValidMPM == SGPM_NUM_MPM)
+        {
+          return;
+        }
+      }
+    }
+
+    if (includedMode[MAP131TO67(cu->timdVer)] == false && cu->timdVer > DC_IDX)
+    {
+      mpm[numValidMPM] = MAP131TO67(cu->timdVer);
+      if (!includedMode[mpm[numValidMPM]])
+      {
+        includedMode[mpm[numValidMPM++]] = true;
+        if (numValidMPM == SGPM_NUM_MPM)
+        {
+          return;
+        }
+      }
+    }
+  }
+
+  mpm[numValidMPM] = g_geoAngle2IntraAng[g_GeoParams[splitDir][0]];
+  if (!includedMode[mpm[numValidMPM]])
+  {
+    includedMode[mpm[numValidMPM++]] = true;
+    if (numValidMPM == SGPM_NUM_MPM)
+    {
+      return;
+    }
+  }
+
+#if ENABLE_DIMD
+  if (cu->slice->getSPS()->getUseDimd())
+  {
+    if (cu->dimdMode != -1)
+    {
+      mpm[numValidMPM] = cu->dimdMode;
+      if (!includedMode[mpm[numValidMPM]])
+      {
+        includedMode[mpm[numValidMPM++]] = true;
+        if (numValidMPM == SGPM_NUM_MPM)
+        {
+          return;
+        }
+      }
+    }
+  }
+#endif
+
+  const CompArea &area  = pu.block(COMPONENT_Y);
+  const Position  posA  = area.topRight().offset(0, -1);
+  const Position  posAR = area.topRight().offset(1, -1);
+  const Position  posL  = area.bottomLeft().offset(-1, 0);
+  const Position  posBL = area.bottomLeft().offset(-1, 1);
+  const Position  posAL = area.topLeft().offset(-1, -1);
+
+  if (shape == GEO_TM_SHAPE_L || shape == GEO_TM_SHAPE_AL)
+  {
+    const PredictionUnit *puLeft = pu.cs->getPURestricted(posL, pu, CHANNEL_TYPE_LUMA);
+    if (puLeft && CU::isIntra(*puLeft->cu))
+    {
+#if JVET_W0123_TIMD_FUSION
+      mpm[numValidMPM] = puLeft->cu->timd ? MAP131TO67(PU::getIntraDirLuma(*puLeft)) : PU::getIntraDirLuma(*puLeft);
+#else
+      mpm[numValidMPM] = PU::getIntraDirLuma(*puLeft);
+#endif
+      if (!includedMode[mpm[numValidMPM]])
+      {
+        includedMode[mpm[numValidMPM++]] = true;
+        if (numValidMPM == SGPM_NUM_MPM)
+        {
+          return;
+        }
+      }
+    }
+  }
+
+  if (shape == GEO_TM_SHAPE_A || shape == GEO_TM_SHAPE_AL)
+  {
+    const PredictionUnit *puAbove = pu.cs->getPURestricted(posA, pu, CHANNEL_TYPE_LUMA);
+    if (puAbove && CU::isIntra(*puAbove->cu))
+    {
+#if JVET_W0123_TIMD_FUSION
+      mpm[numValidMPM] = puAbove->cu->timd ? MAP131TO67(PU::getIntraDirLuma(*puAbove)) : PU::getIntraDirLuma(*puAbove);
+#else
+      mpm[numValidMPM] = PU::getIntraDirLuma(*puAbove);
+#endif
+      if (!includedMode[mpm[numValidMPM]])
+      {
+        includedMode[mpm[numValidMPM++]] = true;
+        if (numValidMPM == SGPM_NUM_MPM)
+        {
+          return;
+        }
+      }
+    }
+  }
+
+#if JVET_W0123_TIMD_FUSION
+  if (shape == GEO_TM_SHAPE_L || shape == GEO_TM_SHAPE_AL)
+  {
+    const PredictionUnit *puLeft = pu.cs->getPURestricted(posL, pu, CHANNEL_TYPE_LUMA);
+    if (puLeft && CU::isInter(*puLeft->cu))
+    {
+      mpm[numValidMPM] = puLeft->getIpmInfo(posL);
+      if (!includedMode[mpm[numValidMPM]])
+      {
+        includedMode[mpm[numValidMPM++]] = true;
+        if (numValidMPM == SGPM_NUM_MPM)
+        {
+          return;
+        }
+      }
+    }
+  }
+
+  if (shape == GEO_TM_SHAPE_A || shape == GEO_TM_SHAPE_AL)
+  {
+    const PredictionUnit *puAbove = pu.cs->getPURestricted(posA, pu, CHANNEL_TYPE_LUMA);
+    if (puAbove && CU::isInter(*puAbove->cu))
+    {
+      mpm[numValidMPM] = puAbove->getIpmInfo(posA);
+      if (!includedMode[mpm[numValidMPM]])
+      {
+        includedMode[mpm[numValidMPM++]] = true;
+        if (numValidMPM == SGPM_NUM_MPM)
+        {
+          return;
+        }
+      }
+    }
+  }
+#endif
+
+  if (shape == GEO_TM_SHAPE_L || shape == GEO_TM_SHAPE_AL)
+  {
+    const PredictionUnit *puBelowLeft = pu.cs->getPURestricted(posBL, pu, CHANNEL_TYPE_LUMA);
+    if (puBelowLeft && CU::isIntra(*puBelowLeft->cu))
+    {
+#if JVET_W0123_TIMD_FUSION
+      mpm[numValidMPM] =
+        puBelowLeft->cu->timd ? MAP131TO67(PU::getIntraDirLuma(*puBelowLeft)) : PU::getIntraDirLuma(*puBelowLeft);
+#else
+      mpm[numValidMPM] = PU::getIntraDirLuma(*puBelowLeft);
+#endif
+      if (!includedMode[mpm[numValidMPM]])
+      {
+        includedMode[mpm[numValidMPM++]] = true;
+        if (numValidMPM == SGPM_NUM_MPM)
+        {
+          return;
+        }
+      }
+    }
+  }
+
+  if (shape == GEO_TM_SHAPE_A || shape == GEO_TM_SHAPE_AL)
+  {
+    const PredictionUnit *puAboveRight = pu.cs->getPURestricted(posAR, pu, CHANNEL_TYPE_LUMA);
+    if (puAboveRight && CU::isIntra(*puAboveRight->cu))
+    {
+#if JVET_W0123_TIMD_FUSION
+      mpm[numValidMPM] =
+        puAboveRight->cu->timd ? MAP131TO67(PU::getIntraDirLuma(*puAboveRight)) : PU::getIntraDirLuma(*puAboveRight);
+#else
+      mpm[numValidMPM] = PU::getIntraDirLuma(*puAboveRight);
+#endif
+      if (!includedMode[mpm[numValidMPM]])
+      {
+        includedMode[mpm[numValidMPM++]] = true;
+        if (numValidMPM == SGPM_NUM_MPM)
+        {
+          return;
+        }
+      }
+    }
+  }
+
+  {
+    const PredictionUnit *puAboveLeft = pu.cs->getPURestricted(posAL, pu, CHANNEL_TYPE_LUMA);
+    if (puAboveLeft && CU::isIntra(*puAboveLeft->cu))
+    {
+#if JVET_W0123_TIMD_FUSION
+      mpm[numValidMPM] =
+        puAboveLeft->cu->timd ? MAP131TO67(PU::getIntraDirLuma(*puAboveLeft)) : PU::getIntraDirLuma(*puAboveLeft);
+#else
+      mpm[numValidMPM] = PU::getIntraDirLuma(*puAboveLeft);
+#endif
+      if (!includedMode[mpm[numValidMPM]])
+      {
+        includedMode[mpm[numValidMPM++]] = true;
+        if (numValidMPM == SGPM_NUM_MPM)
+        {
+          return;
+        }
+      }
+    }
+  }
+
+#if JVET_W0123_TIMD_FUSION
+  if (shape == GEO_TM_SHAPE_L || shape == GEO_TM_SHAPE_AL)
+  {
+    const PredictionUnit *puBelowLeft = pu.cs->getPURestricted(posBL, pu, CHANNEL_TYPE_LUMA);
+    if (puBelowLeft && CU::isInter(*puBelowLeft->cu))
+    {
+      mpm[numValidMPM] = puBelowLeft->getIpmInfo(posBL);
+      if (!includedMode[mpm[numValidMPM]])
+      {
+        includedMode[mpm[numValidMPM++]] = true;
+        if (numValidMPM == SGPM_NUM_MPM)
+        {
+          return;
+        }
+      }
+    }
+  }
+
+  if (shape == GEO_TM_SHAPE_A || shape == GEO_TM_SHAPE_AL)
+  {
+    const PredictionUnit *puAboveRight = pu.cs->getPURestricted(posAR, pu, CHANNEL_TYPE_LUMA);
+    if (puAboveRight && CU::isInter(*puAboveRight->cu))
+    {
+      mpm[numValidMPM] = puAboveRight->getIpmInfo(posAR);
+      if (!includedMode[mpm[numValidMPM]])
+      {
+        includedMode[mpm[numValidMPM++]] = true;
+        if (numValidMPM == SGPM_NUM_MPM)
+        {
+          return;
+        }
+      }
+    }
+  }
+
+  {
+    const PredictionUnit *puAboveLeft = pu.cs->getPURestricted(posAL, pu, CHANNEL_TYPE_LUMA);
+    if (puAboveLeft && CU::isInter(*puAboveLeft->cu))
+    {
+      mpm[numValidMPM] = puAboveLeft->getIpmInfo(posAL);
+      if (!includedMode[mpm[numValidMPM]])
+      {
+        includedMode[mpm[numValidMPM++]] = true;
+        if (numValidMPM == SGPM_NUM_MPM)
+        {
+          return;
+        }
+      }
+    }
+  }
+#endif
+
+  mpm[numValidMPM] = (mpm[0] > DIA_IDX) ? (mpm[0] - 32) : (mpm[0] + 32);
+  if (!includedMode[mpm[numValidMPM]])
+  {
+    includedMode[mpm[numValidMPM++]] = true;
+    if (numValidMPM == SGPM_NUM_MPM)
+    {
+      return;
+    }
+  }
+  mpm[numValidMPM] = PLANAR_IDX;
+}
+#endif
 
 bool PU::isMIP(const PredictionUnit &pu, const ChannelType &chType)
 {
@@ -1681,6 +1961,48 @@ bool PU::hasChromaFusionFlag(const PredictionUnit &pu, int intraMode)
 }
 #endif
 
+#if JVET_AB0155_SGPM
+bool PU::isSgpm(const PredictionUnit &pu, const ChannelType &chType)
+{
+  if (chType == CHANNEL_TYPE_LUMA)
+  {
+    // Default case if chType is omitted.
+    return pu.cu->sgpm;
+  }
+  else
+  {
+    return isDMChromaSgpm(pu) && (pu.intraDir[CHANNEL_TYPE_CHROMA] == DM_CHROMA_IDX);
+  }
+}
+bool PU::isDMChromaSgpm(const PredictionUnit &pu)
+{
+  return false;
+}
+#endif
+
+#if JVET_AB0155_SGPM
+uint32_t PU::getIntraDirLuma(const PredictionUnit &pu, const int partIdx)
+{
+#if JVET_V0130_INTRA_TMP
+  if (isMIP(pu) || isTmp(pu))
+#else
+  if (isMIP(pu))
+#endif
+  {
+    return PLANAR_IDX;
+  }
+
+  else
+  {
+    if (partIdx)
+    {
+      return pu.intraDir1[CHANNEL_TYPE_LUMA];
+    }
+    return pu.intraDir[CHANNEL_TYPE_LUMA];
+  }
+}
+#else
+
 uint32_t PU::getIntraDirLuma( const PredictionUnit &pu )
 {
 #if JVET_V0130_INTRA_TMP
@@ -1696,6 +2018,7 @@ uint32_t PU::getIntraDirLuma( const PredictionUnit &pu )
     return pu.intraDir[CHANNEL_TYPE_LUMA];
   }
 }
+#endif
 
 void PU::getIntraChromaCandModes(const PredictionUnit &pu, unsigned modeList[NUM_CHROMA_MODE])
 {
@@ -1732,6 +2055,12 @@ void PU::getIntraChromaCandModes(const PredictionUnit &pu, unsigned modeList[NUM
   {
     return;
   }
+#if JVET_AB0155_SGPM
+  if (isDMChromaSgpm(pu))
+  {
+    return;
+  }
+#endif
 
   const uint32_t lumaMode = getCoLocatedIntraLumaMode(pu);
   for (int i = 0; i < 4; i++)
@@ -1824,8 +2153,32 @@ bool PU::isChromaIntraModeCrossCheckMode( const PredictionUnit &pu )
   return !pu.cu->bdpcmModeChroma && pu.intraDir[CHANNEL_TYPE_CHROMA] == DM_CHROMA_IDX;
 }
 
+#if JVET_AB0155_SGPM
+uint32_t PU::getFinalIntraMode(const PredictionUnit &pu, const ChannelType &chType, const int partIdx)
+{
+  if (partIdx)
+  {
+    uint32_t uiIntraMode = pu.intraDir[chType];
+    if (isLuma(chType))
+    {
+      uiIntraMode = pu.intraDir1[chType];
+    }
+
+    if (uiIntraMode == DM_CHROMA_IDX && !isLuma(chType))
+    {
+      uiIntraMode = getCoLocatedIntraLumaMode(pu, 1);
+    }
+    if (pu.chromaFormat == CHROMA_422 && !isLuma(chType)
+        && uiIntraMode < NUM_LUMA_MODE)   // map directional, planar and dc
+    {
+      uiIntraMode = g_chroma422IntraAngleMappingTable[uiIntraMode];
+    }
+    return uiIntraMode;
+  }
+#else
 uint32_t PU::getFinalIntraMode( const PredictionUnit &pu, const ChannelType &chType )
 {
+#endif
   uint32_t uiIntraMode = pu.intraDir[chType];
 
   if( uiIntraMode == DM_CHROMA_IDX && !isLuma( chType ) )
@@ -1860,6 +2213,18 @@ const PredictionUnit &PU::getCoLocatedLumaPU(const PredictionUnit &pu)
   return lumaPU;
 }
 
+#if JVET_AB0155_SGPM
+uint32_t PU::getCoLocatedIntraLumaMode(const PredictionUnit &pu, const int partIdx)
+{
+#if JVET_W0123_TIMD_FUSION
+  if (PU::getCoLocatedLumaPU(pu).cu->timd)
+  {
+    return MAP131TO67(PU::getIntraDirLuma(PU::getCoLocatedLumaPU(pu), partIdx));
+  }
+#endif
+  return PU::getIntraDirLuma(PU::getCoLocatedLumaPU(pu), partIdx);
+}
+#else
 uint32_t PU::getCoLocatedIntraLumaMode(const PredictionUnit &pu)
 {
 #if JVET_W0123_TIMD_FUSION
@@ -1870,6 +2235,7 @@ uint32_t PU::getCoLocatedIntraLumaMode(const PredictionUnit &pu)
 #endif
   return PU::getIntraDirLuma(PU::getCoLocatedLumaPU(pu));
 }
+#endif
 
 int PU::getWideAngle( const TransformUnit &tu, const uint32_t dirMode, const ComponentID compID )
 {
@@ -12518,6 +12884,14 @@ void PU::spanMotionInfo2( PredictionUnit &pu, const MergeCtx &mrgCtx )
 
 void PU::spanIpmInfoIntra( PredictionUnit &pu)
 {
+#if JVET_AB0155_SGPM
+  if (pu.cu->sgpm)
+  {
+    spanIpmInfoSgpm(pu);
+    return;
+  }
+#endif
+
   int ipm = PU::getIntraDirLuma(pu);
   if (pu.cu->timd)
   {
@@ -12526,6 +12900,58 @@ void PU::spanIpmInfoIntra( PredictionUnit &pu)
   IpmBuf ib = pu.getIpmBuf();
   ib.fill(ipm);
 }
+#if JVET_AB0155_SGPM
+void PU::spanIpmInfoSgpm(PredictionUnit &pu)
+{
+  int sgpmMode0 = pu.cu->sgpmMode0;
+  int sgpmMode1 = pu.cu->sgpmMode1;
+  int splitDir  = pu.cu->sgpmSplitDir;
+
+  int16_t angle = g_GeoParams[splitDir][0];
+  int tpmMask = 0;
+  int lookUpY = 0, motionIdx = 0;
+  bool isFlip = angle >= 13 && angle <= 27;
+  int distanceIdx = g_GeoParams[splitDir][1];
+  int distanceX = angle;
+  int distanceY = (distanceX + (GEO_NUM_ANGLES >> 2)) % GEO_NUM_ANGLES;
+  int offsetX = (-(int)pu.lwidth()) >> 1;
+  int offsetY = (-(int)pu.lheight()) >> 1;
+
+  if (distanceIdx > 0)
+  {
+    if( angle % 16 == 8 || ( angle % 16 != 0 && pu.lheight() >= pu.lwidth() ) )
+    {
+      offsetY += angle < 16 ? ( ( distanceIdx * pu.lheight() ) >> 3 ) : -( int ) ( ( distanceIdx * pu.lheight() ) >> 3 );
+    }
+    else
+    {
+      offsetX += angle < 16 ? ( ( distanceIdx * pu.lwidth() ) >> 3 ) : -( int ) ( ( distanceIdx * pu.lwidth() ) >> 3 );
+    }
+  }
+
+  IpmBuf   ib      = pu.getIpmBuf();
+  uint8_t *ipmSgpm = ib.buf;
+  
+  for (int y = 0; y < ib.height; y++)
+  {
+    lookUpY = (((4 * y + offsetY) << 1) + 5) * g_Dis[distanceY];
+    for (int x = 0; x < ib.width; x++)
+    {
+      motionIdx = (((4 * x + offsetX) << 1) + 5) * g_Dis[distanceX] + lookUpY;
+      tpmMask = motionIdx <= 0 ? (1 - isFlip) : isFlip;
+      if (tpmMask == 0)
+      {
+        *ipmSgpm = sgpmMode0;
+      }
+      else
+      {
+        *ipmSgpm = sgpmMode1;
+      }
+    }
+    ipmSgpm += ib.stride;
+  }
+}
+#endif
 
 #if RPR_ENABLE
 void scalePositionInRef( PredictionUnit& pu, const PPS& pps, RefPicList refList, int refIdx, Position& PosY )
diff --git a/source/Lib/CommonLib/UnitTools.h b/source/Lib/CommonLib/UnitTools.h
index 9227d158ff35fa4cbd208110eb107ab5bfe85bc0..8b9494ac25c15d7ce8f6ff95b658e4f79bc1b63f 100644
--- a/source/Lib/CommonLib/UnitTools.h
+++ b/source/Lib/CommonLib/UnitTools.h
@@ -151,24 +151,43 @@ namespace PU
   void getGeoIntraMPMs( const PredictionUnit &pu, uint8_t* mpm, uint8_t splitDir, uint8_t shape, bool doInit, bool doInitAL = true, bool doInitA = true, bool doInitL = true);
 #endif
   void getGeoIntraMPMs( const PredictionUnit &pu, uint8_t* mpm, uint8_t splitDir, uint8_t shape );
+#endif
+#if JVET_AB0155_SGPM
+  void getSgpmIntraMPMs(const PredictionUnit &pu, uint8_t *mpm, uint8_t splitDir, uint8_t shape);
 #endif
   bool          isMIP                 (const PredictionUnit &pu, const ChannelType &chType = CHANNEL_TYPE_LUMA);
 #if JVET_V0130_INTRA_TMP
   bool          isTmp(const PredictionUnit& pu, const ChannelType& chType = CHANNEL_TYPE_LUMA);
 #endif
   bool          isDMChromaMIP         (const PredictionUnit &pu);
+#if JVET_AB0155_SGPM
+  bool isSgpm(const PredictionUnit &pu, const ChannelType &chType = CHANNEL_TYPE_LUMA);
+  bool isDMChromaSgpm(const PredictionUnit &pu);
+#endif
+#if JVET_AB0155_SGPM
+  uint32_t getIntraDirLuma(const PredictionUnit &pu, const int partIdx = 0);
+#else
   uint32_t      getIntraDirLuma       (const PredictionUnit &pu);
+#endif
   void getIntraChromaCandModes(const PredictionUnit &pu, unsigned modeList[NUM_CHROMA_MODE]);
 
   const PredictionUnit &getCoLocatedLumaPU(const PredictionUnit &pu);
+#if JVET_AB0155_SGPM
+  uint32_t getFinalIntraMode              (const PredictionUnit &pu, const ChannelType &chType, const int partIdx = 0);
+#else
   uint32_t getFinalIntraMode              (const PredictionUnit &pu, const ChannelType &chType);
+#endif
 #if JVET_W0119_LFNST_EXTENSION
   int      getLFNSTMatrixDim          ( int width, int height );
   bool     getUseLFNST8               ( int width, int height );
   uint8_t  getLFNSTIdx                ( int intraMode, int mtsMode = 0 );
   bool     getUseLFNST16              ( int width, int height );
 #endif
+#if JVET_AB0155_SGPM
+  uint32_t getCoLocatedIntraLumaMode(const PredictionUnit &pu, const int partIdx = 0);
+#else
   uint32_t getCoLocatedIntraLumaMode      (const PredictionUnit &pu);
+#endif
   int      getWideAngle                   ( const TransformUnit &tu, const uint32_t dirMode, const ComponentID compID );
 #if MULTI_PASS_DMVR || JVET_W0097_GPM_MMVD_TM
   uint32_t getBDMVRMvdThreshold       (const PredictionUnit &pu);
@@ -344,6 +363,9 @@ namespace PU
   void spanIpmInfoIntra               (      PredictionUnit &pu );
   void spanIpmInfoInter               (      PredictionUnit &pu, MotionBuf &mb, IpmBuf &ib );
 #endif
+#if JVET_AB0155_SGPM
+  void spanIpmInfoSgpm                (      PredictionUnit &pu);
+#endif
 #if !JVET_Z0054_BLK_REF_PIC_REORDER
   void applyImv                       (      PredictionUnit &pu, MergeCtx &mrgCtx, InterPrediction *interPred = NULL );
 #endif
diff --git a/source/Lib/CommonLib/x86/InterpolationFilterX86.h b/source/Lib/CommonLib/x86/InterpolationFilterX86.h
index 94a978ed68d72053d4d8abb661c894c8bb61550d..0149463d0b37780a3a18f8b7d53dba2c6be58b35 100644
--- a/source/Lib/CommonLib/x86/InterpolationFilterX86.h
+++ b/source/Lib/CommonLib/x86/InterpolationFilterX86.h
@@ -3459,6 +3459,717 @@ static void simdFilter( const ClpRng& clpRng, Pel const *src, int srcStride, Pel
   }
 }
 
+#if JVET_AB0155_SGPM
+
+template<X86_VEXT vext> 
+int xSadTM_SSE(const PredictionUnit &pu, const int width, const int height, const int templateWidth,
+                                const int templateHeight, const ComponentID compIdx, PelBuf &predBuf, PelBuf &recBuf,
+                                PelBuf &adBuf)
+{
+  int     sad        = 0;
+  int32_t iPredStride = predBuf.stride;
+  int32_t iRecStride  = recBuf.stride;
+  int32_t iAdStride   = adBuf.stride;
+
+  // top template
+  Pel *piPred = predBuf.buf + templateWidth;
+  // start point of predBuf is (-templateWidth, -templateHeight) of current block
+  Pel *piAd  = adBuf.buf + templateWidth;
+  Pel *piRec = recBuf.buf - templateHeight * iRecStride;   // start point of recBuf is (0,0) of current block
+
+  if (width == 4)
+  {
+    __m128i vzero = _mm_setzero_si128();
+    __m128i vsum32 = vzero;
+    // for luma, to be confirmed
+    for (int y = 0; y < templateHeight; y++)
+    {
+      __m128i vPred = _mm_loadl_epi64((__m128i *) (piPred));
+      __m128i vRec  = _mm_loadl_epi64((__m128i *) (piRec));
+      __m128i vAd   = _mm_abs_epi16(_mm_sub_epi16(vRec, vPred));
+      _mm_storel_epi64((__m128i *) (piAd), vAd);
+      __m128i vsumtemp = _mm_unpacklo_epi16(vAd, vzero);
+      vsum32           = _mm_add_epi32(vsum32, vsumtemp);
+      piPred += iPredStride;
+      piAd += iAdStride;
+      piRec += iRecStride;
+    }
+    vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e));   // 01001110
+    vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1));   // 10110001
+    sad  = _mm_cvtsi128_si32(vsum32);
+  }
+#if USE_AVX2
+  else if (0 == (width % 16))
+  {
+    __m256i vzero  = _mm256_setzero_si256();
+    __m256i vsum32 = vzero;
+    for (int y = 0; y < templateHeight; y++)
+    {
+      __m256i vsum16 = vzero;
+      for (int x = 0; x < width; x += 16)
+      {
+        __m256i vPred = _mm256_lddqu_si256((__m256i *) (piPred + x));   // why not aligned with 128/256 bit boundaries
+        __m256i vRec  = _mm256_lddqu_si256((__m256i *) (piRec + x));
+        __m256i vAd   = _mm256_abs_epi16(_mm256_sub_epi16(vRec, vPred));
+        _mm256_storeu_si256((__m256i *) (piAd + x), vAd);
+
+        vsum16 = _mm256_add_epi16(vsum16, vAd);
+      }
+      __m256i vsumtemp = _mm256_add_epi32(_mm256_unpacklo_epi16(vsum16, vzero), _mm256_unpackhi_epi16(vsum16, vzero));
+      vsum32           = _mm256_add_epi32(vsum32, vsumtemp);
+      piPred += iPredStride;
+      piAd += iAdStride;
+      piRec += iRecStride;
+    }
+    vsum32 = _mm256_hadd_epi32(vsum32, vzero);
+    vsum32 = _mm256_hadd_epi32(vsum32, vzero);
+    sad  = _mm_cvtsi128_si32(_mm256_castsi256_si128(vsum32))
+            + _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_permute2x128_si256(vsum32, vsum32, 0x11)));
+  }
+#endif
+  else
+  {
+    __m128i vzero  = _mm_setzero_si128();
+    __m128i vsum32 = vzero;
+    for (int y = 0; y < templateHeight; y++)
+    {
+      __m128i vsum16 = vzero;
+      for (int x = 0; x < width; x += 8)
+      {
+        __m128i vPred = _mm_lddqu_si128((__m128i *) (piPred + x));
+        __m128i vRec  = _mm_lddqu_si128((__m128i *) (piRec + x));
+        __m128i vAd   = _mm_abs_epi16(_mm_sub_epi16(vRec, vPred));
+        _mm_storeu_si128((__m128i *) (piAd + x), vAd);
+        vsum16 = _mm_add_epi16(vsum16, vAd);
+      }
+      __m128i vsumtemp = _mm_add_epi32(_mm_unpacklo_epi16(vsum16, vzero), _mm_unpackhi_epi16(vsum16, vzero));
+      vsum32           = _mm_add_epi32(vsum32, vsumtemp);
+      piPred += iPredStride;
+      piAd += iAdStride;
+      piRec += iRecStride;
+    }
+    vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e));   // 01001110
+    vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1));   // 10110001
+    sad  = _mm_cvtsi128_si32(vsum32);
+  }
+
+  // left template
+  piPred = predBuf.buf + templateHeight * iPredStride;
+  // start point of predBuf is (-templateWidth, -templateHeight) of current block
+  piAd  = adBuf.buf + templateHeight * iAdStride;
+  piRec = recBuf.buf - templateWidth;   // start point of recBuf is (0,0) of current block
+
+  if (templateWidth == 4)
+  {
+    __m128i vzero  = _mm_setzero_si128();
+    __m128i vsum32 = vzero;
+    // for luma, to be confirmed
+    for (int y = 0; y < height; y++)
+    {
+      __m128i vPred = _mm_loadl_epi64((__m128i *) (piPred));
+      __m128i vRec  = _mm_loadl_epi64((__m128i *) (piRec));
+      __m128i vAd   = _mm_abs_epi16(_mm_sub_epi16(vRec, vPred));
+      _mm_storel_epi64((__m128i *) (piAd), vAd);
+      __m128i vsumtemp = _mm_unpacklo_epi16(vAd, vzero);
+      vsum32           = _mm_add_epi32(vsum32, vsumtemp);
+      piPred += iPredStride;
+      piAd += iAdStride;
+      piRec += iRecStride;
+    }
+    vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e));   // 01001110
+    vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1));   // 10110001
+    sad    += _mm_cvtsi128_si32(vsum32);
+  }
+#if USE_AVX2
+  else if (0 == (templateWidth % 16))
+  {
+    __m256i vzero  = _mm256_setzero_si256();
+    __m256i vsum32 = vzero;
+    for (int y = 0; y < height; y++)
+    {
+      __m256i vsum16 = vzero;
+      for (int x = 0; x < templateWidth; x += 16)
+      {
+        __m256i vPred = _mm256_lddqu_si256((__m256i *) (piPred + x));   // why not aligned with 128/256 bit boundaries
+        __m256i vRec  = _mm256_lddqu_si256((__m256i *) (piRec + x));
+        __m256i vAd   = _mm256_abs_epi16(_mm256_sub_epi16(vRec, vPred));
+        _mm256_storeu_si256((__m256i *) (piAd + x), vAd);
+
+        vsum16 = _mm256_add_epi16(vsum16, vAd);
+      }
+      __m256i vsumtemp = _mm256_add_epi32(_mm256_unpacklo_epi16(vsum16, vzero), _mm256_unpackhi_epi16(vsum16, vzero));
+      vsum32           = _mm256_add_epi32(vsum32, vsumtemp);
+      piPred += iPredStride;
+      piAd += iAdStride;
+      piRec += iRecStride;
+    }
+    vsum32 = _mm256_hadd_epi32(vsum32, vzero);
+    vsum32 = _mm256_hadd_epi32(vsum32, vzero);
+    sad    += _mm_cvtsi128_si32(_mm256_castsi256_si128(vsum32))
+          + _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_permute2x128_si256(vsum32, vsum32, 0x11)));
+  }
+#endif
+  else if (0 == (templateWidth % 8))
+  {
+    __m128i vzero  = _mm_setzero_si128();
+    __m128i vsum32 = vzero;
+    for (int y = 0; y < height; y++)
+    {
+      __m128i vsum16 = vzero;
+      for (int x = 0; x < templateWidth; x += 8)
+      {
+        __m128i vPred = _mm_lddqu_si128((__m128i *) (piPred + x));
+        __m128i vRec  = _mm_lddqu_si128((__m128i *) (piRec + x));
+        __m128i vAd   = _mm_abs_epi16(_mm_sub_epi16(vRec, vPred));
+        _mm_storeu_si128((__m128i *) (piAd + x), vAd);
+        vsum16 = _mm_add_epi16(vsum16, vAd);
+      }
+      __m128i vsumtemp = _mm_add_epi32(_mm_unpacklo_epi16(vsum16, vzero), _mm_unpackhi_epi16(vsum16, vzero));
+      vsum32           = _mm_add_epi32(vsum32, vsumtemp);
+      piPred += iPredStride;
+      piAd += iAdStride;
+      piRec += iRecStride;
+    }
+    vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e));   // 01001110
+    vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1));   // 10110001
+    sad  += _mm_cvtsi128_si32(vsum32);
+  }
+  else
+  {
+    for (int y = 0; y < height; y++)
+    {
+      for (int x = 0; x < templateWidth; x++)
+      {
+        *piAd = abs(*piRec - *piPred);
+        sad += *piAd;
+        piRec++;
+        piPred++;
+        piAd++;
+      }
+
+      piPred += (iPredStride - templateWidth);
+      piAd += (iAdStride - templateWidth);
+      piRec += (iRecStride - templateWidth);
+    }
+  }
+
+  return sad;
+}
+
+template<X86_VEXT vext>
+int xSgpmSadTM_SSE(const PredictionUnit &pu, const int width, const int height, const int templateWidth,
+                    const int templateHeight, const ComponentID compIdx, const uint8_t splitDir, PelBuf &adBuf)
+{
+  int      sum        = 0;
+  int16_t  wIdx   = floorLog2(pu.lwidth()) - GEO_MIN_CU_LOG2_EX;
+  int16_t  hIdx   = floorLog2(pu.lheight()) - GEO_MIN_CU_LOG2_EX;
+  int16_t  angle  = g_GeoParams[splitDir][0];
+  int16_t  stepY  = 0;
+  int16_t  stepX  = 1;
+  int16_t *weightMask = nullptr;
+
+  if (g_angle2mirror[angle] == 2)
+  {
+    stepY = -GEO_WEIGHT_MASK_SIZE_EXT;
+    weightMask = &g_globalGeoWeightsTpl[g_angle2mask[angle]]
+                                       [(GEO_WEIGHT_MASK_SIZE_EXT - 1
+                                                       - g_weightOffsetEx[splitDir][hIdx][wIdx][1] - GEO_TM_ADDED_WEIGHT_MASK_SIZE)
+                                                        * GEO_WEIGHT_MASK_SIZE_EXT
+                                                      + g_weightOffsetEx[splitDir][hIdx][wIdx][0] + GEO_TM_ADDED_WEIGHT_MASK_SIZE];
+  }
+  else if (g_angle2mirror[angle] == 1)
+  {
+    stepX  = -1;
+    stepY  = GEO_WEIGHT_MASK_SIZE_EXT;
+    weightMask = &g_globalGeoWeightsTpl[g_angle2mask[angle]]
+                                     [(g_weightOffsetEx[splitDir][hIdx][wIdx][1] + GEO_TM_ADDED_WEIGHT_MASK_SIZE)
+                                        * GEO_WEIGHT_MASK_SIZE_EXT
+                                      + (GEO_WEIGHT_MASK_SIZE_EXT - 1 - g_weightOffsetEx[splitDir][hIdx][wIdx][0]
+                                         - GEO_TM_ADDED_WEIGHT_MASK_SIZE)];
+  }
+  else
+  {
+    stepY = GEO_WEIGHT_MASK_SIZE_EXT;
+    weightMask = &g_globalGeoWeightsTpl[g_angle2mask[angle]]
+                                       [(g_weightOffsetEx[splitDir][hIdx][wIdx][1] + GEO_TM_ADDED_WEIGHT_MASK_SIZE)
+                                                        * GEO_WEIGHT_MASK_SIZE_EXT
+                                                      + g_weightOffsetEx[splitDir][hIdx][wIdx][0] + GEO_TM_ADDED_WEIGHT_MASK_SIZE];
+  }
+
+  int32_t iAdStride = adBuf.stride;
+
+  if (compIdx != COMPONENT_Y && pu.chromaFormat == CHROMA_420)
+  {
+    stepY <<= 1;
+  }
+
+  // top template
+  Pel *piAd = adBuf.buf + templateWidth;   // start point of adBuf is (-templateWidth, -templateHeight) of current block
+  int16_t *weightBackup = weightMask;
+  weightMask            = weightMask - templateHeight * stepY;
+  if (width == 4)
+  {
+    __m128i vzero  = _mm_setzero_si128();
+    __m128i vsum32 = vzero;
+    for (int y = 0; y < templateHeight; y++)
+    {
+      __m128i vAd = _mm_loadl_epi64((__m128i *) (piAd));
+      __m128i vMask;
+
+      if (g_angle2mirror[angle] == 1)
+      {
+        vMask                      = _mm_loadl_epi64((__m128i *) (weightMask - (4 - 1)));
+        const __m128i shuffle_mask = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 1, 0, 3, 2, 5, 4, 7, 6);
+        vMask                      = _mm_shuffle_epi8(vMask, shuffle_mask);
+      }
+      else
+      {
+        vMask = _mm_loadl_epi64((__m128i *) weightMask);
+      }
+      vsum32 = _mm_add_epi32(vsum32, _mm_madd_epi16(vMask, vAd));
+
+      piAd += iAdStride;
+      weightMask += stepY;
+    }
+    vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1));   // 10110001
+    sum    = _mm_cvtsi128_si32(vsum32);
+  }
+#if USE_AVX2
+  else if (0 == (width % 16))
+  {
+    __m256i vzero  = _mm256_setzero_si256();
+    __m256i vsum32 = vzero;
+    for (int y = 0; y < templateHeight; y++)
+    {
+      for (int x = 0; x < width; x += 16)
+      {
+        __m256i vAd = _mm256_lddqu_si256((__m256i *) (piAd + x));   
+
+        __m256i vMask;
+
+        if (g_angle2mirror[angle] == 1)
+        {
+          vMask                         = _mm256_lddqu_si256((__m256i *) (weightMask - x - (16 - 1)));
+          const __m256i shuffle_mask = _mm256_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, 2,
+                                                       5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+          vMask                         = _mm256_shuffle_epi8(vMask, shuffle_mask);
+          vMask                         = _mm256_permute4x64_epi64(vMask, _MM_SHUFFLE(1, 0, 3, 2));
+        }
+        else
+        {
+          vMask = _mm256_lddqu_si256((__m256i *) (weightMask + x));
+        }
+        vsum32 = _mm256_add_epi32(vsum32, _mm256_madd_epi16(vMask, vAd));
+      }
+      piAd += iAdStride;
+      weightMask += stepY;
+    }
+    vsum32 = _mm256_hadd_epi32(vsum32, vzero);
+    vsum32 = _mm256_hadd_epi32(vsum32, vzero);
+    sum    = _mm_cvtsi128_si32(_mm256_castsi256_si128(vsum32))
+          + _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_permute2x128_si256(vsum32, vsum32, 0x11)));
+  }
+#endif
+  else
+  {
+    __m128i vzero  = _mm_setzero_si128();
+    __m128i vsum32 = vzero;
+    for (int y = 0; y < templateHeight; y++)
+    {
+      for (int x = 0; x < width; x += 8)
+      {
+        __m128i vAd = _mm_lddqu_si128((__m128i *) (piAd + x));
+        __m128i vMask;
+
+        if (g_angle2mirror[angle] == 1)
+        {
+          vMask                         = _mm_lddqu_si128((__m128i *) (weightMask - x - (8 - 1)));
+          const __m128i shuffle_mask = _mm_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+          vMask                         = _mm_shuffle_epi8(vMask, shuffle_mask);
+        }
+        else
+        {
+          vMask = _mm_lddqu_si128((__m128i *) (weightMask + x));
+        }
+        vsum32     = _mm_add_epi32(vsum32, _mm_madd_epi16(vMask, vAd));
+      }
+      piAd += iAdStride;
+      weightMask += stepY;
+    }
+    vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e));   // 01001110
+    vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1));   // 10110001
+    sum    = _mm_cvtsi128_si32(vsum32);
+  }
+
+  // left template
+  piAd   = adBuf.buf + templateHeight * iAdStride;
+  weightMask = weightBackup - templateWidth * stepX;
+  if (templateWidth == 4)
+  {
+    __m128i vzero  = _mm_setzero_si128();
+    __m128i vsum32 = vzero;
+    for (int y = 0; y < height; y++)
+    {
+      __m128i vAd = _mm_loadl_epi64((__m128i *) (piAd));
+      __m128i vMask;
+
+      if (g_angle2mirror[angle] == 1)
+      {
+        vMask                      = _mm_loadl_epi64((__m128i *) (weightMask - (4 - 1)));
+        const __m128i shuffle_mask = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 1, 0, 3, 2, 5, 4, 7, 6);
+        vMask                      = _mm_shuffle_epi8(vMask, shuffle_mask);
+      }
+      else
+      {
+        vMask = _mm_loadl_epi64((__m128i *) weightMask);
+      }
+      vsum32 = _mm_add_epi32(vsum32, _mm_madd_epi16(vMask, vAd));
+
+      piAd += iAdStride;
+      weightMask += stepY;
+    }
+    vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1));   // 10110001
+    sum    += _mm_cvtsi128_si32(vsum32);
+  }
+#if USE_AVX2
+  else if (0 == (templateWidth % 16))
+  {
+    __m256i vzero  = _mm256_setzero_si256();
+    __m256i vsum32 = vzero;
+    for (int y = 0; y < height; y++)
+    {
+      for (int x = 0; x < templateWidth; x += 16)
+      {
+        __m256i vAd = _mm256_lddqu_si256((__m256i *) (piAd + x));
+
+        __m256i vMask;
+
+        if (g_angle2mirror[angle] == 1)
+        {
+          vMask                      = _mm256_lddqu_si256((__m256i *) (weightMask - x - (16 - 1)));
+          const __m256i shuffle_mask = _mm256_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3, 2,
+                                                       5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+          vMask                      = _mm256_shuffle_epi8(vMask, shuffle_mask);
+          vMask                      = _mm256_permute4x64_epi64(vMask, _MM_SHUFFLE(1, 0, 3, 2));
+        }
+        else
+        {
+          vMask = _mm256_lddqu_si256((__m256i *) (weightMask + x));
+        }
+        vsum32 = _mm256_add_epi32(vsum32, _mm256_madd_epi16(vMask, vAd));
+      }
+      piAd += iAdStride;
+      weightMask += stepY;
+    }
+    vsum32 = _mm256_hadd_epi32(vsum32, vzero);
+    vsum32 = _mm256_hadd_epi32(vsum32, vzero);
+    sum    += _mm_cvtsi128_si32(_mm256_castsi256_si128(vsum32))
+          + _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_permute2x128_si256(vsum32, vsum32, 0x11)));
+  }
+#endif
+  else if (0 == (templateWidth % 8))
+  {
+    __m128i vzero  = _mm_setzero_si128();
+    __m128i vsum32 = vzero;
+    for (int y = 0; y < height; y++)
+    {
+      for (int x = 0; x < templateWidth; x += 8)
+      {
+        __m128i vAd = _mm_lddqu_si128((__m128i *) (piAd + x));
+        __m128i vMask;
+
+        if (g_angle2mirror[angle] == 1)
+        {
+          vMask                      = _mm_lddqu_si128((__m128i *) (weightMask - x - (8 - 1)));
+          const __m128i shuffle_mask = _mm_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+          vMask                      = _mm_shuffle_epi8(vMask, shuffle_mask);
+        }
+        else
+        {
+          vMask = _mm_lddqu_si128((__m128i *) (weightMask + x));
+        }
+        vsum32 = _mm_add_epi32(vsum32, _mm_madd_epi16(vMask, vAd));
+      }
+      piAd += iAdStride;
+      weightMask += stepY;
+    }
+    vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e));   // 01001110
+    vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1));   // 10110001
+    sum    += _mm_cvtsi128_si32(vsum32);
+  }
+  else
+  {
+    for (int y = 0; y < height; y++)
+    {
+      for (int x = 0; x < templateWidth; x++)
+      {
+        sum += *piAd * (*weightMask);
+        piAd++;
+        weightMask += stepX;
+      }
+
+      piAd += (iAdStride - templateWidth);
+      weightMask += (stepY - templateWidth * stepX);
+    }
+  }
+
+  return sum;
+}
+
+template<X86_VEXT vext>
+void xWeightedSgpm_SSE(const PredictionUnit &pu, const uint32_t width, const uint32_t height,
+                         const ComponentID compIdx, const uint8_t splitDir, PelBuf &predDst, PelBuf &predSrc0,
+                         PelBuf &predSrc1)
+{
+  Pel *   dst        = predDst.buf;
+  Pel *   src0       = predSrc0.buf;
+  Pel *   src1       = predSrc1.buf;
+  int32_t strideDst  = predDst.stride;
+  int32_t strideSrc0 = predSrc0.stride;
+  int32_t strideSrc1 = predSrc1.stride;
+
+  //const char   log2WeightBase = 3;
+  const ClpRng clpRng         = pu.cu->slice->clpRngs().comp[compIdx];
+
+  const int32_t shiftWeighted  = 5;
+  const int32_t offsetWeighted = 16;
+  int16_t  wIdx   = floorLog2(pu.lwidth()) - GEO_MIN_CU_LOG2_EX;
+  int16_t  hIdx   = floorLog2(pu.lheight()) - GEO_MIN_CU_LOG2_EX;
+  int16_t  angle  = g_GeoParams[splitDir][0];
+  int16_t  stepY  = 0;
+  int16_t *weight = nullptr;
+  
+  if (g_angle2mirror[angle] == 2)
+  {
+    stepY = -GEO_WEIGHT_MASK_SIZE;
+    weight =
+      &g_globalGeoWeights[GET_SGPM_BLD_IDX(pu.lwidth(), pu.lheight())][g_angle2mask[angle]]
+                         [(GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffsetEx[splitDir][hIdx][wIdx][1])
+                            * GEO_WEIGHT_MASK_SIZE
+                          + g_weightOffsetEx[splitDir][hIdx][wIdx][0]];
+  }
+  else if (g_angle2mirror[angle] == 1)
+  {
+    stepY = GEO_WEIGHT_MASK_SIZE;
+    weight =
+      &g_globalGeoWeights[GET_SGPM_BLD_IDX(pu.lwidth(), pu.lheight())][g_angle2mask[angle]]
+                         [g_weightOffsetEx[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE
+                          + (GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffsetEx[splitDir][hIdx][wIdx][0])];
+  }
+  else
+  {
+    stepY = GEO_WEIGHT_MASK_SIZE;
+    weight =
+      &g_globalGeoWeights[GET_SGPM_BLD_IDX(pu.lwidth(), pu.lheight())][g_angle2mask[angle]]
+                         [g_weightOffsetEx[splitDir][hIdx][wIdx][1] * GEO_WEIGHT_MASK_SIZE
+                          + g_weightOffsetEx[splitDir][hIdx][wIdx][0]];
+  }
+  const __m128i mmEight = _mm_set1_epi16(32);
+  const __m128i mmOffset = _mm_set1_epi32(offsetWeighted);
+  const __m128i mmShift  = _mm_cvtsi32_si128(shiftWeighted);
+  const __m128i mmMin    = _mm_set1_epi16(clpRng.min);
+  const __m128i mmMax    = _mm_set1_epi16(clpRng.max);
+
+  if (compIdx != COMPONENT_Y && pu.chromaFormat == CHROMA_420)
+    stepY <<= 1;
+  if (width == 4)
+  {
+    // for luma, to be confirmed
+    for (int y = 0; y < height; y++)
+    {
+      __m128i s0 = _mm_loadl_epi64((__m128i *) (src0));
+      __m128i s1 = _mm_loadl_epi64((__m128i *) (src1));
+      __m128i w0;
+      if (compIdx != COMPONENT_Y && pu.chromaFormat != CHROMA_444)
+      {
+        if (g_angle2mirror[angle] == 1)
+        {
+          w0                         = _mm_loadu_si128((__m128i *) (weight - (8 - 1)));
+          const __m128i shuffle_mask = _mm_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+          w0                         = _mm_shuffle_epi8(w0, shuffle_mask);
+        }
+        else
+        {
+          w0 = _mm_loadu_si128((__m128i *) (weight));
+        }
+        w0         = _mm_shuffle_epi8(w0, _mm_setr_epi8(0, 1, 4, 5, 8, 9, 12, 13, 0, 0, 0, 0, 0, 0, 0, 0));
+      }
+      else
+      {
+        if (g_angle2mirror[angle] == 1)
+        {
+          w0                         = _mm_loadl_epi64((__m128i *) (weight - (4 - 1)));
+          const __m128i shuffle_mask = _mm_set_epi8(15,14,13,12,11,10,9,8,1,0,3,2,5,4,7,6);
+          w0                         = _mm_shuffle_epi8(w0, shuffle_mask);
+        }
+        else
+        {
+          w0 = _mm_loadl_epi64((__m128i *) weight);
+        }
+      }
+      
+      __m128i w1 = _mm_sub_epi16(mmEight, w0);
+      s0         = _mm_unpacklo_epi16(s0, s1);
+      w0         = _mm_unpacklo_epi16(w0, w1);
+      s0         = _mm_add_epi32(_mm_madd_epi16(s0, w0), mmOffset);
+      s0         = _mm_sra_epi32(s0, mmShift);
+      s0         = _mm_packs_epi32(s0, s0);
+      s0         = _mm_min_epi16(mmMax, _mm_max_epi16(s0, mmMin));
+      _mm_storel_epi64((__m128i *) (dst), s0);
+      dst += strideDst;
+      src0 += strideSrc0;
+      src1 += strideSrc1;
+      weight += stepY;
+    }
+  }
+#if USE_AVX2
+  else if (0 == (width % 16))
+  {
+    const __m256i mmEightAVX2 = _mm256_set1_epi16(32);
+    const __m256i mmOffsetAVX2 = _mm256_set1_epi32(offsetWeighted);
+    const __m256i mmMinAVX2    = _mm256_set1_epi16(clpRng.min);
+    const __m256i mmMaxAVX2    = _mm256_set1_epi16(clpRng.max);
+    for (int y = 0; y < height; y++)
+    {
+      for (int x = 0; x < width; x += 16)
+      {
+        __m256i s0 = _mm256_lddqu_si256((__m256i *) (src0 + x));   // why not aligned with 128/256 bit boundaries
+        __m256i s1 = _mm256_lddqu_si256((__m256i *) (src1 + x));
+
+        __m256i w0 = _mm256_lddqu_si256((__m256i *) (weight + x));
+        if (compIdx != COMPONENT_Y && pu.chromaFormat != CHROMA_444)
+        {
+          const __m256i mask = _mm256_set_epi16(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1);
+          __m256i       w0p0, w0p1;
+          if (g_angle2mirror[angle] == 1)
+          {
+            w0p0 = _mm256_lddqu_si256(
+              (__m256i *) (weight - (x << 1) - (16 - 1)));   // first sub-sample the required weights.
+            w0p1                       = _mm256_lddqu_si256((__m256i *) (weight - (x << 1) - 16 - (16 - 1)));
+            const __m256i shuffle_mask = _mm256_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3,
+                                                         2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+            w0p0                       = _mm256_shuffle_epi8(w0p0, shuffle_mask);
+            w0p0                       = _mm256_permute4x64_epi64(w0p0, _MM_SHUFFLE(1, 0, 3, 2));
+            w0p1                       = _mm256_shuffle_epi8(w0p1, shuffle_mask);
+            w0p1                       = _mm256_permute4x64_epi64(w0p1, _MM_SHUFFLE(1, 0, 3, 2));
+          }
+          else
+          {
+            w0p0 = _mm256_lddqu_si256((__m256i *) (weight + (x << 1)));   // first sub-sample the required weights.
+            w0p1 = _mm256_lddqu_si256((__m256i *) (weight + (x << 1) + 16));
+          }
+          w0p0 = _mm256_mullo_epi16(w0p0, mask);
+          w0p1 = _mm256_mullo_epi16(w0p1, mask);
+          w0   = _mm256_packs_epi16(w0p0, w0p1);
+          w0   = _mm256_permute4x64_epi64(w0, _MM_SHUFFLE(3, 1, 2, 0));
+        }
+        else
+        {
+          if (g_angle2mirror[angle] == 1)
+          {
+            w0                         = _mm256_lddqu_si256((__m256i *) (weight - x - (16 - 1)));
+            const __m256i shuffle_mask = _mm256_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 1, 0, 3,
+                                                         2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+            w0                         = _mm256_shuffle_epi8(w0, shuffle_mask);
+            w0                         = _mm256_permute4x64_epi64(w0, _MM_SHUFFLE(1, 0, 3, 2));
+          }
+          else
+          {
+            w0 = _mm256_lddqu_si256((__m256i *) (weight + x));
+          }
+        }
+        __m256i w1 = _mm256_sub_epi16(mmEightAVX2, w0);
+
+        __m256i s0tmp = _mm256_unpacklo_epi16(s0, s1);
+        __m256i w0tmp = _mm256_unpacklo_epi16(w0, w1);
+        s0tmp         = _mm256_add_epi32(_mm256_madd_epi16(s0tmp, w0tmp), mmOffsetAVX2);
+        s0tmp         = _mm256_sra_epi32(s0tmp, mmShift);
+
+        s0 = _mm256_unpackhi_epi16(s0, s1);
+        w0 = _mm256_unpackhi_epi16(w0, w1);
+        s0 = _mm256_add_epi32(_mm256_madd_epi16(s0, w0), mmOffsetAVX2);
+        s0 = _mm256_sra_epi32(s0, mmShift);
+
+        s0 = _mm256_packs_epi32(s0tmp, s0);
+        s0 = _mm256_min_epi16(mmMaxAVX2, _mm256_max_epi16(s0, mmMinAVX2));
+        _mm256_storeu_si256((__m256i *) (dst + x), s0);
+      }
+      dst += strideDst;
+      src0 += strideSrc0;
+      src1 += strideSrc1;
+      weight += stepY;
+    }
+  }
+#endif
+  else
+  {
+    for (int y = 0; y < height; y++)
+    {
+      for (int x = 0; x < width; x += 8)
+      {
+        __m128i s0 = _mm_lddqu_si128((__m128i *) (src0 + x));
+        __m128i s1 = _mm_lddqu_si128((__m128i *) (src1 + x));
+        __m128i w0;
+        if (compIdx != COMPONENT_Y && pu.chromaFormat != CHROMA_444)
+        {
+          const __m128i mask = _mm_set_epi16(0, 1, 0, 1, 0, 1, 0, 1);
+          __m128i       w0p0, w0p1;
+          if (g_angle2mirror[angle] == 1)
+          {
+            w0p0 =
+              _mm_lddqu_si128((__m128i *) (weight - (x << 1) - (8 - 1)));   // first sub-sample the required weights.
+            w0p1                       = _mm_lddqu_si128((__m128i *) (weight - (x << 1) - 8 - (8 - 1)));
+            const __m128i shuffle_mask = _mm_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+            w0p0                       = _mm_shuffle_epi8(w0p0, shuffle_mask);
+            w0p1                       = _mm_shuffle_epi8(w0p1, shuffle_mask);
+          }
+          else
+          {
+            w0p0 = _mm_lddqu_si128((__m128i *) (weight + (x << 1)));   // first sub-sample the required weights.
+            w0p1 = _mm_lddqu_si128((__m128i *) (weight + (x << 1) + 8));
+          }
+          w0p0 = _mm_mullo_epi16(w0p0, mask);
+          w0p1 = _mm_mullo_epi16(w0p1, mask);
+          w0   = _mm_packs_epi32(w0p0, w0p1);
+        }
+        else
+        {
+          if (g_angle2mirror[angle] == 1)
+          {
+            w0                         = _mm_lddqu_si128((__m128i *) (weight - x - (8 - 1)));
+            const __m128i shuffle_mask = _mm_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+            w0                         = _mm_shuffle_epi8(w0, shuffle_mask);
+          }
+          else
+          {
+            w0 = _mm_lddqu_si128((__m128i *) (weight + x));
+          }
+        }
+        __m128i w1 = _mm_sub_epi16(mmEight, w0);
+
+        __m128i s0tmp = _mm_unpacklo_epi16(s0, s1);
+        __m128i w0tmp = _mm_unpacklo_epi16(w0, w1);
+        s0tmp         = _mm_add_epi32(_mm_madd_epi16(s0tmp, w0tmp), mmOffset);
+        s0tmp         = _mm_sra_epi32(s0tmp, mmShift);
+
+        s0 = _mm_unpackhi_epi16(s0, s1);
+        w0 = _mm_unpackhi_epi16(w0, w1);
+        s0 = _mm_add_epi32(_mm_madd_epi16(s0, w0), mmOffset);
+        s0 = _mm_sra_epi32(s0, mmShift);
+
+        s0 = _mm_packs_epi32(s0tmp, s0);
+        s0 = _mm_min_epi16(mmMax, _mm_max_epi16(s0, mmMin));
+        _mm_storeu_si128((__m128i *) (dst + x), s0);
+      }
+      dst += strideDst;
+      src0 += strideSrc0;
+      src1 += strideSrc1;
+      weight += stepY;
+    }
+  }
+}
+#endif
+
 template< X86_VEXT vext >
 #if JVET_AA0058_GPM_ADP_BLD
 void xWeightedGeoBlk_SSE(const PredictionUnit &pu, const uint32_t width, const uint32_t height, const ComponentID compIdx, const uint8_t splitDir, const uint8_t bldIdx, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1)
@@ -4243,6 +4954,11 @@ void InterpolationFilter::_initInterpolationFilterX86()
 #if JVET_Z0056_GPM_SPLIT_MODE_REORDERING
   m_weightedGeoTplA = xWeightedGeoTpl_SSE<vext, true>;
 #endif
+#if JVET_AB0155_SGPM
+  m_weightedSgpm = xWeightedSgpm_SSE<vext>;
+  m_sadTM = xSadTM_SSE<vext>;
+  m_sgpmSadTM = xSgpmSadTM_SSE<vext>;
+#endif
 }
 
 template void InterpolationFilter::_initInterpolationFilterX86<SIMDX86>();
diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp
index 7803dd7a0d4ce7f2b64f7464834f307894119bfe..ece1232db8bb2d65def6b358397ec7ae3b5b7d6d 100644
--- a/source/Lib/DecoderLib/CABACReader.cpp
+++ b/source/Lib/DecoderLib/CABACReader.cpp
@@ -1812,6 +1812,9 @@ void CABACReader::extend_ref_line(CodingUnit& cu)
   if ( !cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma(cu.chType) || cu.bdpcmMode
 #if ENABLE_DIMD
     || cu.dimd
+#endif
+#if JVET_AB0155_SGPM
+    || cu.sgpm
 #endif
     )
   {
@@ -1933,6 +1936,14 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu )
 #if JVET_W0123_TIMD_FUSION
   cu_timd_flag(cu);
 #endif
+#if JVET_AB0155_SGPM
+  sgpm_flag(cu);
+  if (cu.sgpm)
+  {
+    return;
+  }
+#endif
+
   extend_ref_line( cu );
   isp_mode( cu );
 #if ENABLE_DIMD
@@ -2132,6 +2143,50 @@ void CABACReader::cu_timd_flag( CodingUnit& cu )
 }
 #endif
 
+#if JVET_AB0155_SGPM
+void CABACReader::sgpm_flag(CodingUnit &cu)
+{
+  if (!cu.cs->sps->getUseSgpm())
+  {
+    cu.sgpm = false;
+    return;
+  }
+  if (!(cu.lwidth() >= GEO_MIN_CU_SIZE_EX && cu.lheight() >= GEO_MIN_CU_SIZE_EX && cu.lwidth() <= GEO_MAX_CU_SIZE_EX
+        && cu.lheight() <= GEO_MAX_CU_SIZE_EX && cu.lwidth() < 8 * cu.lheight() && cu.lheight() < 8 * cu.lwidth()
+        && cu.lwidth() * cu.lheight() >= SGPM_MIN_PIX))
+  {
+    cu.sgpm = false;
+    return;
+  }
+
+  if (cu.dimd || cu.timd || cu.mipFlag || cu.tmpFlag)
+  {
+    cu.sgpm = false;
+    return;
+  }
+  if (!cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma(cu.chType))
+  {
+    cu.sgpm = false;
+    return;
+  }
+  if (!(cu.lx() && cu.ly()))
+  {
+    cu.sgpm = false;
+    return;
+  }
+
+  unsigned ctxId = DeriveCtx::CtxSgpmFlag(cu);
+  cu.sgpm        = m_BinDecoder.decodeBin(Ctx::SgpmFlag(ctxId));
+
+  if (cu.sgpm)
+  {
+    uint32_t sgpmIdx = 0;
+    xReadTruncBinCode(sgpmIdx, SGPM_NUM);
+    cu.sgpmIdx = sgpmIdx;
+  }
+}
+#endif
+
 void CABACReader::intra_chroma_pred_modes( CodingUnit& cu )
 {
 #if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
@@ -5946,6 +6001,9 @@ void CABACReader::isp_mode( CodingUnit& cu )
   if( !CU::isIntra( cu ) || !isLuma( cu.chType ) || cu.firstPU->multiRefIdx || !cu.cs->sps->getUseISP() || cu.bdpcmMode || !CU::canUseISP( cu, getFirstComponentOfChannel( cu.chType ) ) || cu.colorTransform 
 #if ENABLE_DIMD && JVET_V0087_DIMD_NO_ISP
       || cu.dimd
+#endif
+#if JVET_AB0155_SGPM
+      || cu.sgpm
 #endif
     )
   {
diff --git a/source/Lib/DecoderLib/CABACReader.h b/source/Lib/DecoderLib/CABACReader.h
index 1f5b452f32fac785920aa8f91ce02836a6c9ab3b..581e8ee449aee73f418566d04a7bd44bf8bf22ff 100644
--- a/source/Lib/DecoderLib/CABACReader.h
+++ b/source/Lib/DecoderLib/CABACReader.h
@@ -115,6 +115,9 @@ public:
   void        intra_luma_pred_modes     ( CodingUnit&                   cu );
 #if JVET_W0123_TIMD_FUSION
   void        cu_timd_flag              ( CodingUnit&                   cu );
+#endif
+#if JVET_AB0155_SGPM
+  void        sgpm_flag                 ( CodingUnit&                   cu );
 #endif
   void        intra_chroma_pred_modes   ( CodingUnit&                   cu );
   bool        intra_chroma_lmc_mode     ( PredictionUnit&               pu );
diff --git a/source/Lib/DecoderLib/DecCu.cpp b/source/Lib/DecoderLib/DecCu.cpp
index 8e18da77bd600f5adaa53060ff46ebe81b378e8d..ab4a4e139ce3c72af3b084462760df496acd9b93 100644
--- a/source/Lib/DecoderLib/DecCu.cpp
+++ b/source/Lib/DecoderLib/DecCu.cpp
@@ -322,6 +322,35 @@ void DecCu::decompressCtu( CodingStructure& cs, const UnitArea& ctuArea )
           pu->intraDir[0] = currCU.timdMode;
         }
 #endif
+
+#if JVET_AB0155_SGPM
+        else if (currCU.sgpm)
+        {
+          PredictionUnit *pu   = currCU.firstPU;
+          const CompArea &area = currCU.Y();
+#if SECONDARY_MPM
+          IntraPrediction::deriveDimdMode(currCU.cs->picture->getRecoBuf(area), area, currCU);
+#endif
+          static_vector<SgpmInfo, SGPM_NUM> sgpmInfoList;
+          static_vector<double, SGPM_NUM>   sgpmCostList;
+          int                         sgpmIdx = currCU.sgpmIdx;
+
+          if (currCU.lwidth() * currCU.lheight() <= 1024)
+          {
+            m_pcIntraPred->deriveTimdMode(currCU.cs->picture->getRecoBuf(area), area, currCU, false, true);
+          }
+
+          m_pcIntraPred->deriveSgpmModeOrdered(currCU.cs->picture->getRecoBuf(area), area, currCU, sgpmInfoList, sgpmCostList);
+
+          currCU.sgpmSplitDir = sgpmInfoList[sgpmIdx].sgpmSplitDir;
+          currCU.sgpmMode0    = sgpmInfoList[sgpmIdx].sgpmMode0;
+          currCU.sgpmMode1    = sgpmInfoList[sgpmIdx].sgpmMode1;
+          
+          pu->intraDir[0]  = currCU.sgpmMode0;
+          pu->intraDir1[0] = currCU.sgpmMode1;
+        }
+#endif
+
         else if (currCU.firstPU->parseLumaMode)
         {
           const CompArea &area = currCU.Y();
diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp
index ec6ed9279c29bd33902e20e10752590062cb4c95..845353c65786d5aec2bd34fe0706af49964546ce 100644
--- a/source/Lib/DecoderLib/VLCReader.cpp
+++ b/source/Lib/DecoderLib/VLCReader.cpp
@@ -2494,6 +2494,9 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS)
 #endif
 #if JVET_W0123_TIMD_FUSION
   READ_FLAG(uiCode, "sps_timd_enabled_flag");                        pcSPS->setUseTimd( uiCode != 0 );
+#endif
+#if JVET_AB0155_SGPM
+  READ_FLAG(uiCode, "sps_sgpm_enabled_flag");                       pcSPS->setUseSgpm(uiCode != 0);
 #endif
   if( pcSPS->getChromaFormatIdc() != CHROMA_400)
   {
@@ -5352,6 +5355,9 @@ void HLSyntaxReader::parseConstraintInfo(ConstraintInfo *cinfo)
 #endif
 #if JVET_W0123_TIMD_FUSION
     READ_FLAG(symbol, "gci_no_timd_constraint_flag");                    cinfo->setNoTimdConstraintFlag(symbol > 0 ? true : false);
+#endif
+#if JVET_AB0155_SGPM
+    READ_FLAG(symbol, "gci_no_sgpm_constraint_flag");                    cinfo->setNoSgpmConstraintFlag(symbol > 0 ? true : false);
 #endif
     /* inter */
     READ_FLAG(symbol, "gci_no_ref_pic_resampling_constraint_flag");      cinfo->setNoRprConstraintFlag(symbol > 0 ? true : false);
diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp
index c1f94280388c47a7d5e81a48859124e9043f1328..8ffe3b5959b617875f661647de15d4f321b79b2f 100644
--- a/source/Lib/EncoderLib/CABACWriter.cpp
+++ b/source/Lib/EncoderLib/CABACWriter.cpp
@@ -1278,6 +1278,9 @@ void CABACWriter::extend_ref_line(const PredictionUnit& pu)
   if( !cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma( cu.chType ) || cu.bdpcmMode
 #if ENABLE_DIMD
     || cu.dimd
+#endif
+#if JVET_AB0155_SGPM
+      || cu.sgpm
 #endif
     )
   {
@@ -1352,6 +1355,9 @@ void CABACWriter::extend_ref_line(const CodingUnit& cu)
   if ( !cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma(cu.chType) || cu.bdpcmMode
 #if ENABLE_DIMD 
     || cu.dimd
+#endif
+#if JVET_AB0155_SGPM
+    || cu.sgpm
 #endif
     )
   {
@@ -1461,6 +1467,13 @@ void CABACWriter::intra_luma_pred_modes( const CodingUnit& cu )
   }
 #if JVET_W0123_TIMD_FUSION
   cu_timd_flag(cu);
+#endif
+#if JVET_AB0155_SGPM
+  sgpm_flag(cu);
+  if (cu.sgpm)
+  {
+    return;
+  }
 #endif
   extend_ref_line( cu );
 
@@ -1681,6 +1694,13 @@ void CABACWriter::intra_luma_pred_mode( const PredictionUnit& pu )
   }
 #if JVET_W0123_TIMD_FUSION
   cu_timd_flag(*pu.cu);
+#endif
+#if JVET_AB0155_SGPM
+  sgpm_flag(*pu.cu);
+  if (pu.cu->sgpm)
+  {
+    return;
+  }
 #endif
   extend_ref_line( pu );
   isp_mode( *pu.cu );
@@ -1846,6 +1866,43 @@ void CABACWriter::cu_timd_flag( const CodingUnit& cu )
 }
 #endif
 
+#if JVET_AB0155_SGPM
+void CABACWriter::sgpm_flag(const CodingUnit &cu)
+{
+  if (!cu.cs->sps->getUseSgpm())
+  {
+    return;
+  }
+  if (!(cu.lwidth() >= GEO_MIN_CU_SIZE_EX && cu.lheight() >= GEO_MIN_CU_SIZE_EX && cu.lwidth() <= GEO_MAX_CU_SIZE_EX
+        && cu.lheight() <= GEO_MAX_CU_SIZE_EX && cu.lwidth() < 8 * cu.lheight() && cu.lheight() < 8 * cu.lwidth()
+        && cu.lwidth() * cu.lheight() >= SGPM_MIN_PIX))
+  {
+    return;
+  }
+
+  if (cu.dimd || cu.timd || cu.mipFlag || cu.tmpFlag)
+  {
+    return;
+  }
+  if (!cu.Y().valid() || cu.predMode != MODE_INTRA || !isLuma(cu.chType))
+  {
+    return;
+  }
+  if (!(cu.lx() && cu.ly()))
+  {
+    return;
+  }
+
+  unsigned ctxId = DeriveCtx::CtxSgpmFlag(cu);
+  m_BinEncoder.encodeBin(cu.sgpm, Ctx::SgpmFlag(ctxId));
+
+  if (cu.sgpm)
+  {
+    xWriteTruncBinCode(cu.sgpmIdx, SGPM_NUM);
+  }
+}
+#endif
+
 #if ENABLE_DIMD
 void CABACWriter::cu_dimd_flag(const CodingUnit& cu)
 {
@@ -5754,6 +5811,9 @@ void CABACWriter::isp_mode( const CodingUnit& cu )
   if( !CU::isIntra( cu ) || !isLuma( cu.chType ) || cu.firstPU->multiRefIdx || !cu.cs->sps->getUseISP() || cu.bdpcmMode || !CU::canUseISP( cu, getFirstComponentOfChannel( cu.chType ) ) || cu.colorTransform 
 #if  ENABLE_DIMD && JVET_V0087_DIMD_NO_ISP
     || cu.dimd
+#endif
+#if JVET_AB0155_SGPM
+      || cu.sgpm
 #endif
     )
   {
diff --git a/source/Lib/EncoderLib/CABACWriter.h b/source/Lib/EncoderLib/CABACWriter.h
index 09e5b24573169b537d54923b396f3bcc9940ac69..049f84d3cc236d46c8bba45ab69c389dcb107d00 100644
--- a/source/Lib/EncoderLib/CABACWriter.h
+++ b/source/Lib/EncoderLib/CABACWriter.h
@@ -128,6 +128,9 @@ public:
 #endif
 #if JVET_W0123_TIMD_FUSION
   void        cu_timd_flag              ( const CodingUnit&             cu );
+#endif
+#if JVET_AB0155_SGPM
+  void        sgpm_flag                 (const CodingUnit&              cu );
 #endif
   void        intra_chroma_pred_modes   ( const CodingUnit&             cu );
   void        intra_chroma_lmc_mode     ( const PredictionUnit&         pu );
diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h
index c5503db4efa1f016fa33e3711425e2d83c480d52..890e29d692df671fece66ed1d0819478f45edd52 100644
--- a/source/Lib/EncoderLib/EncCfg.h
+++ b/source/Lib/EncoderLib/EncCfg.h
@@ -252,6 +252,9 @@ protected:
 #if JVET_W0123_TIMD_FUSION
   bool      m_noTimdConstraintFlag;
 #endif
+#if JVET_AB0155_SGPM
+  bool      m_noSgpmConstraintFlag;
+#endif
 #if ENABLE_OBMC
   bool      m_noObmcConstraintFlag;
 #endif
@@ -464,6 +467,9 @@ protected:
 #if JVET_W0123_TIMD_FUSION
   bool      m_timd;
 #endif
+#if JVET_AB0155_SGPM
+  bool      m_sgpm;
+#endif
 #if ENABLE_OBMC
   bool      m_OBMC;
 #endif
@@ -1098,6 +1104,10 @@ public:
   bool      getNoTimdConstraintFlag() const { return m_noTimdConstraintFlag; }
   void      setNoTimdConstraintFlag(bool val) { m_noTimdConstraintFlag = val; }
 #endif
+#if JVET_AB0155_SGPM
+  bool      getNoSgpmConstraintFlag() const { return m_noSgpmConstraintFlag; }
+  void      setNoSgpmConstraintFlag(bool val) { m_noSgpmConstraintFlag = val; }
+#endif
 #if ENABLE_OBMC
   bool      getNoObmcConstraintFlag() const { return m_noObmcConstraintFlag; }
   void      setNoObmcConstraintFlag(bool bVal) { m_noObmcConstraintFlag = bVal; }
@@ -1464,6 +1474,10 @@ public:
   void      setUseTimd                   ( bool b )       { m_timd = b; }
   bool      getUseTimd                   () const         { return m_timd; }
 #endif
+#if JVET_AB0155_SGPM
+  void      setUseSgpm                   (bool b)         { m_sgpm = b; }
+  bool      getUseSgpm                   () const         { return m_sgpm; }
+#endif
 #if ENABLE_OBMC
   void      setUseObmc                   ( bool b )       { m_OBMC = b; }
   bool      getUseObmc                   ()         const { return m_OBMC; }
diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp
index 38b32dc9ec467abb11263d2d48176e43237c328a..a4f547186d8acbd956c41b27728b2a751f3e6845 100644
--- a/source/Lib/EncoderLib/EncCu.cpp
+++ b/source/Lib/EncoderLib/EncCu.cpp
@@ -2238,6 +2238,10 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS
   bool timdIsBlended = false;
   int  timdFusionWeight[2] = { 0 };
 #endif
+#if JVET_AB0155_SGPM
+  int timdHorMode = 0;
+  int timdVerMode = 0;
+#endif
 
 
   double dct2Cost                =   MAX_DOUBLE;
@@ -2439,13 +2443,22 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS
             if (!timdDerived)
             {
               const CompArea &area = cu.Y();
+              
+#if JVET_AB0155_SGPM
+              cu.timdMode = m_pcIntraSearch->deriveTimdMode(bestCS->picture->getRecoBuf(area), area, cu, true, true);
+#else
               cu.timdMode = m_pcIntraSearch->deriveTimdMode(bestCS->picture->getRecoBuf(area), area, cu);
+#endif
               timdMode = cu.timdMode;
               timdDerived = true;
               timdModeSecondary = cu.timdModeSecondary;
               timdIsBlended     = cu.timdIsBlended;
               timdFusionWeight[0] = cu.timdFusionWeight[0];
               timdFusionWeight[1] = cu.timdFusionWeight[1];
+#if JVET_AB0155_SGPM
+              timdHorMode = cu.timdHor;
+              timdVerMode = cu.timdVer;
+#endif
             }
             else
             {
@@ -2454,6 +2467,10 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS
               cu.timdIsBlended     = timdIsBlended;
               cu.timdFusionWeight[0] = timdFusionWeight[0];
               cu.timdFusionWeight[1] = timdFusionWeight[1];
+#if JVET_AB0155_SGPM
+              cu.timdHor = timdHorMode;
+              cu.timdVer = timdVerMode;
+#endif
             }
           }
 #endif
diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp
index 55cfebe1c5d87f6108b61af80eaa5ac25c2b52e4..06e97509250c3c4ea809ce99e78e450c4d3b921f 100644
--- a/source/Lib/EncoderLib/EncLib.cpp
+++ b/source/Lib/EncoderLib/EncLib.cpp
@@ -1453,6 +1453,9 @@ void EncLib::xInitSPS( SPS& sps )
 #if JVET_W0123_TIMD_FUSION
   cinfo->setNoTimdConstraintFlag(m_noTimdConstraintFlag);
 #endif
+#if JVET_AB0155_SGPM
+  cinfo->setNoSgpmConstraintFlag(m_noSgpmConstraintFlag);
+#endif
 #if ENABLE_OBMC
   cinfo->setNoObmcConstraintFlag(m_noObmcConstraintFlag);
 #endif
@@ -1641,6 +1644,9 @@ void EncLib::xInitSPS( SPS& sps )
 #if JVET_W0123_TIMD_FUSION
   sps.setUseTimd            ( m_timd );
 #endif
+#if JVET_AB0155_SGPM
+  sps.setUseSgpm            ( m_sgpm );
+#endif
 #if ENABLE_OBMC
   sps.setUseOBMC            ( m_OBMC );
 #endif
diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp
index 51d7f4ca7658483bdfd8dd81eebfd68efa02b775..00573527c4e28a88a41cb0af24babb49979c741e 100644
--- a/source/Lib/EncoderLib/IntraSearch.cpp
+++ b/source/Lib/EncoderLib/IntraSearch.cpp
@@ -74,6 +74,17 @@ IntraSearch::IntraSearch()
   {
     m_pSharedPredTransformSkip[ch] = nullptr;
   }
+#if JVET_AB0155_SGPM
+  for (int i = 0; i < NUM_LUMA_MODE; i++)
+  {
+    m_intraPredBuf[i] = nullptr;
+  }
+  for (int i = 0; i < SGPM_NUM; i++)
+  {
+    m_sgpmPredBuf[i] = nullptr;
+  }
+#endif
+
   m_truncBinBits = nullptr;
   m_escapeNumBins = nullptr;
   m_minErrorIndexMap = nullptr;
@@ -175,6 +186,21 @@ void IntraSearch::destroy()
 
   m_tmpStorageLCU.destroy();
   m_colorTransResiBuf.destroy();
+  
+#if JVET_AB0155_SGPM
+  for (int i = 0; i < NUM_LUMA_MODE; i++)
+  {
+    delete[] m_intraPredBuf[i];
+    m_intraPredBuf[i] = nullptr;
+  }
+
+  for (int i = 0; i < SGPM_NUM; i++)
+  {
+    delete[] m_sgpmPredBuf[i];
+    m_sgpmPredBuf[i] = nullptr;
+  }
+#endif
+
   m_isInitialized = false;
   if (m_truncBinBits != nullptr)
   {
@@ -260,6 +286,16 @@ void IntraSearch::init( EncCfg*        pcEncCfg,
     m_cccmStorage[cccmIdx].create(UnitArea(cform, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE)));
   }
 #endif
+#if JVET_AB0155_SGPM
+  for (int i = 0; i < NUM_LUMA_MODE; i++)
+  {
+    m_intraPredBuf[i] = new Pel[GEO_MAX_CU_SIZE_EX * GEO_MAX_CU_SIZE_EX];
+  }
+  for (int i = 0; i < SGPM_NUM; i++)
+  {
+    m_sgpmPredBuf[i] = new Pel[GEO_MAX_CU_SIZE_EX * GEO_MAX_CU_SIZE_EX];
+  }
+#endif
 
   for( uint32_t ch = 0; ch < MAX_NUM_TBLOCKS; ch++ )
   {
@@ -473,6 +509,10 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
 #if JVET_W0123_TIMD_FUSION
   const TempCtx ctxStartTimdFlag   ( m_CtxCache, SubCtx( Ctx::TimdFlag,      m_CABACEstimator->getCtx() ) );
 #endif
+#if JVET_AB0155_SGPM
+  const TempCtx ctxStartSgpmFlag(m_CtxCache, SubCtx(Ctx::SgpmFlag, m_CABACEstimator->getCtx()));
+#endif
+
   const TempCtx ctxStartIspMode    ( m_CtxCache, SubCtx( Ctx::ISPMode,          m_CABACEstimator->getCtx() ) );
 #if SECONDARY_MPM
   const TempCtx ctxStartMPMIdxFlag(m_CtxCache, SubCtx(Ctx::IntraLumaMPMIdx, m_CABACEstimator->getCtx()));
@@ -490,6 +530,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
   bool LFNSTSaveFlag = sps.getUseLFNST() && cu.lfnstIdx == 0;
 
   LFNSTSaveFlag &= sps.getUseIntraMTS() ? cu.mtsFlag == 0 : true;
+#if JVET_AB0155_SGPM
+  bool SGPMSaveFlag = LFNSTSaveFlag;
+#endif
 
   const uint32_t lfnstIdx = cu.lfnstIdx;
 #if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
@@ -538,6 +581,15 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
   bool ispCanBeUsed   = sps.getUseISP() && cu.mtsFlag == 0 && cu.lfnstIdx == 0 && CU::canUseISP(width, height, cu.cs->sps->getMaxTbSize());
   bool saveDataForISP = ispCanBeUsed && (!colorTransformIsEnabled || isFirstColorSpace);
   bool testISP        = ispCanBeUsed && (!colorTransformIsEnabled || !cu.colorTransform);
+
+#if JVET_AB0155_SGPM
+  const bool sgpmAllowed = sps.getUseSgpm() && isLuma(partitioner.chType);
+  bool testSgpm = sgpmAllowed && cu.lwidth() >= GEO_MIN_CU_SIZE_EX && cu.lheight() >= GEO_MIN_CU_SIZE_EX
+                  && cu.lwidth() <= GEO_MAX_CU_SIZE_EX && cu.lheight() <= GEO_MAX_CU_SIZE_EX
+                  && cu.lwidth() < 8 * cu.lheight() && cu.lheight() < 8 * cu.lwidth() && cu.lx() && cu.ly()
+                  && cu.lwidth() * cu.lheight() >= SGPM_MIN_PIX;
+#endif
+
 #if JVET_W0103_INTRA_MTS 
   if (testISP && m_pcEncCfg->getUseFastISP())
   {
@@ -616,6 +668,12 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
 #if JVET_W0123_TIMD_FUSION
     bool bestTimdMode = false;
 #endif
+#if JVET_AB0155_SGPM
+    bool bestSgpmMode = false;
+    const CompArea &area = pu.Y();
+    CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
+#endif
+
     if (isSecondColorSpace)
     {
       uiRdModeList.clear();
@@ -635,6 +693,15 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
     {
       if (mtsUsageFlag != 2)
       {
+#if JVET_AB0155_SGPM
+        if (testSgpm && SGPMSaveFlag)
+        {
+          for (int i = 0; i < NUM_LUMA_MODE; i++)
+          {
+            m_intraModeReady[i] = 0;
+          }
+        }
+#endif
         // this should always be true
         CHECK(!pu.Y().valid(), "PU is not valid");
         bool isFirstLineOfCtu     = (((pu.block(COMPONENT_Y).y) & ((pu.cs->sps)->getMaxCUWidth() - 1)) == 0);
@@ -678,8 +745,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
         if (numModesForFullRD != numModesAvailable)
         {
           CHECK(numModesForFullRD >= numModesAvailable, "Too many modes for full RD search");
-
+#if !JVET_AB0155_SGPM
           const CompArea &area = pu.Y();
+#endif
 
           PelBuf piOrg  = cs.getOrgBuf(area);
           PelBuf piPred = cs.getPredBuf(area);
@@ -688,7 +756,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
           DistParam distParamHad;
           if (cu.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
           {
+#if !JVET_AB0155_SGPM
             CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
+#endif
             PelBuf   tmpOrg = m_tmpStorageLCU.getBuf(tmpArea);
             tmpOrg.rspSignal( piOrg, m_pcReshape->getFwdLUT() );
             m_pcRdCost->setDistParam(distParamSad, tmpOrg, piPred, sps.getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y,
@@ -725,6 +795,25 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
           const int numHadCand = (testMip ? 2 : 1) * 3;
 #endif
 
+#if JVET_AB0155_SGPM
+          static_vector<SgpmInfo, SGPM_NUM> sgpmInfoList;
+          static_vector<double, SGPM_NUM>   sgpmCostList;
+          int                               sgpmNeededMode[NUM_LUMA_MODE] = {0};
+
+          if (testSgpm && SGPMSaveFlag)
+          {
+            deriveSgpmModeOrdered(bestCS->picture->getRecoBuf(area), area, cu, sgpmInfoList, sgpmCostList);
+            for (int sgpmIdx = 0; sgpmIdx < SGPM_NUM; sgpmIdx++)
+            {
+              int      sgpmMode[2];
+              sgpmMode[0]                 = sgpmInfoList[sgpmIdx].sgpmMode0;
+              sgpmMode[1]                 = sgpmInfoList[sgpmIdx].sgpmMode1;
+              sgpmNeededMode[sgpmMode[0]] = 1;
+              sgpmNeededMode[sgpmMode[1]] = 1;
+            }
+          }
+#endif
+
           //*** Derive (regular) candidates using Hadamard
           cu.mipFlag = false;
 
@@ -752,6 +841,15 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
 
               initPredIntraParams(pu, pu.Y(), sps);
               predIntraAng(COMPONENT_Y, piPred, pu);
+#if JVET_AB0155_SGPM
+              if (testSgpm && SGPMSaveFlag && sgpmNeededMode[uiMode])
+              {
+                PelBuf   predBuf(m_intraPredBuf[uiMode], tmpArea);
+                predBuf.copyFrom(piPred);
+                m_intraModeReady[uiMode] = 1;
+              }
+#endif
+
               // Use the min between SAD and HAD as the cost criterion
               // SAD is scaled by 2 to align with the scaling of HAD
               minSadHad += std::min(distParamSad.distFunc(distParamSad) * 2, distParamHad.distFunc(distParamHad));
@@ -764,6 +862,10 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
 #if JVET_W0123_TIMD_FUSION
               m_CABACEstimator->getCtx() = SubCtx( Ctx::TimdFlag, ctxStartTimdFlag );
 #endif
+#if JVET_AB0155_SGPM
+              m_CABACEstimator->getCtx() = SubCtx(Ctx::SgpmFlag, ctxStartSgpmFlag);
+#endif
+
               m_CABACEstimator->getCtx() = SubCtx( Ctx::ISPMode, ctxStartIspMode );
 #if SECONDARY_MPM
               m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMPMIdx, ctxStartMPMIdxFlag);
@@ -829,6 +931,14 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
 
                     initPredIntraParams(pu, pu.Y(), sps);
                     predIntraAng(COMPONENT_Y, piPred, pu);
+#if JVET_AB0155_SGPM
+                    if (testSgpm && SGPMSaveFlag && sgpmNeededMode[mode])
+                    {
+                      PelBuf   predBuf(m_intraPredBuf[mode], tmpArea);
+                      predBuf.copyFrom(piPred);
+                      m_intraModeReady[mode] = 1;
+                    }
+#endif
 
                     // Use the min between SAD and SATD as the cost criterion
                     // SAD is scaled by 2 to align with the scaling of HAD
@@ -843,6 +953,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
                     m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag);
 #if JVET_W0123_TIMD_FUSION
                     m_CABACEstimator->getCtx() = SubCtx( Ctx::TimdFlag, ctxStartTimdFlag );
+#endif
+#if JVET_AB0155_SGPM
+                    m_CABACEstimator->getCtx() = SubCtx(Ctx::SgpmFlag, ctxStartSgpmFlag);
 #endif
                     m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode);
 #if SECONDARY_MPM
@@ -914,6 +1027,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
                   m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag);
 #if JVET_W0123_TIMD_FUSION
                   m_CABACEstimator->getCtx() = SubCtx( Ctx::TimdFlag, ctxStartTimdFlag );
+#endif
+#if JVET_AB0155_SGPM
+                  m_CABACEstimator->getCtx() = SubCtx(Ctx::SgpmFlag, ctxStartSgpmFlag);
 #endif
                   m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode);
 #if SECONDARY_MPM
@@ -1097,6 +1213,118 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
             CandHadList   = m_dSavedHadListLFNST;
           }
 
+#if JVET_AB0155_SGPM
+          if (testSgpm)
+          {
+            if (SGPMSaveFlag)
+            {
+              m_uiSavedRdModeListSGPM.clear();
+              m_dSavedModeCostSGPM.clear();
+              m_uiSavedHadModeListSGPM.clear();
+              m_dSavedHadListSGPM.clear();
+
+              cu.tmpFlag      = false;
+              pu.multiRefIdx  = 0;
+              cu.mipFlag      = false;
+              
+              initIntraPatternChType(cu, pu.Y(), true);
+
+              // get single mode predictions
+              for (int sgpmIdx = 0; sgpmIdx < SGPM_NUM; sgpmIdx++)
+              {
+                int      sgpmMode[2];
+                sgpmMode[0] = sgpmInfoList[sgpmIdx].sgpmMode0;
+                sgpmMode[1] = sgpmInfoList[sgpmIdx].sgpmMode1;
+                for (int idxIn2 = 0; idxIn2 < 2; idxIn2++)
+                {
+                  if (!m_intraModeReady[sgpmMode[idxIn2]])
+                  {
+                    pu.intraDir[0] = sgpmMode[idxIn2];
+
+                    initPredIntraParams(pu, pu.Y(), sps);
+                    predIntraAng(COMPONENT_Y, piPred, pu);
+
+                    PelBuf predBuf(m_intraPredBuf[sgpmMode[idxIn2]], tmpArea);
+                    predBuf.copyFrom(piPred);
+                    m_intraModeReady[sgpmMode[idxIn2]] = 1;
+                  }
+                }
+              }
+
+              cu.sgpm = true;
+              // frac bits calculate once because all are the same
+              cu.sgpmIdx      = 0;
+              cu.sgpmSplitDir = sgpmInfoList[0].sgpmSplitDir;
+              cu.sgpmMode0    = sgpmInfoList[0].sgpmMode0;
+              cu.sgpmMode1    = sgpmInfoList[0].sgpmMode1;
+              pu.intraDir[0]  = cu.sgpmMode0;
+              pu.intraDir1[0] = cu.sgpmMode1;
+              
+              // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated.
+#if JVET_V0130_INTRA_TMP
+              m_CABACEstimator->getCtx() = SubCtx(Ctx::TmpFlag, ctxStartTpmFlag);
+#endif
+              m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag);
+#if JVET_W0123_TIMD_FUSION
+              m_CABACEstimator->getCtx() = SubCtx(Ctx::TimdFlag, ctxStartTimdFlag);
+#endif
+#if JVET_AB0155_SGPM
+              m_CABACEstimator->getCtx() = SubCtx(Ctx::SgpmFlag, ctxStartSgpmFlag);
+#endif
+
+              m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode);
+#if SECONDARY_MPM
+              m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMPMIdx, ctxStartMPMIdxFlag);
+#endif
+              m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaPlanarFlag, ctxStartPlanarFlag);
+              m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaMpmFlag, ctxStartIntraMode);
+#if SECONDARY_MPM
+              m_CABACEstimator->getCtx() = SubCtx(Ctx::IntraLumaSecondMpmFlag, ctxStartIntraMode2);
+#endif
+              m_CABACEstimator->getCtx() = SubCtx(Ctx::MultiRefLineIdx, ctxStartMrlIdx);
+
+              uint64_t fracModeBits = xFracModeBitsIntra(pu, 0, CHANNEL_TYPE_LUMA);
+
+              for (int sgpmIdx = 0; sgpmIdx < SGPM_NUM; sgpmIdx++)
+              {
+                int sgpmMode0 = sgpmInfoList[sgpmIdx].sgpmMode0;
+                int sgpmMode1 = sgpmInfoList[sgpmIdx].sgpmMode1;
+                PelBuf src0(m_intraPredBuf[sgpmMode0], tmpArea);
+                PelBuf src1(m_intraPredBuf[sgpmMode1], tmpArea);
+
+                m_if.m_weightedSgpm(pu, width, height, COMPONENT_Y, sgpmInfoList[sgpmIdx].sgpmSplitDir, piPred, src0, src1);
+
+                PelBuf predBuf(m_sgpmPredBuf[sgpmIdx], tmpArea);
+                predBuf.copyFrom(piPred);
+
+                Distortion minSadHad = 0;
+                minSadHad += std::min(distParamSad.distFunc(distParamSad) * 2, distParamHad.distFunc(distParamHad));
+                double cost = (double) minSadHad + (double) fracModeBits * sqrtLambdaForFirstPass;
+
+                updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, SGPM_IDX, 0, 1,
+                                        sgpmInfoList[sgpmIdx].sgpmSplitDir, sgpmInfoList[sgpmIdx].sgpmMode0,
+                                        sgpmInfoList[sgpmIdx].sgpmMode1, sgpmIdx),
+                               cost, m_uiSavedRdModeListSGPM, m_dSavedModeCostSGPM, SGPM_NUM);
+                updateCandList(ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, SGPM_IDX, 0, 1,
+                                        sgpmInfoList[sgpmIdx].sgpmSplitDir, sgpmInfoList[sgpmIdx].sgpmMode0,
+                                        sgpmInfoList[sgpmIdx].sgpmMode1, sgpmIdx),
+                               double(minSadHad), m_uiSavedHadModeListSGPM, m_dSavedHadListSGPM, SGPM_NUM);
+              }
+
+              cu.sgpm = false;
+            }
+
+            int updateNum = (numModesForFullRD + 1) / 2;
+            for (auto listIdx = 0; listIdx < updateNum; listIdx++)
+            {
+              updateCandList(m_uiSavedRdModeListSGPM[listIdx], m_dSavedModeCostSGPM[listIdx], uiRdModeList,
+                             CandCostList, numModesForFullRD);
+              updateCandList(m_uiSavedHadModeListSGPM[listIdx], m_dSavedHadListSGPM[listIdx], uiHadModeList,
+                             CandHadList, numHadCand);
+            }
+          }
+#endif
+
           if (m_pcEncCfg->getFastUDIUseMPMEnabled())
           {
 
@@ -1290,6 +1518,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
           m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag);
 #if JVET_W0123_TIMD_FUSION
           m_CABACEstimator->getCtx() = SubCtx( Ctx::TimdFlag, ctxStartTimdFlag );
+#endif
+#if JVET_AB0155_SGPM
+          m_CABACEstimator->getCtx() = SubCtx(Ctx::SgpmFlag, ctxStartSgpmFlag);
 #endif
           m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode);
 #if SECONDARY_MPM
@@ -1494,6 +1725,18 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
         cu.dimd = true;
       }
 #endif
+#if JVET_AB0155_SGPM
+      cu.sgpm = uiOrgMode.sgpmFlag;
+      if (cu.sgpm)
+      {
+        uiOrgMode.modeId = uiOrgMode.sgpmMode0;
+        cu.sgpmSplitDir  = uiOrgMode.sgpmSplitDir;
+        cu.sgpmMode0     = uiOrgMode.sgpmMode0;
+        cu.sgpmMode1     = uiOrgMode.sgpmMode1;
+        cu.sgpmIdx       = uiOrgMode.sgpmIdx;
+        pu.intraDir1[CHANNEL_TYPE_LUMA] = uiOrgMode.sgpmMode1;
+      }
+#endif
 #if JVET_V0130_INTRA_TMP
       cu.tmpFlag = uiOrgMode.tmpFlag;
 #if JVET_W0103_INTRA_MTS
@@ -1557,6 +1800,16 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
       CHECK( cu.tmpFlag && cu.ispMode, "Error: combination of TPM and ISP not supported" );
       CHECK( cu.tmpFlag && pu.multiRefIdx, "Error: combination of TPM and MRL not supported" );
 #endif
+#if JVET_AB0155_SGPM
+      CHECK(cu.sgpm && cu.tmpFlag, "Error: combination of SGPM and TPM not supported");
+      CHECK(cu.sgpm && cu.ispMode, "Error: combination of SGPM and ISP not supported");
+      CHECK(cu.sgpm && pu.multiRefIdx, "Error: combination of SGPM and MRL not supported");
+      CHECK(cu.sgpm && cu.mipFlag, "Error: combination of SGPM and MIP not supported");
+      CHECK(cu.sgpm && cu.timd, "Error: combination of SGPM and TIMD not supported");
+      CHECK(cu.sgpm && cu.dimd, "Error: combination of SGPM and DIMD not supported");
+      CHECK(cu.sgpm && cu.bdpcmMode, "Error: combination of SGPM and BDPCM not supported");
+#endif
+
 #if ENABLE_DIMD && JVET_V0087_DIMD_NO_ISP
       CHECK(cu.ispMode && cu.dimd, "Error: combination of ISP and DIMD not supported");
 #endif
@@ -1645,7 +1898,11 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
 #endif
 #if JVET_V0130_INTRA_TMP
 #if JVET_W0123_TIMD_FUSION
-      if (!cu.ispMode && !cu.mtsFlag && !cu.lfnstIdx && !cu.bdpcmMode && !pu.multiRefIdx && !cu.mipFlag && !cu.tmpFlag && testISP && !cu.timd)
+      if (!cu.ispMode && !cu.mtsFlag && !cu.lfnstIdx && !cu.bdpcmMode && !pu.multiRefIdx && !cu.mipFlag && !cu.tmpFlag && testISP && !cu.timd
+#if JVET_AB0155_SGPM
+        && !cu.sgpm
+#endif
+        )
 #else
       if( !cu.ispMode && !cu.mtsFlag && !cu.lfnstIdx && !cu.bdpcmMode && !pu.multiRefIdx && !cu.mipFlag && !cu.tmpFlag && testISP )
 #endif
@@ -1741,6 +1998,10 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
 #if JVET_W0123_TIMD_FUSION
           bestTimdMode = cu.timd;
 #endif
+#if JVET_AB0155_SGPM
+          bestSgpmMode = cu.sgpm;
+#endif
+
           if( sps.getUseLFNST() && mtsUsageFlag == 1 && !cu.ispMode )
           {
             m_bestModeCostStore[ lfnstIdx ] = csBest->cost; //cs.cost;
@@ -1864,6 +2125,20 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
         pu.intraDir[ CHANNEL_TYPE_LUMA ] = cu.timdMode;
       }
 #endif
+#if JVET_AB0155_SGPM
+      cu.sgpm = uiBestPUMode.sgpmFlag;
+      if (cu.sgpm)
+      {
+        CHECK(!bestSgpmMode, "mode not same");
+        pu.intraDir[CHANNEL_TYPE_LUMA]  = uiBestPUMode.sgpmMode0;
+        pu.intraDir1[CHANNEL_TYPE_LUMA] = uiBestPUMode.sgpmMode1;
+        cu.sgpmSplitDir                 = uiBestPUMode.sgpmSplitDir;
+        cu.sgpmMode0                    = uiBestPUMode.sgpmMode0;
+        cu.sgpmMode1                    = uiBestPUMode.sgpmMode1;
+        cu.sgpmIdx                      = uiBestPUMode.sgpmIdx;
+      }
+#endif
+
       if (cu.colorTransform)
       {
         CHECK(pu.intraDir[CHANNEL_TYPE_CHROMA] != DM_CHROMA_IDX, "chroma should use DM mode for adaptive color transform");
@@ -4664,6 +4939,16 @@ void IntraSearch::xSelectAMTForFullRD(TransformUnit &tu)
   }
   else
   {
+#if JVET_AB0155_SGPM
+    if (pu.cu->sgpm)
+    {
+      CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
+      PelBuf predBuf(m_sgpmPredBuf[pu.cu->sgpmIdx], tmpArea);
+      piPred.copyFrom(predBuf);
+    }
+    else
+#endif
+
     predIntraAng(COMPONENT_Y, piPred, pu);
   }
 
@@ -4863,6 +5148,16 @@ void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &comp
           }
           else
           {
+#if JVET_AB0155_SGPM
+            if (pu.cu->sgpm &&compID == COMPONENT_Y)
+            {
+              CompArea tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
+              PelBuf   predBuf(m_sgpmPredBuf[pu.cu->sgpmIdx], tmpArea);
+              piPred.copyFrom(predBuf);
+            }
+            else
+#endif
+
             predIntraAng(compID, piPred, pu);
           }
 #if JVET_Z0050_DIMD_CHROMA_FUSION
@@ -7904,8 +8199,12 @@ void IntraSearch::reduceHadCandList(static_vector<T, N>& candModeList, static_ve
 
       if (!alreadyIncluded)
       {
+#if JVET_AB0155_SGPM
+        updateCandList(mipMode, sortedMipCost[idx], tempRdModeList, tempCandCostList, tempRdModeList.size() + 1);
+#else
         tempRdModeList.push_back(mipMode);
         tempCandCostList.push_back(0);
+#endif
         if( fastMip ) break;
       }
     }
diff --git a/source/Lib/EncoderLib/IntraSearch.h b/source/Lib/EncoderLib/IntraSearch.h
index 93795e081e7d8064f26c64b6a0be311cd371ea3f..49faf5dfe7623dd03fcb1c4780e2544ccf688665 100644
--- a/source/Lib/EncoderLib/IntraSearch.h
+++ b/source/Lib/EncoderLib/IntraSearch.h
@@ -227,7 +227,53 @@ private:
     uint32_t modeId; // PU::intraDir[CHANNEL_TYPE_LUMA]
 #if JVET_V0130_INTRA_TMP
 	  bool     tmpFlag; // CU::tmpFlag
-
+#endif
+#if JVET_AB0155_SGPM
+          bool sgpmFlag;   // CU::sgpmFlag
+          int  sgpmSplitDir;
+          int  sgpmMode0;
+          int  sgpmMode1;
+          int  sgpmIdx;
+#endif
+#if JVET_AB0155_SGPM && JVET_V0130_INTRA_TMP
+    ModeInfo() : mipFlg(false), mipTrFlg(false), mRefId(0), ispMod(NOT_INTRA_SUBPARTITIONS), modeId(0), tmpFlag(0), sgpmFlag(0), sgpmSplitDir(0), sgpmMode0(0), sgpmMode1(0), sgpmIdx(0){}
+    ModeInfo(const bool mipf, const bool miptf, const int mrid, const uint8_t ispm, const uint32_t mode,
+             const bool tpmf = 0, const bool sf = 0, const int sd = 0, const int sm0 = 0, const int sm1 = 0, const int si = 0)
+      : mipFlg(mipf)
+      , mipTrFlg(miptf)
+      , mRefId(mrid)
+      , ispMod(ispm)
+      , modeId(mode)
+      , tmpFlag(tpmf)
+      , sgpmFlag(sf)
+      , sgpmSplitDir(sd)
+      , sgpmMode0(sm0)
+      , sgpmMode1(sm1)
+      , sgpmIdx(si)
+    {
+    }
+    ModeInfo &operator=(const ModeInfo &other)
+    {
+      mipFlg       = other.mipFlg;     // CU::mipFlag
+      mipTrFlg     = other.mipTrFlg;   // PU::mipTransposedFlag
+      mRefId       = other.mRefId;     // PU::multiRefIdx
+      ispMod       = other.ispMod;     // CU::ispMode
+      modeId       = other.modeId;     // PU::intraDir[CHANNEL_TYPE_LUMA]
+      tmpFlag      = other.tmpFlag;    // CU::tmpFlag
+      sgpmFlag     = other.sgpmFlag;   // CU::sgpmFlag
+      sgpmSplitDir = other.sgpmSplitDir;
+      sgpmMode0    = other.sgpmMode0;
+      sgpmMode1    = other.sgpmMode1;
+      sgpmIdx      = other.sgpmIdx;
+      return *this;
+    }
+    bool operator==(const ModeInfo cmp) const
+    {
+      return (mipFlg == cmp.mipFlg && mipTrFlg == cmp.mipTrFlg && mRefId == cmp.mRefId && ispMod == cmp.ispMod
+                && modeId == cmp.modeId && tmpFlag == cmp.tmpFlag && sgpmFlag == cmp.sgpmFlag
+                && sgpmSplitDir == cmp.sgpmSplitDir); // sgpmMode0 and sgpmMode1 seems no need
+    }
+#elif JVET_V0130_INTRA_TMP
 	  ModeInfo() : mipFlg(false), mipTrFlg(false), mRefId(0), ispMod(NOT_INTRA_SUBPARTITIONS), modeId(0), tmpFlag(0) {}
 	  ModeInfo(const bool mipf, const bool miptf, const int mrid, const uint8_t ispm, const uint32_t mode, const bool tpmf = 0) : mipFlg(mipf), mipTrFlg(miptf), mRefId(mrid), ispMod(ispm), modeId(mode), tmpFlag(tpmf) {}
 	  bool operator==(const ModeInfo cmp) const { return (mipFlg == cmp.mipFlg && mipTrFlg == cmp.mipTrFlg && mRefId == cmp.mRefId && ispMod == cmp.ispMod && modeId == cmp.modeId && tmpFlag == cmp.tmpFlag); }
@@ -241,7 +287,19 @@ private:
   {
     double rdCost;
     ModeInfoWithCost() : ModeInfo(), rdCost(MAX_DOUBLE) {}
-#if JVET_V0130_INTRA_TMP
+#if JVET_AB0155_SGPM && JVET_V0130_INTRA_TMP
+    ModeInfoWithCost(const bool mipf, const bool miptf, const int mrid, const uint8_t ispm, const uint32_t mode,
+                     const bool tpmf, double cost, const bool sf = 0, const int sd = 0, const int sm0 = 0, const int sm1 = 0)
+      : ModeInfo(mipf, miptf, mrid, ispm, mode, tpmf, sf, sd, sm0, sm1), rdCost(cost)
+    {
+    }
+    bool operator==(const ModeInfoWithCost cmp) const
+    {
+      return (mipFlg == cmp.mipFlg && mipTrFlg == cmp.mipTrFlg && mRefId == cmp.mRefId && ispMod == cmp.ispMod
+              && modeId == cmp.modeId && tmpFlag == cmp.tmpFlag && rdCost == cmp.rdCost && sgpmFlag == cmp.sgpmFlag
+              && sgpmSplitDir == cmp.sgpmSplitDir);   // sgpmMode0 and sgpmMode1 seems no need
+    }
+#elif JVET_V0130_INTRA_TMP
 	  ModeInfoWithCost(const bool mipf, const bool miptf, const int mrid, const uint8_t ispm, const uint32_t mode, const bool tpmf, double cost) : ModeInfo(mipf, miptf, mrid, ispm, mode, tpmf), rdCost(cost) {}
 	  bool operator==(const ModeInfoWithCost cmp) const { return (mipFlg == cmp.mipFlg && mipTrFlg == cmp.mipTrFlg && mRefId == cmp.mRefId && ispMod == cmp.ispMod && modeId == cmp.modeId && tmpFlag == cmp.tmpFlag && rdCost == cmp.rdCost); }
 #else
@@ -403,6 +461,19 @@ private:
   static_vector<double,   FAST_UDI_MAX_RDMODE_NUM> m_dSavedModeCostLFNST;
   static_vector<double,   FAST_UDI_MAX_RDMODE_NUM> m_dSavedHadListLFNST;
 
+#if JVET_AB0155_SGPM
+  static_vector<ModeInfo, SGPM_NUM> m_uiSavedRdModeListSGPM;
+  static_vector<ModeInfo, SGPM_NUM> m_uiSavedHadModeListSGPM;
+  static_vector<double, SGPM_NUM>   m_dSavedModeCostSGPM;
+  static_vector<double, SGPM_NUM>   m_dSavedHadListSGPM;
+
+  Pel* m_intraPredBuf[NUM_LUMA_MODE];
+  Pel* m_sgpmPredBuf[SGPM_NUM];
+  uint8_t    m_intraModeReady[NUM_LUMA_MODE];
+
+  size_t m_numSGPMCands;
+#endif
+
   PelStorage      m_tmpStorageLCU;
   PelStorage      m_colorTransResiBuf;
 #if JVET_AB0143_CCCM_TS
diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp
index 7f6f217e580919818ddeba4d3e563159a24a2934..a5a139542f6861d8eb962330198a1e16614c504a 100644
--- a/source/Lib/EncoderLib/VLCWriter.cpp
+++ b/source/Lib/EncoderLib/VLCWriter.cpp
@@ -1566,6 +1566,9 @@ void HLSWriter::codeSPS( const SPS* pcSPS )
 
 #if JVET_W0123_TIMD_FUSION
   WRITE_FLAG( pcSPS->getUseTimd() ? 1 : 0,                                          "sps_timd_enabled_flag");
+#endif
+#if JVET_AB0155_SGPM
+  WRITE_FLAG(pcSPS->getUseSgpm() ? 1 : 0, "sps_sgpm_enabled_flag");
 #endif
   if( pcSPS->getChromaFormatIdc() != CHROMA_400)
   {
@@ -3230,6 +3233,9 @@ void  HLSWriter::codeConstraintInfo  ( const ConstraintInfo* cinfo )
 #if JVET_W0123_TIMD_FUSION
     WRITE_FLAG(cinfo->getNoTimdConstraintFlag() ? 1 : 0, "gci_no_timd_constraint_flag" );
 #endif
+#if JVET_AB0155_SGPM
+    WRITE_FLAG(cinfo->getNoSgpmConstraintFlag() ? 1 : 0, "gci_no_sgpm_constraint_flag");
+#endif
 
     /* inter */
     WRITE_FLAG(cinfo->getNoRprConstraintFlag() ? 1 : 0, "gci_no_ref_pic_resampling_constraint_flag");