diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp
index 7ae48e0eee59ca7dc35c0dbd3b94dc9155a361c1..a56cbd4c749e6bbaea39c863fc5635dfed397f19 100644
--- a/source/App/EncoderApp/EncApp.cpp
+++ b/source/App/EncoderApp/EncApp.cpp
@@ -788,6 +788,10 @@ void EncApp::xInitLibCfg()
 
   m_cEncLib.setUseWrapAround                                     ( m_wrapAround );
   m_cEncLib.setWrapAroundOffset                                  ( m_wrapAroundOffset );
+#if IDCC_TPM_JEM
+  m_cEncLib.setUseIntraTMP(m_IntraTMP);
+  m_cEncLib.setIntraTMPMaxSize(m_IntraTMP_MaxSize);
+#endif
 #if ERICSSON_BIF
   m_cEncLib.setUseBIF                                            ( m_BIF );
   m_cEncLib.setBIFStrength                                       ( m_BIFStrength );
diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp
index 68d9004719c2221b31f795f23f4aefdc2cff4211..bcfca9603ea36deba5ce19617c870b39fc173806 100644
--- a/source/App/EncoderApp/EncAppCfg.cpp
+++ b/source/App/EncoderApp/EncAppCfg.cpp
@@ -1044,12 +1044,15 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("AdditionalInterHypRefFrames",                     m_maxNumAddHypRefFrames,                              4, "max. number of ref frames for additional inter hypotheseis")
   ("AdditionalInterHypTries",                         m_addHypTries,                                        1, "number of tries for additional inter prediction hypotheseis")
 #endif
+#if IDCC_TPM_JEM
+  ("IntraTMP",                                        m_IntraTMP,                                        false, "intra Template Matching (0: off, 1:on)  [default: on]")
+  ("IntraTMPMaxSize",                                 m_IntraTMP_MaxSize,                                 64u, "intra Template Matching max CU size  [default: 64]")
+#endif
 #if ERICSSON_BIF
-  ("BIF",                                             m_BIF,                                            false, "bilateral filter   (0: off, 1:on)  [default: off]")
+  ("BIF",                                             m_BIF,                                            true, "bilateral filter   (0: off, 1:on)  [default: on]")
   ("BIFStrength",                                     m_BIFStrength,                                       1u, "bilateral filter strength  (0: half, 1: full, 2: double)  [default: full]")
   ("BIFQPOffset",                                     m_BIFQPOffset,                                        0, "bilateral filter QP offset (0: no offset)  [default: 0]")
 #endif
-  
   // ADD_NEW_TOOL : (encoder app) add parsing parameters here
   ( "VirtualBoundariesPresentInSPSFlag",              m_virtualBoundariesPresentFlag,                    true, "Virtual Boundary position information is signalled in SPS or PH (1:SPS, 0:PH)  [default: on]" )
   ("NumVerVirtualBoundaries",                         m_numVerVirtualBoundaries,                           0u, "Number of vertical virtual boundaries (0-3, inclusive)")
@@ -4173,6 +4176,17 @@ void EncAppCfg::xPrintParameter()
     }
 #endif
   }
+#if IDCC_TPM_JEM
+  msg(DETAILS, "Intra TMP: %d\n", m_IntraTMP);
+  msg(DETAILS, "Max CU size of TMP: %d\n", m_IntraTMP_MaxSize);
+#if IDCC_FixedComparisonPerPixel
+  msg(DETAILS, "dynamic search range with fixed comparison per pixel: \n");
+  msg(DETAILS, "	searchRangeWidth = %d*Width \n", IDCC_SearchRangeMultFactor);
+  msg(DETAILS, "	searchRangeHeight = %d*Heigh \n", IDCC_SearchRangeMultFactor);
+#else
+  msg(DETAILS, "search range: %d\n", IDCC_SEARCHRANGEINTRA);
+#endif
+#endif
 
   msg( DETAILS, "Max Num Merge Candidates               : %d\n", m_maxNumMergeCand );
   msg( DETAILS, "Max Num Affine Merge Candidates        : %d\n", m_maxNumAffineMergeCand );
@@ -4306,6 +4320,10 @@ void EncAppCfg::xPrintParameter()
   {
     msg( VERBOSE, "WrapAroundOffset:%d ", m_wrapAroundOffset );
   }
+#if IDCC_TPM_JEM
+  msg( VERBOSE, "IntraTMP:%d ", m_IntraTMP);
+  msg( VERBOSE, "IntraTMP_MaxSize:%d ", m_IntraTMP_MaxSize);
+#endif
 #if ERICSSON_BIF
   msg( VERBOSE, "BIF:%d ", m_BIF);
   msg( VERBOSE, "BIFStrength:%d ", m_BIFStrength);
diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h
index 69abf35de5ed53d22ab7147ee4ba3e72a7dfa945..cc0289f8d3fa427b8ee4d562d6f882419eb5ceeb 100644
--- a/source/App/EncoderApp/EncAppCfg.h
+++ b/source/App/EncoderApp/EncAppCfg.h
@@ -411,6 +411,10 @@ protected:
   int       m_maxNumAddHypRefFrames;                          ///< max. number of ref frames for additional inter hypotheseis
   int       m_addHypTries;                                    ///< max. number of tries for additional inter hypotheseis
 #endif
+#if IDCC_TPM_JEM
+  bool      m_IntraTMP;                                       ///< intra Template Matching 
+  unsigned  m_IntraTMP_MaxSize;                               ///< max CU size for which intra TMP is allowed
+#endif
 #if ERICSSON_BIF
   bool      m_BIF;                                            ///< bilateral filter
   unsigned  m_BIFStrength;                                    /// Bilateral filter strength
diff --git a/source/App/Parcat/parcat.cpp b/source/App/Parcat/parcat.cpp
index c19b982e04c947fd115232c40842ae193492d420..a40fc93a23a684be02840da74a940e10252a59e8 100644
--- a/source/App/Parcat/parcat.cpp
+++ b/source/App/Parcat/parcat.cpp
@@ -52,9 +52,8 @@ class ParcatHLSyntaxReader : public VLCReader
     bool parsePictureHeaderInSliceHeaderFlag ( ParameterSetManager *parameterSetManager );
 };
 
-bool ParcatHLSyntaxReader::parsePictureHeaderInSliceHeaderFlag(ParameterSetManager *parameterSetManager) {
-
-
+bool ParcatHLSyntaxReader::parsePictureHeaderInSliceHeaderFlag(ParameterSetManager *parameterSetManager)
+{
   uint32_t  uiCode;
   READ_FLAG(uiCode, "picture_header_in_slice_header_flag");
   return (uiCode==1);
@@ -172,7 +171,7 @@ const char * NALU_TYPE[] =
     "NAL_UNIT_CODED_SLICE_GDR",
     "NAL_UNIT_RESERVED_IRAP_VCL11",
     "NAL_UNIT_RESERVED_IRAP_VCL12",
-    "NAL_UNIT_DPS",
+    "NAL_UNIT_DCI",
     "NAL_UNIT_VPS",
     "NAL_UNIT_SPS",
     "NAL_UNIT_PPS",
@@ -195,8 +194,8 @@ const char * NALU_TYPE[] =
 
 int calc_poc(int iPOClsb, int prevTid0POC, int getBitsForPOC, int nalu_type)
 {
-  int iPrevPOC = prevTid0POC;
-  int iMaxPOClsb = 1<< getBitsForPOC;
+  int iPrevPOC    = prevTid0POC;
+  int iMaxPOClsb  = 1<< getBitsForPOC;
   int iPrevPOClsb = iPrevPOC & (iMaxPOClsb - 1);
   int iPrevPOCmsb = iPrevPOC-iPrevPOClsb;
   int iPOCmsb;
@@ -218,9 +217,9 @@ int calc_poc(int iPOClsb, int prevTid0POC, int getBitsForPOC, int nalu_type)
 
 std::vector<uint8_t> filter_segment(const std::vector<uint8_t> & v, int idx, int * poc_base, int * last_idr_poc)
 {
-  const uint8_t * p = v.data();
+  const uint8_t * p   = v.data();
   const uint8_t * buf = v.data();
-  int sz = (int) v.size();
+  int sz  = (int) v.size();
   int nal_start, nal_end;
   int off = 0;
   int cnt = 0;
@@ -229,9 +228,9 @@ std::vector<uint8_t> filter_segment(const std::vector<uint8_t> & v, int idx, int
   std::vector<uint8_t> out;
   out.reserve(v.size());
 
-  int bits_for_poc = 8;
+  int  bits_for_poc  = 8;
   bool skip_next_sei = false;
-  bool change_poc = false;
+  bool change_poc    = false;
   bool first_idr_slice_after_ph_nal = false;
 
   while(find_nal_unit(p, sz, &nal_start, &nal_end) > 0)
@@ -252,7 +251,7 @@ std::vector<uint8_t> filter_segment(const std::vector<uint8_t> & v, int idx, int
 #if ENABLE_TRACING
     printf ("NALU Type: %d (%s)\n", nalu_type, NALU_TYPE[nalu_type]);
 #endif
-    int poc = -1;
+    int poc     = -1;
     int poc_lsb = -1;
     int new_poc = -1;
 
@@ -307,28 +306,41 @@ std::vector<uint8_t> filter_segment(const std::vector<uint8_t> & v, int idx, int
         // beginning of picture header parsing
         parcatHLSReader.parsePictureHeaderUpToPoc(&parameterSetManager);
         int num_bits_up_to_poc_lsb = parcatHLSReader.getBitstream()->getNumBitsRead();
-        int offset = num_bits_up_to_poc_lsb;
+        int num_emul_prev_code_before_poc = 0;
+        for (int i=0; i<parcatHLSReader.getBitstream()->numEmulationPreventionBytesRead(); i++)
+        {
+          if (8*parcatHLSReader.getBitstream()->getEmulationPreventionByteLocation(i) <= num_bits_up_to_poc_lsb)
+            num_emul_prev_code_before_poc++;
+        }
+        int offset = num_bits_up_to_poc_lsb + (num_emul_prev_code_before_poc << 3);
 
         int byte_offset = offset / 8;
-        int hi_bits = offset % 8;
-        uint16_t data = (nalu[byte_offset] << 8) | nalu[byte_offset + 1];
-        int low_bits = 16 - hi_bits - bits_for_poc;
+        int hi_bits     = offset % 8;
+        uint16_t data   = (nalu[byte_offset] << 8) | nalu[byte_offset + 1];
+        int low_bits    = 16 - hi_bits - bits_for_poc;
         poc_lsb = (data >> low_bits) & 0xff;
-        poc = poc_lsb; //calc_poc(poc_lsb, 0, bits_for_poc, nalu_type);
+        poc     = poc_lsb; //calc_poc(poc_lsb, 0, bits_for_poc, nalu_type);
 
         new_poc = poc + *poc_base;
         // int picOrderCntLSB = (pcSlice->getPOC()-pcSlice->getLastIDR()+(1<<pcSlice->getSPS()->getBitsForPOC())) & ((1<<pcSlice->getSPS()->getBitsForPOC())-1);
         unsigned picOrderCntLSB = (new_poc - *last_idr_poc + (1 << bits_for_poc)) & ((1 << bits_for_poc) - 1);
 
         int low = data & ((1 << low_bits) - 1);
-        int hi = data >> (16 - hi_bits);
-        data = (hi << (16 - hi_bits)) | (picOrderCntLSB << low_bits) | low;
+        int hi  = data >> (16 - hi_bits);
+        data    = (hi << (16 - hi_bits)) | (picOrderCntLSB << low_bits) | low;
 
         nalu[byte_offset] = data >> 8;
         nalu[byte_offset + 1] = data & 0xff;
 
 #if ENABLE_TRACING
-        std::cout << "Changed poc " << poc << " to " << new_poc << std::endl;
+        std::cout << "Changed poc " << poc << " to " << new_poc << " at offset " << offset << " bits";
+        if (num_emul_prev_code_before_poc)
+        {
+          std::cout << " with " << num_emul_prev_code_before_poc << " emulation prevention code at byte pos ";
+          for (int i=0; i<num_emul_prev_code_before_poc; i++)
+            std::cout << parcatHLSReader.getBitstream()->getEmulationPreventionByteLocation(i) << " ";
+        }
+        std::cout << std::endl;
 #endif
         ++cnt;
         change_poc = false;
@@ -340,7 +352,8 @@ std::vector<uint8_t> filter_segment(const std::vector<uint8_t> & v, int idx, int
       skip_next_sei = true;
       idr_found = true;
     }
-    if ((idx > 1 && (nalu_type == NAL_UNIT_CODED_SLICE_IDR_W_RADL || nalu_type == NAL_UNIT_CODED_SLICE_IDR_N_LP)) || ((idx > 1 && !idr_found) && (nalu_type == NAL_UNIT_DCI || nalu_type == NAL_UNIT_VPS || nalu_type == NAL_UNIT_SPS || nalu_type == NAL_UNIT_PPS || nalu_type == NAL_UNIT_PREFIX_APS || nalu_type == NAL_UNIT_SUFFIX_APS || nalu_type == NAL_UNIT_PH || nalu_type == NAL_UNIT_ACCESS_UNIT_DELIMITER))
+    if ((idx > 1 && (nalu_type == NAL_UNIT_CODED_SLICE_IDR_W_RADL || nalu_type == NAL_UNIT_CODED_SLICE_IDR_N_LP))
+      || ((idx > 1 && !idr_found) && (nalu_type == NAL_UNIT_DCI || nalu_type == NAL_UNIT_VPS || nalu_type == NAL_UNIT_SPS || nalu_type == NAL_UNIT_PPS || nalu_type == NAL_UNIT_PREFIX_APS || nalu_type == NAL_UNIT_SUFFIX_APS || nalu_type == NAL_UNIT_PH || nalu_type == NAL_UNIT_ACCESS_UNIT_DELIMITER))
       || (nalu_type == NAL_UNIT_SUFFIX_SEI && skip_next_sei))
     {
     }
@@ -355,8 +368,7 @@ std::vector<uint8_t> filter_segment(const std::vector<uint8_t> & v, int idx, int
       skip_next_sei = false;
     }
 
-
-    p += (nal_end - nal_start);
+    p  += (nal_end - nal_start);
     sz -= nal_end;
   }
 
@@ -408,12 +420,12 @@ int main(int argc, char * argv[])
   }
 
   FILE * fdo = fopen(argv[argc - 1], "wb");
-  if (fdo==NULL)
+  if (fdo == NULL)
   {
     fprintf(stderr, "Error: could not open output file: %s", argv[argc - 1]);
     exit(1);
   }
-  int poc_base = 0;
+  int poc_base     = 0;
   int last_idr_poc = 0;
 
   initROM();
diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h
index bcb690f4cc368cb182653964b15c00c49b0e287b..77b337940d2250c8f59061bf36275758c294ed6d 100644
--- a/source/Lib/CommonLib/CommonDef.h
+++ b/source/Lib/CommonLib/CommonDef.h
@@ -477,6 +477,14 @@ static const int ALF_VB_POS_ABOVE_CTUROW_CHMA = 2;
 #if W0038_DB_OPT
 static const int MAX_ENCODER_DEBLOCKING_QUALITY_LAYERS =           8 ;
 #endif
+#if IDCC_TPM_JEM && !IDCC_FixedComparisonPerPixel
+static const int SEARCHRANGEINTRA = IDCC_SEARCHRANGEINTRA; ///< Intra search range (-SEARCHRANGE,+SEARCHRANGE)
+#endif
+
+#if IDCC_TPM_JEM
+static const int USE_MORE_BLOCKSIZE_DEPTH_MAX = IDCC_TMP_MaxSize_Depth - 1;
+static const int INIT_THRESHOULD_SHIFTBITS = 2;  ///< (default 2) Early skip threshold for checking distance.
+#endif
 
 #if SHARP_LUMA_DELTA_QP
 static const uint32_t LUMA_LEVEL_TO_DQP_LUT_MAXSIZE =                1024; ///< max LUT size for QP offset based on luma
diff --git a/source/Lib/CommonLib/ContextModelling.cpp b/source/Lib/CommonLib/ContextModelling.cpp
index 803d7266d331157498a4f7e4edd92e706b55f3e8..7855d636f03d74734a51f7f92acb5460e63cabfc 100644
--- a/source/Lib/CommonLib/ContextModelling.cpp
+++ b/source/Lib/CommonLib/ContextModelling.cpp
@@ -778,6 +778,24 @@ void MergeCtx::setMmvdMergeCandiInfo(PredictionUnit& pu, int candIdx)
   PU::restrictBiPredMergeCandsOne(pu);
 }
 
+#if IDCC_TPM_JEM
+unsigned DeriveCtx::CtxTmpFlag(const CodingUnit& cu)
+{
+	const CodingStructure* cs = cu.cs;
+	unsigned ctxId = 0;
+
+	const CodingUnit* cuLeft = cs->getCURestricted(cu.lumaPos().offset(-1, 0), cu, CH_L);
+	ctxId = (cuLeft && cuLeft->TmpFlag) ? 1 : 0;
+
+	const CodingUnit* cuAbove = cs->getCURestricted(cu.lumaPos().offset(0, -1), cu, CH_L);
+	ctxId += (cuAbove && cuAbove->TmpFlag) ? 1 : 0;
+
+	ctxId = (cu.lwidth() > 2 * cu.lheight() || cu.lheight() > 2 * cu.lwidth()) ? 3 : ctxId;
+
+	return ctxId;
+}
+#endif
+
 unsigned DeriveCtx::CtxMipFlag( const CodingUnit& cu )
 {
   const CodingStructure *cs = cu.cs;
diff --git a/source/Lib/CommonLib/ContextModelling.h b/source/Lib/CommonLib/ContextModelling.h
index 5f26cface3f1284188fa5a79e9e8efad9274cba0..6e3ad4616ecfce51ec1eacf7418eb2608fce7406 100644
--- a/source/Lib/CommonLib/ContextModelling.h
+++ b/source/Lib/CommonLib/ContextModelling.h
@@ -611,6 +611,9 @@ unsigned CtxAffineFlag( const CodingUnit& cu );
 unsigned CtxPredModeFlag( const CodingUnit& cu );
 unsigned CtxIBCFlag(const CodingUnit& cu);
 unsigned CtxMipFlag   ( const CodingUnit& cu );
+#if IDCC_TPM_JEM
+unsigned CtxTmpFlag(const CodingUnit& cu);
+#endif
 unsigned CtxPltCopyFlag( const unsigned prevRunType, const unsigned dist );
 #if ENABLE_DIMD
 unsigned CtxDIMDFlag(const CodingUnit& cu);
diff --git a/source/Lib/CommonLib/Contexts.cpp b/source/Lib/CommonLib/Contexts.cpp
index 62e393cbc28c505f0b5c0875757e43e728383c33..db49d40b90b8583e77883a1d410ba6feddce1652 100644
--- a/source/Lib/CommonLib/Contexts.cpp
+++ b/source/Lib/CommonLib/Contexts.cpp
@@ -1000,6 +1000,17 @@ const CtxSet ContextSetCfg::MipFlag = ContextSetCfg::addCtxSet
 	{ 9,  9,  8,  6 },
 	{ 10, 10,  9,  6 }
 });
+#if IDCC_TPM_JEM
+const CtxSet ContextSetCfg::TmpFlag = ContextSetCfg::addCtxSet
+({
+  {  CNU,  CNU,  CNU,  CNU, },
+  {  CNU,  CNU,  CNU,  CNU, },
+  {  CNU,  CNU,  CNU,  CNU, },
+  {   DWS,  DWS,   DWS,   DWS, },
+  {   DWS,  DWS,   DWS,   DWS, },
+  {   DWS,  DWS,   DWS,   DWS, },
+	});
+#endif
 
 #if MMLM
 const CtxSet ContextSetCfg::MMLMFlag = ContextSetCfg::addCtxSet
@@ -2078,6 +2089,16 @@ const CtxSet ContextSetCfg::MipFlag = ContextSetCfg::addCtxSet
   {  33,  49,  50,  25, },
   {   9,  10,   9,   6, },
 });
+#if IDCC_TPM_JEM
+const CtxSet ContextSetCfg::TmpFlag = ContextSetCfg::addCtxSet
+({
+  {  CNU,  CNU,  CNU,  CNU, },
+  {  CNU,  CNU,  CNU,  CNU, },
+  {  CNU,  CNU,  CNU,  CNU, },
+  {   DWS,  DWS,   DWS,   DWS, },
+	});
+#endif
+
 
 #if MMLM
 const CtxSet ContextSetCfg::MMLMFlag = ContextSetCfg::addCtxSet
diff --git a/source/Lib/CommonLib/Contexts.h b/source/Lib/CommonLib/Contexts.h
index c00f64f86d104c13a8549cf0d7bc7b344dc8f8a9..41b9f313e1d7fa7bb0e2a98ac730fd0b5fd44f1c 100644
--- a/source/Lib/CommonLib/Contexts.h
+++ b/source/Lib/CommonLib/Contexts.h
@@ -245,6 +245,9 @@ public:
   static const CtxSet   CclmModeIdx;
   static const CtxSet   IntraChromaPredMode;
   static const CtxSet   MipFlag;
+#if IDCC_TPM_JEM
+  static const CtxSet   TmpFlag;
+#endif
 #if MMLM
   static const CtxSet   MMLMFlag;
 #endif
diff --git a/source/Lib/CommonLib/IntraPrediction.cpp b/source/Lib/CommonLib/IntraPrediction.cpp
index 621d983f2f27720ab7eee8823a55c923e0bb574e..ea5eff9a761587742da206c07731ba1a8c8cc9b3 100644
--- a/source/Lib/CommonLib/IntraPrediction.cpp
+++ b/source/Lib/CommonLib/IntraPrediction.cpp
@@ -679,6 +679,9 @@ void IntraPrediction::initPredIntraParams(const PredictionUnit & pu, const CompA
   if(   sps.getSpsRangeExtension().getIntraSmoothingDisabledFlag()
     || !isLuma( chType )
     || useISP
+#if IDCC_TPM_JEM
+	  || PU::isTmp(pu, chType)
+#endif
     || PU::isMIP( pu, chType )
     || m_ipaParam.multiRefIndex
     || DC_IDX == dirMode
@@ -1384,7 +1387,53 @@ void IntraPrediction::initIntraPatternChTypeISP(const CodingUnit& cu, const Comp
   }
 }
 
+#if IDCC_TPM_JEM
+bool IntraPrediction::isRefTemplateAvailable(CodingUnit& cu, CompArea& area)
+{
+	const ChannelType      chType = toChannelType(area.compID);
+	const CodingStructure& cs = *cu.cs;
+	const SPS& sps = *cs.sps;
+	const PreCalcValues& pcv = *cs.pcv;
+
+
+	const int  tuWidth = area.width;
+	const int  tuHeight = area.height;
+	const int  predSize = m_topRefLength;
+	const int  predHSize = m_leftRefLength;
+	//const int predStride = predSize;
+
+
+	const int  unitWidth = pcv.minCUWidth >> getComponentScaleX(area.compID, sps.getChromaFormatIdc());
+	const int  unitHeight = pcv.minCUHeight >> getComponentScaleY(area.compID, sps.getChromaFormatIdc());
+
+	const int  totalAboveUnits = (predSize + (unitWidth - 1)) / unitWidth;
+	const int  totalLeftUnits = (predHSize + (unitHeight - 1)) / unitHeight;
+	const int  totalUnits = totalAboveUnits + totalLeftUnits + 1; //+1 for top-left
+	const int  numAboveUnits = std::max<int>(tuWidth / unitWidth, 1);
+	const int  numLeftUnits = std::max<int>(tuHeight / unitHeight, 1);
+	const int  numAboveRightUnits = totalAboveUnits - numAboveUnits;
+	const int  numLeftBelowUnits = totalLeftUnits - numLeftUnits;
+
+	if (numAboveUnits <= 0 || numLeftUnits <= 0 || numAboveRightUnits <= 0 || numLeftBelowUnits <= 0)
+		return false;
 
+	// ----- Step 1: analyze neighborhood -----
+	const Position posLT = area;
+	//const Position posRT = area.topRight();
+	//const Position posLB = area.bottomLeft();
+
+	bool  neighborFlags[4 * MAX_NUM_PART_IDXS_IN_CTU_WIDTH + 1];
+	//int   numIntraNeighbor = 0;
+
+	memset(neighborFlags, 0, totalUnits);
+
+	//bool retVal = 1;
+
+	return isAboveLeftAvailable(cu, chType, posLT) && isAboveAvailable(cu, chType, posLT, numAboveUnits, unitWidth, (neighborFlags + totalLeftUnits + 1)) && isLeftAvailable(cu, chType, posLT, numLeftUnits, unitHeight, (neighborFlags + totalLeftUnits - 1));
+
+	//return retVal;
+}
+#endif
 void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBufUnfiltered, const CompArea &area, const CodingUnit &cu )
 {
   const ChannelType      chType = toChannelType( area.compID );
diff --git a/source/Lib/CommonLib/IntraPrediction.h b/source/Lib/CommonLib/IntraPrediction.h
index 87796b451d113951a92405c36b26859b1fffe8db..5d7ae3173634dd702b00ccf6248c3021e7e5f51e 100644
--- a/source/Lib/CommonLib/IntraPrediction.h
+++ b/source/Lib/CommonLib/IntraPrediction.h
@@ -144,6 +144,9 @@ protected:
 
   void xPredIntraBDPCM            ( const CPelBuf &pSrc, PelBuf &pDst, const uint32_t dirMode, const ClpRng& clpRng );
   Pel  xGetPredValDc              ( const CPelBuf &pSrc, const Size &dstSize );
+#if IDCC_TPM_JEM
+  bool isRefTemplateAvailable(CodingUnit& cu, CompArea& area);
+#endif
 
   void xFillReferenceSamples      ( const CPelBuf &recoBuf,      Pel* refBufUnfiltered, const CompArea &area, const CodingUnit &cu );
   void xFilterReferenceSamples(const Pel *refBufUnfiltered, Pel *refBufFiltered, const CompArea &area, const SPS &sps,
diff --git a/source/Lib/CommonLib/Slice.cpp b/source/Lib/CommonLib/Slice.cpp
index 96be6fd96a2556a72ea798e4a8b795ea838f67a7..6c40305b13876f969e58eef7eb492ca2cca28c00 100644
--- a/source/Lib/CommonLib/Slice.cpp
+++ b/source/Lib/CommonLib/Slice.cpp
@@ -3113,6 +3113,10 @@ SPS::SPS()
 #if ENABLE_DIMD
 , m_dimd                      ( false )
 #endif
+#if IDCC_TPM_JEM
+, m_IntraTMP                  ( false )
+, m_IntraTMP_MaxSize          ( 64 )                             
+#endif
 #if ENABLE_OBMC
 , m_OBMC                      ( false )
 #endif
diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h
index 41ffe7e35bcfca51949725357ad24990f6e93492..fde7178a8208baf651a05f9c407ca10ceb78e228 100644
--- a/source/Lib/CommonLib/Slice.h
+++ b/source/Lib/CommonLib/Slice.h
@@ -1651,6 +1651,10 @@ private:
 #if ENABLE_DIMD
   bool              m_dimd;
 #endif
+#if IDCC_TPM_JEM
+  bool              m_IntraTMP;                                       ///< intra Template Matching 
+  unsigned          m_IntraTMP_MaxSize;                               ///< max CU size for which intra TMP is allowed
+#endif
 #if ENABLE_OBMC
   bool              m_OBMC;
 #endif
@@ -2073,6 +2077,12 @@ void                    setCCALFEnabledFlag( bool b )
   void      setUseDimd         ( bool b )                                        { m_dimd = b; }
   bool      getUseDimd         ()                                      const     { return m_dimd; }
 #endif
+#if IDCC_TPM_JEM
+  void      setUseIntraTMP(bool b) { m_IntraTMP = b; }
+  bool      getUseIntraTMP() const { return m_IntraTMP; }
+  void      setIntraTMPMaxSize(unsigned n) { m_IntraTMP_MaxSize = n; }
+  unsigned  getIntraTMPMaxSize() const { return m_IntraTMP_MaxSize; }
+#endif
 #if ENABLE_OBMC
   void      setUseOBMC         ( bool b )                                        { m_OBMC = b; }
   bool      getUseOBMC         ()                                      const     { return m_OBMC; }
diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp
index d67d07aa2477be9d02a7689b4715d82b2679fe5c..0ad97c3e3efc6fbceafb7aa6d143713650e4774a 100644
--- a/source/Lib/CommonLib/TrQuant.cpp
+++ b/source/Lib/CommonLib/TrQuant.cpp
@@ -55,6 +55,16 @@
 #include "CommonLib/CodingStatistics.h"
 #endif
 
+#if IDCC_TMP_SIMD
+#include "CommonDefX86.h"
+#endif
+
+#if IDCC_TPM_JEM
+
+unsigned int g_uiDepth2Width[5] = { 4, 8, 16, 32, 64 };
+#endif
+
+
 struct coeffGroupRDStats
 {
   int    iNNZbeforePos0;
@@ -187,6 +197,9 @@ TrQuant::TrQuant() : m_quant( nullptr )
     m_fwdICT[-2]  = fwdTransformCbCr<-2>;
     m_fwdICT[ 3]  = fwdTransformCbCr< 3>;
     m_fwdICT[-3]  = fwdTransformCbCr<-3>;
+#if IDCC_TPM_JEM
+	m_pppTarPatch = NULL;
+#endif
   }
 }
 
@@ -197,6 +210,33 @@ TrQuant::~TrQuant()
     delete m_quant;
     m_quant = nullptr;
   }
+#if IDCC_TPM_JEM
+#endif
+
+#if IDCC_TPM_JEM
+  if (m_pppTarPatch != NULL)
+  {
+	  for (unsigned int uiDepth = 0; uiDepth < USE_MORE_BLOCKSIZE_DEPTH_MAX; uiDepth++)
+	  {
+		  unsigned int blkSize = g_uiDepth2Width[uiDepth];
+
+		  unsigned int patchSize = blkSize + IDCC_TemplateSize;
+		  for (unsigned int uiRow = 0; uiRow < patchSize; uiRow++)
+		  {
+			  if (m_pppTarPatch[uiDepth][uiRow] != NULL)
+			  {
+				  delete[]m_pppTarPatch[uiDepth][uiRow]; m_pppTarPatch[uiDepth][uiRow] = NULL;
+			  }
+		  }
+		  if (m_pppTarPatch[uiDepth] != NULL)
+		  {
+			  delete[]m_pppTarPatch[uiDepth]; m_pppTarPatch[uiDepth] = NULL;
+		  }
+	  }
+	  delete[] m_pppTarPatch;
+	  m_pppTarPatch = NULL;
+  }
+#endif
 }
 
 #if ENABLE_SPLIT_PARALLELISM
@@ -234,6 +274,27 @@ void TrQuant::init( const Quant* otherQuant,
     m_quant->init( uiMaxTrSize, bUseRDOQ, bUseRDOQTS, useSelectiveRDOQ );
   }
 
+
+#if IDCC_TPM_JEM
+  unsigned int blkSize;
+  
+  if (m_pppTarPatch == NULL)
+  {
+	  m_pppTarPatch = new Pel * *[USE_MORE_BLOCKSIZE_DEPTH_MAX];
+	  for (unsigned int uiDepth = 0; uiDepth < USE_MORE_BLOCKSIZE_DEPTH_MAX; uiDepth++)
+	  {
+		  blkSize = g_uiDepth2Width[uiDepth];
+
+		  unsigned int patchSize = blkSize + IDCC_TemplateSize;
+		  m_pppTarPatch[uiDepth] = new Pel * [patchSize];
+		  for (unsigned int uiRow = 0; uiRow < patchSize; uiRow++)
+		  {
+			  m_pppTarPatch[uiDepth][uiRow] = new Pel[patchSize];
+		  }
+	  }
+}
+#endif
+
 #if TU_256
   fastFwdTrans =
   { {
@@ -374,6 +435,581 @@ void TrQuant::invLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32
   }
 }
 
+#if IDCC_TPM_JEM
+void insertNode(DistType diff, int& iXOffset, int& iYOffset, DistType& pDiff, int& pX, int& pY, short& pId, unsigned int& setId)
+{
+	pDiff = diff;
+	pX = iXOffset;
+	pY = iYOffset;
+	pId = setId;
+}
+#if IDCC_TPM_JEM
+#if IDCC_FixedComparisonPerPixel
+void clipMvIntraConstraint(CodingUnit* pcCU, int regionId, int& iHorMin, int& iHorMax, int& iVerMin, int& iVerMax, unsigned int uiTemplateSize, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int iCurrY, int iCurrX, int offsetLCUY, int offsetLCUX)
+#else
+void clipMvIntraConstraint(CodingUnit* pcCU, int regionId, int& iHorMin, int& iHorMax, int& iVerMin, int& iVerMax, int iRange, unsigned int uiTemplateSize, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int iCurrY, int iCurrX, int offsetLCUY, int offsetLCUX)
+#endif
+{
+#if IDCC_FixedComparisonPerPixel
+	int SearchRange_Height, SearchRange_Width;
+	
+	SearchRange_Width = IDCC_SearchRangeMultFactor * uiBlkWidth;
+	SearchRange_Height = IDCC_SearchRangeMultFactor * uiBlkHeight;
+#else
+	int SearchRange_Width = IDCC_SEARCHRANGEINTRA;
+	int SearchRange_Height = IDCC_SEARCHRANGEINTRA;
+#endif
+	int  iMvShift = 0;
+	int iTemplateSize = uiTemplateSize;
+	int iBlkWidth = uiBlkWidth;
+	int iBlkHeight = uiBlkHeight;
+	if (regionId == 0) //above outside LCU
+	{
+		iHorMax = std::min((iCurrX + SearchRange_Width) << iMvShift, (int)((pcCU->cs->sps->getMaxPicWidthInLumaSamples() - iBlkWidth) << iMvShift));
+		iHorMin = std::max((iTemplateSize) << iMvShift, (iCurrX - SearchRange_Width) << iMvShift);
+
+		iVerMax = (iCurrY - iBlkHeight - offsetLCUY) << iMvShift;
+		iVerMin = std::max(((iTemplateSize) << iMvShift), ((iCurrY - SearchRange_Height) << iMvShift));
+
+		iHorMin = iHorMin - iCurrX;
+		iHorMax = iHorMax - iCurrX;
+		iVerMax = iVerMax - iCurrY;
+		iVerMin = iVerMin - iCurrY;
+	}
+	else if (regionId == 1) //left outside LCU
+	{
+		iHorMax = (iCurrX - offsetLCUX - iBlkWidth) << iMvShift;
+		iHorMin = std::max((iTemplateSize) << iMvShift, (iCurrX - SearchRange_Width) << iMvShift);
+
+		iVerMin = std::max((iTemplateSize) << iMvShift, (iCurrY - iBlkHeight - offsetLCUY) << iMvShift);
+		iVerMax = (iCurrY) << iMvShift;
+
+		iHorMin = iHorMin - iCurrX;
+		iHorMax = iHorMax - iCurrX;
+		iVerMax = iVerMax - iCurrY;
+		iVerMin = iVerMin - iCurrY;
+	}
+	else if (regionId == 2) //left outside LCU (can reach the bottom row of LCU)
+	{
+		iHorMin = std::max((iTemplateSize) << iMvShift, (iCurrX - SearchRange_Width) << iMvShift);
+		iHorMax = (iCurrX - offsetLCUX - iBlkWidth) << iMvShift;
+		iVerMin = (iCurrY + 1) << iMvShift;
+		iVerMax = std::min(pcCU->cs->sps->getMaxPicHeightInLumaSamples() - iBlkHeight, (iCurrY - offsetLCUY + pcCU->cs->sps->getCTUSize() - iBlkHeight) << iMvShift);
+
+		iHorMin = iHorMin - iCurrX;
+		iHorMax = iHorMax - iCurrX;
+		iVerMax = iVerMax - iCurrY;
+		iVerMin = iVerMin - iCurrY;
+	}
+}
+#endif
+#endif
+
+#if IDCC_TPM_JEM
+TempLibFast::TempLibFast()
+{
+}
+
+TempLibFast::~TempLibFast()
+{
+}
+#endif
+
+#if IDCC_TPM_JEM
+void TempLibFast::initTemplateDiff(unsigned int uiPatchWidth, unsigned int uiPatchHeight, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int bitDepth)
+{
+#if VCEG_AZ08_USE_SAD_DISTANCE
+	DistType maxValue = ((1 << bitDepth) >> (INIT_THRESHOULD_SHIFTBITS)) * (uiPatchHeight * uiPatchWidth - uiBlkHeight * uiBlkWidth);
+#endif
+#if VCEG_AZ08_USE_SSD_DISTANCE
+	DistType maxValue = ((1 << bitDepth) >> (INIT_THRESHOULD_SHIFTBITS)) * ((1 << bitDepth) >> (INIT_THRESHOULD_SHIFTBITS)) * (uiPatchSize * uiPatchSize - uiBlkSize * uiBlkSize);
+#endif
+	m_diffMax = maxValue;
+	{
+		m_pDiff = maxValue;
+	}
+}
+
+void TrQuant::getTargetTemplate(CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight)
+{
+	const ComponentID compID = COMPONENT_Y;
+	unsigned int uiPatchWidth = uiBlkWidth + IDCC_TemplateSize;
+	unsigned int uiPatchHeight = uiBlkHeight + IDCC_TemplateSize;
+	unsigned int uiTarDepth = floorLog2(std::max(uiBlkHeight, uiBlkWidth)) - 2;
+	Pel** tarPatch = m_pppTarPatch[uiTarDepth];
+	CompArea area = pcCU->blocks[compID];
+	Pel* pCurrStart = pcCU->cs->picture->getRecoBuf(area).buf;
+	unsigned int  uiPicStride = pcCU->cs->picture->getRecoBuf(compID).stride;
+	unsigned int uiY, uiX;
+
+
+
+	//fill template
+	//up-left & up 
+	Pel* tarTemp;
+	Pel* pCurrTemp = pCurrStart - IDCC_TemplateSize * uiPicStride - IDCC_TemplateSize;
+	for (uiY = 0; uiY < IDCC_TemplateSize; uiY++)
+	{
+		tarTemp = tarPatch[uiY]; 
+		for (uiX = 0; uiX < uiPatchWidth; uiX++)
+		{
+			tarTemp[uiX] = pCurrTemp[uiX];
+		}
+		pCurrTemp += uiPicStride;
+	}
+	//left
+	for (uiY = IDCC_TemplateSize; uiY < uiPatchHeight; uiY++)
+	{
+		tarTemp = tarPatch[uiY];
+		for (uiX = 0; uiX < IDCC_TemplateSize; uiX++)
+		{
+			tarTemp[uiX] = pCurrTemp[uiX];
+		}
+		pCurrTemp += uiPicStride;
+	}
+}
+
+void TrQuant::candidateSearchIntra(CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight)
+{
+	const ComponentID compID = COMPONENT_Y;
+	const int channelBitDepth = pcCU->cs->sps->getBitDepth(toChannelType(compID));
+	unsigned int uiPatchWidth = uiBlkWidth + IDCC_TemplateSize;
+	unsigned int uiPatchHeight = uiBlkHeight + IDCC_TemplateSize;
+	unsigned int uiTarDepth = floorLog2(std::max(uiBlkWidth, uiBlkHeight)) - 2;
+	Pel** tarPatch = getTargetPatch(uiTarDepth);
+	//Initialize the library for saving the best candidates
+	m_tempLibFast.initTemplateDiff(uiPatchWidth, uiPatchHeight, uiBlkWidth, uiBlkHeight, channelBitDepth);
+	short setId = 0; //record the reference picture.
+	searchCandidateFromOnePicIntra(pcCU, tarPatch, uiPatchWidth, uiPatchHeight, setId);
+	//count collected candidate number
+	DistType pDiff = m_tempLibFast.getDiff();
+	DistType maxDiff = m_tempLibFast.getDiffMax();
+	
+
+	if (pDiff < maxDiff)
+		m_uiVaildCandiNum = 1;
+	else
+		m_uiVaildCandiNum = 0;
+}
+
+void  TrQuant::searchCandidateFromOnePicIntra(CodingUnit* pcCU, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, unsigned int setId)
+{
+	const ComponentID compID = COMPONENT_Y;
+	unsigned int uiBlkWidth = uiPatchWidth - IDCC_TemplateSize;
+	unsigned int uiBlkHeight = uiPatchHeight - IDCC_TemplateSize;
+
+	int pX = m_tempLibFast.getX();
+	int pY = m_tempLibFast.getY();
+	DistType pDiff = m_tempLibFast.getDiff();
+	short pId = m_tempLibFast.getId();
+	CompArea area = pcCU->blocks[compID];
+	int  refStride = pcCU->cs->picture->getRecoBuf(compID).stride;
+	
+	Pel* ref = pcCU->cs->picture->getRecoBuf(area).buf;
+	
+	setRefPicUsed(ref); //facilitate the access of each candidate point 
+	
+	setStride(refStride);
+#if !IDCC_FixedComparisonPerPixel
+	int     iSrchRng = SEARCHRANGEINTRA;
+#endif
+
+	
+	Mv cTmpMvPred;
+	cTmpMvPred.setZero();
+
+	unsigned int uiCUPelY = area.pos().y;
+	unsigned int uiCUPelX = area.pos().x;
+	int blkX = 0;
+	int blkY = 0;
+	int iCurrY = uiCUPelY + blkY;
+	int iCurrX = uiCUPelX + blkX;
+
+	Position  ctuRsAddr = CU::getCtuXYAddr(*pcCU);
+	int offsetLCUY = iCurrY - ctuRsAddr.y;
+	int offsetLCUX = iCurrX - ctuRsAddr.x;
+
+
+	int iYOffset, iXOffset;
+	DistType diff;
+	Pel* refCurr;
+
+#if IDCC_SignleSearchRegion
+	int mvYMins;
+	int mvYMaxs;
+	int mvXMins;
+	int mvXMaxs;
+#else
+#define REGION_NUM 3
+	int mvYMins[REGION_NUM];
+	int mvYMaxs[REGION_NUM];
+	int mvXMins[REGION_NUM];
+	int mvXMaxs[REGION_NUM];
+	int regionNum = REGION_NUM;
+	int regionId = 0;
+#endif
+
+#if IDCC_TMP_Within_CTU && !IDCC_SignleSearchRegion
+	//1. check the near pixels within LCU
+	//above pixels in LCU
+	int iTemplateSize = IDCC_TemplateSize;
+	int iBlkWidth = uiBlkWidth;
+	int iBlkHeight = uiBlkHeight;
+	regionId = 0;
+	int iMvShift = 0;
+	
+
+	int iVerMin = std::max(((iTemplateSize) << iMvShift), (iCurrY - offsetLCUY - iBlkHeight + 1) << iMvShift);
+	int iVerMax = (iCurrY - iBlkHeight) << iMvShift; 
+	int iHorMin = std::max((iTemplateSize) << iMvShift, (iCurrX - offsetLCUX - iBlkWidth + 1) << iMvShift);
+	int iHorMax = (iCurrX - iBlkWidth);
+
+	mvXMins[regionId] = iHorMin - iCurrX;
+	mvXMaxs[regionId] = iHorMax - iCurrX;
+	mvYMins[regionId] = iVerMin - iCurrY;
+	mvYMaxs[regionId] = iVerMax - iCurrY;
+
+
+
+	//check within CTU pixels
+	for (regionId = 0; regionId < 1; regionId++)
+	{
+		int mvYMin = mvYMins[regionId];
+		int mvYMax = mvYMaxs[regionId];
+		int mvXMin = mvXMins[regionId];
+		int mvXMax = mvXMaxs[regionId];
+		if (mvYMax < mvYMin || mvXMax < mvXMin)
+		{
+			continue;
+		}
+		for (iYOffset = mvYMax; iYOffset >= mvYMin; iYOffset--)
+		{
+			for (iXOffset = mvXMax; iXOffset >= mvXMin; iXOffset--)
+			{
+				refCurr = ref + iYOffset * refStride + iXOffset;
+				diff = calcTemplateDiff(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, pDiff);
+				if (diff < (pDiff))
+				{
+					insertNode(diff, iXOffset, iYOffset, pDiff, pX, pY, pId, setId); 
+				}
+        if (pDiff == 0)
+        {
+          regionId++;
+        }
+			}
+		}
+	}
+#endif
+#if IDCC_SignleSearchRegion
+
+#if IDCC_FixedComparisonPerPixel
+	int SearchRange_Height, SearchRange_Width;
+	// No. of comparison per pixel is:
+	// (searchRange_width - Width - TempSize_width + 1)  *( searchRange_height - Height - TempSize + 1) / Width / Height
+	// to have a constant comparison per pixel:
+	// (searchRange_width - Width - TempSize_width + 1)/Width must be const  = CC
+	// (searchRange_height - Height - TempSize + 1)/ Height must be constant = CC
+
+	//searchRange_width  = CC*Width + Width + TempSize_width - 1;
+	//searchRange_height  = CC*Height + Height + TempSize_height - 1;
+
+
+	SearchRange_Width = IDCC_SearchRangeMultFactor * uiBlkWidth + uiBlkWidth + iTempSize - 1;
+	SearchRange_Height = IDCC_SearchRangeMultFactor * uiBlkHeight + uiBlkHeight + iTempSize - 1;
+#endif
+	int iTempSize = uiTempSize;
+
+	//int  iMvShift = 0;
+	int iBlkWidth = uiBlkWidth;
+	int iBlkHeight = uiBlkHeight;
+
+#if IDCC_FixedComparisonPerPixel
+	mvYMins = std::max(iTempSize, iCurrY - SearchRange_Height);
+#else
+	mvYMins = std::max(iTempSize, (iCurrY - iSrchRng));
+#endif
+	mvYMaxs = iCurrY - iBlkHeight;
+#if IDCC_FixedComparisonPerPixel
+	mvXMins = std::max(iTempSize, (iCurrX - SearchRange_Width));
+#else
+	mvXMins = std::max(iTempSize, (iCurrX - iSrchRng));
+#endif
+	mvXMaxs = (iCurrX - iBlkWidth);
+
+	mvXMins = mvXMins - iCurrX;
+	mvXMaxs = mvXMaxs - iCurrX;
+	mvYMaxs = mvYMaxs - iCurrY;
+	mvYMins = mvYMins - iCurrY;
+#endif
+
+#if !IDCC_SignleSearchRegion
+	//2. check the pixels outside CTU
+	for (regionId = 0; regionId < regionNum; regionId++)
+	{// this function fills in the range the template matching for pixels outside the current CTU
+#if IDCC_FixedComparisonPerPixel
+		clipMvIntraConstraint(pcCU, regionId, mvXMins[regionId], mvXMaxs[regionId], mvYMins[regionId], mvYMaxs[regionId], IDCC_TemplateSize, uiBlkWidth, uiBlkHeight, iCurrY, iCurrX, offsetLCUY, offsetLCUX);
+#else
+		clipMvIntraConstraint(pcCU, regionId, mvXMins[regionId], mvXMaxs[regionId], mvYMins[regionId], mvYMaxs[regionId], iSrchRng, uiTempSize, uiBlkWidth, uiBlkHeight, iCurrY, iCurrX, offsetLCUY, offsetLCUX);
+#endif
+	}
+#endif
+#if !IDCC_SignleSearchRegion
+	for (regionId = 0; regionId < regionNum; regionId++)
+#endif
+	{
+#if IDCC_SignleSearchRegion
+		int mvYMin = mvYMins;
+		int mvYMax = mvYMaxs;
+		int mvXMin = mvXMins;
+		int mvXMax = mvXMaxs;
+#else
+		int mvYMin = mvYMins[regionId];
+		int mvYMax = mvYMaxs[regionId];
+		int mvXMin = mvXMins[regionId];
+		int mvXMax = mvXMaxs[regionId];
+#endif
+		if ( mvYMax < mvYMin || mvXMax < mvXMin )
+		{
+#if IDCC_SignleSearchRegion
+			return;
+#else
+			continue;
+#endif
+		}
+		for (iYOffset = mvYMax; iYOffset >= mvYMin; iYOffset--)
+		{
+			for (iXOffset = mvXMax; iXOffset >= mvXMin; iXOffset--)
+			{
+				refCurr = ref + iYOffset * refStride + iXOffset;
+				diff = calcTemplateDiff(refCurr, refStride, tarPatch, uiPatchWidth, uiPatchHeight, pDiff);
+				if (diff < (pDiff))
+				{
+					insertNode(diff, iXOffset, iYOffset, pDiff, pX, pY, pId, setId);
+				}
+        if (pDiff == 0)
+        {
+          regionId = regionNum;
+        }
+			}
+		}
+	}
+	m_tempLibFast.m_pX = pX;
+	m_tempLibFast.m_pY = pY;
+	m_tempLibFast.m_pDiff = pDiff;
+	m_tempLibFast.m_pId = pId;
+}
+bool TrQuant::generateTMPrediction(Pel* piPred, unsigned int uiStride, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int& foundCandiNum)
+{
+	bool bSucceedFlag = true;
+	unsigned int uiPatchWidth = uiBlkWidth + IDCC_TemplateSize;
+	unsigned int uiPatchHeight = uiBlkHeight + IDCC_TemplateSize;
+
+	foundCandiNum = m_uiVaildCandiNum;
+	if (foundCandiNum < 1)
+	{
+		return false;
+	}
+
+	int pX = m_tempLibFast.getX();
+	int pY = m_tempLibFast.getY();
+	Pel* ref;
+	int picStride = getStride();
+	int iOffsetY, iOffsetX;
+	Pel* refTarget;
+	unsigned int uiHeight = uiPatchHeight - IDCC_TemplateSize;
+	unsigned int uiWidth = uiPatchWidth - IDCC_TemplateSize;
+
+	//the data center: we use the prediction block as the center now.
+	//collect the candidates
+	ref = getRefPicUsed();
+	{
+		iOffsetY = pY;
+		iOffsetX = pX;
+		refTarget = ref + iOffsetY * picStride + iOffsetX;
+		for (unsigned int uiY = 0; uiY < uiHeight; uiY++)
+		{
+			for (unsigned int uiX = 0; uiX < uiWidth; uiX++)
+			{
+				piPred[uiX] = refTarget[uiX];
+			}
+			refTarget += picStride;
+			piPred += uiStride;
+		}
+	}
+	return bSucceedFlag;
+}
+
+DistType TrQuant::calcTemplateDiff(Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, DistType iMax)
+{
+#if IDCC_TMP_SIMD
+	DistType iDiffSum = 0;
+	int iY;
+	Pel* refPatchRow = ref - IDCC_TemplateSize * uiStride - IDCC_TemplateSize;
+	Pel* tarPatchRow;
+
+	uint32_t uiSum;
+	// horizontal difference
+	for (iY = 0; iY < IDCC_TemplateSize; iY++)
+	{
+		tarPatchRow = tarPatch[iY];
+		const short* pSrc1 = (const short*)tarPatchRow;
+		const short* pSrc2 = (const short*)refPatchRow;
+
+		// SIMD difference
+		//int  iRows = uiPatchHeight;
+		int  iCols = uiPatchWidth;
+		if ((iCols & 7) == 0)
+		{
+			// Do with step of 8
+			__m128i vzero = _mm_setzero_si128();
+			__m128i vsum32 = vzero;
+			//for (int iY = 0; iY < iRows; iY += iSubStep)
+			{
+				__m128i vsum16 = vzero;
+				for (int iX = 0; iX < iCols; iX += 8)
+				{
+					__m128i vsrc1 = _mm_loadu_si128((const __m128i*)(&pSrc1[iX]));
+					__m128i vsrc2 = _mm_lddqu_si128((const __m128i*)(&pSrc2[iX]));
+					vsum16 = _mm_add_epi16(vsum16, _mm_abs_epi16(_mm_sub_epi16(vsrc1, vsrc2)));
+				}
+				__m128i vsumtemp = _mm_add_epi32(_mm_unpacklo_epi16(vsum16, vzero), _mm_unpackhi_epi16(vsum16, vzero));
+				vsum32 = _mm_add_epi32(vsum32, vsumtemp);
+				//pSrc1 += iStrideSrc1;
+				//pSrc2 += iStrideSrc2;
+			}
+			vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e));   // 01001110
+			vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1));   // 10110001
+			uiSum = _mm_cvtsi128_si32(vsum32);
+		}
+		else
+		{
+			// Do with step of 4
+			__m128i vzero = _mm_setzero_si128();
+			__m128i vsum32 = vzero;
+			//for (int iY = 0; iY < iRows; iY += iSubStep)
+			{
+				__m128i vsum16 = vzero;
+				for (int iX = 0; iX < iCols; iX += 4)
+				{
+					__m128i vsrc1 = _mm_loadl_epi64((const __m128i*) & pSrc1[iX]);
+					__m128i vsrc2 = _mm_loadl_epi64((const __m128i*) & pSrc2[iX]);
+					vsum16 = _mm_add_epi16(vsum16, _mm_abs_epi16(_mm_sub_epi16(vsrc1, vsrc2)));
+				}
+				__m128i vsumtemp = _mm_add_epi32(_mm_unpacklo_epi16(vsum16, vzero), _mm_unpackhi_epi16(vsum16, vzero));
+				vsum32 = _mm_add_epi32(vsum32, vsumtemp);
+				//pSrc1 += iStrideSrc1;
+				//pSrc2 += iStrideSrc2;
+			}
+			vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e));   // 01001110
+			vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1));   // 10110001
+			uiSum = _mm_cvtsi128_si32(vsum32);
+		}
+		iDiffSum += uiSum;
+
+		if (iDiffSum > iMax) //for speeding up
+		{
+			return iDiffSum;
+		}
+		// update location
+		refPatchRow += uiStride;
+	}
+
+	// vertical difference
+	int  iCols = IDCC_TemplateSize;
+	for (iY = IDCC_TemplateSize; iY < uiPatchHeight; iY++)
+	{
+		tarPatchRow = tarPatch[iY];
+		const short* pSrc1 = (const short*)tarPatchRow;
+		const short* pSrc2 = (const short*)refPatchRow ;
+
+		// SIMD difference
+
+		// Do with step of 4
+		__m128i vzero = _mm_setzero_si128();
+		__m128i vsum32 = vzero;
+		//for (int iY = 0; iY < iRows; iY += iSubStep)
+		{
+			__m128i vsum16 = vzero;
+			for (int iX = 0; iX < iCols; iX += 4)
+			{
+				__m128i vsrc1 = _mm_loadl_epi64((const __m128i*) & pSrc1[iX]);
+				__m128i vsrc2 = _mm_loadl_epi64((const __m128i*) & pSrc2[iX]);
+				vsum16 = _mm_add_epi16(vsum16, _mm_abs_epi16(_mm_sub_epi16(vsrc1, vsrc2)));
+			}
+			__m128i vsumtemp = _mm_add_epi32(_mm_unpacklo_epi16(vsum16, vzero), _mm_unpackhi_epi16(vsum16, vzero));
+			vsum32 = _mm_add_epi32(vsum32, vsumtemp);
+			//pSrc1 += iStrideSrc1;
+			//pSrc2 += iStrideSrc2;
+		}
+		vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e));   // 01001110
+		vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1));   // 10110001
+		uiSum = _mm_cvtsi128_si32(vsum32);
+
+		iDiffSum += uiSum;
+
+		if (iDiffSum > iMax) //for speeding up
+		{
+			return iDiffSum;
+		}
+		// update location
+		refPatchRow += uiStride;
+	}
+	
+	return iDiffSum;
+	
+#else
+	int iY, iX;
+#if VCEG_AZ08_USE_SSD_DISTANCE
+	int iDiff;
+#endif
+	DistType iDiffSum = 0;
+	Pel* refPatchRow = ref - IDCC_TemplateSize * uiStride - IDCC_TemplateSize;
+	Pel* tarPatchRow;
+	for (iY = 0; iY < IDCC_TemplateSize; iY++)
+	{
+		tarPatchRow = tarPatch[iY];
+		for (iX = 0; iX < uiPatchWidth; iX++)
+		{
+#if VCEG_AZ08_USE_SAD_DISTANCE
+			iDiffSum += abs(refPatchRow[iX] - tarPatchRow[iX]);
+#endif
+#if VCEG_AZ08_USE_SSD_DISTANCE
+			iDiff = refPatchRow[iX] - tarPatchRow[iX];
+			iDiffSum += iDiff * iDiff;
+#endif
+		}
+		if (iDiffSum > iMax) //for speeding up
+		{
+			return iDiffSum;
+		}
+		refPatchRow += uiStride;
+	}
+	for (iY = IDCC_TemplateSize; iY < uiPatchHeight; iY++)
+	{
+		tarPatchRow = tarPatch[iY];
+		for (iX = 0; iX < uiTempSize; iX++)
+		{
+#if VCEG_AZ08_USE_SAD_DISTANCE
+			iDiffSum += abs(refPatchRow[iX] - tarPatchRow[iX]);
+#endif
+#if VCEG_AZ08_USE_SSD_DISTANCE
+			iDiff = refPatchRow[iX] - tarPatchRow[iX];
+			iDiffSum += iDiff * iDiff;
+#endif
+		}
+		if (iDiffSum > iMax) //for speeding up
+		{
+			return iDiffSum;
+		}
+		refPatchRow += uiStride;
+	}
+	return iDiffSum;
+#endif
+}
+#endif
+
+
+
 uint32_t TrQuant::getLFNSTIntraMode( int wideAngPredMode )
 {
   uint32_t intraMode;
@@ -427,6 +1063,12 @@ void TrQuant::xInvLfnst( const TransformUnit &tu, const ComponentID compID )
     {
       intraMode = PLANAR_IDX;
     }
+#if IDCC_TPM_JEM
+	if (PU::isTmp(*tu.cs->getPU(area.pos(), toChannelType(compID)), toChannelType(compID)))
+	{
+		intraMode = PLANAR_IDX;
+  }
+#endif
     CHECK( intraMode >= NUM_INTRA_MODE - 1, "Invalid intra mode" );
 
 #if EXTENDED_LFNST
@@ -567,6 +1209,12 @@ void TrQuant::xFwdLfnst( const TransformUnit &tu, const ComponentID compID, cons
     {
       intraMode = PLANAR_IDX;
     }
+#if IDCC_TPM_JEM
+	if (PU::isTmp(*tu.cs->getPU(area.pos(), toChannelType(compID)), toChannelType(compID)))
+	{
+		intraMode = PLANAR_IDX;
+  }
+#endif
     CHECK( intraMode >= NUM_INTRA_MODE - 1, "Invalid intra mode" );
 
 #if EXTENDED_LFNST
@@ -787,7 +1435,11 @@ std::vector<int> TrQuant::selectICTCandidates( const TransformUnit &tu, CompStor
 void TrQuant::getTrTypes(const TransformUnit tu, const ComponentID compID, int &trTypeHor, int &trTypeVer)
 {
   const bool isExplicitMTS = (CU::isIntra(*tu.cu) ? tu.cs->sps->getUseIntraMTS() : tu.cs->sps->getUseInterMTS() && CU::isInter(*tu.cu)) && isLuma(compID);
+#if IDCC_TPM_JEM && !IDCC_TMP_ImplicitMTS
+  const bool isImplicitMTS = CU::isIntra(*tu.cu) && tu.cs->sps->getUseImplicitMTS() && isLuma(compID) && tu.cu->lfnstIdx == 0 && tu.cu->mipFlag == 0 && tu.cu->TmpFlag == 0;
+#else
   const bool isImplicitMTS = CU::isIntra(*tu.cu) && tu.cs->sps->getUseImplicitMTS() && isLuma(compID) && tu.cu->lfnstIdx == 0 && tu.cu->mipFlag == 0;
+#endif
   const bool isISP = CU::isIntra(*tu.cu) && tu.cu->ispMode && isLuma(compID);
   const bool isSBT = CU::isInter(*tu.cu) && tu.cu->sbtInfo && isLuma(compID);
 
@@ -804,7 +1456,11 @@ void TrQuant::getTrTypes(const TransformUnit tu, const ComponentID compID, int &
     return;
   }
 
+#if IDCC_TMP_ImplicitMTS
+  if (isImplicitMTS || isISP || tu.cu->TmpFlag)
+#else
   if (isImplicitMTS || isISP)
+#endif
   {
     int  width = tu.blocks[compID].width;
     int  height = tu.blocks[compID].height;
diff --git a/source/Lib/CommonLib/TrQuant.h b/source/Lib/CommonLib/TrQuant.h
index cc6e2c1d9f8498cac91408855e7eca33f290be79..f116d83e256f8ab45ccf75ef0296a4a6ca14bcf1 100644
--- a/source/Lib/CommonLib/TrQuant.h
+++ b/source/Lib/CommonLib/TrQuant.h
@@ -55,6 +55,51 @@
 typedef void FwdTrans(const TCoeff*, TCoeff*, int, int, int, int);
 typedef void InvTrans(const TCoeff*, TCoeff*, int, int, int, int, const TCoeff, const TCoeff);
 
+
+
+#if IDCC_TPM_JEM
+
+
+#define MAX_1DTRANS_LEN         (1 << (((USE_MORE_BLOCKSIZE_DEPTH_MAX) + 1) << 1)) ///< 4x4 = 16, 8x8 = 64, 16x16=256, 32x32 = 1024
+extern unsigned int g_uiDepth2Width[5];
+extern unsigned int g_uiDepth2MaxCandiNum[5];
+
+class TempLibFast
+{
+public:
+	int m_pX;    //offset X
+	int m_pY;    //offset Y
+	int m_pXInteger;    //offset X for integer pixel search
+	int m_pYInteger;    //offset Y for integer pixel search
+	DistType m_pDiffInteger;
+	int getXInteger() { return m_pXInteger; }
+	int getYInteger() { return m_pYInteger; }
+	DistType getDiffInteger() { return m_pDiffInteger; }
+	short m_pIdInteger; //frame id
+	short getIdInteger() { return m_pIdInteger; }
+	DistType m_pDiff; //mse
+	short m_pId; //frame id
+	
+
+	TempLibFast();
+	~TempLibFast();
+	//void init();
+	int getX() { return m_pX; }
+	int getY() { return m_pY; }
+	DistType getDiff() { return m_pDiff; }
+	short getId() { return m_pId; }
+	/*void initDiff(unsigned int uiPatchSize, int bitDepth);
+	void initDiff(unsigned int uiPatchSize, int bitDepth, int iCandiNumber);*/
+	void initTemplateDiff(unsigned int uiPatchWidth, unsigned int uiPatchHeight, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int bitDepth);
+	int m_diffMax;
+	int getDiffMax() { return m_diffMax; }
+};
+
+
+typedef short TrainDataType;
+#endif
+
+
 // ====================================================================================================================
 // Class definition
 // ====================================================================================================================
@@ -87,6 +132,19 @@ public:
   void fwdLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize );
   void invLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize );
 #endif
+#if IDCC_TPM_JEM
+  DistType calcTemplateDiff(Pel* ref, unsigned int uiStride, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, DistType iMax);
+  Pel** getTargetPatch(unsigned int uiDepth) { return m_pppTarPatch[uiDepth]; }
+  Pel* getRefPicUsed() { return m_refPicUsed; }
+  void setRefPicUsed(Pel* ref) { m_refPicUsed = ref; }
+  unsigned int getStride() { return m_uiPicStride; }
+  void setStride(unsigned int uiPicStride) { m_uiPicStride = uiPicStride; }
+
+  void searchCandidateFromOnePicIntra(CodingUnit* pcCU, Pel** tarPatch, unsigned int uiPatchWidth, unsigned int uiPatchHeight, unsigned int setId);
+  void candidateSearchIntra(CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight);
+  bool generateTMPrediction(Pel* piPred, unsigned int uiStride, unsigned int uiBlkWidth, unsigned int uiBlkHeight, int& foundCandiNum);
+  void getTargetTemplate(CodingUnit* pcCU, unsigned int uiBlkWidth, unsigned int uiBlkHeight);
+#endif
 
   uint32_t getLFNSTIntraMode( int wideAngPredMode );
   bool     getTransposeFlag ( uint32_t intraMode  );
@@ -141,6 +199,15 @@ public:
 
 protected:
   TCoeff   m_tempCoeff[MAX_TB_SIZEY * MAX_TB_SIZEY];
+#if IDCC_TPM_JEM
+  int m_uiPartLibSize;
+  TempLibFast m_tempLibFast;
+  Pel* m_refPicUsed;
+  Picture* m_refPicBuf;
+  unsigned int m_uiPicStride;
+  unsigned int m_uiVaildCandiNum;
+  Pel*** m_pppTarPatch;
+#endif
 #if SIGN_PREDICTION
   Pel      m_tempSignPredResid[SIGN_PRED_MAX_BS * SIGN_PRED_MAX_BS * 2]{0};
   Pel      m_signPredTemplate[SIGN_PRED_FREQ_RANGE*SIGN_PRED_FREQ_RANGE*SIGN_PRED_MAX_BS*2];
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index 06a94c5765e059c9ddc7c349fd1aec6dd5b79db8..9527da1e4f7d6b50f8f23ef489913e4f650ca214 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -1,4 +1,4 @@
-/* The copyright in this software is being made available under the BSD
+/* The copyright in this software is being made available under the BSD
  * License, included below. This software may be subject to other third party
  * and contributor rights, including patent rights, and no such rights are
  * granted under this license.
@@ -50,6 +50,41 @@
 #include <assert.h>
 #include <cassert>
 
+#define IDCC_TPM_JEM									  1 // template matching prediction as implemented in JEM-7.2
+#if IDCC_TPM_JEM
+
+#define IDCC_TMP_SIMD									1
+//#define IDCC_TMP_MaxSize								64
+#define IDCC_SignleSearchRegion							0 // single region starting from current position up-left on
+
+#define IDCC_FixedComparisonPerPixel					1
+#if IDCC_FixedComparisonPerPixel
+#define IDCC_SearchRangeMultFactor						5
+#endif
+
+#if !IDCC_SignleSearchRegion
+#define IDCC_TMP_Within_CTU								1
+#endif
+#if !IDCC_FixedComparisonPerPixel
+#define IDCC_SEARCHRANGEINTRA							 70 // should be larger than IDCC_TMP_MaxSize + IDCC_TemplateSize
+#endif
+#if IDCC_TMP_SIMD
+#define IDCC_TemplateSize								4 // must be multiple of 4 for SIMD
+#else
+#define IDCC_TemplateSize								4
+#endif
+#define IDCC_TMP_ImplicitMTS							1
+
+#define IDCC_TMP_MaxSize_Depth							6 // should be log2(IDCC_TMP_MaxSize): keep as 6 to avoid any error
+
+#define VCEG_AZ08_USE_SSD_DISTANCE                        0  ///< (default 0) If defined, use SSD distance.
+#define VCEG_AZ08_USE_SAD_DISTANCE                        1  ///< (default 1) If defined, use SAD distance.
+
+#if VCEG_AZ08_USE_SSD_DISTANCE || VCEG_AZ08_USE_SAD_DISTANCE
+typedef       int             DistType;
+#endif
+#endif
+
 // Run test with the following config file parameters:
 //
 // BIF : 1
diff --git a/source/Lib/CommonLib/Unit.cpp b/source/Lib/CommonLib/Unit.cpp
index 17937b452261e8ba85e87b3e1ca4b4015f930026..c281acf6ffe27554228e07829e54c7becf1f8a15 100644
--- a/source/Lib/CommonLib/Unit.cpp
+++ b/source/Lib/CommonLib/Unit.cpp
@@ -303,6 +303,9 @@ CodingUnit& CodingUnit::operator=( const CodingUnit& other )
   smvdMode        = other.smvdMode;
   ispMode           = other.ispMode;
   mipFlag           = other.mipFlag;
+#if IDCC_TPM_JEM
+  TmpFlag = other.TmpFlag;
+#endif
 #if INTER_LIC
   LICFlag           = other.LICFlag;
 #endif
@@ -384,6 +387,9 @@ void CodingUnit::initData()
   smvdMode        = 0;
   ispMode           = 0;
   mipFlag           = false;
+#if IDCC_TPM_JEM
+  TmpFlag = false;
+#endif
 #if INTER_LIC
   LICFlag = false;
 #endif
diff --git a/source/Lib/CommonLib/Unit.h b/source/Lib/CommonLib/Unit.h
index a59bb6f63bf053df8d7013bbd2bc8f7b862d4348..e24b8c58bc8be25c0646fcbe72bf6af3eb96bd8a 100644
--- a/source/Lib/CommonLib/Unit.h
+++ b/source/Lib/CommonLib/Unit.h
@@ -334,6 +334,9 @@ struct CodingUnit : public UnitArea
   uint8_t         BcwIdx;
   int8_t          refIdxBi[2];
   bool           mipFlag;
+#if IDCC_TPM_JEM
+  bool			 TmpFlag;
+#endif
 #if INTER_LIC
   bool           LICFlag;
 #endif
diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp
index 651de98a9ed1d305633c5d5dffa0d7dadad838ea..9f6b464441c3aac5f52ae109540b67491cf1415b 100644
--- a/source/Lib/CommonLib/UnitTools.cpp
+++ b/source/Lib/CommonLib/UnitTools.cpp
@@ -333,7 +333,12 @@ uint32_t CU::getCtuAddr( const CodingUnit &cu )
 {
   return getCtuAddr( cu.blocks[cu.chType].lumaPos(), *cu.cs->pcv );
 }
-
+#if IDCC_TPM_JEM
+Position CU::getCtuXYAddr(const CodingUnit& cu)
+{
+	return Position((cu.blocks[cu.chType].lumaPos().x >> cu.cs->pcv->maxCUWidthLog2) << cu.cs->pcv->maxCUWidthLog2, (cu.blocks[cu.chType].lumaPos().y >> cu.cs->pcv->maxCUHeightLog2) << cu.cs->pcv->maxCUHeightLog2);
+}
+#endif
 int CU::predictQP( const CodingUnit& cu, const int prevQP )
 {
   const CodingStructure &cs = *cu.cs;
@@ -939,7 +944,12 @@ bool PU::isMIP(const PredictionUnit &pu, const ChannelType &chType)
     return isDMChromaMIP(pu) && (pu.intraDir[CHANNEL_TYPE_CHROMA] == DM_CHROMA_IDX);
   }
 }
-
+#if IDCC_TPM_JEM
+bool PU::isTmp(const PredictionUnit& pu, const ChannelType& chType)
+{
+	return (chType == CHANNEL_TYPE_LUMA && pu.cu->TmpFlag);
+}
+#endif
 bool PU::isDMChromaMIP(const PredictionUnit &pu)
 {
 #if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
@@ -951,7 +961,11 @@ bool PU::isDMChromaMIP(const PredictionUnit &pu)
 
 uint32_t PU::getIntraDirLuma( const PredictionUnit &pu )
 {
+#if IDCC_TPM_JEM
+	if (isMIP(pu) || isTmp(pu))
+#else
   if (isMIP(pu))
+#endif
   {
     return PLANAR_IDX;
   }
@@ -4970,6 +4984,9 @@ bool CU::isMTSAllowed(const CodingUnit &cu, const ComponentID compID)
   mtsAllowed &= cuWidth <= maxSize && cuHeight <= maxSize;
   mtsAllowed &= !cu.ispMode;
   mtsAllowed &= !cu.sbtInfo;
+#if IDCC_TMP_ImplicitMTS
+  mtsAllowed &= !cu.TmpFlag;
+#endif
   mtsAllowed &= !(cu.bdpcmMode && cuWidth <= tsMaxSize && cuHeight <= tsMaxSize);
   return mtsAllowed;
 }
@@ -5304,7 +5321,12 @@ bool allowLfnstWithMip(const Size& block)
   }
   return false;
 }
-
+#if IDCC_TPM_JEM
+bool allowLfnstWithTpm()
+{
+	return true;
+}
+#endif
 #if INTER_LIC
 bool CU::isLICFlagPresent(const CodingUnit& cu)
 {
diff --git a/source/Lib/CommonLib/UnitTools.h b/source/Lib/CommonLib/UnitTools.h
index e85f668de33d8a2f7b87c23a681c3bcdf159fbca..a9c47df9814bcf6a67ce72f5f47f0f8dae446d50 100644
--- a/source/Lib/CommonLib/UnitTools.h
+++ b/source/Lib/CommonLib/UnitTools.h
@@ -70,6 +70,9 @@ namespace CU
   bool isSameSubPic                   (const CodingUnit &cu, const CodingUnit &cu2);
   bool isLastSubCUOfCtu               (const CodingUnit &cu);
   uint32_t getCtuAddr                     (const CodingUnit &cu);
+#if IDCC_TPM_JEM
+  Position getCtuXYAddr(const CodingUnit& cu);
+#endif
   int  predictQP                      (const CodingUnit& cu, const int prevQP );
 
   uint32_t getNumPUs                      (const CodingUnit& cu);
@@ -138,6 +141,9 @@ namespace PU
   int  getIntraMPMs(const PredictionUnit &pu, unsigned *mpm, const ChannelType &channelType = CHANNEL_TYPE_LUMA);
 #endif
   bool          isMIP                 (const PredictionUnit &pu, const ChannelType &chType = CHANNEL_TYPE_LUMA);
+#if IDCC_TPM_JEM
+  bool          isTmp(const PredictionUnit& pu, const ChannelType& chType = CHANNEL_TYPE_LUMA);
+#endif
   bool          isDMChromaMIP         (const PredictionUnit &pu);
   uint32_t      getIntraDirLuma       (const PredictionUnit &pu);
   void getIntraChromaCandModes(const PredictionUnit &pu, unsigned modeList[NUM_CHROMA_MODE]);
@@ -268,6 +274,9 @@ uint32_t getCtuAddr        (const Position& pos, const PreCalcValues &pcv);
 int  getNumModesMip   (const Size& block);
 int getMipSizeId      (const Size& block);
 bool allowLfnstWithMip(const Size& block);
+#if IDCC_TPM_JEM
+bool allowLfnstWithTpm();
+#endif
 
 template<typename T, size_t N>
 uint32_t updateCandList(T uiMode, double uiCost, static_vector<T, N>& candModeList, static_vector<double, N>& candCostList
diff --git a/source/Lib/CommonLib/dtrace_blockstatistics.cpp b/source/Lib/CommonLib/dtrace_blockstatistics.cpp
index 260c7cc03c7e61173ad82535c7ec61a761c3fe57..a5e071fbcb35f0572326c2367eaedf59a7de3cd4 100644
--- a/source/Lib/CommonLib/dtrace_blockstatistics.cpp
+++ b/source/Lib/CommonLib/dtrace_blockstatistics.cpp
@@ -884,6 +884,9 @@ void writeAllData(const CodingStructure& cs, const UnitArea& ctuArea)
           if(chType == CHANNEL_TYPE_LUMA)
           {
             DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::MIPFlag), cu.mipFlag);
+#if IDCC_TPM_JEM
+			DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::TmpFlag), cu.TmpFlag);
+#endif
             DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::ISPMode), cu.ispMode);
           }
 
diff --git a/source/Lib/CommonLib/dtrace_blockstatistics.h b/source/Lib/CommonLib/dtrace_blockstatistics.h
index c3ef3fd6d3473ae3f8c69272baa900fc032013a1..a416227b6dbfe3abf96f6b3f34b7ae77b0fcd397 100644
--- a/source/Lib/CommonLib/dtrace_blockstatistics.h
+++ b/source/Lib/CommonLib/dtrace_blockstatistics.h
@@ -78,6 +78,9 @@ enum class BlockStatistic {
   Chroma_IntraMode,
   MultiRefIdx,
   MIPFlag,
+#if IDCC_TPM_JEM
+  TmpFlag,
+#endif
   ISPMode,
 
   // inter
@@ -170,6 +173,9 @@ static const std::map<BlockStatistic, std::tuple<std::string, BlockStatisticType
   { BlockStatistic::JointCbCr,              std::tuple<std::string, BlockStatisticType, std::string>{"JointCbCr",                   BlockStatisticType::Flag,                   ""}},
 
   { BlockStatistic::MIPFlag,                std::tuple<std::string, BlockStatisticType, std::string>{"MIPFlag",                     BlockStatisticType::Flag,                   ""}},
+#if IDCC_TPM_JEM
+  { BlockStatistic::TmpFlag,                std::tuple<std::string, BlockStatisticType, std::string>{"TmpFlag",                     BlockStatisticType::Flag,                   ""}},
+#endif
   { BlockStatistic::ISPMode,                std::tuple<std::string, BlockStatisticType, std::string>{"ISPMode",                     BlockStatisticType::Integer,                "[0, " + std::to_string(NUM_INTRA_SUBPARTITIONS_MODES) + "]"}},
   { BlockStatistic::Depth,                  std::tuple<std::string, BlockStatisticType, std::string>{"Depth",                       BlockStatisticType::Integer,                "[0, 7]"}},
   { BlockStatistic::QT_Depth,               std::tuple<std::string, BlockStatisticType, std::string>{"QT_Depth",                    BlockStatisticType::Integer,                "[0, 7]"}},
diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp
index a5286cdebd4effbc775579307c7bbf88bde2edef..d6dc6a190cbda6fcec3f508bb1dae27364e653ab 100644
--- a/source/Lib/DecoderLib/CABACReader.cpp
+++ b/source/Lib/DecoderLib/CABACReader.cpp
@@ -1606,7 +1606,17 @@ void CABACReader::intra_luma_pred_modes( CodingUnit &cu )
     cu.firstPU->intraDir[0] = cu.bdpcmMode == 2? VER_IDX : HOR_IDX;
     return;
   }
-
+#if IDCC_TPM_JEM
+  int TMP_MaxSize=cu.cs->sps->getIntraTMPMaxSize();
+  if (cu.lwidth() <= TMP_MaxSize && cu.lheight() <= TMP_MaxSize)
+  {
+	  Tmp_Flag(cu);
+	  if (cu.TmpFlag)
+		  return;
+  }
+  else
+	  cu.TmpFlag = 0;
+#endif
   mip_flag(cu);
   if (cu.mipFlag)
   {
@@ -3848,7 +3858,11 @@ void CABACReader::residual_lfnst_mode( CodingUnit& cu,  CUCtx& cuCtx  )
   int chIdx = cu.isSepTree() && cu.chType == CHANNEL_TYPE_CHROMA ? 1 : 0;
 #endif
   if ((cu.ispMode && !CU::canUseLfnstWithISP(cu, cu.chType))
+#if IDCC_TPM_JEM
+   || (cu.cs->sps->getUseLFNST() && CU::isIntra(cu) && ((cu.mipFlag && !allowLfnstWithMip(cu.firstPU->lumaSize())) || (cu.TmpFlag && !allowLfnstWithTpm())))
+#else
       || (cu.cs->sps->getUseLFNST() && CU::isIntra(cu) && cu.mipFlag && !allowLfnstWithMip(cu.firstPU->lumaSize()))
+#endif
 #if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
     || (CS::isDualITree(*cu.cs) && cu.chType == CHANNEL_TYPE_CHROMA && std::min(cu.blocks[1].width, cu.blocks[1].height) < 4)
 #else
@@ -4488,7 +4502,27 @@ unsigned CABACReader::code_unary_fixed( unsigned ctxId, unsigned unary_max, unsi
   }
   return idx;
 }
+#if IDCC_TPM_JEM
+void CABACReader::Tmp_Flag(CodingUnit& cu)
+{
+	RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET(STATS__CABAC_BITS__OTHER);
 
+	if (!cu.Y().valid())
+	{
+		return;
+	}
+
+  if( !cu.cs->sps->getUseIntraTMP() )
+  {
+    cu.TmpFlag = false;
+    return;
+  }
+
+	unsigned ctxId = DeriveCtx::CtxTmpFlag(cu);
+	cu.TmpFlag = m_BinDecoder.decodeBin(Ctx::TmpFlag(ctxId));
+	DTRACE(g_trace_ctx, D_SYNTAX, "Tmp_Flag() pos=(%d,%d) mode=%d\n", cu.lumaPos().x, cu.lumaPos().y, cu.TmpFlag ? 1 : 0);
+}
+#endif
 void CABACReader::mip_flag( CodingUnit& cu )
 {
   RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET( STATS__CABAC_BITS__OTHER );
diff --git a/source/Lib/DecoderLib/CABACReader.h b/source/Lib/DecoderLib/CABACReader.h
index 31b0b3449563cbb80472acb93b74927fc422aa98..9697cde83b12b7624877c0f9527f1aa6a1d76f2c 100644
--- a/source/Lib/DecoderLib/CABACReader.h
+++ b/source/Lib/DecoderLib/CABACReader.h
@@ -107,6 +107,9 @@ public:
   void        adaptive_color_transform(CodingUnit&             cu);
   void        sbt_mode                  ( CodingUnit&                   cu );
   void        end_of_ctu                ( CodingUnit&                   cu,     CUCtx&          cuCtx );
+#if IDCC_TPM_JEM
+  void        Tmp_Flag(CodingUnit& cu);
+#endif
   void        mip_flag                  ( CodingUnit&                   cu );
   void        mip_pred_modes            ( CodingUnit&                   cu );
   void        mip_pred_mode             ( PredictionUnit&               pu );
diff --git a/source/Lib/DecoderLib/DecCu.cpp b/source/Lib/DecoderLib/DecCu.cpp
index 4a7817d835ea8c329826c0804d9e341f569eb4d1..b20d81b6efc70f8061829531de46d87bc5ba5fec 100644
--- a/source/Lib/DecoderLib/DecCu.cpp
+++ b/source/Lib/DecoderLib/DecCu.cpp
@@ -307,7 +307,19 @@ void DecCu::xIntraRecBlk( TransformUnit& tu, const ComponentID compID )
   }
   else
   {
+#if IDCC_TPM_JEM
+	  if (PU::isTmp(pu, chType))
+	  {
+		  int foundCandiNum;
+		  m_pcTrQuant->getTargetTemplate(tu.cu, pu.lwidth(), pu.lheight());
+		  m_pcTrQuant->candidateSearchIntra(tu.cu, pu.lwidth(), pu.lheight());
+		  m_pcTrQuant->generateTMPrediction(piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum);
+		  assert(foundCandiNum >= 1);
+	  }
+	  else if (PU::isMIP(pu, chType))
+#else
     if( PU::isMIP( pu, chType ) )
+#endif
     {
       m_pcIntraPred->initIntraMip( pu, area );
       m_pcIntraPred->predIntraMip( compID, piPred, pu );
@@ -500,7 +512,19 @@ void DecCu::xIntraRecACTBlk(TransformUnit& tu)
 
     PelBuf piPred = cs.getPredBuf(area);
     m_pcIntraPred->initIntraPatternChType(*tu.cu, area);
+#if IDCC_TPM_JEM
+	if (PU::isTmp(pu, chType))
+	{
+		int foundCandiNum;
+		const unsigned int           uiStride = cs.picture->getRecoBuf(COMPONENT_Y).stride;
+		m_pcTrQuant->getTargetTemplate(tu.cu, pu.lwidth(), pu.lheight());
+		m_pcTrQuant->candidateSearchIntra(tu.cu, pu.lwidth(), pu.lheight());
+		m_pcTrQuant->generateTMPrediction(piPred.buf, uiStride, pu.lwidth(), pu.lheight(), foundCandiNum);
+	}
+	else if (PU::isMIP(pu, chType))
+#else
     if (PU::isMIP(pu, chType))
+#endif
     {
       m_pcIntraPred->initIntraMip(pu, area);
       m_pcIntraPred->predIntraMip(compID, piPred, pu);
diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp
index d8d43d08c6f6a8801880616fddec6878bccdfdc1..ce6f1c1c7571f761ae5404c19374b3fa1dbdadbc 100644
--- a/source/Lib/DecoderLib/VLCReader.cpp
+++ b/source/Lib/DecoderLib/VLCReader.cpp
@@ -2259,6 +2259,13 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS)
   READ_FLAG(uiCode, "sps_mip_enabled_flag");                        pcSPS->setUseMIP( uiCode != 0 );
 #if ENABLE_DIMD
   READ_FLAG(uiCode, "sps_dimd_enabled_flag");                           pcSPS->setUseDimd(uiCode != 0);
+#endif
+#if IDCC_TPM_JEM
+  READ_FLAG(uiCode, "sps_intraTMP_enabled_flag");                   pcSPS->setUseIntraTMP( uiCode != 0 );
+  if(pcSPS->getUseIntraTMP())
+  {
+    READ_UVLC(uiCode, "sps_log2_intra_tmp_max_size");                 pcSPS->setIntraTMPMaxSize(1 << uiCode);
+  }
 #endif
   if( pcSPS->getChromaFormatIdc() != CHROMA_400)
   {
diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp
index d0c2961f688d0838f1694f51a69dba42155114aa..0a40c34d59411eb269116eab45898cd9dc388234 100644
--- a/source/Lib/EncoderLib/CABACWriter.cpp
+++ b/source/Lib/EncoderLib/CABACWriter.cpp
@@ -1200,7 +1200,15 @@ void CABACWriter::intra_luma_pred_modes( const CodingUnit& cu )
     cu.firstPU->intraDir[0] = cu.bdpcmMode == 2? VER_IDX : HOR_IDX;
     return;
   }
-
+#if IDCC_TPM_JEM
+  int TMP_MaxSize=cu.cs->sps->getIntraTMPMaxSize();
+  if (cu.lwidth() <= TMP_MaxSize && cu.lheight() <= TMP_MaxSize)
+  {
+	  Tmp_Flag(cu);
+	  if (cu.TmpFlag)
+		  return;
+  }
+#endif
   mip_flag(cu);
   if (cu.mipFlag)
   {
@@ -1381,6 +1389,17 @@ void CABACWriter::intra_luma_pred_mode( const PredictionUnit& pu )
 {
 
   if( pu.cu->bdpcmMode ) return;
+#if IDCC_TPM_JEM
+  // check if sufficient search range is available
+  //bool bCheck = pu.cu->
+  int TMP_MaxSize=pu.cu->cs->sps->getIntraTMPMaxSize();
+  if (pu.cu->lwidth() <= TMP_MaxSize && pu.cu->lheight() <= TMP_MaxSize)
+  {
+	  Tmp_Flag(*pu.cu);
+	  if (pu.cu->TmpFlag)
+		  return;
+  }
+#endif
   mip_flag(*pu.cu);
   if (pu.cu->mipFlag)
   {
@@ -3569,7 +3588,11 @@ void CABACWriter::residual_lfnst_mode( const CodingUnit& cu, CUCtx& cuCtx )
   int chIdx = cu.isSepTree() && cu.chType == CHANNEL_TYPE_CHROMA ? 1 : 0;
 #endif
   if( ( cu.ispMode && !CU::canUseLfnstWithISP( cu, cu.chType ) ) ||
+#if IDCC_TPM_JEM
+  (cu.cs->sps->getUseLFNST() && CU::isIntra(cu) && ((cu.mipFlag && !allowLfnstWithMip(cu.firstPU->lumaSize())) || (cu.TmpFlag && !allowLfnstWithTpm()))) ||
+#else
       (cu.cs->sps->getUseLFNST() && CU::isIntra(cu) && cu.mipFlag && !allowLfnstWithMip(cu.firstPU->lumaSize())) ||
+#endif
 #if INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
     (CS::isDualITree(*cu.cs) && cu.chType == CHANNEL_TYPE_CHROMA && std::min(cu.blocks[1].width, cu.blocks[1].height) < 4)
 #else
@@ -4195,6 +4218,25 @@ void CABACWriter::code_unary_fixed( unsigned symbol, unsigned ctxId, unsigned un
   }
 }
 
+#if IDCC_TPM_JEM
+void CABACWriter::Tmp_Flag(const CodingUnit& cu)
+{
+	if (!cu.Y().valid())
+	{
+		return;
+	}
+
+  if( !cu.cs->sps->getUseIntraTMP() )
+  {
+    return;
+  }
+
+	unsigned ctxId = DeriveCtx::CtxTmpFlag(cu);
+	m_BinEncoder.encodeBin(cu.TmpFlag, Ctx::TmpFlag(ctxId));
+	DTRACE(g_trace_ctx, D_SYNTAX, "Tmp_Flag() pos=(%d,%d) mode=%d\n", cu.lumaPos().x, cu.lumaPos().y, cu.TmpFlag ? 1 : 0);
+}
+#endif
+
 void CABACWriter::mip_flag( const CodingUnit& cu )
 {
 #if ENABLE_DIMD
diff --git a/source/Lib/EncoderLib/CABACWriter.h b/source/Lib/EncoderLib/CABACWriter.h
index 9999cad3d326c406fbfacaa2379c8268f541019d..f5ab58565bba7ff45566ed42f8cd666de34826f7 100644
--- a/source/Lib/EncoderLib/CABACWriter.h
+++ b/source/Lib/EncoderLib/CABACWriter.h
@@ -116,6 +116,9 @@ public:
   void        adaptive_color_transform(const CodingUnit&             cu);
   void        sbt_mode                  ( const CodingUnit&             cu );
   void        end_of_ctu                ( const CodingUnit&             cu,       CUCtx&            cuCtx );
+#if IDCC_TPM_JEM
+  void        Tmp_Flag(const CodingUnit& cu);
+#endif
   void        mip_flag                  ( const CodingUnit&             cu );
   void        mip_pred_modes            ( const CodingUnit&             cu );
   void        mip_pred_mode             ( const PredictionUnit&         pu );
diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h
index 2f642584d5875d0a5517728a6befe99b795affee..756a4aacdea73d238c2594453e96620aedff7a5f 100644
--- a/source/Lib/EncoderLib/EncCfg.h
+++ b/source/Lib/EncoderLib/EncCfg.h
@@ -806,6 +806,10 @@ protected:
 
   bool        m_alf;                                          ///< Adaptive Loop Filter
 
+#if IDCC_TPM_JEM
+  bool      m_IntraTMP;                                       ///< intra Template Matching 
+  unsigned  m_IntraTMP_MaxSize;                               ///< max CU size for which intra TMP is allowed
+#endif
 #if ERICSSON_BIF
   bool        m_BIF;
   int         m_BIFStrength;
@@ -1283,6 +1287,12 @@ public:
   bool      getUseWrapAround                ()         const { return m_wrapAround; }
   void      setWrapAroundOffset             ( unsigned u )   { m_wrapAroundOffset = u; }
   unsigned  getWrapAroundOffset             ()         const { return m_wrapAroundOffset; }
+#if IDCC_TPM_JEM
+  void      setUseIntraTMP(bool b) { m_IntraTMP = b; }
+  bool      getUseIntraTMP() { return m_IntraTMP; }
+  void      setIntraTMPMaxSize(unsigned n) { m_IntraTMP_MaxSize = n; }
+  unsigned  getIntraTMPMaxSize() { return m_IntraTMP_MaxSize; }
+#endif
 #if ERICSSON_BIF
   void      setUseBIF                       ( bool b )       { m_BIF = b; }
   bool      getUseBIF                       ()         const { return m_BIF; }
diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp
index ce35b27035c47511363293ccaf6b7450029e3fa4..107624a0110b106cab1cf1d1d4d2241eec4f030c 100644
--- a/source/Lib/EncoderLib/EncCu.cpp
+++ b/source/Lib/EncoderLib/EncCu.cpp
@@ -2008,6 +2008,9 @@ bool EncCu::xCheckRDCostIntra(CodingStructure *&tempCS, CodingStructure *&bestCS
               m_modeCtrl->setISPMode(cu.ispMode);
               m_modeCtrl->setISPLfnstIdx(cu.lfnstIdx);
               m_modeCtrl->setMIPFlagISPPass(cu.mipFlag);
+#if IDCC_TPM_JEM
+			  m_modeCtrl->setTPMFlagISPPass(cu.TmpFlag);
+#endif
               m_modeCtrl->setBestISPIntraModeRelCU(cu.ispMode ? PU::getFinalIntraMode(*cu.firstPU, CHANNEL_TYPE_LUMA) : UINT8_MAX);
               m_modeCtrl->setBestDCT2NonISPCostRelCU(m_modeCtrl->getMtsFirstPassNoIspCost());
             }
@@ -3878,6 +3881,9 @@ void EncCu::xCheckRDCostMergeGeo2Nx2N(CodingStructure *&tempCS, CodingStructure
   cu.mmvdSkip = false;
   cu.skip = false;
   cu.mipFlag = false;
+#if IDCC_TPM_JEM
+  cu.TmpFlag = false;
+#endif
   cu.bdpcmMode = 0;
 
   PredictionUnit &pu = tempCS->addPU(cu, pm.chType);
@@ -4103,6 +4109,9 @@ void EncCu::xCheckRDCostMergeGeo2Nx2N(CodingStructure *&tempCS, CodingStructure
       cu.mmvdSkip = false;
       cu.skip = false;
       cu.mipFlag = false;
+#if IDCC_TPM_JEM
+	  cu.TmpFlag = false;
+#endif
       cu.bdpcmMode = 0;
       PredictionUnit &pu = tempCS->addPU(cu, pm.chType);
       pu.mergeFlag = true;
@@ -4799,6 +4808,9 @@ void EncCu::xCheckSATDCostGeoMerge(CodingStructure *&tempCS, CodingUnit &cu, Pre
   cu.mmvdSkip = false;
   cu.skip = false;
   cu.mipFlag = false;
+#if IDCC_TPM_JEM
+  cu.TmpFlag = false;
+#endif
   cu.bdpcmMode = 0;
   pu.mergeFlag = true;
   pu.regularMergeFlag = false;
@@ -5783,7 +5795,10 @@ void EncCu::xCheckRDCostTMMerge2Nx2N(CodingStructure *&tempCS, CodingStructure *
           pu.bdmvrRefine = true;
           m_pcInterSearch->setBdmvrSubPuMvBuf(m_mvBufBDMVR4TM[uiMergeCand << 1], m_mvBufBDMVR4TM[(uiMergeCand << 1) + 1]);
         }
-        PU::spanMotionInfo(pu, mergeCtx, m_mvBufBDMVR4TM[uiMergeCand << 1], m_mvBufBDMVR4TM[( uiMergeCand << 1 ) + 1]);
+        else
+        {
+          PU::spanMotionInfo(pu, mergeCtx);
+        }
 #else
         PU::spanMotionInfo(pu, mergeCtx);
 #endif
@@ -5795,6 +5810,13 @@ void EncCu::xCheckRDCostTMMerge2Nx2N(CodingStructure *&tempCS, CodingStructure *
 
         m_pcInterSearch->motionCompensation(pu, acMergeRealBuffer[uiMergeCand], REF_PIC_LIST_X, true, true);
 
+#if MULTI_PASS_DMVR
+        if( pu.bdmvrRefine )
+        {
+          ::memcpy( m_mvBufEncBDOF4TM[uiMergeCand], m_pcInterSearch->getBdofSubPuMvOffset(), sizeof( Mv ) * BDOF_SUBPU_MAX_NUM );
+          PU::spanMotionInfo( pu, mergeCtx, m_mvBufBDMVR4TM[uiMergeCand << 1], m_mvBufBDMVR4TM[( uiMergeCand << 1 ) + 1], m_mvBufEncBDOF4TM[uiMergeCand] );
+        }
+#endif
         distParam.cur = acMergeRealBuffer[uiMergeCand].Y();
         Distortion uiSad = distParam.distFunc(distParam);
         m_CABACEstimator->getCtx() = ctxStart;
@@ -5874,7 +5896,10 @@ void EncCu::xCheckRDCostTMMerge2Nx2N(CodingStructure *&tempCS, CodingStructure *
 #endif
       }
 #if MULTI_PASS_DMVR
-      PU::spanMotionInfo(pu, mergeCtx, m_mvBufBDMVR4TM[uiMergeCand << 1], m_mvBufBDMVR4TM[( uiMergeCand << 1 ) + 1]);
+      if (!pu.bdmvrRefine)
+      {
+        PU::spanMotionInfo(pu, mergeCtx);
+      }
 #else
       PU::spanMotionInfo(pu, mergeCtx);
 #endif
@@ -5882,6 +5907,12 @@ void EncCu::xCheckRDCostTMMerge2Nx2N(CodingStructure *&tempCS, CodingStructure *
       if( mrgTempBufSet )
       {
         tempCS->getPredBuf().copyFrom(acMergeRealBuffer[uiMergeCand]);
+#if MULTI_PASS_DMVR
+        if( pu.bdmvrRefine )
+        {
+          PU::spanMotionInfo( pu, mergeCtx, m_mvBufBDMVR4TM[uiMergeCand << 1], m_mvBufBDMVR4TM[( uiMergeCand << 1 ) + 1], m_mvBufEncBDOF4TM[uiMergeCand] );
+        }
+#endif
       }
       else
       {
@@ -5890,6 +5921,13 @@ void EncCu::xCheckRDCostTMMerge2Nx2N(CodingStructure *&tempCS, CodingStructure *
         m_pcInterSearch->m_storeBeforeLIC = false;
 #endif
         m_pcInterSearch->motionCompensation( pu );
+#if MULTI_PASS_DMVR
+        if( pu.bdmvrRefine )
+        {
+          ::memcpy( m_mvBufEncBDOF4TM[uiMergeCand], m_pcInterSearch->getBdofSubPuMvOffset(), sizeof( Mv ) * BDOF_SUBPU_MAX_NUM );
+          PU::spanMotionInfo( pu, mergeCtx, m_mvBufBDMVR4TM[uiMergeCand << 1], m_mvBufBDMVR4TM[( uiMergeCand << 1 ) + 1], m_mvBufEncBDOF4TM[uiMergeCand] );
+        }
+#endif
       }
 
       xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, uiNoResidualPass == 0 ? &candHasNoResidual[uiMrgHADIdx] : NULL );
diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp
index 46cb2b097d8c5f53e0cc301ae11d5aa27fb82f04..e2055c70e760d21fa206c1121c50816dcfffb6ad 100644
--- a/source/Lib/EncoderLib/EncLib.cpp
+++ b/source/Lib/EncoderLib/EncLib.cpp
@@ -1428,6 +1428,10 @@ void EncLib::xInitSPS( SPS& sps )
   sps.setMaxNumAddHyps(m_maxNumAddHyps);
   sps.setNumAddHypWeights(m_numAddHypWeights);
   sps.setMaxNumAddHypRefFrames(m_maxNumAddHypRefFrames);
+#endif
+#if IDCC_TPM_JEM
+  sps.setUseIntraTMP(m_IntraTMP);
+  sps.setIntraTMPMaxSize(m_IntraTMP_MaxSize);
 #endif
   // ADD_NEW_TOOL : (encoder lib) set tool enabling flags and associated parameters here
   sps.setUseISP                             ( m_ISP );
diff --git a/source/Lib/EncoderLib/EncModeCtrl.cpp b/source/Lib/EncoderLib/EncModeCtrl.cpp
index ee16ae06196168b494f386ad92a530550a2f41ca..67b0825473d1807c127628dfe14551828a02688c 100644
--- a/source/Lib/EncoderLib/EncModeCtrl.cpp
+++ b/source/Lib/EncoderLib/EncModeCtrl.cpp
@@ -2209,6 +2209,9 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt
             int bit4 = cuECtx.ispLfnstIdx == 2;
             int bit5 = cuECtx.mipFlag;
             int bit6 = cuECtx.bestCostIsp < cuECtx.bestNonDCT2Cost * 0.95;
+#if IDCC_TPM_JEM
+			int bit7 = cuECtx.TmpFlag;
+#endif
             int val =
               (bit0) |
               (bit1 << 1) |
@@ -2217,6 +2220,9 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt
               (bit4 << 4) |
               (bit5 << 5) |
               (bit6 << 6) |
+#if IDCC_TPM_JEM
+			  (bit7 << 7) |
+#endif
               ( cuECtx.bestPredModeDCT2 << 9 );
             relatedCU.ispPredModeVal     = val;
             relatedCU.bestDCT2NonISPCost = cuECtx.bestDCT2NonISPCost;
diff --git a/source/Lib/EncoderLib/EncModeCtrl.h b/source/Lib/EncoderLib/EncModeCtrl.h
index b55472cbd08c90acd5b93b78b5d519a0083239ad..5b62615ea0b94bb1cf4ee74eebe77560cf92ad82 100644
--- a/source/Lib/EncoderLib/EncModeCtrl.h
+++ b/source/Lib/EncoderLib/EncModeCtrl.h
@@ -238,6 +238,9 @@ struct ComprCUCtx
                     ( MAX_DOUBLE )
     , bestISPIntraMode
                     ( UINT8_MAX )
+#if IDCC_TPM_JEM
+	  , TmpFlag(false)
+#endif
     , mipFlag       ( false )
     , ispMode       ( NOT_INTRA_SUBPARTITIONS )
     , ispLfnstIdx   ( 0 )
@@ -283,6 +286,9 @@ struct ComprCUCtx
   double                            bestDCT2NonISPCost;
   double                            bestNonDCT2Cost;
   uint8_t                           bestISPIntraMode;
+#if IDCC_TPM_JEM
+  bool								TmpFlag;
+#endif
   bool                              mipFlag;
   uint8_t                           ispMode;
   uint8_t                           ispLfnstIdx;
@@ -390,6 +396,9 @@ public:
   void   setBestNonDCT2Cost           ( double val )            { m_ComprCUCtxList.back().bestNonDCT2Cost = val; }
   uint8_t getBestISPIntraModeRelCU    ()                  const { return m_ComprCUCtxList.back().bestISPIntraMode; }
   void   setBestISPIntraModeRelCU     ( uint8_t val )           { m_ComprCUCtxList.back().bestISPIntraMode = val; }
+#if IDCC_TPM_JEM
+  void   setTPMFlagISPPass(bool val) { m_ComprCUCtxList.back().TmpFlag = val; }
+#endif
   void   setMIPFlagISPPass            ( bool val )              { m_ComprCUCtxList.back().mipFlag = val; }
   void   setISPMode                   ( uint8_t val )           { m_ComprCUCtxList.back().ispMode = val; }
   void   setISPLfnstIdx               ( uint8_t val )           { m_ComprCUCtxList.back().ispLfnstIdx = val; }
diff --git a/source/Lib/EncoderLib/InterSearch.cpp b/source/Lib/EncoderLib/InterSearch.cpp
index 7429a84079b34d2743fe9dd75e8c0b824e1b15d8..a3ca8a54f75929355dd71784137ee03b11b79589 100644
--- a/source/Lib/EncoderLib/InterSearch.cpp
+++ b/source/Lib/EncoderLib/InterSearch.cpp
@@ -8710,7 +8710,8 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa
       }
     }
 #if WCG_EXT
-    if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (m_pcEncCfg->getLmcs() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())))
+    if (m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() || (
+      m_pcEncCfg->getLmcs() && (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())))
     {
       const CPelBuf orgLuma = cs.getOrgBuf( cs.area.blocks[COMPONENT_Y] );
       if (compID == COMPONENT_Y )
diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp
index ea77b2de720bac5fdf4ed442ac551681269bd88b..205d5618e35047c08e3c84838b334ee4c14db092 100644
--- a/source/Lib/EncoderLib/IntraSearch.cpp
+++ b/source/Lib/EncoderLib/IntraSearch.cpp
@@ -394,6 +394,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
 
   const TempCtx ctxStart          ( m_CtxCache, m_CABACEstimator->getCtx() );
   const TempCtx ctxStartMipFlag    ( m_CtxCache, SubCtx( Ctx::MipFlag,          m_CABACEstimator->getCtx() ) );
+#if IDCC_TPM_JEM
+  const TempCtx ctxStartTpmFlag(m_CtxCache, SubCtx(Ctx::TmpFlag, m_CABACEstimator->getCtx()));
+#endif
   const TempCtx ctxStartIspMode    ( m_CtxCache, SubCtx( Ctx::ISPMode,          m_CABACEstimator->getCtx() ) );
 #if SECONDARY_MPM
   const TempCtx ctxStartMPMIdxFlag(m_CtxCache, SubCtx(Ctx::IntraLumaMPMIdx, m_CABACEstimator->getCtx()));
@@ -491,6 +494,10 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
     const bool mipAllowed = sps.getUseMIP() && isLuma(partitioner.chType) && ((cu.lfnstIdx == 0) || allowLfnstWithMip(cu.firstPU->lumaSize()));
     const bool testMip = mipAllowed && !(cu.lwidth() > (8 * cu.lheight()) || cu.lheight() > (8 * cu.lwidth()));
     const bool supportedMipBlkSize = pu.lwidth() <= MIP_MAX_WIDTH && pu.lheight() <= MIP_MAX_HEIGHT;
+#if IDCC_TPM_JEM
+	const bool tpmAllowed = sps.getUseIntraTMP() && isLuma(partitioner.chType) && ((cu.lfnstIdx == 0) || allowLfnstWithTpm());
+	const bool testTpm = tpmAllowed && (cu.lwidth() <= sps.getIntraTMPMaxSize() && cu.lheight() <= sps.getIntraTMPMaxSize());
+#endif
 
     static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> uiRdModeList;
 
@@ -566,10 +573,19 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
                                    ? std::max(numModesForFullRD, floorLog2(std::min(pu.lwidth(), pu.lheight())) - 1)
                                    : numModesForFullRD;
           }
+#if IDCC_TPM_JEM
+		  if (testTpm)
+			  numModesForFullRD += 1; // testing tpm
+		  const int numHadCand = (testMip ? 2 : 1) * 3 + testTpm;
+#else
           const int numHadCand = (testMip ? 2 : 1) * 3;
+#endif
 
           //*** Derive (regular) candidates using Hadamard
           cu.mipFlag = false;
+#if IDCC_TPM_JEM
+		  cu.TmpFlag = false;
+#endif
 
           //===== init pattern for luma prediction =====
           initIntraPatternChType(cu, pu.Y(), true);
@@ -600,6 +616,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
               minSadHad += std::min(distParamSad.distFunc(distParamSad) * 2, distParamHad.distFunc(distParamHad));
 
               // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated.
+#if IDCC_TPM_JEM
+			  m_CABACEstimator->getCtx() = SubCtx(Ctx::TmpFlag, ctxStartTpmFlag);
+#endif
               m_CABACEstimator->getCtx() = SubCtx( Ctx::MipFlag, ctxStartMipFlag );
               m_CABACEstimator->getCtx() = SubCtx( Ctx::ISPMode, ctxStartIspMode );
 #if SECONDARY_MPM
@@ -674,6 +693,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
 
                     // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been
                     // pre-estimated.
+#if IDCC_TPM_JEM
+					m_CABACEstimator->getCtx() = SubCtx(Ctx::TmpFlag, ctxStartTpmFlag);
+#endif
                     m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag);
                     m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode);
 #if SECONDARY_MPM
@@ -739,6 +761,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
                     std::min(distParamSad.distFunc(distParamSad) * 2, distParamHad.distFunc(distParamHad));
 
                   // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated.
+#if IDCC_TPM_JEM
+				  m_CABACEstimator->getCtx() = SubCtx(Ctx::TmpFlag, ctxStartTpmFlag);
+#endif
                   m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag);
                   m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode);
 #if SECONDARY_MPM
@@ -781,6 +806,48 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
               m_dSavedHadListLFNST.resize(3);
               LFNSTSaveFlag = false;
             }
+#if IDCC_TPM_JEM
+			// derive TPM candidate using hadamard
+			if (testTpm)
+			{
+				cu.TmpFlag = true;
+				cu.mipFlag = false;
+				pu.multiRefIdx = 0;
+
+
+
+				int foundCandiNum = 0;
+				bool bsuccessfull = 0;
+				CodingUnit cu_cpy = cu;
+
+				if (isRefTemplateAvailable(cu_cpy, cu_cpy.blocks[COMPONENT_Y]))
+				{
+					m_pcTrQuant->getTargetTemplate(&cu_cpy, pu.lwidth(), pu.lheight());
+					m_pcTrQuant->candidateSearchIntra(&cu_cpy, pu.lwidth(), pu.lheight());
+					bsuccessfull = m_pcTrQuant->generateTMPrediction(piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum);
+				}
+				if (bsuccessfull && foundCandiNum >= 1)
+				{
+					
+					Distortion minSadHad =
+						std::min(distParamSad.distFunc(distParamSad) * 2, distParamHad.distFunc(distParamHad));
+
+					m_CABACEstimator->getCtx() = SubCtx(Ctx::TmpFlag, ctxStartTpmFlag);
+
+					uint64_t fracModeBits = xFracModeBitsIntra(pu, 0, CHANNEL_TYPE_LUMA);
+
+					double cost = double(minSadHad) + double(fracModeBits) * sqrtLambdaForFirstPass;
+					DTRACE(g_trace_ctx, D_INTRA_COST, "IntraTPM: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost,
+						0);
+
+					updateCandList(ModeInfo(0, 0, 0, NOT_INTRA_SUBPARTITIONS, 0, 1), cost, uiRdModeList,
+						CandCostList, numModesForFullRD);
+					updateCandList(ModeInfo(0, 0, 0, NOT_INTRA_SUBPARTITIONS, 0, 1),
+						0.8 * double(minSadHad), uiHadModeList, CandHadList, numHadCand);
+				}
+				
+			}
+#endif
             //*** Derive MIP candidates using Hadamard
             if (testMip && !supportedMipBlkSize)
             {
@@ -799,6 +866,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
             }
             else if (testMip)
             {
+#if IDCC_TPM_JEM
+				cu.TmpFlag = 0;
+#endif
               cu.mipFlag     = true;
               pu.multiRefIdx = 0;
 
@@ -1025,6 +1095,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
           cs.interHad = 0;
 
           //===== reset context models =====
+#if IDCC_TPM_JEM
+		  m_CABACEstimator->getCtx() = SubCtx(Ctx::TmpFlag, ctxStartTpmFlag);
+#endif
           m_CABACEstimator->getCtx() = SubCtx(Ctx::MipFlag, ctxStartMipFlag);
           m_CABACEstimator->getCtx() = SubCtx(Ctx::ISPMode, ctxStartIspMode);
 #if SECONDARY_MPM
@@ -1128,6 +1201,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
         uiOrgMode.modeId = cu.dimdMode;
         cu.dimd = true;
       }
+#endif
+#if IDCC_TPM_JEM
+	  cu.TmpFlag = uiOrgMode.tpmFlg;
 #endif
       cu.mipFlag                     = uiOrgMode.mipFlg;
       pu.mipTransposedFlag           = uiOrgMode.mipTrFlg;
@@ -1140,6 +1216,11 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
       CHECK(cu.ispMode && cu.mipFlag, "Error: combination of ISP and MIP not supported");
       CHECK(cu.ispMode && pu.multiRefIdx, "Error: combination of ISP and MRL not supported");
       CHECK(cu.ispMode&& cu.colorTransform, "Error: combination of ISP and ACT not supported");
+#if IDCC_TPM_JEM
+	  CHECK(cu.mipFlag&& cu.TmpFlag, "Error: combination of MIP and TPM not supported");
+	  CHECK(cu.TmpFlag&& cu.ispMode, "Error: combination of TPM and ISP not supported");
+	  CHECK(cu.TmpFlag&& pu.multiRefIdx, "Error: combination of TPM and MRL not supported");
+#endif
 
       pu.intraDir[CHANNEL_TYPE_CHROMA] = cu.colorTransform ? DM_CHROMA_IDX : pu.intraDir[CHANNEL_TYPE_CHROMA];
 
@@ -1180,10 +1261,17 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
             uiBestPUMode.ispMod, mtsCheckRangeFlag, mtsFirstCheckId, mtsLastCheckId, moreProbMTSIdxFirst);
         }
       }
-
+#if IDCC_TPM_JEM
+	  if (!cu.ispMode && !cu.mtsFlag && !cu.lfnstIdx && !cu.bdpcmMode && !pu.multiRefIdx && !cu.mipFlag && !cu.TmpFlag && testISP)
+#else
       if (!cu.ispMode && !cu.mtsFlag && !cu.lfnstIdx && !cu.bdpcmMode && !pu.multiRefIdx && !cu.mipFlag && testISP)
+#endif
       {
+#if IDCC_TPM_JEM
+		  m_regIntraRDListWithCosts.push_back(ModeInfoWithCost(cu.mipFlag, pu.mipTransposedFlag, pu.multiRefIdx, cu.ispMode, uiOrgMode.modeId, cu.TmpFlag, csTemp->cost));
+#else
         m_regIntraRDListWithCosts.push_back( ModeInfoWithCost( cu.mipFlag, pu.mipTransposedFlag, pu.multiRefIdx, cu.ispMode, uiOrgMode.modeId, csTemp->cost ) );
+#endif
       }
 
       if( cu.ispMode && !csTemp->cus[0]->firstTU->cbf[COMPONENT_Y] )
@@ -1198,10 +1286,15 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
       {
         m_modeCostStore[lfnstIdx][mode] = tmpValidReturn ? csTemp->cost : (MAX_DOUBLE / 2.0); //(MAX_DOUBLE / 2.0) ??
       }
-
+#if IDCC_TPM_JEM
+	  DTRACE(g_trace_ctx, D_INTRA_COST, "IntraCost T [x=%d,y=%d,w=%d,h=%d] %f (%d,%d,%d,%d,%d,%d,%d) \n", cu.blocks[0].x,
+		  cu.blocks[0].y, (int)width, (int)height, csTemp->cost, uiOrgMode.modeId, uiOrgMode.ispMod,
+		  pu.multiRefIdx, cu.TmpFlag, cu.mipFlag, cu.lfnstIdx, cu.mtsFlag);
+#else
       DTRACE(g_trace_ctx, D_INTRA_COST, "IntraCost T [x=%d,y=%d,w=%d,h=%d] %f (%d,%d,%d,%d,%d,%d) \n", cu.blocks[0].x,
              cu.blocks[0].y, (int) width, (int) height, csTemp->cost, uiOrgMode.modeId, uiOrgMode.ispMod,
              pu.multiRefIdx, cu.mipFlag, cu.lfnstIdx, cu.mtsFlag);
+#endif
 
       if( tmpValidReturn )
       {
@@ -1298,6 +1391,9 @@ bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, c
     if( validReturn )
     {
       //=== update PU data ====
+#if IDCC_TPM_JEM
+		cu.TmpFlag = uiBestPUMode.tpmFlg;
+#endif
       cu.mipFlag = uiBestPUMode.mipFlg;
       pu.mipTransposedFlag             = uiBestPUMode.mipTrFlg;
       pu.multiRefIdx = uiBestPUMode.mRefId;
@@ -3252,7 +3348,19 @@ void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID &comp
       }
       else
       {
+#if IDCC_TPM_JEM
+		  if (PU::isTmp(pu, chType))
+		  {
+			  int foundCandiNum;
+			  m_pcTrQuant->getTargetTemplate(tu.cu, pu.lwidth(), pu.lheight());
+			  m_pcTrQuant->candidateSearchIntra(tu.cu, pu.lwidth(), pu.lheight());
+			  m_pcTrQuant->generateTMPrediction(piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum);
+			  assert(foundCandiNum >= 1);
+		  }
+		  else if (PU::isMIP(pu, chType))
+#else
         if( PU::isMIP( pu, chType ) )
+#endif
         {
           initIntraMip( pu, area );
           predIntraMip( compID, piPred, pu );
@@ -4474,7 +4582,20 @@ bool IntraSearch::xRecurIntraCodingACTQT(CodingStructure &cs, Partitioner &parti
       PelBuf         piResi = resiBuf.bufs[compID];
 
       initIntraPatternChType(*tu.cu, area);
+#if IDCC_TPM_JEM
+	  if (PU::isTmp(pu, chType))
+	  {
+		  int foundCandiNum;
+		  m_pcTrQuant->getTargetTemplate(pu.cu, pu.lwidth(), pu.lheight());
+		  m_pcTrQuant->candidateSearchIntra(pu.cu, pu.lwidth(), pu.lheight());
+		  m_pcTrQuant->generateTMPrediction(piPred.buf, piPred.stride, pu.lwidth(), pu.lheight(), foundCandiNum);
+		  assert(foundCandiNum >= 1);
+
+	  }
+	  else if (PU::isMIP(pu, chType))
+#else
       if (PU::isMIP(pu, chType))
+#endif
       {
         initIntraMip(pu, area);
         predIntraMip(compID, piPred, pu);
diff --git a/source/Lib/EncoderLib/IntraSearch.h b/source/Lib/EncoderLib/IntraSearch.h
index a2246e326242e99a484215b66d7b43ba496dbb36..b11a37a36218fd800ba8ecc0f1aeec9266a62c9d 100644
--- a/source/Lib/EncoderLib/IntraSearch.h
+++ b/source/Lib/EncoderLib/IntraSearch.h
@@ -221,17 +221,30 @@ private:
     int      mRefId; // PU::multiRefIdx
     uint8_t  ispMod; // CU::ispMode
     uint32_t modeId; // PU::intraDir[CHANNEL_TYPE_LUMA]
-
+#if IDCC_TPM_JEM
+	bool     tpmFlg; // CU::TmpFlag
+#endif
+#if IDCC_TPM_JEM
+	ModeInfo() : mipFlg(false), mipTrFlg(false), mRefId(0), ispMod(NOT_INTRA_SUBPARTITIONS), modeId(0), tpmFlg(0) {}
+	ModeInfo(const bool mipf, const bool miptf, const int mrid, const uint8_t ispm, const uint32_t mode, const bool tpmf = 0) : mipFlg(mipf), mipTrFlg(miptf), mRefId(mrid), ispMod(ispm), modeId(mode), tpmFlg(tpmf) {}
+	bool operator==(const ModeInfo cmp) const { return (mipFlg == cmp.mipFlg && mipTrFlg == cmp.mipTrFlg && mRefId == cmp.mRefId && ispMod == cmp.ispMod && modeId == cmp.modeId && tpmFlg == cmp.tpmFlg); }
+#else
     ModeInfo() : mipFlg(false), mipTrFlg(false), mRefId(0), ispMod(NOT_INTRA_SUBPARTITIONS), modeId(0) {}
     ModeInfo(const bool mipf, const bool miptf, const int mrid, const uint8_t ispm, const uint32_t mode) : mipFlg(mipf), mipTrFlg(miptf), mRefId(mrid), ispMod(ispm), modeId(mode) {}
     bool operator==(const ModeInfo cmp) const { return (mipFlg == cmp.mipFlg && mipTrFlg == cmp.mipTrFlg && mRefId == cmp.mRefId && ispMod == cmp.ispMod && modeId == cmp.modeId); }
+#endif
   };
   struct ModeInfoWithCost : public ModeInfo
   {
     double rdCost;
     ModeInfoWithCost() : ModeInfo(), rdCost(MAX_DOUBLE) {}
+#if IDCC_TPM_JEM
+	ModeInfoWithCost(const bool mipf, const bool miptf, const int mrid, const uint8_t ispm, const uint32_t mode, const bool tpmf, double cost) : ModeInfo(mipf, miptf, mrid, ispm, mode, tpmf), rdCost(cost) {}
+	bool operator==(const ModeInfoWithCost cmp) const { return (mipFlg == cmp.mipFlg && mipTrFlg == cmp.mipTrFlg && mRefId == cmp.mRefId && ispMod == cmp.ispMod && modeId == cmp.modeId && tpmFlg == cmp.tpmFlg && rdCost == cmp.rdCost); }
+#else
     ModeInfoWithCost(const bool mipf, const bool miptf, const int mrid, const uint8_t ispm, const uint32_t mode, double cost) : ModeInfo(mipf, miptf, mrid, ispm, mode), rdCost(cost) {}
     bool operator==(const ModeInfoWithCost cmp) const { return (mipFlg == cmp.mipFlg && mipTrFlg == cmp.mipTrFlg && mRefId == cmp.mRefId && ispMod == cmp.ispMod && modeId == cmp.modeId && rdCost == cmp.rdCost); }
+#endif
     static bool compareModeInfoWithCost(ModeInfoWithCost a, ModeInfoWithCost b) { return a.rdCost < b.rdCost; }
   };
 
diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp
index 08f2878c09d05e6b4a9816a1e4bb297af166ea1f..56c523b37f8b032a82eca66b49799432ac72fb89 100644
--- a/source/Lib/EncoderLib/VLCWriter.cpp
+++ b/source/Lib/EncoderLib/VLCWriter.cpp
@@ -132,6 +132,7 @@ void VLCWriter::xWriteUvlc     ( uint32_t uiCode )
   uint32_t uiLength = 1;
   uint32_t uiTemp = ++uiCode;
 
+  if(!uiTemp){std::cout << "integer overflow: uiCode=" << uiCode << std::endl;}
   CHECK( !uiTemp, "Integer overflow" );
 
   while( 1 != uiTemp )
@@ -1383,6 +1384,14 @@ void HLSWriter::codeSPS( const SPS* pcSPS )
 #if ENABLE_DIMD
   WRITE_FLAG( pcSPS->getUseDimd() ? 1 : 0,                                             "sps_dimd_enabled_flag");
 #endif
+#if IDCC_TPM_JEM
+  WRITE_FLAG( pcSPS->getUseIntraTMP() ? 1 : 0,                                         "sps_intraTMP_enabled_flag");
+  if(pcSPS->getUseIntraTMP())
+  {
+    WRITE_UVLC(floorLog2(pcSPS->getIntraTMPMaxSize()), "sps_log2_intra_tmp_max_size");
+  }
+#endif
+
   if( pcSPS->getChromaFormatIdc() != CHROMA_400)
   {
     WRITE_FLAG( pcSPS->getUseLMChroma() ? 1 : 0,                                      "sps_cclm_enabled_flag");