From 003824b2e90d9249756bdbc805fe4892da25da9a Mon Sep 17 00:00:00 2001
From: Xiaoyu Xiu <xiaoyuxiu@kwai.com>
Date: Wed, 3 Apr 2019 19:13:54 +0200
Subject: [PATCH] JVET-N0329: IBC Search Improvement

---
 source/Lib/CommonLib/IbcHashMap.cpp   |  99 +++++++++++
 source/Lib/CommonLib/IbcHashMap.h     |   4 +
 source/Lib/CommonLib/MotionInfo.h     |   7 +
 source/Lib/CommonLib/RdCost.h         |   4 +
 source/Lib/CommonLib/Slice.h          |   8 +-
 source/Lib/CommonLib/TypeDef.h        |   2 +
 source/Lib/CommonLib/UnitTools.cpp    | 113 ++++++++++++
 source/Lib/CommonLib/UnitTools.h      |   5 +
 source/Lib/EncoderLib/EncCu.cpp       |  16 ++
 source/Lib/EncoderLib/EncModeCtrl.cpp |   4 +
 source/Lib/EncoderLib/EncSlice.cpp    |  12 ++
 source/Lib/EncoderLib/InterSearch.cpp | 242 +++++++++++++++++++++++++-
 source/Lib/EncoderLib/InterSearch.h   |  20 +++
 source/Lib/EncoderLib/IntraSearch.cpp |   8 +
 14 files changed, 539 insertions(+), 5 deletions(-)

diff --git a/source/Lib/CommonLib/IbcHashMap.cpp b/source/Lib/CommonLib/IbcHashMap.cpp
index 9d876292de..0b2cce6eff 100644
--- a/source/Lib/CommonLib/IbcHashMap.cpp
+++ b/source/Lib/CommonLib/IbcHashMap.cpp
@@ -274,6 +274,9 @@ bool IbcHashMap::ibcHashMatch(const Area& lumaArea, std::vector<Position>& cand,
   // find the block with least candidates
   size_t minSize = MAX_UINT;
   unsigned int targetHashOneBlock = 0;
+#if JVET_N0329_IBC_SEARCH_IMP 
+  Position targetBlockOffsetInCu(0, 0);
+#endif
   for (SizeType y = 0; y < lumaArea.height && minSize > 1; y += MIN_PU_SIZE)
   {
     for (SizeType x = 0; x < lumaArea.width && minSize > 1; x += MIN_PU_SIZE)
@@ -283,6 +286,9 @@ bool IbcHashMap::ibcHashMatch(const Area& lumaArea, std::vector<Position>& cand,
       {
         minSize = m_hash2Pos[hash].size();
         targetHashOneBlock = hash;
+#if JVET_N0329_IBC_SEARCH_IMP 
+        targetBlockOffsetInCu.repositionTo(Position(x, y));
+#endif
       }
     }
   }
@@ -294,11 +300,20 @@ bool IbcHashMap::ibcHashMatch(const Area& lumaArea, std::vector<Position>& cand,
     // check whether whole block match
     for (std::vector<Position>::iterator refBlockPos = candOneBlock.begin(); refBlockPos != candOneBlock.end(); refBlockPos++)
     {
+#if JVET_N0329_IBC_SEARCH_IMP 
+      Position topLeft = refBlockPos->offset(-targetBlockOffsetInCu.x, -targetBlockOffsetInCu.y);
+      Position bottomRight = topLeft.offset(lumaArea.width - 1, lumaArea.height - 1);
+#else
       Position bottomRight = refBlockPos->offset(lumaArea.width - 1, lumaArea.height - 1);
+#endif
       bool wholeBlockMatch = true;
       if (lumaArea.width > MIN_PU_SIZE || lumaArea.height > MIN_PU_SIZE)
       {
+#if JVET_N0329_IBC_SEARCH_IMP 
+        if (!cs.isDecomp(bottomRight, cs.chType) || bottomRight.x >= m_picWidth || bottomRight.y >= m_picHeight || topLeft.x < 0 || topLeft.y < 0)
+#else
         if (!cs.isDecomp(bottomRight, cs.chType) || bottomRight.x >= m_picWidth || bottomRight.y >= m_picHeight)
+#endif
         {
           continue;
         }
@@ -307,20 +322,33 @@ bool IbcHashMap::ibcHashMatch(const Area& lumaArea, std::vector<Position>& cand,
           for (SizeType x = 0; x < lumaArea.width && wholeBlockMatch; x += MIN_PU_SIZE)
           {
             // whether the reference block and current block has the same hash
+#if JVET_N0329_IBC_SEARCH_IMP 
+            wholeBlockMatch &= (m_pos2Hash[lumaArea.pos().y + y][lumaArea.pos().x + x] == m_pos2Hash[topLeft.y + y][topLeft.x + x]);
+#else
             wholeBlockMatch &= (m_pos2Hash[lumaArea.pos().y + y][lumaArea.pos().x + x] == m_pos2Hash[refBlockPos->y + y][refBlockPos->x + x]);
+#endif
           }
         }
       }
       else
       {
+#if JVET_N0329_IBC_SEARCH_IMP 
+        CHECK(topLeft != *refBlockPos, "4x4 target block should not have offset!");
+        if (abs(topLeft.x - lumaArea.x) > searchRange4SmallBlk || abs(topLeft.y - lumaArea.y) > searchRange4SmallBlk || !cs.isDecomp(bottomRight, cs.chType))
+#else
         if (abs(refBlockPos->x - lumaArea.x) > searchRange4SmallBlk || abs(refBlockPos->y - lumaArea.y) > searchRange4SmallBlk || !cs.isDecomp(bottomRight, cs.chType))
+#endif
         {
           continue;
         }
       }
       if (wholeBlockMatch)
       {
+#if JVET_N0329_IBC_SEARCH_IMP 
+        cand.push_back(topLeft);
+#else
         cand.push_back(*refBlockPos);
+#endif
         if (cand.size() > maxCand)
         {
           break;
@@ -349,5 +377,76 @@ int IbcHashMap::getHashHitRatio(const Area& lumaArea)
   return 100 * hit / total;
 }
 
+#if JVET_N0329_IBC_SEARCH_IMP
+int IbcHashMap::calHashBlkMatchPerc(const Area& lumaArea)
+{
+  int maxX = std::min((int)(lumaArea.x + lumaArea.width), m_picWidth);
+  int maxY = std::min((int)(lumaArea.y + lumaArea.height), m_picHeight);
+  int          maxUsage[100];
+  unsigned int mostSelHash[100];
+
+  static   int numExcludedHashValue = 36;
+
+  for (int i = 0; i < numExcludedHashValue; i++)
+  {
+    maxUsage[i] = 0;
+    mostSelHash[i] = 0;
+  }
+
+  for (std::unordered_map<unsigned int, std::vector<Position>>::iterator it = m_hash2Pos.begin(); it != m_hash2Pos.end(); ++it)
+  {
+    unsigned int hash = it->first;
+    int usage = (int)it->second.size();
+    assert(usage == m_hash2Pos[hash].size());
+
+    int insertPos = -1;
+    for (insertPos = 0; insertPos < numExcludedHashValue; insertPos++)
+    {
+      if (usage > maxUsage[insertPos])
+      {
+        break;
+      }
+    }
+    assert(insertPos <= numExcludedHashValue);
+
+    if (insertPos < numExcludedHashValue)
+    {
+      for (int i = (numExcludedHashValue - 1); i >= (insertPos + 1); i--)
+      {
+        maxUsage[i] = maxUsage[i - 1];
+        mostSelHash[i] = mostSelHash[i - 1];
+      }
+      maxUsage[insertPos] = usage;
+      mostSelHash[insertPos] = hash;
+    }
+  }
+
+  int hit = 0, total = 0;
+  for (int y = lumaArea.y; y < maxY; y += MIN_PU_SIZE)
+  {
+    for (int x = lumaArea.x; x < maxX; x += MIN_PU_SIZE)
+    {
+      unsigned int hash = m_pos2Hash[y][x];
+
+      bool excludedHash = false;
+      for (int i = 0; i < numExcludedHashValue && !excludedHash; i++)
+      {
+        if (hash == mostSelHash[i])
+        {
+          excludedHash = true;
+        }
+      }
+
+      if (excludedHash)
+      {
+        continue;
+      }
 
+      hit += (m_hash2Pos[hash].size() > 1);
+      total++;
+    }
+  }
+  return 100 * hit / total;
+}
+#endif
 //! \}
diff --git a/source/Lib/CommonLib/IbcHashMap.h b/source/Lib/CommonLib/IbcHashMap.h
index e343aab6e0..46074a4b8c 100644
--- a/source/Lib/CommonLib/IbcHashMap.h
+++ b/source/Lib/CommonLib/IbcHashMap.h
@@ -82,6 +82,10 @@ public:
   bool    ibcHashMatch(const Area& lumaArea, std::vector<Position>& cand, const CodingStructure& cs, const int maxCand, const int searchRange4SmallBlk);
   int     getHashHitRatio(const Area& lumaArea);
 
+#if JVET_N0329_IBC_SEARCH_IMP
+  int     calHashBlkMatchPerc(const Area& lumaArea);
+#endif
+
 #ifdef TARGET_SIMD_X86
   void    initIbcHashMapX86();
   template <X86_VEXT vext>
diff --git a/source/Lib/CommonLib/MotionInfo.h b/source/Lib/CommonLib/MotionInfo.h
index 020323575f..609fd14adf 100644
--- a/source/Lib/CommonLib/MotionInfo.h
+++ b/source/Lib/CommonLib/MotionInfo.h
@@ -224,4 +224,11 @@ struct LutMotionCand
   static_vector<MotionInfo, MAX_NUM_HMVP_CANDS> lutShare;
   static_vector<MotionInfo, MAX_NUM_HMVP_CANDS> lutShareIbc;
 };
+#if JVET_N0329_IBC_SEARCH_IMP
+struct PatentBvCand
+{
+  Mv m_bvCands[IBC_NUM_CANDIDATES];
+  int currCnt;
+};
+#endif
 #endif // __MOTIONINFO__
diff --git a/source/Lib/CommonLib/RdCost.h b/source/Lib/CommonLib/RdCost.h
index 64d2e64595..22e5b1b684 100644
--- a/source/Lib/CommonLib/RdCost.h
+++ b/source/Lib/CommonLib/RdCost.h
@@ -179,7 +179,11 @@ public:
 
   inline Distortion getBvCostMultiplePreds(int x, int y, bool useIMV)
   {
+#if JVET_N0329_IBC_SEARCH_IMP
+    return Distortion(m_dCost * getBitsMultiplePreds(x, y, useIMV));
+#else
     return Distortion((m_dCost * getBitsMultiplePreds(x, y, useIMV)) / 65536.0);
+#endif
   }
 
   unsigned int    getBitsMultiplePreds(int x, int y, bool useIMV)
diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h
index de5979d8e6..f68f0f8aa9 100644
--- a/source/Lib/CommonLib/Slice.h
+++ b/source/Lib/CommonLib/Slice.h
@@ -1828,6 +1828,9 @@ private:
   int                        m_apsId;
   APS*                       m_aps;
   bool                       m_tileGroupAlfEnabledFlag;
+#if JVET_N0329_IBC_SEARCH_IMP 
+  bool                       m_disableSATDForRd;
+#endif
   SliceReshapeInfo           m_sliceReshapeInfo;
 public:
                               Slice();
@@ -2114,7 +2117,10 @@ public:
 
   bool                        getTileGroupAlfEnabledFlag() const { return m_tileGroupAlfEnabledFlag; }
   void                        setTileGroupAlfEnabledFlag(bool b) { m_tileGroupAlfEnabledFlag = b; }
-
+#if JVET_N0329_IBC_SEARCH_IMP 
+  void                        setDisableSATDForRD(bool b) { m_disableSATDForRd = b; }
+  bool                        getDisableSATDForRD() { return m_disableSATDForRd; }
+#endif
   const SliceReshapeInfo&     getReshapeInfo() const { return m_sliceReshapeInfo; }
         SliceReshapeInfo&     getReshapeInfo()       { return m_sliceReshapeInfo; }
 protected:
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index bd385f954b..a387b883c0 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -58,6 +58,8 @@
 
 #define JVET_N0242_NON_LINEAR_ALF                         1 // enable CE5-3.2, Non-linear ALF based on clipping function
 
+#define JVET_N0329_IBC_SEARCH_IMP                         1 // IBC encoder-side improvement
+
 #define JVET_N0325_BDOF                                   1  // unified right-shifts for BDOF derivation
 
 #define JVET_N0449_MMVD_SIMP                              1 // Configurable number of mmvd distance entries used
diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp
index f1915892a3..4ecabb63e7 100644
--- a/source/Lib/CommonLib/UnitTools.cpp
+++ b/source/Lib/CommonLib/UnitTools.cpp
@@ -2186,9 +2186,121 @@ bool PU::isDiffMER(const PredictionUnit &pu1, const PredictionUnit &pu2)
 
   return false;
 }
+
+#if JVET_N0329_IBC_SEARCH_IMP
+bool PU::isAddNeighborMv(const Mv& currMv, Mv* neighborMvs, int numNeighborMv)
+{
+  bool existed = false;
+  for (uint32_t cand = 0; cand < numNeighborMv && !existed; cand++)
+  {
+    if (currMv == neighborMvs[cand])
+    {
+      existed = true;
+    }
+  }
+
+  if (!existed)
+  {
+    return true;
+  }
+  else
+  {
+    return false;
+  }
+}
+#endif
+
+#if JVET_N0329_IBC_SEARCH_IMP
+void PU::getIbcMVPsEncOnly(PredictionUnit &pu, Mv* mvPred, int& nbPred)
+#else
 void PU::getIbcMVPsEncOnly(PredictionUnit &pu, Mv* MvPred, int& nbPred)
+#endif
 {
+#if JVET_N0329_IBC_SEARCH_IMP
+  const PreCalcValues   &pcv = *pu.cs->pcv;
+  const int  cuWidth = pu.blocks[COMPONENT_Y].width;
+  const int  cuHeight = pu.blocks[COMPONENT_Y].height;
+  const int  log2UnitWidth = g_aucLog2[pcv.minCUWidth];
+  const int  log2UnitHeight = g_aucLog2[pcv.minCUHeight];
+  const int  totalAboveUnits = (cuWidth >> log2UnitWidth) + 1;
+  const int  totalLeftUnits = (cuHeight >> log2UnitHeight) + 1;
+
+  nbPred = 0;
+  Position posLT = pu.Y().topLeft();
+
+  // above-left
+  const PredictionUnit *aboveLeftPU = pu.cs->getPURestricted(posLT.offset(-1, -1), pu, pu.cs->chType);
+  if (aboveLeftPU && CU::isIBC(*aboveLeftPU->cu))
+  {
+    if (isAddNeighborMv(aboveLeftPU->bv, mvPred, nbPred))
+    {
+      mvPred[nbPred++] = aboveLeftPU->bv;
+    }
+  }
+
+  // above neighbors
+  for (uint32_t dx = 0; dx < totalAboveUnits && nbPred < IBC_NUM_CANDIDATES; dx++)
+  {
+    const PredictionUnit* tmpPU = pu.cs->getPURestricted(posLT.offset((dx << log2UnitWidth), -1), pu, pu.cs->chType);
+    if (tmpPU && CU::isIBC(*tmpPU->cu))
+    {
+      if (isAddNeighborMv(tmpPU->bv, mvPred, nbPred))
+      {
+        mvPred[nbPred++] = tmpPU->bv;
+      }
+    }
+  }
+
+  // left neighbors
+  for (uint32_t dy = 0; dy < totalLeftUnits && nbPred < IBC_NUM_CANDIDATES; dy++)
+  {
+    const PredictionUnit* tmpPU = pu.cs->getPURestricted(posLT.offset(-1, (dy << log2UnitHeight)), pu, pu.cs->chType);
+    if (tmpPU && CU::isIBC(*tmpPU->cu))
+    {
+      if (isAddNeighborMv(tmpPU->bv, mvPred, nbPred))
+      {
+        mvPred[nbPred++] = tmpPU->bv;
+      }
+    }
+  }
+
+  size_t numAvaiCandInLUT = pu.cs->motionLut.lutIbc.size();
+  for (uint32_t cand = 0; cand < numAvaiCandInLUT && nbPred < IBC_NUM_CANDIDATES; cand++)
+  {
+    MotionInfo neibMi = pu.cs->motionLut.lutIbc[cand];
+    if (isAddNeighborMv(neibMi.bv, mvPred, nbPred))
+    {
+      mvPred[nbPred++] = neibMi.bv;
+    }
+  }
 
+  bool isBvCandDerived[IBC_NUM_CANDIDATES];
+  ::memset(isBvCandDerived, false, IBC_NUM_CANDIDATES);
+
+  int curNbPred = nbPred;
+  if (curNbPred < IBC_NUM_CANDIDATES)
+  {
+    do
+    {
+      curNbPred = nbPred;
+      for (uint32_t idx = 0; idx < curNbPred && nbPred < IBC_NUM_CANDIDATES; idx++)
+      {
+        if (!isBvCandDerived[idx])
+        {
+          Mv derivedBv;
+          if (getDerivedBV(pu, mvPred[idx], derivedBv))
+          {
+            if (isAddNeighborMv(derivedBv, mvPred, nbPred))
+            {
+              mvPred[nbPred++] = derivedBv;
+            }
+          }
+          isBvCandDerived[idx] = true;
+        }
+      }
+    } while (nbPred > curNbPred && nbPred < IBC_NUM_CANDIDATES);
+  }
+#else
   //-- Get Spatial MV
   Position posLT = pu.Y().topLeft();
   Position posRT = pu.Y().topRight();
@@ -2257,6 +2369,7 @@ void PU::getIbcMVPsEncOnly(PredictionUnit &pu, Mv* MvPred, int& nbPred)
     if (getDerivedBV(pu, MvPred[nbPred - 1], MvPred[nbPred]))
       nbPred++;
   }
+#endif
 }
 
 bool PU::getDerivedBV(PredictionUnit &pu, const Mv& currentMv, Mv& derivedMv)
diff --git a/source/Lib/CommonLib/UnitTools.h b/source/Lib/CommonLib/UnitTools.h
index 0cb4a09d9b..45b749c7a2 100644
--- a/source/Lib/CommonLib/UnitTools.h
+++ b/source/Lib/CommonLib/UnitTools.h
@@ -184,7 +184,12 @@ namespace PU
   bool isUniqueTriangleCandidates     (const PredictionUnit &pu, MergeCtx &triangleMrgCtx);
   void spanTriangleMotionInfo         (      PredictionUnit &pu, MergeCtx &triangleMrgCtx, const bool splitDir, const uint8_t candIdx0, const uint8_t candIdx1);
   int32_t mappingRefPic               (const PredictionUnit &pu, int32_t refPicPoc, bool targetRefPicList);
+#if JVET_N0329_IBC_SEARCH_IMP
+  bool isAddNeighborMv  (const Mv& currMv, Mv* neighborMvs, int numNeighborMv);
+  void getIbcMVPsEncOnly(PredictionUnit &pu, Mv* mvPred, int& nbPred);
+#else
   void getIbcMVPsEncOnly(PredictionUnit &pu, Mv* MvPred, int& nbPred);
+#endif
   bool getDerivedBV(PredictionUnit &pu, const Mv& currentMv, Mv& derivedMv);
   bool isBlockVectorValid(PredictionUnit& pu, int xPos, int yPos, int width, int height, int picWidth, int picHeight, int xStartInCU, int yStartInCU, int xBv, int yBv, int ctuSize);
   bool checkDMVRCondition(const PredictionUnit& pu);
diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp
index dda140f085..faefbe832d 100644
--- a/source/Lib/EncoderLib/EncCu.cpp
+++ b/source/Lib/EncoderLib/EncCu.cpp
@@ -1803,7 +1803,11 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
       PredictionUnit &pu  = tempCS->addPU( cu, partitioner.chType );
 
       DistParam distParam;
+#if JVET_N0329_IBC_SEARCH_IMP
+      const bool bUseHadamard = !encTestMode.lossless && !tempCS->slice->getDisableSATDForRD();
+#else
       const bool bUseHadamard= !encTestMode.lossless;
+#endif
       m_pcRdCost->setDistParam (distParam, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y(), sps.getBitDepth (CHANNEL_TYPE_LUMA), COMPONENT_Y, bUseHadamard);
 
       const UnitArea localUnitArea( tempCS->area.chromaFormat, Area( 0, 0, tempCS->area.Y().width, tempCS->area.Y().height) );
@@ -2308,7 +2312,11 @@ void EncCu::xCheckRDCostMergeTriangle2Nx2N( CodingStructure *&tempCS, CodingStru
   }
 
   DistParam distParam;
+#if JVET_N0329_IBC_SEARCH_IMP 
+  const bool useHadamard = !encTestMode.lossless && !tempCS->slice->getDisableSATDForRD();
+#else
   const bool useHadamard = !encTestMode.lossless;
+#endif
   m_pcRdCost->setDistParam( distParam, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y(), sps.getBitDepth( CHANNEL_TYPE_LUMA ), COMPONENT_Y, useHadamard );
 
   const UnitArea localUnitArea( tempCS->area.chromaFormat, Area( 0, 0, tempCS->area.Y().width, tempCS->area.Y().height) );
@@ -2627,7 +2635,11 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct
       PredictionUnit &pu = tempCS->addPU( cu, partitioner.chType );
 
       DistParam distParam;
+#if JVET_N0329_IBC_SEARCH_IMP
+      const bool bUseHadamard = !encTestMode.lossless && !tempCS->slice->getDisableSATDForRD();
+#else
       const bool bUseHadamard = !encTestMode.lossless;
+#endif
       m_pcRdCost->setDistParam( distParam, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y(), sps.getBitDepth( CHANNEL_TYPE_LUMA ), COMPONENT_Y, bUseHadamard );
 
       const UnitArea localUnitArea( tempCS->area.chromaFormat, Area( 0, 0, tempCS->area.Y().width, tempCS->area.Y().height ) );
@@ -2887,7 +2899,11 @@ void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure *&tempCS, CodingStruct
       cu.mmvdSkip = false;
       cu.triangle = false;
       DistParam distParam;
+#if JVET_N0329_IBC_SEARCH_IMP
+      const bool bUseHadamard = !encTestMode.lossless && !cu.slice->getDisableSATDForRD();
+#else
       const bool bUseHadamard = !encTestMode.lossless;
+#endif
       PredictionUnit &pu = tempCS->addPU(cu, partitioner.chType); //tempCS->addPU(cu);
       pu.mmvdMergeFlag = false;
       Picture* refPic = pu.cu->slice->getPic();
diff --git a/source/Lib/EncoderLib/EncModeCtrl.cpp b/source/Lib/EncoderLib/EncModeCtrl.cpp
index 0c8a5f7c6d..32a940e073 100644
--- a/source/Lib/EncoderLib/EncModeCtrl.cpp
+++ b/source/Lib/EncoderLib/EncModeCtrl.cpp
@@ -1533,7 +1533,11 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt
     if( lastTestMode().type != ETM_INTRA && cuECtx.bestCS && cuECtx.bestCU && interHadActive( cuECtx ) )
     {
       // Get SATD threshold from best Inter-CU
+#if JVET_N0329_IBC_SEARCH_IMP
+      if (!cs.slice->isIRAP() && m_pcEncCfg->getUsePbIntraFast() && !cs.slice->getDisableSATDForRD())
+#else
       if( !cs.slice->isIRAP() && m_pcEncCfg->getUsePbIntraFast() )
+#endif
       {
         CodingUnit* bestCU = cuECtx.bestCU;
         if (bestCU && !CU::isIntra(*bestCU))
diff --git a/source/Lib/EncoderLib/EncSlice.cpp b/source/Lib/EncoderLib/EncSlice.cpp
index 825d3dd624..0a36a5c041 100644
--- a/source/Lib/EncoderLib/EncSlice.cpp
+++ b/source/Lib/EncoderLib/EncSlice.cpp
@@ -738,6 +738,9 @@ void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr
     rpcSlice->setMaxBTSizeIChroma( rpcSlice->getSPS()->getMaxBTSizeIChroma() );
     rpcSlice->setMaxTTSizeIChroma( rpcSlice->getSPS()->getMaxTTSizeIChroma() );
   }
+#if JVET_N0329_IBC_SEARCH_IMP 
+  rpcSlice->setDisableSATDForRD(false);
+#endif
 }
 
 
@@ -1584,11 +1587,20 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons
   if ( pcSlice->getSPS()->getDisFracMmvdEnabledFlag() ||
       (pcSlice->getSPS()->getIBCFlag() && m_pcCuEncoder->getEncCfg()->getIBCHashSearch()))
   {
+#if JVET_N0329_IBC_SEARCH_IMP
+    m_pcCuEncoder->getIbcHashMap().rebuildPicHashMap(cs.picture->getTrueOrigBuf());
+    if (m_pcCfg->getIntraPeriod() != -1)
+    {
+      int hashBlkHitPerc = m_pcCuEncoder->getIbcHashMap().calHashBlkMatchPerc(cs.area.Y());
+      cs.slice->setDisableSATDForRD(hashBlkHitPerc > 59);
+    }
+#else
     if (pcSlice->getSPS()->getUseReshaper() && m_pcLib->getReshaper()->getCTUFlag() && pcSlice->getSPS()->getIBCFlag())
       cs.picture->getOrigBuf(COMPONENT_Y).rspSignal(m_pcLib->getReshaper()->getFwdLUT());
     m_pcCuEncoder->getIbcHashMap().rebuildPicHashMap( cs.picture->getOrigBuf() );
     if (pcSlice->getSPS()->getUseReshaper() && m_pcLib->getReshaper()->getCTUFlag() && pcSlice->getSPS()->getIBCFlag())
       cs.picture->getOrigBuf().copyFrom(cs.picture->getTrueOrigBuf());
+#endif
   }
   checkDisFracMmvd( pcPic, startCtuTsAddr, boundingCtuTsAddr );
   // for every CTU in the slice segment (may terminate sooner if there is a byte limit on the slice-segment)
diff --git a/source/Lib/EncoderLib/InterSearch.cpp b/source/Lib/EncoderLib/InterSearch.cpp
index 182e93a7a6..25cb3e3495 100644
--- a/source/Lib/EncoderLib/InterSearch.cpp
+++ b/source/Lib/EncoderLib/InterSearch.cpp
@@ -198,7 +198,15 @@ void InterSearch::init( EncCfg*        pcEncCfg,
 {
   CHECK(m_isInitialized, "Already initialized");
   m_numBVs = 0;
+#if JVET_N0329_IBC_SEARCH_IMP
+  for (int i = 0; i < IBC_NUM_CANDIDATES; i++)
+  {
+    m_defaultCachedBvs.m_bvCands[i].setZero();
+  }
+  m_defaultCachedBvs.currCnt = 0;
+#else
   m_numBV16s = 0;
+#endif
   m_pcEncCfg                     = pcEncCfg;
   m_pcTrQuant                    = pcTrQuant;
   m_iSearchRange                 = iSearchRange;
@@ -747,7 +755,11 @@ Distortion InterSearch::xGetInterPredictionError( PredictionUnit& pu, PelUnitBuf
   DistParam cDistParam;
   cDistParam.applyWeight = false;
 
+#if JVET_N0329_IBC_SEARCH_IMP 
+  m_pcRdCost->setDistParam(cDistParam, origBuf.Y(), predBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, m_pcEncCfg->getUseHADME() && !pu.cu->transQuantBypass && !pu.cu->slice->getDisableSATDForRD());
+#else
   m_pcRdCost->setDistParam( cDistParam, origBuf.Y(), predBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, m_pcEncCfg->getUseHADME() && !pu.cu->transQuantBypass );
+#endif
 
   return (Distortion)cDistParam.distFunc( cDistParam );
 }
@@ -805,6 +817,13 @@ int InterSearch::xIBCSearchMVChromaRefine(PredictionUnit& pu,
   UnitArea allCompBlocks(pu.chromaFormat, (Area)pu.block(COMPONENT_Y));
   for (int cand = 0; cand < CHROMA_REFINEMENT_CANDIDATES; cand++)
   {
+#if JVET_N0329_IBC_SEARCH_IMP
+    if (sadBestCand[cand] == std::numeric_limits<Distortion>::max())
+    {
+      continue;
+    }
+#endif
+
     if ((!cMVCand[cand].getHor()) && (!cMVCand[cand].getVer()))
       continue;
 
@@ -869,10 +888,22 @@ int InterSearch::xIBCSearchMVChromaRefine(PredictionUnit& pu,
   return bestCandIdx;
 }
 
+#if JVET_N0329_IBC_SEARCH_IMP
+static unsigned int xMergeCandLists(Mv *dst, unsigned int dn, unsigned int dstTotalLength, Mv *src, unsigned int sn)
+{
+  for (unsigned int cand = 0; cand < sn && dn < dstTotalLength; cand++)
+#else
 static unsigned int xMergeCandLists(Mv *dst, unsigned int dn, Mv *src, unsigned int sn)
 {
   for (unsigned int cand = 0; cand < sn && dn<IBC_NUM_CANDIDATES; cand++)
+#endif
   {
+#if JVET_N0329_IBC_SEARCH_IMP
+    if (src[cand] == Mv())
+    {
+      continue;
+    }
+#endif
     bool found = false;
     for (int j = 0; j<dn; j++)
     {
@@ -942,7 +973,12 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct&  cS
     Distortion tempSadBest = 0;
 
     int srLeft = srchRngHorLeft, srRight = srchRngHorRight, srTop = srchRngVerTop, srBottom = srchRngVerBottom;
+#if JVET_N0329_IBC_SEARCH_IMP
+    m_numBVs = 0;
+    m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, (2 * IBC_NUM_CANDIDATES), m_defaultCachedBvs.m_bvCands, m_defaultCachedBvs.currCnt);
 
+    Mv cMvPredEncOnly[IBC_NUM_CANDIDATES];
+#else
     if (roiWidth>8 || roiHeight>8)
     {
       m_numBVs = 0;
@@ -953,9 +989,14 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct&  cS
     }
 
     Mv cMvPredEncOnly[16];
+#endif
     int nbPreds = 0;
     PU::getIbcMVPsEncOnly(pu, cMvPredEncOnly, nbPreds);
+#if JVET_N0329_IBC_SEARCH_IMP
+    m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, (2 * IBC_NUM_CANDIDATES), cMvPredEncOnly, nbPreds);
+#else
     m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, cMvPredEncOnly, nbPreds);
+#endif
 
     for (unsigned int cand = 0; cand < m_numBVs; cand++)
     {
@@ -1196,6 +1237,23 @@ void InterSearch::xIntraPatternSearch(PredictionUnit& pu, IntTZSearchStruct&  cS
   ruiCost = sadBest;
 
 end:
+#if JVET_N0329_IBC_SEARCH_IMP
+  m_numBVs = 0;
+  m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, (2 * IBC_NUM_CANDIDATES), m_defaultCachedBvs.m_bvCands, m_defaultCachedBvs.currCnt);
+
+  m_defaultCachedBvs.currCnt = 0;
+  m_defaultCachedBvs.currCnt = xMergeCandLists(m_defaultCachedBvs.m_bvCands, m_defaultCachedBvs.currCnt, IBC_NUM_CANDIDATES, cMVCand, CHROMA_REFINEMENT_CANDIDATES);
+  m_defaultCachedBvs.currCnt = xMergeCandLists(m_defaultCachedBvs.m_bvCands, m_defaultCachedBvs.currCnt, IBC_NUM_CANDIDATES, m_acBVs, m_numBVs);
+
+  for (unsigned int cand = 0; cand < CHROMA_REFINEMENT_CANDIDATES; cand++)
+  {
+    if (cMVCand[cand].getHor() == 0 && cMVCand[cand].getVer() == 0)
+    {
+      continue;
+    }
+    m_ctuRecord[pu.lumaPos()][pu.lumaSize()].bvRecord[cMVCand[cand]] = sadBestCand[cand];
+  }
+#else
   if (roiWidth + roiHeight > 8)
   {
     m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, cMVCand, CHROMA_REFINEMENT_CANDIDATES);
@@ -1205,6 +1263,7 @@ end:
       m_numBV16s = m_numBVs;
     }
   }
+#endif
 
   return;
 }
@@ -1218,10 +1277,57 @@ void InterSearch::xIBCEstimation(PredictionUnit& pu, PelUnitBuf& origBuf,
   Distortion &ruiCost, const int localSearchRangeX, const int localSearchRangeY
 )
 {
+#if JVET_N0329_IBC_SEARCH_IMP
+  const int iPicWidth = pu.cs->slice->getSPS()->getPicWidthInLumaSamples();
+  const int iPicHeight = pu.cs->slice->getSPS()->getPicHeightInLumaSamples();
+  const unsigned int  lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth();
+  const int           cuPelX = pu.Y().x;
+  const int           cuPelY = pu.Y().y;
+  int                 iRoiWidth = pu.lwidth();
+  int                 iRoiHeight = pu.lheight();
+
+  PelUnitBuf* pBuf = &origBuf;
+
+  //  Search key pattern initialization
+  CPelBuf  tmpPattern = pBuf->Y();
+  CPelBuf* pcPatternKey = &tmpPattern;
+
+  if ((pu.cs->slice->getReshapeInfo().getUseSliceReshaper() && m_pcReshape->getCTUFlag()))
+  {
+    const CompArea &area = pu.blocks[COMPONENT_Y];
+    CompArea    tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
+    PelBuf tmpOrgLuma = m_tmpStorageLCU.getBuf(tmpArea);
+    tmpOrgLuma.copyFrom(tmpPattern);
+    tmpOrgLuma.rspSignal(m_pcReshape->getFwdLUT());
+    pcPatternKey = (CPelBuf*)&tmpOrgLuma;
+  }
+
+  m_lumaClpRng = pu.cs->slice->clpRng(COMPONENT_Y);
+  Picture* refPic = pu.cu->slice->getPic();
+  const CPelBuf refBuf = refPic->getRecoBuf(pu.blocks[COMPONENT_Y]);
+
+  IntTZSearchStruct cStruct;
+  cStruct.pcPatternKey = pcPatternKey;
+  cStruct.iRefStride = refBuf.stride;
+  cStruct.piRefY = refBuf.buf;
+  cStruct.imvShift = pu.cu->imv << 1;
+  cStruct.subShiftMode = 0; // used by intra pattern search function
+
+  // disable weighted prediction
+  setWpScalingDistParam(-1, REF_PIC_LIST_X, pu.cs->slice);
+
+  m_pcRdCost->getMotionCost(0, pu.cu->transQuantBypass);
+  m_pcRdCost->setPredictors(pcMvPred);
+  m_pcRdCost->setCostScale(0);
+
+  m_cDistParam.useMR = false;
+  m_pcRdCost->setDistParam(m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, cStruct.subShiftMode);
+#endif
   bool buffered = false;
   if (m_pcEncCfg->getIBCFastMethod() & IBC_FAST_METHOD_BUFFERBV)
   {
     ruiCost = MAX_UINT;
+#if !JVET_N0329_IBC_SEARCH_IMP
     const int iPicWidth = pu.cs->slice->getSPS()->getPicWidthInLumaSamples();
     const int iPicHeight = pu.cs->slice->getSPS()->getPicHeightInLumaSamples();
     const int   cuPelX = pu.Y().x;
@@ -1229,8 +1335,11 @@ void InterSearch::xIBCEstimation(PredictionUnit& pu, PelUnitBuf& origBuf,
 
     int          iRoiWidth = pu.lwidth();
     int          iRoiHeight = pu.lheight();
+#endif
     std::unordered_map<Mv, Distortion>& history = m_ctuRecord[pu.lumaPos()][pu.lumaSize()].bvRecord;
+#if !JVET_N0329_IBC_SEARCH_IMP
     const unsigned int  lcuWidth = pu.cs->slice->getSPS()->getMaxCUWidth();
+#endif
     for (std::unordered_map<Mv, Distortion>::iterator p = history.begin(); p != history.end(); p++)
     {
       const Mv& bv = p->first;
@@ -1238,7 +1347,28 @@ void InterSearch::xIBCEstimation(PredictionUnit& pu, PelUnitBuf& origBuf,
       int xBv = bv.hor;
       int yBv = bv.ver;
       if (PU::isBlockVectorValid(pu, cuPelX, cuPelY, iRoiWidth, iRoiHeight, iPicWidth, iPicHeight, 0, 0, xBv, yBv, lcuWidth))
-       {
+      {
+#if JVET_N0329_IBC_SEARCH_IMP
+        buffered = true;
+        Distortion sad = m_pcRdCost->getBvCostMultiplePreds(xBv, yBv, pu.cs->sps->getAMVREnabledFlag());
+        m_cDistParam.cur.buf = cStruct.piRefY + cStruct.iRefStride * yBv + xBv;
+        sad += m_cDistParam.distFunc(m_cDistParam);
+        if (sad < ruiCost)
+        {
+          rcMv = bv;
+          ruiCost = sad;
+        }
+        else if (sad == ruiCost)
+        {
+          // stabilise the search through the unordered list
+          if (bv.hor < rcMv.getHor()
+            || (bv.hor == rcMv.getHor() && bv.ver < rcMv.getVer()))
+          {
+            // update the vector.
+            rcMv = bv;
+          }
+        }
+#else
         if (p->second < ruiCost)
         {
           rcMv = bv;
@@ -1255,12 +1385,59 @@ void InterSearch::xIBCEstimation(PredictionUnit& pu, PelUnitBuf& origBuf,
             rcMv = bv;
           }
         }
+#endif
+      }
+    }
+
+#if JVET_N0329_IBC_SEARCH_IMP
+    if (buffered)
+    {
+      Mv cMvPredEncOnly[IBC_NUM_CANDIDATES];
+      int nbPreds = 0;
+      PU::getIbcMVPsEncOnly(pu, cMvPredEncOnly, nbPreds);
+
+      for (unsigned int cand = 0; cand < nbPreds; cand++)
+      {
+        int xPred = cMvPredEncOnly[cand].getHor();
+        int yPred = cMvPredEncOnly[cand].getVer();
+
+        if (PU::isBlockVectorValid(pu, cuPelX, cuPelY, iRoiWidth, iRoiHeight, iPicWidth, iPicHeight, 0, 0, xPred, yPred, lcuWidth))
+        {
+          Distortion sad = m_pcRdCost->getBvCostMultiplePreds(xPred, yPred, pu.cs->sps->getAMVREnabledFlag());
+          m_cDistParam.cur.buf = cStruct.piRefY + cStruct.iRefStride * yPred + xPred;
+          sad += m_cDistParam.distFunc(m_cDistParam);
+          if (sad < ruiCost)
+          {
+            rcMv.set(xPred, yPred);
+            ruiCost = sad;
+          }
+          else if (sad == ruiCost)
+          {
+            // stabilise the search through the unordered list
+            if (xPred < rcMv.getHor()
+              || (xPred == rcMv.getHor() && yPred < rcMv.getVer()))
+            {
+              // update the vector.
+              rcMv.set(xPred, yPred);
+            }
+          }
+
+          m_ctuRecord[pu.lumaPos()][pu.lumaSize()].bvRecord[Mv(xPred, yPred)] = sad;
+        }
       }
     }
+#endif
   }
 
   if (!buffered)
   {
+#if JVET_N0329_IBC_SEARCH_IMP
+    Mv        cMvSrchRngLT;
+    Mv        cMvSrchRngRB;
+
+    // assume that intra BV is integer-pel precision
+    xSetIntraSearchRange(pu, pu.lwidth(), pu.lheight(), localSearchRangeX, localSearchRangeY, cMvSrchRngLT, cMvSrchRngRB);
+#else
     Mv        cMvSrchRngLT;
     Mv        cMvSrchRngRB;
 
@@ -1304,9 +1481,9 @@ void InterSearch::xIBCEstimation(PredictionUnit& pu, PelUnitBuf& origBuf,
     m_pcRdCost->getMotionCost(0, pu.cu->transQuantBypass);
     m_pcRdCost->setPredictors(pcMvPred);
     m_pcRdCost->setCostScale(0);
+#endif
 
     //  Do integer search
-
     xIntraPatternSearch(pu, cStruct, rcMv, ruiCost, &cMvSrchRngLT, &cMvSrchRngRB, pcMvPred);
   }
 }
@@ -1469,7 +1646,9 @@ bool InterSearch::predIBCSearch(CodingUnit& cu, Partitioner& partitioner, const
     pu.refIdx[REF_PIC_LIST_0] = MAX_NUM_REF;
     pu.mv[REF_PIC_LIST_0].changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
 
+#if !JVET_N0329_IBC_SEARCH_IMP
     m_ctuRecord[cu.lumaPos()][cu.lumaSize()].bvRecord[pu.bv] = cost;
+#endif
   }
 
   return true;
@@ -2898,9 +3077,15 @@ Distortion InterSearch::xGetAffineTemplateCost( PredictionUnit& pu, PelUnitBuf&
   }
 
   // calc distortion
-
+#if JVET_N0329_IBC_SEARCH_IMP 
+  enum DFunc distFunc = (pu.cu->transQuantBypass || pu.cs->slice->getDisableSATDForRD()) ? DF_SAD : DF_HAD;
+#endif
   uiCost  = m_pcRdCost->getDistPart( origBuf.Y(), predBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y
-    , DF_HAD
+#if JVET_N0329_IBC_SEARCH_IMP 
+    , distFunc
+#else
+    , DF_HAD 
+#endif
   );
   uiCost += m_pcRdCost->getCost( m_auiMVPIdxCost[iMVPIdx][iMVPNum] );
   DTRACE( g_trace_ctx, D_COMMON, " (%d) affineTemplateCost=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiCost );
@@ -3686,7 +3871,11 @@ void InterSearch::xPatternSearchIntRefine(PredictionUnit& pu, IntTZSearchStruct&
   CHECK( amvpInfo.mvCand[riMVPIdx] != rcMvPred, "xPatternSearchIntRefine(): MvPred issue.");
 
   const SPS &sps = *pu.cs->sps;
+#if JVET_N0329_IBC_SEARCH_IMP 
+  m_pcRdCost->setDistParam(m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, m_pcEncCfg->getUseHADME() && !pu.cu->transQuantBypass && !pu.cs->slice->getDisableSATDForRD());
+#else
   m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, m_pcEncCfg->getUseHADME() && !pu.cu->transQuantBypass );
+#endif
 
   // input MV rcMV has integer resolution
   // -> shift it to QPEL
@@ -3821,14 +4010,22 @@ void InterSearch::xPatternSearchFracDIF(
     m_pcRdCost->setCostScale(0);
     xExtDIFUpSamplingH(&cPatternRoi);
     rcMvQter = rcMvInt;   rcMvQter <<= 2;    // for mv-cost
+#if JVET_N0329_IBC_SEARCH_IMP 
+    ruiCost = xPatternRefinement(cStruct.pcPatternKey, baseRefMv, 1, rcMvQter, !bIsLosslessCoded && !pu.cs->slice->getDisableSATDForRD());
+#else
     ruiCost = xPatternRefinement(cStruct.pcPatternKey, baseRefMv, 1, rcMvQter, !bIsLosslessCoded);
+#endif
     return;
   }
 
 
   if (cStruct.imvShift || (m_useCompositeRef && cStruct.zeroMV))
   {
+#if JVET_N0329_IBC_SEARCH_IMP 
+    m_pcRdCost->setDistParam(m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY + iOffset, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, m_pcEncCfg->getUseHADME() && !bIsLosslessCoded && !pu.cs->slice->getDisableSATDForRD());
+#else
     m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY + iOffset, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, m_pcEncCfg->getUseHADME() && !bIsLosslessCoded );
+#endif
     ruiCost = m_cDistParam.distFunc( m_cDistParam );
     ruiCost += m_pcRdCost->getCostOfVectorWithPredictor( rcMvInt.getHor(), rcMvInt.getVer(), cStruct.imvShift );
     return;
@@ -3840,7 +4037,11 @@ void InterSearch::xPatternSearchFracDIF(
 
   rcMvHalf = rcMvInt;   rcMvHalf <<= 1;    // for mv-cost
   Mv baseRefMv(0, 0);
+#if JVET_N0329_IBC_SEARCH_IMP 
+  ruiCost = xPatternRefinement(cStruct.pcPatternKey, baseRefMv, 2, rcMvHalf, (!bIsLosslessCoded && !pu.cs->slice->getDisableSATDForRD()));
+#else
   ruiCost = xPatternRefinement(cStruct.pcPatternKey, baseRefMv, 2, rcMvHalf, !bIsLosslessCoded);
+#endif
 
   //  quarter-pel refinement
   m_pcRdCost->setCostScale( 0 );
@@ -3850,7 +4051,11 @@ void InterSearch::xPatternSearchFracDIF(
 
   rcMvQter = rcMvInt;    rcMvQter <<= 1;    // for mv-cost
   rcMvQter += rcMvHalf;  rcMvQter <<= 1;
+#if JVET_N0329_IBC_SEARCH_IMP
+  ruiCost = xPatternRefinement(cStruct.pcPatternKey, baseRefMv, 1, rcMvQter, (!bIsLosslessCoded && !pu.cs->slice->getDisableSATDForRD()));
+#else
   ruiCost = xPatternRefinement( cStruct.pcPatternKey, baseRefMv, 1, rcMvQter, !bIsLosslessCoded );
+#endif
 }
 
 Distortion InterSearch::xGetSymmetricCost( PredictionUnit& pu, PelUnitBuf& origBuf, RefPicList eCurRefPicList, const MvField& cCurMvField, MvField& cTarMvField, int gbiIdx )
@@ -3881,7 +4086,12 @@ Distortion InterSearch::xGetSymmetricCost( PredictionUnit& pu, PelUnitBuf& origB
     bufTmp.Y().addAvg( predBufA.Y(), predBufB.Y(), pu.cu->slice->clpRng( COMPONENT_Y ) );
 
   // calc distortion
+#if JVET_N0329_IBC_SEARCH_IMP
+  DFunc distFunc = (!pu.cu->transQuantBypass && !pu.cu->slice->getDisableSATDForRD()) ? DF_HAD : DF_SAD;
+  cost = m_pcRdCost->getDistPart(bufTmp.Y(), origBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, distFunc);
+#else
   cost = m_pcRdCost->getDistPart(bufTmp.Y(), origBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD);
+#endif
 
   return(cost);
 }
@@ -4925,6 +5135,9 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
   double        fWeight       = 1.0;
 
   PelUnitBuf  origBufTmp = m_tmpStorageLCU.getBuf( UnitAreaRelative( *pu.cu, pu ) );
+#if JVET_N0329_IBC_SEARCH_IMP 
+  enum DFunc distFunc = (pu.cu->transQuantBypass || pu.cs->slice->getDisableSATDForRD()) ? DF_SAD : DF_HAD;
+#endif
 
   // if Bi, set to ( 2 * Org - ListX )
   if ( bBi )
@@ -5010,7 +5223,11 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
   xPredAffineBlk( COMPONENT_Y, pu, refPic, acMvTemp, predBuf, false, pu.cs->slice->clpRng( COMPONENT_Y ) );
 
   // get error
+#if JVET_N0329_IBC_SEARCH_IMP
+  uiCostBest = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, distFunc);
+#else
   uiCostBest = m_pcRdCost->getDistPart( predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD );
+#endif
 
   // get cost with mv
   m_pcRdCost->setCostScale(0);
@@ -5202,7 +5419,11 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
     xPredAffineBlk( COMPONENT_Y, pu, refPic, acMvTemp, predBuf, false, pu.cu->slice->clpRng( COMPONENT_Y ) );
 
     // get error
+#if JVET_N0329_IBC_SEARCH_IMP 
+    Distortion uiCostTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, distFunc);
+#else
     Distortion uiCostTemp = m_pcRdCost->getDistPart( predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD );
+#endif
     DTRACE( g_trace_ctx, D_COMMON, " (%d) uiCostTemp=%d\n", DTRACE_GET_COUNTER(g_trace_ctx,D_COMMON), uiCostTemp );
 
     // get cost with mv
@@ -5235,7 +5456,11 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
   {
     xPredAffineBlk(COMPONENT_Y, pu, refPic, ctrlPtMv, predBuf, false, pu.cu->slice->clpRng(COMPONENT_Y));
     // get error
+#if JVET_N0329_IBC_SEARCH_IMP
+    Distortion costTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, distFunc);
+#else
     Distortion costTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD);
+#endif
     // get cost with mv
     m_pcRdCost->setCostScale(0);
     uint32_t bitsTemp = ruiBits;
@@ -5277,7 +5502,11 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu,
               clipMv( acMvTemp[j], pu.cu->lumaPos(), pu.cu->lumaSize(), *pu.cs->sps );
               xPredAffineBlk( COMPONENT_Y, pu, refPic, acMvTemp, predBuf, false, pu.cu->slice->clpRng( COMPONENT_Y ) );
 
+#if JVET_N0329_IBC_SEARCH_IMP
+              Distortion costTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, distFunc);
+#else
               Distortion costTemp = m_pcRdCost->getDistPart( predBuf.Y(), pBuf->Y(), pu.cs->sps->getBitDepth( CHANNEL_TYPE_LUMA ), COMPONENT_Y, DF_HAD );
+#endif
               uint32_t bitsTemp   = ruiBits;
               bitsTemp += xCalcAffineMVBits( pu, acMvTemp, acMvPred, pu.cu->imv != 1 );
               costTemp = ( Distortion ) ( floor( fWeight * ( double ) costTemp ) + ( double ) m_pcRdCost->getCost( bitsTemp ) );
@@ -7066,7 +7295,12 @@ void InterSearch::symmvdCheckBestMvp(
         bufTmp.Y().addAvg(predBufA.Y(), predBufB.Y(), pu.cu->slice->clpRng(COMPONENT_Y));
 
       // calc distortion
+#if JVET_N0329_IBC_SEARCH_IMP
+      DFunc distFunc = (!pu.cu->transQuantBypass && !pu.cu->slice->getDisableSATDForRD()) ? DF_HAD : DF_SAD;
+      Distortion cost = m_pcRdCost->getDistPart(bufTmp.Y(), origBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, distFunc);
+#else
       Distortion cost = m_pcRdCost->getDistPart(bufTmp.Y(), origBuf.Y(), pu.cs->sps->getBitDepth(CHANNEL_TYPE_LUMA), COMPONENT_Y, DF_HAD);
+#endif
 
       m_pcRdCost->setPredictor(amvpCur.mvCand[i]);
       uint32_t bits = m_pcRdCost->getBitsOfVectorWithPredictor(curMv.hor, curMv.ver, (pu.cu->imv << 1));
diff --git a/source/Lib/EncoderLib/InterSearch.h b/source/Lib/EncoderLib/InterSearch.h
index d2a4bccb92..acc2861c1f 100644
--- a/source/Lib/EncoderLib/InterSearch.h
+++ b/source/Lib/EncoderLib/InterSearch.h
@@ -118,6 +118,9 @@ private:
   int             m_affMVListMaxSize;
   Distortion      m_hevcCost;
   EncAffineMotion m_affineMotion;
+#if JVET_N0329_IBC_SEARCH_IMP
+  PatentBvCand    m_defaultCachedBvs;
+#endif
 protected:
   // interface to option
   EncCfg*         m_pcEncCfg;
@@ -150,8 +153,14 @@ protected:
   Mv              m_integerMv2Nx2N              [NUM_REF_PIC_LIST_01][MAX_NUM_REF];
 
   bool            m_isInitialized;
+
+#if JVET_N0329_IBC_SEARCH_IMP
+  Mv              m_acBVs[2 * IBC_NUM_CANDIDATES];
+  unsigned int    m_numBVs;
+#else
   unsigned int    m_numBVs, m_numBV16s;
   Mv              m_acBVs[IBC_NUM_CANDIDATES];
+#endif
   bool            m_useCompositeRef;
   Distortion      m_estMinDistSbt[NUMBER_SBT_MODE + 1]; // estimated minimum SSE value of the PU if using a SBT mode
   uint8_t         m_sbtRdoOrder[NUMBER_SBT_MODE];       // order of SBT mode in RDO
@@ -281,7 +290,18 @@ public:
   bool  predIBCSearch           ( CodingUnit& cu, Partitioner& partitioner, const int localSearchRangeX, const int localSearchRangeY, IbcHashMap& ibcHashMap);
   void  xIntraPatternSearch         ( PredictionUnit& pu, IntTZSearchStruct&  cStruct, Mv& rcMv, Distortion&  ruiCost, Mv* cMvSrchRngLT, Mv* cMvSrchRngRB, Mv* pcMvPred);
   void  xSetIntraSearchRange        ( PredictionUnit& pu, int iRoiWidth, int iRoiHeight, const int localSearchRangeX, const int localSearchRangeY, Mv& rcMvSrchRngLT, Mv& rcMvSrchRngRB);
+#if JVET_N0329_IBC_SEARCH_IMP
+  void  resetIbcSearch()
+  {
+    for (int i = 0; i < IBC_NUM_CANDIDATES; i++)
+    {
+      m_defaultCachedBvs.m_bvCands[i].setZero();
+    }
+    m_defaultCachedBvs.currCnt = 0;
+  }
+#else
   void  resetIbcSearch() { m_numBVs = m_numBV16s = 0; }
+#endif
   void  xIBCEstimation   ( PredictionUnit& pu, PelUnitBuf& origBuf, Mv     *pcMvPred, Mv     &rcMv, Distortion &ruiCost, const int localSearchRangeX, const int localSearchRangeY);
   void  xIBCSearchMVCandUpdate  ( Distortion  uiSad, int x, int y, Distortion* uiSadBestCand, Mv* cMVCand);
   int   xIBCSearchMVChromaRefine( PredictionUnit& pu, int iRoiWidth, int iRoiHeight, int cuPelX, int cuPelY, Distortion* uiSadBestCand, Mv*     cMVCand);
diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp
index 1fcaedc19c..5d73a658c8 100644
--- a/source/Lib/EncoderLib/IntraSearch.cpp
+++ b/source/Lib/EncoderLib/IntraSearch.cpp
@@ -678,7 +678,11 @@ void IntraSearch::estIntraPredLumaQT( CodingUnit &cu, Partitioner &partitioner,
     // after this point, don't use numModesForFullRD
 
     // PBINTRA fast
+#if JVET_N0329_IBC_SEARCH_IMP
+    if (m_pcEncCfg->getUsePbIntraFast() && !cs.slice->isIntra() && uiRdModeList.size() < numModesAvailable && !cs.slice->getDisableSATDForRD())
+#else
     if( m_pcEncCfg->getUsePbIntraFast() && !cs.slice->isIntra() && uiRdModeList.size() < numModesAvailable )
+#endif
     {
       if( CandHadList.size() < 3 || CandHadList[2] > cs.interHad * PBINTRA_RATIO )
       {
@@ -976,7 +980,11 @@ void IntraSearch::estIntraPredChromaQT( CodingUnit &cu, Partitioner &partitioner
       }
 
       DistParam distParam;
+#if JVET_N0329_IBC_SEARCH_IMP
+      const bool useHadamard = !cu.transQuantBypass;
+#else
       const bool useHadamard = true;
+#endif
       pu.intraDir[1] = MDLM_L_IDX; // temporary assigned, just to indicate this is a MDLM mode. for luma down-sampling operation.
 
       initIntraPatternChType(cu, pu.Cb());
-- 
GitLab