From 831167838f60dc77dd786395474448183fa1bbe3 Mon Sep 17 00:00:00 2001
From: Adam Wieckowski <adam.wieckowski@hhi.fraunhofer.de>
Date: Thu, 21 Mar 2019 16:45:20 +0100
Subject: [PATCH] work in progress on ENABLE_SPLIT_PARALLELISM

---
 source/Lib/CommonLib/CodingStructure.cpp |  8 ++-
 source/Lib/CommonLib/InterPrediction.h   |  3 +
 source/Lib/CommonLib/TypeDef.h           |  2 +-
 source/Lib/DecoderLib/DecCu.h            |  3 +
 source/Lib/EncoderLib/EncCu.cpp          | 82 ++++++------------------
 source/Lib/EncoderLib/EncCu.h            | 11 +---
 source/Lib/EncoderLib/EncModeCtrl.cpp    | 10 ++-
 source/Lib/EncoderLib/InterSearch.cpp    |  2 +
 8 files changed, 44 insertions(+), 77 deletions(-)

diff --git a/source/Lib/CommonLib/CodingStructure.cpp b/source/Lib/CommonLib/CodingStructure.cpp
index 8778f3567..e79e3c8ec 100644
--- a/source/Lib/CommonLib/CodingStructure.cpp
+++ b/source/Lib/CommonLib/CodingStructure.cpp
@@ -867,9 +867,9 @@ void CodingStructure::useSubStructure( const CodingStructure& subStruct, const C
   if( cpyReco ) picture->getRecoBuf( clippedArea ).copyFrom( subRecoBuf );
 
 #if JVET_M0483_IBC
-  if (!subStruct.m_isTuEnc && ((!slice->isIntra() || slice->getSPS()->getIBCFlag()) && subStruct.chType != CHANNEL_TYPE_CHROMA))
+  if (!subStruct.m_isTuEnc && ((!slice->isIntra() || slice->getSPS()->getIBCFlag()) && chType != CHANNEL_TYPE_CHROMA))
 #else
-  if (!subStruct.m_isTuEnc && (!slice->isIntra() && subStruct.chType != CHANNEL_TYPE_CHROMA))
+  if (!subStruct.m_isTuEnc && (!slice->isIntra() && chType != CHANNEL_TYPE_CHROMA))
 #endif
   {
     // copy motion buffer
@@ -1149,6 +1149,10 @@ void CodingStructure::initStructData( const int &QP, const bool &_isLosses, cons
   {
     getMotionBuf()      .memset( 0 );
   }
+  if( parent )
+  {
+    motionLut = parent->motionLut;
+  }
 
   fracBits = 0;
   dist     = 0;
diff --git a/source/Lib/CommonLib/InterPrediction.h b/source/Lib/CommonLib/InterPrediction.h
index e329cc291..824351800 100644
--- a/source/Lib/CommonLib/InterPrediction.h
+++ b/source/Lib/CommonLib/InterPrediction.h
@@ -214,6 +214,9 @@ public:
 #endif
 #if JVET_M0170_MRG_SHARELIST
   void    setShareState(int shareStateIn) {m_shareState = shareStateIn;}
+#if ENABLE_SPLIT_PARALLELISM
+  int     getShareState() const { return m_shareState; }
+#endif
 #endif
 };
 
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index 04de41c87..b09ad5b63 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -221,7 +221,7 @@ typedef std::pair<int, int>  TrCost;
 
 #endif
 #ifndef ENABLE_SPLIT_PARALLELISM
-#define ENABLE_SPLIT_PARALLELISM                          0
+#define ENABLE_SPLIT_PARALLELISM                          1
 #endif
 #if ENABLE_SPLIT_PARALLELISM
 #define PARL_SPLIT_MAX_NUM_JOBS                           6                             // number of parallel jobs that can be defined and need memory allocated
diff --git a/source/Lib/DecoderLib/DecCu.h b/source/Lib/DecoderLib/DecCu.h
index 40114ce68..8990dfec0 100644
--- a/source/Lib/DecoderLib/DecCu.h
+++ b/source/Lib/DecoderLib/DecCu.h
@@ -79,6 +79,9 @@ public:
 
 #if JVET_M0170_MRG_SHARELIST
   void setShareStateDec (int shareStateDecIn)  { m_shareStateDec = shareStateDecIn; }
+#if ENABLE_SPLIT_PARALLELISM
+  int  getShareStateDec () const { return m_shareStateDec; }
+#endif
 #endif
   /// reconstruct Ctu information
 protected:
diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp
index 39c6a5e43..b9ba49d91 100644
--- a/source/Lib/EncoderLib/EncCu.cpp
+++ b/source/Lib/EncoderLib/EncCu.cpp
@@ -118,9 +118,6 @@ void EncCu::create( EncCfg* encCfg )
     }
   }
 
-  // WIA: only the weight==height case is relevant without QTBT
-  m_pImvTempCS = nullptr;
-
   m_cuChromaQpOffsetIdxPlus1 = 0;
 
   unsigned maxDepth = numWidths + numHeights;
@@ -199,22 +196,6 @@ void EncCu::destroy()
   delete m_modeCtrl;
   m_modeCtrl = nullptr;
 
-  // WIA: only the weight==height case is relevant without QTBT
-  if( m_pImvTempCS )
-  {
-    for( unsigned w = 0; w < numWidths; w++ )
-    {
-      if( m_pImvTempCS[w] )
-      {
-        m_pImvTempCS[w]->destroy();
-        delete[] m_pImvTempCS[w];
-      }
-    }
-
-    delete[] m_pImvTempCS;
-    m_pImvTempCS = nullptr;
-  }
-
   for (unsigned ui = 0; ui < MMVD_MRG_MAX_RD_BUF_NUM; ui++)
   {
     m_acMergeBuffer[ui].destroy();
@@ -281,9 +262,6 @@ void EncCu::init( EncLib* pcEncLib, const SPS& sps PARL_PARAM( const int tId ) )
 #if JVET_M0102_INTRA_SUBPARTITIONS
   m_pcIntraSearch->setModeCtrl( m_modeCtrl );
 #endif
-  ::memset(m_subMergeBlkSize, 0, sizeof(m_subMergeBlkSize));
-  ::memset(m_subMergeBlkNum, 0, sizeof(m_subMergeBlkNum));
-  m_prevPOC = MAX_UINT;
 
 #if  JVET_M0255_FRACMMVD_SWITCH
   if ( ( m_pcEncCfg->getIBCHashSearch() && m_pcEncCfg->getIBCMode() ) || m_pcEncCfg->getAllowDisFracMMVD() )
@@ -653,14 +631,6 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par
   const unsigned wIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lwidth()  );
 
   const UnitArea currCsArea = clipArea( CS::getArea( *bestCS, bestCS->area, partitioner.chType ), *tempCS->picture );
-#if JVET_M0483_IBC
-  if (m_pImvTempCS && (!slice.isIntra() || slice.getSPS()->getIBCFlag()))
-#else
-  if( m_pImvTempCS && !slice.isIntra() )
-#endif
-  {
-    tempCS->initSubStructure( *m_pImvTempCS[wIdx], partitioner.chType, partitioner.currArea(), false );
-  }
 
   tempCS->chType = partitioner.chType;
   bestCS->chType = partitioner.chType;
@@ -680,6 +650,7 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par
 #endif
     slsSbt->resetSaveloadSbt( maxSLSize );
 #if ENABLE_SPLIT_PARALLELISM
+    CHECK( tempCS->picture->scheduler.getSplitJobId() != 0, "The SBT search reset need to happen in sequential region." );
     if (m_pcEncCfg->getNumSplitThreads() > 1)
     {
       for (int jId = 1; jId < NUM_RESERVERD_SPLIT_JOBS; jId++)
@@ -1163,8 +1134,13 @@ void EncCu::copyState( EncCu* other, Partitioner& partitioner, const UnitArea& c
           CodingStructure* dst =        m_pBestCS[wIdx][hIdx];
     const CodingStructure* src = other->m_pBestCS[wIdx][hIdx];
     bool keepResi = KEEP_PRED_AND_RESI_SIGNALS;
+#if JVET_M0427_INLOOP_RESHAPER
+    bool keepPred = true;
+#else
+    bool keepPred = KEEP_PRED_AND_RESI_SIGNALS;
+#endif
 
-    dst->useSubStructure( *src, partitioner.chType, currArea, KEEP_PRED_AND_RESI_SIGNALS, true, keepResi, keepResi );
+    dst->useSubStructure( *src, partitioner.chType, currArea, keepPred, true, keepResi, keepResi );
 
     dst->cost           =  src->cost;
     dst->dist           =  src->dist;
@@ -1189,7 +1165,16 @@ void EncCu::copyState( EncCu* other, Partitioner& partitioner, const UnitArea& c
     encReshapeThis->copyState( *encReshapeOther );
   }
 #endif
+#if JVET_M0170_MRG_SHARELIST
+  m_shareState    = other->m_shareState;
+  m_shareBndPosX  = other->m_shareBndPosX;
+  m_shareBndPosY  = other->m_shareBndPosY;
+  m_shareBndSizeW = other->m_shareBndSizeW;
+  m_shareBndSizeH = other->m_shareBndSizeH;
+  setShareStateDec( other->getShareStateDec() );
+  m_pcInterSearch->setShareState( other->m_pcInterSearch->getShareState() );
 
+#endif
   m_CABACEstimator->getCtx() = other->m_CABACEstimator->getCtx();
 }
 #endif
@@ -1993,12 +1978,10 @@ void EncCu::xCheckRDCostHashInter( CodingStructure *&tempCS, CodingStructure *&b
 
 #if JVET_M0464_UNI_MTS
     xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0
-      , m_pImvTempCS ? m_pImvTempCS[wIdx] : NULL
       , 0
       , &equGBiCost
 #else
     xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0
-      , m_pImvTempCS ? m_pImvTempCS[wIdx] : NULL
       , 1
       , 0
       , &equGBiCost
@@ -2701,10 +2684,9 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *&
       }
 
 #if JVET_M0464_UNI_MTS
-      xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, NULL, uiNoResidualPass == 0 ? &candHasNoResidual[uiMrgHADIdx] : NULL );
+      xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, uiNoResidualPass == 0 ? &candHasNoResidual[uiMrgHADIdx] : NULL );
 #else
       xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass
-        , NULL
         , 1
         , uiNoResidualPass == 0 ? &candHasNoResidual[uiMrgHADIdx] : NULL);
 #endif
@@ -3063,9 +3045,9 @@ void EncCu::xCheckRDCostMergeTriangle2Nx2N( CodingStructure *&tempCS, CodingStru
         }
 
 #if JVET_M0464_UNI_MTS
-        xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, noResidualPass, NULL, ( noResidualPass == 0 ? &trianglecandHasNoResidual[mergeCand] : NULL ) );
+        xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, noResidualPass, ( noResidualPass == 0 ? &trianglecandHasNoResidual[mergeCand] : NULL ) );
 #else
-        xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, noResidualPass, NULL, true, ( (noResidualPass == 0 ) ? &trianglecandHasNoResidual[mergeCand] : NULL ) );
+        xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, noResidualPass, true, ( (noResidualPass == 0 ) ? &trianglecandHasNoResidual[mergeCand] : NULL ) );
 #endif
 
         if (m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip)
@@ -3345,9 +3327,9 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct
       }
 
 #if JVET_M0464_UNI_MTS
-      xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, NULL, ( uiNoResidualPass == 0 ? &candHasNoResidual[uiMergeCand] : NULL ) );
+      xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, ( uiNoResidualPass == 0 ? &candHasNoResidual[uiMergeCand] : NULL ) );
 #else
-      xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, NULL, true, ((uiNoResidualPass == 0) ? &candHasNoResidual[uiMergeCand] : NULL) );
+      xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, true, ((uiNoResidualPass == 0) ? &candHasNoResidual[uiMergeCand] : NULL) );
 #endif
 
       if ( m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip )
@@ -4067,12 +4049,10 @@ void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestC
 
 #if JVET_M0464_UNI_MTS
   xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, 0
-                        , m_pImvTempCS ? m_pImvTempCS[wIdx] : NULL
                         , 0
                         , &equGBiCost
 #else
   xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, 0
-    , m_pImvTempCS ? m_pImvTempCS[wIdx] : NULL
     , 1
     , 0
     , &equGBiCost
@@ -4341,12 +4321,10 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be
 
 #if JVET_M0464_UNI_MTS
   xEncodeInterResidual( tempCS, bestCS, partitioner, encTestModeBase, 0
-                        , NULL
                         , 0
                         , &equGBiCost
 #else
   xEncodeInterResidual( tempCS, bestCS, partitioner, encTestModeBase, 0
-    , NULL
     , true
     , 0
     , &equGBiCost
@@ -4607,7 +4585,6 @@ void EncCu::xEncodeInterResidual(   CodingStructure *&tempCS
                                   , Partitioner &partitioner
                                   , const EncTestMode& encTestMode
                                   , int residualPass
-                                  , CodingStructure* imvCS
                                   , bool* bestHasNonResi
                                   , double* equGBiCost
 #else
@@ -4823,14 +4800,6 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be
 #if !JVET_M0464_UNI_MTS
     double emtFirstPassCost = tempCS->cost;
 #endif
-    if( imvCS && (tempCS->cost < imvCS->cost) )
-    {
-      if( imvCS->cost != MAX_DOUBLE )
-      {
-        imvCS->initStructData( encTestMode.qp, encTestMode.lossless );
-      }
-      imvCS->copyStructure( *tempCS, partitioner.chType );
-    }
     if( NULL != bestHasNonResi && (bestCostInternal > tempCS->cost) )
     {
       bestCostInternal = tempCS->cost;
@@ -5035,15 +5004,6 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be
 
       xCheckDQP( *tempCS, partitioner );
 
-      if( imvCS && ( tempCS->cost < imvCS->cost ) )
-      {
-        if( imvCS->cost != MAX_DOUBLE )
-        {
-          imvCS->initStructData( encTestMode.qp, encTestMode.lossless );
-        }
-        imvCS->copyStructure( *tempCS, partitioner.chType );
-      }
-
       if( NULL != bestHasNonResi && ( bestCostInternal > tempCS->cost ) )
       {
         bestCostInternal = tempCS->cost;
diff --git a/source/Lib/EncoderLib/EncCu.h b/source/Lib/EncoderLib/EncCu.h
index 9a8259056..a1938e7b4 100644
--- a/source/Lib/EncoderLib/EncCu.h
+++ b/source/Lib/EncoderLib/EncCu.h
@@ -125,7 +125,6 @@ private:
   CABACWriter*          m_CABACEstimator;
   RateCtrl*             m_pcRateCtrl;
   IbcHashMap            m_ibcHashMap;
-  CodingStructure     **m_pImvTempCS;
   EncModeCtrl          *m_modeCtrl;
 #if JVET_M0170_MRG_SHARELIST
   int                  m_shareState;
@@ -140,9 +139,7 @@ private:
   PelStorage            m_acTriangleWeightedBuffer[TRIANGLE_MAX_NUM_CANDS]; // to store weighted prediction pixles
   double                m_mergeBestSATDCost;
   MotionInfo            m_SubPuMiBuf      [( MAX_CU_SIZE * MAX_CU_SIZE ) >> ( MIN_CU_LOG2 << 1 )];
-  unsigned int          m_subMergeBlkSize[10];
-  unsigned int          m_subMergeBlkNum[10];
-  unsigned int          m_prevPOC;
+
   int                   m_ctuIbcSearchRangeX;
   int                   m_ctuIbcSearchRangeY;
 #if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
@@ -188,10 +185,6 @@ public:
   EncCfg*     getEncCfg()            const { return m_pcEncCfg;          }
 #endif
 
-#if JVET_M0170_MRG_SHARELIST
-  Position shareParentPos;
-  Size     shareParentSize;
-#endif
   ~EncCu();
 
 protected:
@@ -237,13 +230,11 @@ protected:
                              , Partitioner &partitioner
                              , const EncTestMode& encTestMode
                              , int residualPass       = 0
-                             , CodingStructure* imvCS = NULL
                              , bool* bestHasNonResi   = NULL
                              , double* equGBiCost     = NULL
                            );
 #else
   void xEncodeInterResidual   ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, int residualPass = 0
-    , CodingStructure* imvCS = NULL
     , int emtMode = 1
     , bool* bestHasNonResi = NULL
     , double* equGBiCost = NULL
diff --git a/source/Lib/EncoderLib/EncModeCtrl.cpp b/source/Lib/EncoderLib/EncModeCtrl.cpp
index c056b9e29..4d360e9ff 100644
--- a/source/Lib/EncoderLib/EncModeCtrl.cpp
+++ b/source/Lib/EncoderLib/EncModeCtrl.cpp
@@ -1326,7 +1326,7 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru
   xGetMinMaxQP( minQP, maxQP, cs, partitioner, baseQP, *cs.sps, *cs.pps, true );
 #endif
   bool checkIbc = true;
-  if (cs.chType == CHANNEL_TYPE_CHROMA)
+    if (cs.chType == CHANNEL_TYPE_CHROMA)
   {
     IbcLumaCoverage ibcLumaCoverage = cs.getIbcLumaCoverage(cs.area.Cb());
     switch (ibcLumaCoverage)
@@ -2235,13 +2235,17 @@ int EncModeCtrlMTnoRQT::getNumParallelJobs( const CodingStructure &cs, Partition
 bool EncModeCtrlMTnoRQT::isParallelSplit( const CodingStructure &cs, Partitioner& partitioner ) const
 {
   if( partitioner.getImplicitSplit( cs ) != CU_DONT_SPLIT || cs.picture->scheduler.getSplitJobId() != 0 ) return false;
-  if( partitioner.currDepth <= cs.pps->getMaxCuDQPDepth() ) return false;
+#if JVET_M0113_M0188_QG_SIZE
+  if( cs.pps->getUseDQP() && partitioner.currQgEnable() ) return false;
+#else
+  if( cs.pps->getUseDQP() && partitioner.currDepth <= cs.pps->getMaxCuDQPDepth() ) return false;
+#endif
   const int numJobs = getNumParallelJobs( cs, partitioner );
   const int numPxl  = partitioner.currArea().Y().area();
   const int parlAt  = m_pcEncCfg->getNumSplitThreads() <= 3 ? 1024 : 256;
   if(  cs.slice->isIntra() && numJobs > 2 && ( numPxl == parlAt || !partitioner.canSplit( CU_QUAD_SPLIT, cs ) ) ) return true;
   if( !cs.slice->isIntra() && numJobs > 1 && ( numPxl == parlAt || !partitioner.canSplit( CU_QUAD_SPLIT, cs ) ) ) return true;
-  return false;
+  return false; 
 }
 
 bool EncModeCtrlMTnoRQT::parallelJobSelector( const EncTestMode& encTestmode, const CodingStructure &cs, Partitioner& partitioner ) const
diff --git a/source/Lib/EncoderLib/InterSearch.cpp b/source/Lib/EncoderLib/InterSearch.cpp
index 5126a4637..0d8e96948 100644
--- a/source/Lib/EncoderLib/InterSearch.cpp
+++ b/source/Lib/EncoderLib/InterSearch.cpp
@@ -176,6 +176,8 @@ void InterSearch::setTempBuffers( CodingStructure ****pSplitCS, CodingStructure
 void InterSearch::copyState( const InterSearch& other )
 {
   memcpy( m_aaiAdaptSR, other.m_aaiAdaptSR, sizeof( m_aaiAdaptSR ) );
+  m_affineMotion = other.m_affineMotion;
+  m_uniMotions   = other.m_uniMotions;
 }
 #endif
 
-- 
GitLab