From df873c462549bd90791cd12b4a22d18d58ecfc8f Mon Sep 17 00:00:00 2001
From: Fabrice URBAN <fabrice.urban@interdigital.com>
Date: Tue, 27 Aug 2024 01:31:18 +0000
Subject: [PATCH] FIX input file with rescaling (multilayer scalable) when
 using "SourceScalingRatioHor/Ver" encoder option

---
 source/App/EncoderApp/EncApp.cpp            | 14 +++--
 source/Lib/EncoderLib/EncTemporalFilter.cpp | 64 +++++++++++++++++++--
 source/Lib/EncoderLib/EncTemporalFilter.h   |  6 ++
 3 files changed, 74 insertions(+), 10 deletions(-)

diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp
index 4c3d0570b8..da3bc33375 100644
--- a/source/App/EncoderApp/EncApp.cpp
+++ b/source/App/EncoderApp/EncApp.cpp
@@ -1698,7 +1698,9 @@ void EncApp::createLib( const int layerIdx )
   {
     m_cEncLib.getTemporalFilter().init(m_frameSkip, m_inputBitDepth, m_msbExtendedBitDepth, m_internalBitDepth, m_sourceWidth,
                           sourceHeight, m_sourcePadding, m_clipInputVideoToRec709Range, m_inputFileName,
-                          m_chromaFormatIdc, m_inputColourSpaceConvert, m_iQP, m_gopBasedTemporalFilterStrengths,
+                          m_chromaFormatIdc, m_sourceWidthBeforeScale, m_sourceHeightBeforeScale,
+                          m_horCollocatedChromaFlag, m_verCollocatedChromaFlag,
+                          m_inputColourSpaceConvert, m_iQP, m_gopBasedTemporalFilterStrengths,
                           m_gopBasedTemporalFilterPastRefs, m_gopBasedTemporalFilterFutureRefs, m_firstValidFrame,
                           m_lastValidFrame, m_gopBasedTemporalFilterEnabled, m_cEncLib.getAdaptQPmap(),
                           m_cEncLib.getBIM(), m_ctuSize);
@@ -1707,7 +1709,9 @@ void EncApp::createLib( const int layerIdx )
   {
     m_cEncLib.getTemporalFilterForFG().init(m_frameSkip, m_inputBitDepth, m_msbExtendedBitDepth, m_internalBitDepth, m_sourceWidth,
                                sourceHeight, m_sourcePadding, m_clipInputVideoToRec709Range, m_inputFileName,
-                               m_chromaFormatIdc, m_inputColourSpaceConvert, m_iQP, m_fgcSEITemporalFilterStrengths,
+                               m_chromaFormatIdc, m_sourceWidthBeforeScale, m_sourceHeightBeforeScale,
+                               m_horCollocatedChromaFlag, m_verCollocatedChromaFlag,
+                               m_inputColourSpaceConvert, m_iQP, m_fgcSEITemporalFilterStrengths,
                                m_fgcSEITemporalFilterPastRefs, m_fgcSEITemporalFilterFutureRefs, m_firstValidFrame,
                                m_lastValidFrame, true, m_cEncLib.getAdaptQPmap(), m_cEncLib.getBIM(), m_ctuSize);
   }
@@ -1797,12 +1801,12 @@ bool EncApp::encodePrep( bool& eos )
                                 m_clipInputVideoToRec709Range);
     int w0 = m_sourceWidthBeforeScale;
     int h0 = m_sourceHeightBeforeScale;
-    int w1 = m_orgPic->get(COMPONENT_Y).width - SPS::getWinUnitX(m_chromaFormatIdc) * (m_confWinLeft + m_confWinRight);
-    int h1 = m_orgPic->get(COMPONENT_Y).height - SPS::getWinUnitY(m_chromaFormatIdc) * (m_confWinTop + m_confWinBottom);
+    int w1 = m_orgPic->get(COMPONENT_Y).width - m_sourcePadding[0];
+    int h1 = m_orgPic->get(COMPONENT_Y).height - m_sourcePadding[1];
     int xScale = ((w0 << ScalingRatio::BITS) + (w1 >> 1)) / w1;
     int yScale = ((h0 << ScalingRatio::BITS) + (h1 >> 1)) / h1;
     ScalingRatio scalingRatio = { xScale, yScale };
-    Window       conformanceWindow1(m_confWinLeft, m_confWinRight, m_confWinTop, m_confWinBottom);
+    Window conformanceWindow1(0, m_sourcePadding[0] / SPS::getWinUnitX(m_inputChromaFormatIDC), 0, m_sourcePadding[1] / SPS::getWinUnitY(m_inputChromaFormatIDC));
 
     bool downsampling = (m_sourceWidthBeforeScale > m_sourceWidth) || (m_sourceHeightBeforeScale > m_sourceHeight);
     bool useLumaFilter = downsampling;
diff --git a/source/Lib/EncoderLib/EncTemporalFilter.cpp b/source/Lib/EncoderLib/EncTemporalFilter.cpp
index ce0fb82fa2..31e605e45d 100644
--- a/source/Lib/EncoderLib/EncTemporalFilter.cpp
+++ b/source/Lib/EncoderLib/EncTemporalFilter.cpp
@@ -81,6 +81,8 @@ const int EncTemporalFilter::m_cuTreeThresh[4] =
 EncTemporalFilter::EncTemporalFilter()
   : m_frameSkip(0)
   , m_chromaFormatIdc(ChromaFormat::UNDEFINED)
+  , m_sourceWidthBeforeScale(0)
+  , m_sourceHeightBeforeScale(0)
   , m_sourceWidth(0)
   , m_sourceHeight(0)
   , m_QP(0)
@@ -91,6 +93,8 @@ EncTemporalFilter::EncTemporalFilter()
 void EncTemporalFilter::init(const int frameSkip, const BitDepths &inputBitDepth, const BitDepths &msbExtendedBitDepth,
                              const BitDepths &internalBitDepth, const int width, const int height, const int *pad,
                              const bool rec709, const std::string &filename, const ChromaFormat inputChromaFormatIDC,
+                             const int sourceWidthBeforeScale, const int sourceHeightBeforeScale,
+                             const int sourceHorCollocatedChromaFlag, const int sourceVerCollocatedChromaFlag, 
                              const InputColourSpaceConversion colorSpaceConv, const int qp,
                              const std::map<int, double> &temporalFilterStrengths, const int pastRefs,
                              const int futureRefs, const int firstValidFrame, const int lastValidFrame,
@@ -111,6 +115,10 @@ void EncTemporalFilter::init(const int frameSkip, const BitDepths &inputBitDepth
   m_clipInputVideoToRec709Range = rec709;
   m_inputFileName               = filename;
   m_chromaFormatIdc             = inputChromaFormatIDC;
+  m_sourceWidthBeforeScale = sourceWidthBeforeScale;
+  m_sourceHeightBeforeScale = sourceHeightBeforeScale;
+  m_sourceHorCollocatedChromaFlag = sourceHorCollocatedChromaFlag;
+  m_sourceVerCollocatedChromaFlag = sourceVerCollocatedChromaFlag;
   m_inputColourSpaceConvert = colorSpaceConv;
   m_area = Area(0, 0, width, height);
   m_QP   = qp;
@@ -155,7 +163,14 @@ bool EncTemporalFilter::filter(PelStorage *orgPic, int receivedPoc)
     const int  lastFrame      = std::min(currentFilePoc + m_futureRefs, m_lastValidFrame);
     VideoIOYuv yuvFrames;
     yuvFrames.open(m_inputFileName, false, m_inputBitDepth, m_msbExtendedBitDepth, m_internalBitDepth);
-    yuvFrames.skipFrames(firstFrame, m_sourceWidth - m_pad[0], m_sourceHeight - m_pad[1], m_chromaFormatIdc);
+    if (m_sourceWidthBeforeScale != 0 && m_sourceHeightBeforeScale != 0)
+    {
+      yuvFrames.skipFrames(firstFrame, m_sourceWidthBeforeScale, m_sourceHeightBeforeScale, m_chromaFormatIdc);
+    }
+    else
+    {
+      yuvFrames.skipFrames(firstFrame, m_sourceWidth - m_pad[0], m_sourceHeight - m_pad[1], m_chromaFormatIdc);
+    }
 
     std::deque<TemporalFilterSourcePicInfo> srcFrameInfo;
 
@@ -177,7 +192,14 @@ bool EncTemporalFilter::filter(PelStorage *orgPic, int receivedPoc)
     {
       if (poc == currentFilePoc)
       { // hop over frame that will be filtered
-        yuvFrames.skipFrames(1, m_sourceWidth - m_pad[0], m_sourceHeight - m_pad[1], m_chromaFormatIdc);
+        if (m_sourceWidthBeforeScale != 0 && m_sourceHeightBeforeScale != 0)
+        {
+          yuvFrames.skipFrames(1, m_sourceWidthBeforeScale, m_sourceHeightBeforeScale, m_chromaFormatIdc);
+        }
+        else
+        {
+          yuvFrames.skipFrames(1, m_sourceWidth - m_pad[0], m_sourceHeight - m_pad[1], m_chromaFormatIdc);
+        }
         continue;
       }
       srcFrameInfo.push_back(TemporalFilterSourcePicInfo());
@@ -185,9 +207,41 @@ bool EncTemporalFilter::filter(PelStorage *orgPic, int receivedPoc)
 
       PelStorage dummyPicBufferTO; // Only used temporary in yuvFrames.read
       srcPic.picBuffer.create(m_chromaFormatIdc, m_area, 0, m_padding);
-      dummyPicBufferTO.create(m_chromaFormatIdc, m_area, 0, m_padding);
-      if (!yuvFrames.read(srcPic.picBuffer, dummyPicBufferTO, m_inputColourSpaceConvert, m_pad, m_chromaFormatIdc,
-                          m_clipInputVideoToRec709Range))
+      bool readOk = false;
+      if (m_sourceWidthBeforeScale != 0 && m_sourceHeightBeforeScale != 0)
+      {
+        Area areaPrescale(0, 0, m_sourceWidthBeforeScale, m_sourceHeightBeforeScale);
+        PelStorage m_orgPicBeforeScale;
+        m_orgPicBeforeScale.create(m_chromaFormatIdc, areaPrescale, 0, m_padding);
+        dummyPicBufferTO.create(m_chromaFormatIdc, areaPrescale, 0, m_padding);
+        readOk = yuvFrames.read(m_orgPicBeforeScale, dummyPicBufferTO, m_inputColourSpaceConvert, m_pad, m_chromaFormatIdc,
+          m_clipInputVideoToRec709Range);
+        if (readOk)
+        {
+          int w0 = m_sourceWidthBeforeScale;
+          int h0 = m_sourceHeightBeforeScale;
+          int w1 = m_sourceWidth - m_pad[0];
+          int h1 = m_sourceHeight - m_pad[1];
+          int xScale = ((w0 << ScalingRatio::BITS) + (w1 >> 1)) / w1;
+          int yScale = ((h0 << ScalingRatio::BITS) + (h1 >> 1)) / h1;
+          ScalingRatio scalingRatio = { xScale, yScale };
+          Window conformanceWindow1(0, m_pad[0] / SPS::getWinUnitX(m_chromaFormatIdc), 0, m_pad[1] / SPS::getWinUnitY(m_chromaFormatIdc));
+
+          bool downsampling = (m_sourceWidthBeforeScale > m_sourceWidth) || (m_sourceHeightBeforeScale > m_sourceHeight);
+          bool useLumaFilter = downsampling;
+          Picture::rescalePicture(scalingRatio, m_orgPicBeforeScale, Window(), srcPic.picBuffer, conformanceWindow1,
+            m_chromaFormatIdc, m_internalBitDepth, useLumaFilter, downsampling,
+            m_sourceHorCollocatedChromaFlag != 0, m_sourceVerCollocatedChromaFlag != 0);
+        }
+      }
+      else
+      {
+        dummyPicBufferTO.create(m_chromaFormatIdc, m_area, 0, m_padding);
+        readOk = yuvFrames.read(srcPic.picBuffer, dummyPicBufferTO, m_inputColourSpaceConvert, m_pad, m_chromaFormatIdc,
+          m_clipInputVideoToRec709Range);
+      }
+
+      if(!readOk)
       {
         // eof or read fail
         srcPic.picBuffer.destroy();
diff --git a/source/Lib/EncoderLib/EncTemporalFilter.h b/source/Lib/EncoderLib/EncTemporalFilter.h
index d6ac28416d..241580d7b7 100644
--- a/source/Lib/EncoderLib/EncTemporalFilter.h
+++ b/source/Lib/EncoderLib/EncTemporalFilter.h
@@ -110,6 +110,8 @@ public:
   void init(const int frameSkip, const BitDepths &inputBitDepth, const BitDepths &msbExtendedBitDepth,
             const BitDepths &internalBitDepth, const int width, const int height, const int *pad, const bool rec709,
             const std::string &filename, const ChromaFormat inputChroma,
+            const int sourceWidthBeforeScale, const int sourceHeightBeforeScale,
+            const int sourceHorCollocatedChromaFlag, const int sourceVerCollocatedChromaFlag,
             const InputColourSpaceConversion colorSpaceConv, const int qp,
             const std::map<int, double> &temporalFilterStrengths, const int pastRefs, const int futureRefs,
             const int firstValidFrame, const int lastValidFrame, const bool bMCTFenabled,
@@ -137,6 +139,10 @@ private:
   BitDepths m_internalBitDepth;
 
   ChromaFormat m_chromaFormatIdc;
+  int m_sourceWidthBeforeScale;
+  int m_sourceHeightBeforeScale;
+  int m_sourceHorCollocatedChromaFlag;
+  int m_sourceVerCollocatedChromaFlag;
   int m_sourceWidth;
   int m_sourceHeight;
   int m_QP;
-- 
GitLab