From 6d246424e1aa96f88c33814c1fc0159438accd6a Mon Sep 17 00:00:00 2001
From: Philip Cowan <cowanp@sharplabs.com>
Date: Wed, 14 Dec 2022 01:07:56 +0000
Subject: [PATCH] JVET-AB0058: signal frame upsampling in neural network
 post-filter characteristics

---
 doc/software-manual.tex              |  9 +++++++++
 source/App/EncoderApp/EncApp.cpp     |  7 +++++++
 source/App/EncoderApp/EncAppCfg.cpp  | 30 ++++++++++++++++++++++++++++
 source/App/EncoderApp/EncAppCfg.h    |  4 ++++
 source/Lib/CommonLib/SEI.h           |  4 ++++
 source/Lib/CommonLib/TypeDef.h       | 13 ++++++++++++
 source/Lib/DecoderLib/SEIread.cpp    | 14 +++++++++++++
 source/Lib/EncoderLib/EncCfg.h       | 10 ++++++++++
 source/Lib/EncoderLib/SEIEncoder.cpp |  7 +++++++
 source/Lib/EncoderLib/SEIwrite.cpp   | 10 ++++++++++
 10 files changed, 108 insertions(+)

diff --git a/doc/software-manual.tex b/doc/software-manual.tex
index ff15194fb..02f02eb24 100644
--- a/doc/software-manual.tex
+++ b/doc/software-manual.tex
@@ -5584,6 +5584,7 @@ Specifies sii_num_units_in_shutter_interval for single entry.If multiple entries
     2 & Chroma upsampling from the 4:2:0 chroma format to the 4:2:2 or 4:4:4 chroma format, or from the 4:2:2 chroma format to the 4:4:4 chroma format \\
     3 & Increasing the width or height of the cropped decoded output picture without changing the chroma format \\
     4 & Increasing the width or height of the cropped decoded output picture and upsampling the chroma format \\
+    5 & Frame rate upsampling \\
   \end{tabular}
   \\
   \Option{SEINNPostFilterCharacteristicsOutSubWidthCFlag\emph{i}} &
@@ -5782,6 +5783,14 @@ Specifies sii_num_units_in_shutter_interval for single entry.If multiple entries
   \Default{""} &
   Specifies the NNR bitstream of the \emph{i}-th neural network post-filter.
   \\
+  \Option{SEINNPostFilterCharacteristicsNumberInputDecodedPicsMinusTwo\emph{i}} &
+  \Default{0} &
+  Specifies the number of decoded output pictures minus 2 used as input for the \emph{i}-th neural network post-filter.
+  \\
+  \Option{SEINNPostFilterCharacteristicsNumberInterpolatedPics\emph{i}} &
+  \Default{0} &
+  Specifies the number of interpolated pictures generated by the \emph{i}-th neural network post-filter between the i th and (i+1) th picture used as input for the post processing filter.
+  \\
 \end{OptionTableNoShorthand}
 
 \begin{OptionTableNoShorthand}{Neural network post-filter characteristics}{tab:sei-nn-post-filter-activation}
diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp
index cdef6f64c..38f1f399c 100644
--- a/source/App/EncoderApp/EncApp.cpp
+++ b/source/App/EncoderApp/EncApp.cpp
@@ -1175,6 +1175,13 @@ void EncApp::xInitLibCfg( int layerIdx )
         m_cEncLib.setNNPostFilterSEICharacteristicsPicWidthInLumaSamples   (m_nnPostFilterSEICharacteristicsPicWidthInLumaSamples[i], i);
         m_cEncLib.setNNPostFilterSEICharacteristicsPicHeightInLumaSamples  (m_nnPostFilterSEICharacteristicsPicHeightInLumaSamples[i], i);
       }
+#if JVET_AB0058_NN_FRAME_RATE_UPSAMPLING
+      if (m_cEncLib.getNNPostFilterSEICharacteristicsPurpose(i) == 5)
+      {
+        m_cEncLib.setNNPostFilterSEICharacteristicsNumberInputDecodedPicturesMinus2(m_nnPostFilterSEICharacteristicsNumberInputDecodedPicturesMinus2[i], i);
+        m_cEncLib.setNNPostFilterSEICharacteristicsNumberInterpolatedPictures( m_nnPostFilterSEICharacteristicsNumberInterpolatedPictures[i], i);
+      }
+#endif
       m_cEncLib.setNNPostFilterSEICharacteristicsComponentLastFlag       (m_nnPostFilterSEICharacteristicsComponentLastFlag[i], i);
 #if M60678_BALLOT_COMMENTS_OF_FI_03
       m_cEncLib.setNNPostFilterSEICharacteristicsInpFormatIdc            (m_nnPostFilterSEICharacteristicsInpFormatIdc[i], i);
diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp
index 4f13e1222..1da5d0cde 100644
--- a/source/App/EncoderApp/EncAppCfg.cpp
+++ b/source/App/EncoderApp/EncAppCfg.cpp
@@ -738,6 +738,14 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   SMultiValueInput<uint16_t>   cfg_poSEIProcessingOrder (0, 255, 0, 256);
 #endif
 
+#if JVET_AB0058_NN_FRAME_RATE_UPSAMPLING
+  std::vector<SMultiValueInput<uint32_t>>   cfg_nnPostFilterSEICharacteristicsInterpolatedPicturesList;
+  for (int i = 0; i < MAX_NUM_NN_POST_FILTERS; i++)
+  {
+    cfg_nnPostFilterSEICharacteristicsInterpolatedPicturesList.push_back(SMultiValueInput<uint32_t>(0, std::numeric_limits<uint32_t>::max(), 1, 0));
+  }
+#endif
+
 #if ENABLE_TRACING
   string sTracingRule;
   string sTracingFile;
@@ -1871,6 +1879,16 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
     payloadFilename << "SEINNPostFilterCharacteristicsPayloadFilename" << i;
     opts.addOptions()(payloadFilename.str(), m_nnPostFilterSEICharacteristicsPayloadFilename[i], string(""), "Specifies the NNR bitstream in the Neural Network Post Filter Characteristics SEI message");
 
+#if JVET_AB0058_NN_FRAME_RATE_UPSAMPLING
+    std::ostringstream numberDecodedInputPics;
+    numberDecodedInputPics << "SEINNPostFilterCharacteristicsNumberInputDecodedPicsMinusTwo" << i;
+    opts.addOptions()(numberDecodedInputPics.str(), m_nnPostFilterSEICharacteristicsNumberInputDecodedPicturesMinus2[i], 0u, "Specifies the number of decoded output pictures used as input for the post processing filter");
+
+    std::ostringstream numberInterpolatedPics;
+    numberInterpolatedPics << "SEINNPostFilterCharacteristicsNumberInterpolatedPics" << i;
+    opts.addOptions()(numberInterpolatedPics.str(), cfg_nnPostFilterSEICharacteristicsInterpolatedPicturesList[i], cfg_nnPostFilterSEICharacteristicsInterpolatedPicturesList[i], "Number of pictures to interpolate");
+#endif
+
     opts.addOptions()("SEINNPostFilterActivationEnabled", m_nnPostFilterSEIActivationEnabled, false, "Control use of the Neural Network Post Filter SEI on current picture");
     opts.addOptions()("SEINNPostFilterActivationId", m_nnPostFilterSEIActivationId , 0u,        "Id of the Neural Network Post Filter on current picture");
   }
@@ -2375,6 +2393,18 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   m_ext360.processOptions(ext360CfgContext);
 #endif
 
+#if JVET_AB0058_NN_FRAME_RATE_UPSAMPLING
+  for (int i = 0; i < MAX_NUM_NN_POST_FILTERS; ++i)
+  {
+    m_nnPostFilterSEICharacteristicsNumberInterpolatedPictures[i] = cfg_nnPostFilterSEICharacteristicsInterpolatedPicturesList[i].values;
+    if (m_nnPostFilterSEICharacteristicsNumberInterpolatedPictures[i].size() == 0)
+    {
+      m_nnPostFilterSEICharacteristicsNumberInterpolatedPictures[i].push_back(0);
+    }
+    CHECK(m_nnPostFilterSEICharacteristicsNumberInterpolatedPictures[i].size() < m_nnPostFilterSEICharacteristicsNumberInputDecodedPicturesMinus2[i], "Number Interpolated Pictures List must be greater than number of decoder pictures list");
+  }
+#endif
+
   if (isY4mFileExt(m_inputFileName))
   {
     int          width = 0, height = 0, frameRate = 0, inputBitDepth = 0;
diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h
index 5e1e9f09d..5dcf14af8 100644
--- a/source/App/EncoderApp/EncAppCfg.h
+++ b/source/App/EncoderApp/EncAppCfg.h
@@ -771,6 +771,10 @@ protected:
 
   bool                  m_nnPostFilterSEIActivationEnabled;
   uint32_t              m_nnPostFilterSEIActivationId;
+#if JVET_AB0058_NN_FRAME_RATE_UPSAMPLING
+  uint32_t              m_nnPostFilterSEICharacteristicsNumberInputDecodedPicturesMinus2[MAX_NUM_NN_POST_FILTERS];
+  std::vector<uint32_t> m_nnPostFilterSEICharacteristicsNumberInterpolatedPictures[MAX_NUM_NN_POST_FILTERS];
+#endif
 
   bool                  m_poSEIEnabled;
   std::vector<uint16_t> m_poSEIPayloadType;
diff --git a/source/Lib/CommonLib/SEI.h b/source/Lib/CommonLib/SEI.h
index 995cb008b..6e175d2ba 100644
--- a/source/Lib/CommonLib/SEI.h
+++ b/source/Lib/CommonLib/SEI.h
@@ -1285,6 +1285,10 @@ public:
 #if JVET_AB0135_NN_SEI_COMPLEXITY_MOD
   uint32_t       m_totalKilobyteSize;
 #endif
+#if JVET_AB0058_NN_FRAME_RATE_UPSAMPLING
+  uint32_t       m_numberInputDecodedPicturesMinus2;
+  std::vector<uint32_t> m_numberInterpolatedPictures;
+#endif
 };
 
 class SEINeuralNetworkPostFilterActivation : public SEI
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index 658702893..72ec152e8 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -63,6 +63,7 @@
 #define JVET_AB0080_CHROMA_QP_FIX                         1 // fix to align chroma QP changes with luma QP changes
 #endif
 #define JVET_AB0081                                       1 // JVET-AB0081: Increased length of filters used for upscaling reconstructed pictures in VTM
+#define JVET_AB0058_NN_FRAME_RATE_UPSAMPLING              1 // JVET-AB0058: Allow for signalling nn frame rate upsampling
 
 //########### place macros to be removed in next cycle below this line ###############
 #define JVET_AB0047_MOVE_GATED_SYNTAX_OF_NNPFC_URIS_AFTER_NNPFC_MODEIDC 1
@@ -758,6 +759,18 @@ enum NNPC_PaddingType
   FIXED_PADDING = 4
 };
 
+#if JVET_AB0058_NN_FRAME_RATE_UPSAMPLING
+enum NNPC_PurposeType
+{
+  UNKONWN = 0,
+  VISUAL_QUALITY_IMPROVEMENT = 1,
+  CHROMA_UPSAMPLING = 2,
+  INCREASE_PICT_DIMENSION_WITHOUT_CHROMA_UPSAMPLING = 3,
+  INCREASE_PICT_DIMENSION_WITH_CHROMA_UPSMAPLING = 4,
+  FRANE_RATE_UPSAMPLING = 5
+};
+#endif
+
 enum POST_FILTER_MODE
 {
   EXTERNAL = 0,
diff --git a/source/Lib/DecoderLib/SEIread.cpp b/source/Lib/DecoderLib/SEIread.cpp
index 50a8f6d2d..250200eb8 100644
--- a/source/Lib/DecoderLib/SEIread.cpp
+++ b/source/Lib/DecoderLib/SEIread.cpp
@@ -2711,6 +2711,20 @@ void SEIReader::xParseSEINNPostFilterCharacteristics(SEINeuralNetworkPostFilterC
       sei_read_flag(pDecodedMessageOutputStream, val, "nnpfc_pic_height_in_luma_samples");
       sei.m_picHeightInLumaSamples = val;
     }
+#if JVET_AB0058_NN_FRAME_RATE_UPSAMPLING
+    
+    if (sei.m_purpose == NNPC_PurposeType::FRANE_RATE_UPSAMPLING)
+    {
+      sei_read_uvlc(pDecodedMessageOutputStream, val, "nnpfc_number_of_input_pictures_minus2");
+      sei.m_numberInputDecodedPicturesMinus2 = val;
+      sei.m_numberInterpolatedPictures.resize(sei.m_numberInputDecodedPicturesMinus2 + 1);
+      for (int i = 0; i < sei.m_numberInterpolatedPictures.size(); i++)
+      {
+        sei_read_uvlc(pDecodedMessageOutputStream, val, "nnpfc_interpolated_pictures");
+        sei.m_numberInterpolatedPictures[i] = val;
+      }
+    }
+#endif
 
     sei_read_flag(pDecodedMessageOutputStream, val, "nnpfc_component_last_flag");
     sei.m_componentLastFlag = val;
diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h
index 0bb21b260..1a82b4472 100644
--- a/source/Lib/EncoderLib/EncCfg.h
+++ b/source/Lib/EncoderLib/EncCfg.h
@@ -729,6 +729,10 @@ protected:
 #if JVET_AB0135_NN_SEI_COMPLEXITY_MOD
   uint32_t                m_nnPostFilterSEICharacteristicsTotalKilobyteSize[MAX_NUM_NN_POST_FILTERS];
 #endif
+#if JVET_AB0058_NN_FRAME_RATE_UPSAMPLING
+  uint32_t                m_nnPostFilterSEICharacteristicsNumberInputDecodedPicturesMinus2[MAX_NUM_NN_POST_FILTERS];
+  std::vector<uint32_t>   m_nnPostFilterSEICharacteristicsNumberInterpolatedPictures[MAX_NUM_NN_POST_FILTERS];
+#endif
 
   bool                    m_nnPostFilterSEIActivationEnabled;
   uint32_t                m_nnPostFilterSEIActivationId;
@@ -1962,6 +1966,12 @@ public:
 
   void        setNNPostFilterSEICharacteristicsPayloadFilename(std::string payloadFilename, int filterIdx)              { m_nnPostFilterSEICharacteristicsPayloadFilename[filterIdx] = payloadFilename; }
   std::string getNNPostFilterSEICharacteristicsPayloadFilename(int filterIdx) const                                     { return m_nnPostFilterSEICharacteristicsPayloadFilename[filterIdx]; }
+#if JVET_AB0058_NN_FRAME_RATE_UPSAMPLING
+  void        setNNPostFilterSEICharacteristicsNumberInputDecodedPicturesMinus2(uint32_t value, int filterIdx)          { m_nnPostFilterSEICharacteristicsNumberInputDecodedPicturesMinus2[filterIdx] = value; }
+  uint32_t    getNNPostFilterSEICharacteristicsNumberInputDecodedPicturesMinus2(int filterIdx) const                    { return m_nnPostFilterSEICharacteristicsNumberInputDecodedPicturesMinus2[filterIdx]; }
+  void        setNNPostFilterSEICharacteristicsNumberInterpolatedPictures(std::vector<uint32_t> value, int filterIdx)   { m_nnPostFilterSEICharacteristicsNumberInterpolatedPictures[filterIdx] = value; }
+  const       std::vector<uint32_t>& getNNPostFilterSEICharacteristicsNumberInterpolatedPictures(int filterIdx)         { return m_nnPostFilterSEICharacteristicsNumberInterpolatedPictures[filterIdx]; }
+#endif
   void        setNnPostFilterSEIActivationEnabled(bool enabledFlag)                                                     { m_nnPostFilterSEIActivationEnabled = enabledFlag; }
   bool        getNnPostFilterSEIActivationEnabled() const                                                               { return m_nnPostFilterSEIActivationEnabled; }
   void        setNnPostFilterSEIActivationId(uint32_t id)                                                               { m_nnPostFilterSEIActivationId = id; }
diff --git a/source/Lib/EncoderLib/SEIEncoder.cpp b/source/Lib/EncoderLib/SEIEncoder.cpp
index 2a0d5584e..7a4cf5019 100644
--- a/source/Lib/EncoderLib/SEIEncoder.cpp
+++ b/source/Lib/EncoderLib/SEIEncoder.cpp
@@ -1296,6 +1296,13 @@ void SEIEncoder::initSEINeuralNetworkPostFilterCharacteristics(SEINeuralNetworkP
       sei->m_picWidthInLumaSamples = m_pcCfg->getNNPostFilterSEICharacteristicsPicWidthInLumaSamples(filterIdx);
       sei->m_picHeightInLumaSamples = m_pcCfg->getNNPostFilterSEICharacteristicsPicHeightInLumaSamples(filterIdx);
     }
+#if JVET_AB0058_NN_FRAME_RATE_UPSAMPLING
+    if (sei->m_purpose == NNPC_PurposeType::FRANE_RATE_UPSAMPLING)
+    {
+      sei->m_numberInputDecodedPicturesMinus2 = m_pcCfg->getNNPostFilterSEICharacteristicsNumberInputDecodedPicturesMinus2(filterIdx);
+      sei->m_numberInterpolatedPictures = m_pcCfg->getNNPostFilterSEICharacteristicsNumberInterpolatedPictures(filterIdx);
+    }
+#endif
 
     sei->m_componentLastFlag = m_pcCfg->getNNPostFilterSEICharacteristicsComponentLastFlag(filterIdx);
 #if M60678_BALLOT_COMMENTS_OF_FI_03
diff --git a/source/Lib/EncoderLib/SEIwrite.cpp b/source/Lib/EncoderLib/SEIwrite.cpp
index ac9b2ddbe..74b554962 100644
--- a/source/Lib/EncoderLib/SEIwrite.cpp
+++ b/source/Lib/EncoderLib/SEIwrite.cpp
@@ -1771,6 +1771,16 @@ void SEIWriter::xWriteSEINeuralNetworkPostFilterCharacteristics(const SEINeuralN
       WRITE_UVLC(sei.m_picWidthInLumaSamples, "nnpfc_pic_width_in_luma_samples");
       WRITE_UVLC(sei.m_picHeightInLumaSamples, "nnpfc_pic_height_in_luma_samples");
     }
+#if JVET_AB0058_NN_FRAME_RATE_UPSAMPLING
+    if (sei.m_purpose == NNPC_PurposeType::CHROMA_UPSAMPLING) 
+    {
+      WRITE_UVLC(sei.m_numberInputDecodedPicturesMinus2, "nnpfc_number_of_input_pictures_minus2");
+      for (int i = 0; i <= sei.m_numberInputDecodedPicturesMinus2; ++i)
+      {
+        WRITE_UVLC(sei.m_numberInterpolatedPictures[i], "nnpfc_interpolated_pictures");
+      }
+    }
+#endif
 
     WRITE_FLAG(sei.m_componentLastFlag, "nnpfc_component_last_flag");
 #if M60678_BALLOT_COMMENTS_OF_FI_03
-- 
GitLab