From 6d246424e1aa96f88c33814c1fc0159438accd6a Mon Sep 17 00:00:00 2001 From: Philip Cowan <cowanp@sharplabs.com> Date: Wed, 14 Dec 2022 01:07:56 +0000 Subject: [PATCH] JVET-AB0058: signal frame upsampling in neural network post-filter characteristics --- doc/software-manual.tex | 9 +++++++++ source/App/EncoderApp/EncApp.cpp | 7 +++++++ source/App/EncoderApp/EncAppCfg.cpp | 30 ++++++++++++++++++++++++++++ source/App/EncoderApp/EncAppCfg.h | 4 ++++ source/Lib/CommonLib/SEI.h | 4 ++++ source/Lib/CommonLib/TypeDef.h | 13 ++++++++++++ source/Lib/DecoderLib/SEIread.cpp | 14 +++++++++++++ source/Lib/EncoderLib/EncCfg.h | 10 ++++++++++ source/Lib/EncoderLib/SEIEncoder.cpp | 7 +++++++ source/Lib/EncoderLib/SEIwrite.cpp | 10 ++++++++++ 10 files changed, 108 insertions(+) diff --git a/doc/software-manual.tex b/doc/software-manual.tex index ff15194fb..02f02eb24 100644 --- a/doc/software-manual.tex +++ b/doc/software-manual.tex @@ -5584,6 +5584,7 @@ Specifies sii_num_units_in_shutter_interval for single entry.If multiple entries 2 & Chroma upsampling from the 4:2:0 chroma format to the 4:2:2 or 4:4:4 chroma format, or from the 4:2:2 chroma format to the 4:4:4 chroma format \\ 3 & Increasing the width or height of the cropped decoded output picture without changing the chroma format \\ 4 & Increasing the width or height of the cropped decoded output picture and upsampling the chroma format \\ + 5 & Frame rate upsampling \\ \end{tabular} \\ \Option{SEINNPostFilterCharacteristicsOutSubWidthCFlag\emph{i}} & @@ -5782,6 +5783,14 @@ Specifies sii_num_units_in_shutter_interval for single entry.If multiple entries \Default{""} & Specifies the NNR bitstream of the \emph{i}-th neural network post-filter. \\ + \Option{SEINNPostFilterCharacteristicsNumberInputDecodedPicsMinusTwo\emph{i}} & + \Default{0} & + Specifies the number of decoded output pictures minus 2 used as input for the \emph{i}-th neural network post-filter. + \\ + \Option{SEINNPostFilterCharacteristicsNumberInterpolatedPics\emph{i}} & + \Default{0} & + Specifies the number of interpolated pictures generated by the \emph{i}-th neural network post-filter between the i th and (i+1) th picture used as input for the post processing filter. + \\ \end{OptionTableNoShorthand} \begin{OptionTableNoShorthand}{Neural network post-filter characteristics}{tab:sei-nn-post-filter-activation} diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp index cdef6f64c..38f1f399c 100644 --- a/source/App/EncoderApp/EncApp.cpp +++ b/source/App/EncoderApp/EncApp.cpp @@ -1175,6 +1175,13 @@ void EncApp::xInitLibCfg( int layerIdx ) m_cEncLib.setNNPostFilterSEICharacteristicsPicWidthInLumaSamples (m_nnPostFilterSEICharacteristicsPicWidthInLumaSamples[i], i); m_cEncLib.setNNPostFilterSEICharacteristicsPicHeightInLumaSamples (m_nnPostFilterSEICharacteristicsPicHeightInLumaSamples[i], i); } +#if JVET_AB0058_NN_FRAME_RATE_UPSAMPLING + if (m_cEncLib.getNNPostFilterSEICharacteristicsPurpose(i) == 5) + { + m_cEncLib.setNNPostFilterSEICharacteristicsNumberInputDecodedPicturesMinus2(m_nnPostFilterSEICharacteristicsNumberInputDecodedPicturesMinus2[i], i); + m_cEncLib.setNNPostFilterSEICharacteristicsNumberInterpolatedPictures( m_nnPostFilterSEICharacteristicsNumberInterpolatedPictures[i], i); + } +#endif m_cEncLib.setNNPostFilterSEICharacteristicsComponentLastFlag (m_nnPostFilterSEICharacteristicsComponentLastFlag[i], i); #if M60678_BALLOT_COMMENTS_OF_FI_03 m_cEncLib.setNNPostFilterSEICharacteristicsInpFormatIdc (m_nnPostFilterSEICharacteristicsInpFormatIdc[i], i); diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp index 4f13e1222..1da5d0cde 100644 --- a/source/App/EncoderApp/EncAppCfg.cpp +++ b/source/App/EncoderApp/EncAppCfg.cpp @@ -738,6 +738,14 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) SMultiValueInput<uint16_t> cfg_poSEIProcessingOrder (0, 255, 0, 256); #endif +#if JVET_AB0058_NN_FRAME_RATE_UPSAMPLING + std::vector<SMultiValueInput<uint32_t>> cfg_nnPostFilterSEICharacteristicsInterpolatedPicturesList; + for (int i = 0; i < MAX_NUM_NN_POST_FILTERS; i++) + { + cfg_nnPostFilterSEICharacteristicsInterpolatedPicturesList.push_back(SMultiValueInput<uint32_t>(0, std::numeric_limits<uint32_t>::max(), 1, 0)); + } +#endif + #if ENABLE_TRACING string sTracingRule; string sTracingFile; @@ -1871,6 +1879,16 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) payloadFilename << "SEINNPostFilterCharacteristicsPayloadFilename" << i; opts.addOptions()(payloadFilename.str(), m_nnPostFilterSEICharacteristicsPayloadFilename[i], string(""), "Specifies the NNR bitstream in the Neural Network Post Filter Characteristics SEI message"); +#if JVET_AB0058_NN_FRAME_RATE_UPSAMPLING + std::ostringstream numberDecodedInputPics; + numberDecodedInputPics << "SEINNPostFilterCharacteristicsNumberInputDecodedPicsMinusTwo" << i; + opts.addOptions()(numberDecodedInputPics.str(), m_nnPostFilterSEICharacteristicsNumberInputDecodedPicturesMinus2[i], 0u, "Specifies the number of decoded output pictures used as input for the post processing filter"); + + std::ostringstream numberInterpolatedPics; + numberInterpolatedPics << "SEINNPostFilterCharacteristicsNumberInterpolatedPics" << i; + opts.addOptions()(numberInterpolatedPics.str(), cfg_nnPostFilterSEICharacteristicsInterpolatedPicturesList[i], cfg_nnPostFilterSEICharacteristicsInterpolatedPicturesList[i], "Number of pictures to interpolate"); +#endif + opts.addOptions()("SEINNPostFilterActivationEnabled", m_nnPostFilterSEIActivationEnabled, false, "Control use of the Neural Network Post Filter SEI on current picture"); opts.addOptions()("SEINNPostFilterActivationId", m_nnPostFilterSEIActivationId , 0u, "Id of the Neural Network Post Filter on current picture"); } @@ -2375,6 +2393,18 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) m_ext360.processOptions(ext360CfgContext); #endif +#if JVET_AB0058_NN_FRAME_RATE_UPSAMPLING + for (int i = 0; i < MAX_NUM_NN_POST_FILTERS; ++i) + { + m_nnPostFilterSEICharacteristicsNumberInterpolatedPictures[i] = cfg_nnPostFilterSEICharacteristicsInterpolatedPicturesList[i].values; + if (m_nnPostFilterSEICharacteristicsNumberInterpolatedPictures[i].size() == 0) + { + m_nnPostFilterSEICharacteristicsNumberInterpolatedPictures[i].push_back(0); + } + CHECK(m_nnPostFilterSEICharacteristicsNumberInterpolatedPictures[i].size() < m_nnPostFilterSEICharacteristicsNumberInputDecodedPicturesMinus2[i], "Number Interpolated Pictures List must be greater than number of decoder pictures list"); + } +#endif + if (isY4mFileExt(m_inputFileName)) { int width = 0, height = 0, frameRate = 0, inputBitDepth = 0; diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h index 5e1e9f09d..5dcf14af8 100644 --- a/source/App/EncoderApp/EncAppCfg.h +++ b/source/App/EncoderApp/EncAppCfg.h @@ -771,6 +771,10 @@ protected: bool m_nnPostFilterSEIActivationEnabled; uint32_t m_nnPostFilterSEIActivationId; +#if JVET_AB0058_NN_FRAME_RATE_UPSAMPLING + uint32_t m_nnPostFilterSEICharacteristicsNumberInputDecodedPicturesMinus2[MAX_NUM_NN_POST_FILTERS]; + std::vector<uint32_t> m_nnPostFilterSEICharacteristicsNumberInterpolatedPictures[MAX_NUM_NN_POST_FILTERS]; +#endif bool m_poSEIEnabled; std::vector<uint16_t> m_poSEIPayloadType; diff --git a/source/Lib/CommonLib/SEI.h b/source/Lib/CommonLib/SEI.h index 995cb008b..6e175d2ba 100644 --- a/source/Lib/CommonLib/SEI.h +++ b/source/Lib/CommonLib/SEI.h @@ -1285,6 +1285,10 @@ public: #if JVET_AB0135_NN_SEI_COMPLEXITY_MOD uint32_t m_totalKilobyteSize; #endif +#if JVET_AB0058_NN_FRAME_RATE_UPSAMPLING + uint32_t m_numberInputDecodedPicturesMinus2; + std::vector<uint32_t> m_numberInterpolatedPictures; +#endif }; class SEINeuralNetworkPostFilterActivation : public SEI diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 658702893..72ec152e8 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -63,6 +63,7 @@ #define JVET_AB0080_CHROMA_QP_FIX 1 // fix to align chroma QP changes with luma QP changes #endif #define JVET_AB0081 1 // JVET-AB0081: Increased length of filters used for upscaling reconstructed pictures in VTM +#define JVET_AB0058_NN_FRAME_RATE_UPSAMPLING 1 // JVET-AB0058: Allow for signalling nn frame rate upsampling //########### place macros to be removed in next cycle below this line ############### #define JVET_AB0047_MOVE_GATED_SYNTAX_OF_NNPFC_URIS_AFTER_NNPFC_MODEIDC 1 @@ -758,6 +759,18 @@ enum NNPC_PaddingType FIXED_PADDING = 4 }; +#if JVET_AB0058_NN_FRAME_RATE_UPSAMPLING +enum NNPC_PurposeType +{ + UNKONWN = 0, + VISUAL_QUALITY_IMPROVEMENT = 1, + CHROMA_UPSAMPLING = 2, + INCREASE_PICT_DIMENSION_WITHOUT_CHROMA_UPSAMPLING = 3, + INCREASE_PICT_DIMENSION_WITH_CHROMA_UPSMAPLING = 4, + FRANE_RATE_UPSAMPLING = 5 +}; +#endif + enum POST_FILTER_MODE { EXTERNAL = 0, diff --git a/source/Lib/DecoderLib/SEIread.cpp b/source/Lib/DecoderLib/SEIread.cpp index 50a8f6d2d..250200eb8 100644 --- a/source/Lib/DecoderLib/SEIread.cpp +++ b/source/Lib/DecoderLib/SEIread.cpp @@ -2711,6 +2711,20 @@ void SEIReader::xParseSEINNPostFilterCharacteristics(SEINeuralNetworkPostFilterC sei_read_flag(pDecodedMessageOutputStream, val, "nnpfc_pic_height_in_luma_samples"); sei.m_picHeightInLumaSamples = val; } +#if JVET_AB0058_NN_FRAME_RATE_UPSAMPLING + + if (sei.m_purpose == NNPC_PurposeType::FRANE_RATE_UPSAMPLING) + { + sei_read_uvlc(pDecodedMessageOutputStream, val, "nnpfc_number_of_input_pictures_minus2"); + sei.m_numberInputDecodedPicturesMinus2 = val; + sei.m_numberInterpolatedPictures.resize(sei.m_numberInputDecodedPicturesMinus2 + 1); + for (int i = 0; i < sei.m_numberInterpolatedPictures.size(); i++) + { + sei_read_uvlc(pDecodedMessageOutputStream, val, "nnpfc_interpolated_pictures"); + sei.m_numberInterpolatedPictures[i] = val; + } + } +#endif sei_read_flag(pDecodedMessageOutputStream, val, "nnpfc_component_last_flag"); sei.m_componentLastFlag = val; diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h index 0bb21b260..1a82b4472 100644 --- a/source/Lib/EncoderLib/EncCfg.h +++ b/source/Lib/EncoderLib/EncCfg.h @@ -729,6 +729,10 @@ protected: #if JVET_AB0135_NN_SEI_COMPLEXITY_MOD uint32_t m_nnPostFilterSEICharacteristicsTotalKilobyteSize[MAX_NUM_NN_POST_FILTERS]; #endif +#if JVET_AB0058_NN_FRAME_RATE_UPSAMPLING + uint32_t m_nnPostFilterSEICharacteristicsNumberInputDecodedPicturesMinus2[MAX_NUM_NN_POST_FILTERS]; + std::vector<uint32_t> m_nnPostFilterSEICharacteristicsNumberInterpolatedPictures[MAX_NUM_NN_POST_FILTERS]; +#endif bool m_nnPostFilterSEIActivationEnabled; uint32_t m_nnPostFilterSEIActivationId; @@ -1962,6 +1966,12 @@ public: void setNNPostFilterSEICharacteristicsPayloadFilename(std::string payloadFilename, int filterIdx) { m_nnPostFilterSEICharacteristicsPayloadFilename[filterIdx] = payloadFilename; } std::string getNNPostFilterSEICharacteristicsPayloadFilename(int filterIdx) const { return m_nnPostFilterSEICharacteristicsPayloadFilename[filterIdx]; } +#if JVET_AB0058_NN_FRAME_RATE_UPSAMPLING + void setNNPostFilterSEICharacteristicsNumberInputDecodedPicturesMinus2(uint32_t value, int filterIdx) { m_nnPostFilterSEICharacteristicsNumberInputDecodedPicturesMinus2[filterIdx] = value; } + uint32_t getNNPostFilterSEICharacteristicsNumberInputDecodedPicturesMinus2(int filterIdx) const { return m_nnPostFilterSEICharacteristicsNumberInputDecodedPicturesMinus2[filterIdx]; } + void setNNPostFilterSEICharacteristicsNumberInterpolatedPictures(std::vector<uint32_t> value, int filterIdx) { m_nnPostFilterSEICharacteristicsNumberInterpolatedPictures[filterIdx] = value; } + const std::vector<uint32_t>& getNNPostFilterSEICharacteristicsNumberInterpolatedPictures(int filterIdx) { return m_nnPostFilterSEICharacteristicsNumberInterpolatedPictures[filterIdx]; } +#endif void setNnPostFilterSEIActivationEnabled(bool enabledFlag) { m_nnPostFilterSEIActivationEnabled = enabledFlag; } bool getNnPostFilterSEIActivationEnabled() const { return m_nnPostFilterSEIActivationEnabled; } void setNnPostFilterSEIActivationId(uint32_t id) { m_nnPostFilterSEIActivationId = id; } diff --git a/source/Lib/EncoderLib/SEIEncoder.cpp b/source/Lib/EncoderLib/SEIEncoder.cpp index 2a0d5584e..7a4cf5019 100644 --- a/source/Lib/EncoderLib/SEIEncoder.cpp +++ b/source/Lib/EncoderLib/SEIEncoder.cpp @@ -1296,6 +1296,13 @@ void SEIEncoder::initSEINeuralNetworkPostFilterCharacteristics(SEINeuralNetworkP sei->m_picWidthInLumaSamples = m_pcCfg->getNNPostFilterSEICharacteristicsPicWidthInLumaSamples(filterIdx); sei->m_picHeightInLumaSamples = m_pcCfg->getNNPostFilterSEICharacteristicsPicHeightInLumaSamples(filterIdx); } +#if JVET_AB0058_NN_FRAME_RATE_UPSAMPLING + if (sei->m_purpose == NNPC_PurposeType::FRANE_RATE_UPSAMPLING) + { + sei->m_numberInputDecodedPicturesMinus2 = m_pcCfg->getNNPostFilterSEICharacteristicsNumberInputDecodedPicturesMinus2(filterIdx); + sei->m_numberInterpolatedPictures = m_pcCfg->getNNPostFilterSEICharacteristicsNumberInterpolatedPictures(filterIdx); + } +#endif sei->m_componentLastFlag = m_pcCfg->getNNPostFilterSEICharacteristicsComponentLastFlag(filterIdx); #if M60678_BALLOT_COMMENTS_OF_FI_03 diff --git a/source/Lib/EncoderLib/SEIwrite.cpp b/source/Lib/EncoderLib/SEIwrite.cpp index ac9b2ddbe..74b554962 100644 --- a/source/Lib/EncoderLib/SEIwrite.cpp +++ b/source/Lib/EncoderLib/SEIwrite.cpp @@ -1771,6 +1771,16 @@ void SEIWriter::xWriteSEINeuralNetworkPostFilterCharacteristics(const SEINeuralN WRITE_UVLC(sei.m_picWidthInLumaSamples, "nnpfc_pic_width_in_luma_samples"); WRITE_UVLC(sei.m_picHeightInLumaSamples, "nnpfc_pic_height_in_luma_samples"); } +#if JVET_AB0058_NN_FRAME_RATE_UPSAMPLING + if (sei.m_purpose == NNPC_PurposeType::CHROMA_UPSAMPLING) + { + WRITE_UVLC(sei.m_numberInputDecodedPicturesMinus2, "nnpfc_number_of_input_pictures_minus2"); + for (int i = 0; i <= sei.m_numberInputDecodedPicturesMinus2; ++i) + { + WRITE_UVLC(sei.m_numberInterpolatedPictures[i], "nnpfc_interpolated_pictures"); + } + } +#endif WRITE_FLAG(sei.m_componentLastFlag, "nnpfc_component_last_flag"); #if M60678_BALLOT_COMMENTS_OF_FI_03 -- GitLab