From 2ee8b0289b941f964012e32b874a7a1492131959 Mon Sep 17 00:00:00 2001
From: Xiang Li <xlxiangli@google.com>
Date: Tue, 21 Jun 2022 10:24:25 -0700
Subject: [PATCH] Y4M support at both encoder and decoder

---
 .clang-format                       |   8 +-
 source/App/DecoderApp/DecApp.cpp    |   8 ++
 source/App/DecoderApp/DecAppCfg.cpp |   8 ++
 source/App/DecoderApp/DecAppCfg.h   |   1 +
 source/App/EncoderApp/EncApp.cpp    |  24 ++++
 source/Lib/Utilities/VideoIOYuv.cpp | 167 ++++++++++++++++++++++++++++
 source/Lib/Utilities/VideoIOYuv.h   |  19 +++-
 7 files changed, 229 insertions(+), 6 deletions(-)

diff --git a/.clang-format b/.clang-format
index 46c86d74bc..b5ac8af9d2 100644
--- a/.clang-format
+++ b/.clang-format
@@ -79,7 +79,7 @@ MacroBlockBegin: ''
 MacroBlockEnd:   ''
 MaxEmptyLinesToKeep: 1
 NamespaceIndentation: All
-ObjCBinPackProtocolList: Auto
+# ObjCBinPackProtocolList: Auto
 ObjCBlockIndentWidth: 2
 ObjCSpaceAfterProperty: false
 ObjCSpaceBeforeProtocolList: true
@@ -97,10 +97,10 @@ SortUsingDeclarations: true
 SpaceAfterCStyleCast: true
 SpaceAfterTemplateKeyword: false
 SpaceBeforeAssignmentOperators: true
-SpaceBeforeCtorInitializerColon: true
-SpaceBeforeInheritanceColon: true
+# SpaceBeforeCtorInitializerColon: true
+# SpaceBeforeInheritanceColon: true
 SpaceBeforeParens: ControlStatements
-SpaceBeforeRangeBasedForLoopColon: false
+# SpaceBeforeRangeBasedForLoopColon: false
 SpaceInEmptyParentheses: false
 SpacesBeforeTrailingComments: 3
 SpacesInAngles:  false
diff --git a/source/App/DecoderApp/DecApp.cpp b/source/App/DecoderApp/DecApp.cpp
index 30c5a825d1..62364e84aa 100644
--- a/source/App/DecoderApp/DecApp.cpp
+++ b/source/App/DecoderApp/DecApp.cpp
@@ -418,6 +418,14 @@ uint32_t DecApp::decode()
         }
         if( ( m_cDecLib.getVPS() != nullptr && ( m_cDecLib.getVPS()->getMaxLayers() == 1 || xIsNaluWithinTargetOutputLayerIdSet( &nalu ) ) ) || m_cDecLib.getVPS() == nullptr )
         {
+          if (isY4mFileExt(reconFileName))
+          {
+            const auto sps = pcListPic->front()->cs->sps;
+            const auto pps = pcListPic->front()->cs->pps;
+            m_cVideoIOYuvReconFile[nalu.m_nuhLayerId].setOutputY4mInfo(
+              pps->getPicWidthInLumaSamples(), pps->getPicHeightInLumaSamples(), m_outputFrameRate, m_outputBitDepth[0],
+              sps->getChromaFormatIdc());
+          }
           m_cVideoIOYuvReconFile[nalu.m_nuhLayerId].open( reconFileName, true, m_outputBitDepth, m_outputBitDepth, bitDepths.recon ); // write mode
         }
       }
diff --git a/source/App/DecoderApp/DecAppCfg.cpp b/source/App/DecoderApp/DecAppCfg.cpp
index 8988da1f34..cf310da048 100644
--- a/source/App/DecoderApp/DecAppCfg.cpp
+++ b/source/App/DecoderApp/DecAppCfg.cpp
@@ -40,6 +40,7 @@
 #include <string>
 #include "DecAppCfg.h"
 #include "Utilities/program_options_lite.h"
+#include "Utilities/VideoIOYuv.h"
 #include "CommonLib/ChromaFormat.h"
 #include "CommonLib/dtrace_next.h"
 
@@ -87,6 +88,7 @@ bool DecAppCfg::parseCfg( int argc, char* argv[] )
   ("SkipFrames,s",              m_iSkipFrame,                          0,          "number of frames to skip before random access")
   ("OutputBitDepth,d",          m_outputBitDepth[CHANNEL_TYPE_LUMA],   0,          "bit depth of YUV output luma component (default: use 0 for native depth)")
   ("OutputBitDepthC,d",         m_outputBitDepth[CHANNEL_TYPE_CHROMA], 0,          "bit depth of YUV output chroma component (default: use luma output bit-depth)")
+  ("OutputFrameRate",           m_outputFrameRate,                     0,          "output frame rate, used to generate decoded Y4M")
   ("OutputColourSpaceConvert",  outputColourSpaceConvert,              string(""), "Colour space conversion to apply to input 444 video. Permitted values are (empty string=UNCHANGED) " + getListOfColourSpaceConverts(false))
   ("MaxTemporalLayer,t",        m_iMaxTemporalLayer,                   500,    "Maximum Temporal Layer to be decoded. -1 to decode all layers")
   ("TargetOutputLayerSet,p",    m_targetOlsIdx,                        500,    "Target output layer set index")
@@ -245,6 +247,12 @@ bool DecAppCfg::parseCfg( int argc, char* argv[] )
   {
     m_targetOlsIdx = -1;
   }
+
+  if (m_outputFrameRate == 0 && isY4mFileExt(m_reconFileName))
+  {
+    msg(ERROR, "OutputFrameRate needs to be set when outputting to Y4M file\n");
+    return false;
+  }
   return true;
 }
 
diff --git a/source/App/DecoderApp/DecAppCfg.h b/source/App/DecoderApp/DecAppCfg.h
index f9af97fb68..e5ae2ddc17 100644
--- a/source/App/DecoderApp/DecAppCfg.h
+++ b/source/App/DecoderApp/DecAppCfg.h
@@ -63,6 +63,7 @@ protected:
 
   int           m_iSkipFrame;                           ///< counter for frames prior to the random access point to skip
   int           m_outputBitDepth[MAX_NUM_CHANNEL_TYPE]; ///< bit depth used for writing output
+  int           m_outputFrameRate = 0;                  ///< used to generate decoded Y4M
   InputColourSpaceConversion m_outputColourSpaceConvert;
   int           m_targetOlsIdx;                       ///< target output layer set
   std::vector<int> m_targetOutputLayerIdSet;          ///< set of LayerIds to be outputted
diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp
index a0e4ab3380..c3a2ad1c5b 100644
--- a/source/App/EncoderApp/EncApp.cpp
+++ b/source/App/EncoderApp/EncApp.cpp
@@ -1334,6 +1334,11 @@ void EncApp::xCreateLib( std::list<PelUnitBuf*>& recBufList, const int layerId )
         reconFileName.append( std::to_string( layerId ) );
       }
     }
+    if (isY4mFileExt(reconFileName))
+    {
+      m_cVideoIOYuvReconFile.setOutputY4mInfo(m_sourceWidth, m_sourceHeight, m_iFrameRate, m_internalBitDepth[0],
+                                              m_chromaFormatIDC);
+    }
     m_cVideoIOYuvReconFile.open( reconFileName, true, m_outputBitDepth, m_outputBitDepth, m_internalBitDepth );  // write mode
   }
 
@@ -1380,6 +1385,25 @@ void EncApp::xInitLib()
 
 void EncApp::createLib( const int layerIdx )
 {
+  if (isY4mFileExt(m_inputFileName))
+  {
+    int          width = 0, height = 0, frameRate = 0, inputBitDepth = 0;
+    ChromaFormat chromaFormat = CHROMA_420;
+    m_cVideoIOYuvInputFile.parseY4mFileHeader(m_inputFileName, width, height, frameRate, inputBitDepth, chromaFormat);
+    if (width != m_sourceWidth || height != m_sourceHeight || frameRate != m_iFrameRate
+        || inputBitDepth != m_inputBitDepth[0] || chromaFormat != m_chromaFormatIDC)
+    {
+      printf("Warning: Y4M file info is different from input.\n");
+      m_sourceWidth            = width;
+      m_sourceHeight           = height;
+      m_iFrameRate             = frameRate;
+      m_inputBitDepth[0]       = inputBitDepth;
+      m_inputBitDepth[1]       = inputBitDepth;
+      m_chromaFormatIDC        = chromaFormat;
+      m_MSBExtendedBitDepth[0] = m_inputBitDepth[0];
+      m_MSBExtendedBitDepth[1] = m_inputBitDepth[1];
+    }
+  }
   const int sourceHeight = m_isField ? m_iSourceHeightOrg : m_sourceHeight;
   UnitArea unitArea( m_chromaFormatIDC, Area( 0, 0, m_sourceWidth, sourceHeight ) );
 
diff --git a/source/Lib/Utilities/VideoIOYuv.cpp b/source/Lib/Utilities/VideoIOYuv.cpp
index 9b98e092d1..836c87c308 100644
--- a/source/Lib/Utilities/VideoIOYuv.cpp
+++ b/source/Lib/Utilities/VideoIOYuv.cpp
@@ -50,6 +50,12 @@ using namespace std;
 
 #define FLIP_PIC 0
 
+constexpr int Y4M_SIGNATURE_LENGTH    = 10;
+const char    y4mSignature[]          = "YUV4MPEG2 ";
+constexpr int Y4M_MAX_HEADER_LENGTH   = 128;
+constexpr int Y4M_FRAME_HEADER_LENGTH = 6;   // basic Y4m frame header, "FRAME" + '\n'
+const char    y4mFrameHeader[]        = { 'F', 'R', 'A', 'M', 'E', '\n' };
+
 // ====================================================================================================================
 // Local Functions
 // ====================================================================================================================
@@ -154,20 +160,158 @@ void VideoIOYuv::open( const std::string &fileName, bool bWriteMode, const int f
     {
       EXIT( "Failed to write reconstructed YUV file: " << fileName.c_str() );
     }
+    if (isY4mFileExt(fileName)) 
+    {
+      writeY4mFileHeader();
+      m_outY4m = true;
+    }
   }
   else
   {
+    if (isY4mFileExt(fileName))
+    {
+      if (m_inY4mFileHeaderLength == 0)
+      {
+        int          dummyWidth = 0, dummyHeight = 0, dummyFrameRate = 0, dummyBitDepth = 0;
+        ChromaFormat dummyChromaFormat = CHROMA_420;
+        parseY4mFileHeader(fileName, dummyWidth, dummyHeight, dummyFrameRate, dummyBitDepth, dummyChromaFormat);
+      }
+    }
     m_cHandle.open( fileName.c_str(), ios::binary | ios::in );
 
     if( m_cHandle.fail() )
     {
       EXIT( "Failed to open input YUV file: " << fileName.c_str() );
     }
+
+    if (m_inY4mFileHeaderLength)
+    {
+      m_cHandle.seekg(m_inY4mFileHeaderLength, ios::cur);
+    }
   }
 
   return;
 }
 
+void VideoIOYuv::parseY4mFileHeader(const std::string &fileName, int &width, int &height, int &frameRate, int &bitDepth,
+                               ChromaFormat &chromaFormat)
+{
+  m_cHandle.open(fileName.c_str(), ios::binary | ios::in);
+  CHECK(m_cHandle.fail(), "File open failed.")
+  
+  char header[Y4M_MAX_HEADER_LENGTH];
+  m_cHandle.read(header, sizeof(header));
+  CHECK(strncmp(header, y4mSignature, Y4M_SIGNATURE_LENGTH), "The input is not a Y4M file!");
+
+  // locate the end of the header
+  for (int i = Y4M_SIGNATURE_LENGTH + 1; i < Y4M_MAX_HEADER_LENGTH; i++)
+  {
+    if (header[i] == '\n')
+    {
+      header[i]             = ' ';   // space is used as token end later
+      m_inY4mFileHeaderLength = i + 1;
+      break;
+    }
+  }
+  // parse Y4M header info
+  for (int i = Y4M_SIGNATURE_LENGTH; i < m_inY4mFileHeaderLength; i++)
+  {
+    int numerator = 0, denominator = 0, pos = 0;
+    switch (header[i])
+    {
+    case 'W': sscanf(header + i + 1, "%d", &width); break;
+    case 'H': sscanf(header + i + 1, "%d", &height); break;
+    case 'C':
+      if (strncmp(&header[i + 1], "mono", 4) == 0)
+      {
+        chromaFormat = CHROMA_400;
+        pos          = i + 5;
+      }
+      else if (strncmp(&header[i + 1], "420", 3) == 0)
+      {
+        chromaFormat = CHROMA_420;
+        pos          = i + 4;
+        if (strncmp(&header[pos], "jpeg", 4) == 0)
+        {
+          pos += 4;
+        }
+        else if (strncmp(&header[pos], "paldv", 5) == 0)
+        {
+          pos += 5;
+        }
+      }
+      else if (strncmp(&header[i + 1], "422", 3) == 0)
+      {
+        chromaFormat = CHROMA_422;
+        pos          = i + 4;
+      }
+      else if (strncmp(&header[i + 1], "444", 3) == 0)
+      {
+        chromaFormat = CHROMA_444;
+        pos          = i + 4;
+      }
+      bitDepth = 8;
+      if (header[pos] == 'p')
+      {
+        sscanf(&header[pos + 1], "%d", &bitDepth);
+      }
+      break;
+    case 'F':
+      if (sscanf(header + i + 1, "%d:%d", &numerator, &denominator) == 2)
+      {
+        if (denominator != 0)
+        {
+          frameRate = (int) (1.0 * numerator / denominator + 0.5);
+        }
+      }
+      break;
+    case 'I': CHECK(header[i + 1] != 'p', "Interlaced Y4M is not supported yet");
+    case 'A':   // not support, ignore
+    case 'X':   // not support, ignore
+      break;
+    default: CHECK(true, "Wrong Y4M file header!")
+    }
+    i = (int) (strchr(header + i + 1, ' ') - header);
+  }
+
+  m_cHandle.close();
+}
+
+void VideoIOYuv::setOutputY4mInfo(int width, int height, int frameRate, int bitDepth, ChromaFormat chromaFormat)
+{
+  m_outPicWidth     = width;
+  m_outPicHeight    = height;
+  m_outBitDepth     = bitDepth;
+  m_outFrameRate    = frameRate;
+  m_outChromaFormat = chromaFormat;
+}
+
+void VideoIOYuv::writeY4mFileHeader()
+{
+  CHECK(m_outPicWidth == 0 || m_outPicHeight == 0 || m_outBitDepth == 0 || m_outFrameRate == 0,
+        "Output Y4M file into has not been set");
+  std::string header = y4mSignature;
+  header += "W" + std::to_string(m_outPicWidth) + " ";
+  header += "H" + std::to_string(m_outPicHeight) + " ";
+  header += "F" + std::to_string(m_outFrameRate) + ":1 ";
+  header += "Ip A0:0 ";
+  switch (m_outChromaFormat)
+  {
+  case CHROMA_400: header += "Cmono"; break;
+  case CHROMA_420: header += "C420"; break;
+  case CHROMA_422: header += "C422"; break;
+  case CHROMA_444: header += "C444"; break;
+  }
+  if (m_outBitDepth > 8)
+  {
+    header += "p" + std::to_string(m_outBitDepth);
+  }
+  header += "\n";
+  // not write extension/comment
+
+  m_cHandle.write(header.c_str(), header.length());
+}
+
 void VideoIOYuv::close()
 {
   m_cHandle.close();
@@ -215,6 +359,10 @@ void VideoIOYuv::skipFrames(uint32_t numFrames, uint32_t width, uint32_t height,
   }
   frameSize *= wordsize;
   //------------------
+  if (m_inY4mFileHeaderLength)
+  {
+    frameSize += Y4M_FRAME_HEADER_LENGTH;
+  }
 
   const streamoff offset = frameSize * numFrames;
 
@@ -912,6 +1060,13 @@ bool VideoIOYuv::read ( PelUnitBuf& pic, PelUnitBuf& picOrg, const InputColourSp
     }
   }
 
+  if (m_inY4mFileHeaderLength)
+  {
+    char y4mFrameHeader[Y4M_FRAME_HEADER_LENGTH];
+    m_cHandle.read(y4mFrameHeader, Y4M_FRAME_HEADER_LENGTH);
+    CHECK(strncmp(y4mFrameHeader, "FRAME", Y4M_FRAME_HEADER_LENGTH - 1), "Wrong Y4M frame header!");
+  }
+
   const PelBuf areaBufY = picOrg.get(COMPONENT_Y);
 #if !EXTENSION_360_VIDEO
   const uint32_t stride444      = areaBufY.stride;
@@ -1060,6 +1215,11 @@ bool VideoIOYuv::write( uint32_t orgWidth, uint32_t orgHeight, const CPelUnitBuf
     msg( WARNING, "\nWarning: writing %d x %d luma sample output picture!", width444, height444);
   }
 
+  if (m_outY4m)
+  {
+    m_cHandle.write(y4mFrameHeader, Y4M_FRAME_HEADER_LENGTH);
+  }
+
   for(uint32_t comp=0; retval && comp < ::getNumberValidComponents(format); comp++)
   {
     const ComponentID compID      = ComponentID(comp);
@@ -1325,3 +1485,10 @@ bool VideoIOYuv::writeUpscaledPicture( const SPS& sps, const PPS& pps, const CPe
 
   return ret;
 }
+
+bool isY4mFileExt(const std::string &fileName)
+{
+  auto pos = fileName.find(".y4m");
+  // ".y4m" must be at the end of the file name
+  return (pos != std::string::npos && pos + 4 == fileName.length());
+}
diff --git a/source/Lib/Utilities/VideoIOYuv.h b/source/Lib/Utilities/VideoIOYuv.h
index b18d1f8eff..48697fda1f 100644
--- a/source/Lib/Utilities/VideoIOYuv.h
+++ b/source/Lib/Utilities/VideoIOYuv.h
@@ -61,13 +61,26 @@ private:
   int       m_fileBitdepth[MAX_NUM_CHANNEL_TYPE]; ///< bitdepth of input/output video file
   int       m_MSBExtendedBitDepth[MAX_NUM_CHANNEL_TYPE];  ///< bitdepth after addition of MSBs (with value 0)
   int       m_bitdepthShift[MAX_NUM_CHANNEL_TYPE];  ///< number of bits to increase or decrease image by before/after write/read
+  int          m_inY4mFileHeaderLength = 0;
+  int          m_outPicWidth           = 0;
+  int          m_outPicHeight          = 0;
+  int          m_outBitDepth           = 0;
+  int          m_outFrameRate          = 0;
+  ChromaFormat m_outChromaFormat       = CHROMA_420;
+  bool         m_outY4m                = false;
 
 public:
   VideoIOYuv()           {}
   virtual ~VideoIOYuv()  {}
 
-  void  open  ( const std::string &fileName, bool bWriteMode, const int fileBitDepth[MAX_NUM_CHANNEL_TYPE], const int MSBExtendedBitDepth[MAX_NUM_CHANNEL_TYPE], const int internalBitDepth[MAX_NUM_CHANNEL_TYPE] ); ///< open or create file
-  void  close ();                                           ///< close file
+  void parseY4mFileHeader(const std::string &fileName, int &width, int &height, int &frameRate, int &bitDepth,
+                          ChromaFormat &chromaFormat);
+  void setOutputY4mInfo(int width, int height, int frameRate, int bitDepth, ChromaFormat chromaFormat);
+  void writeY4mFileHeader();
+  void open(const std::string &fileName, bool bWriteMode, const int fileBitDepth[MAX_NUM_CHANNEL_TYPE],
+            const int MSBExtendedBitDepth[MAX_NUM_CHANNEL_TYPE],
+            const int internalBitDepth[MAX_NUM_CHANNEL_TYPE]);   ///< open or create file
+  void close();                                                  ///< close file
 #if EXTENSION_360_VIDEO
   void skipFrames(int numFrames, uint32_t width, uint32_t height, ChromaFormat format);
 #else
@@ -105,5 +118,7 @@ public:
 
 };
 
+bool isY4mFileExt(const std::string &fileName);
+
 #endif // __VIDEOIOYUV__
 
-- 
GitLab