From 2ee8b0289b941f964012e32b874a7a1492131959 Mon Sep 17 00:00:00 2001 From: Xiang Li <xlxiangli@google.com> Date: Tue, 21 Jun 2022 10:24:25 -0700 Subject: [PATCH] Y4M support at both encoder and decoder --- .clang-format | 8 +- source/App/DecoderApp/DecApp.cpp | 8 ++ source/App/DecoderApp/DecAppCfg.cpp | 8 ++ source/App/DecoderApp/DecAppCfg.h | 1 + source/App/EncoderApp/EncApp.cpp | 24 ++++ source/Lib/Utilities/VideoIOYuv.cpp | 167 ++++++++++++++++++++++++++++ source/Lib/Utilities/VideoIOYuv.h | 19 +++- 7 files changed, 229 insertions(+), 6 deletions(-) diff --git a/.clang-format b/.clang-format index 46c86d74bc..b5ac8af9d2 100644 --- a/.clang-format +++ b/.clang-format @@ -79,7 +79,7 @@ MacroBlockBegin: '' MacroBlockEnd: '' MaxEmptyLinesToKeep: 1 NamespaceIndentation: All -ObjCBinPackProtocolList: Auto +# ObjCBinPackProtocolList: Auto ObjCBlockIndentWidth: 2 ObjCSpaceAfterProperty: false ObjCSpaceBeforeProtocolList: true @@ -97,10 +97,10 @@ SortUsingDeclarations: true SpaceAfterCStyleCast: true SpaceAfterTemplateKeyword: false SpaceBeforeAssignmentOperators: true -SpaceBeforeCtorInitializerColon: true -SpaceBeforeInheritanceColon: true +# SpaceBeforeCtorInitializerColon: true +# SpaceBeforeInheritanceColon: true SpaceBeforeParens: ControlStatements -SpaceBeforeRangeBasedForLoopColon: false +# SpaceBeforeRangeBasedForLoopColon: false SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 3 SpacesInAngles: false diff --git a/source/App/DecoderApp/DecApp.cpp b/source/App/DecoderApp/DecApp.cpp index 30c5a825d1..62364e84aa 100644 --- a/source/App/DecoderApp/DecApp.cpp +++ b/source/App/DecoderApp/DecApp.cpp @@ -418,6 +418,14 @@ uint32_t DecApp::decode() } if( ( m_cDecLib.getVPS() != nullptr && ( m_cDecLib.getVPS()->getMaxLayers() == 1 || xIsNaluWithinTargetOutputLayerIdSet( &nalu ) ) ) || m_cDecLib.getVPS() == nullptr ) { + if (isY4mFileExt(reconFileName)) + { + const auto sps = pcListPic->front()->cs->sps; + const auto pps = pcListPic->front()->cs->pps; + m_cVideoIOYuvReconFile[nalu.m_nuhLayerId].setOutputY4mInfo( + pps->getPicWidthInLumaSamples(), pps->getPicHeightInLumaSamples(), m_outputFrameRate, m_outputBitDepth[0], + sps->getChromaFormatIdc()); + } m_cVideoIOYuvReconFile[nalu.m_nuhLayerId].open( reconFileName, true, m_outputBitDepth, m_outputBitDepth, bitDepths.recon ); // write mode } } diff --git a/source/App/DecoderApp/DecAppCfg.cpp b/source/App/DecoderApp/DecAppCfg.cpp index 8988da1f34..cf310da048 100644 --- a/source/App/DecoderApp/DecAppCfg.cpp +++ b/source/App/DecoderApp/DecAppCfg.cpp @@ -40,6 +40,7 @@ #include <string> #include "DecAppCfg.h" #include "Utilities/program_options_lite.h" +#include "Utilities/VideoIOYuv.h" #include "CommonLib/ChromaFormat.h" #include "CommonLib/dtrace_next.h" @@ -87,6 +88,7 @@ bool DecAppCfg::parseCfg( int argc, char* argv[] ) ("SkipFrames,s", m_iSkipFrame, 0, "number of frames to skip before random access") ("OutputBitDepth,d", m_outputBitDepth[CHANNEL_TYPE_LUMA], 0, "bit depth of YUV output luma component (default: use 0 for native depth)") ("OutputBitDepthC,d", m_outputBitDepth[CHANNEL_TYPE_CHROMA], 0, "bit depth of YUV output chroma component (default: use luma output bit-depth)") + ("OutputFrameRate", m_outputFrameRate, 0, "output frame rate, used to generate decoded Y4M") ("OutputColourSpaceConvert", outputColourSpaceConvert, string(""), "Colour space conversion to apply to input 444 video. Permitted values are (empty string=UNCHANGED) " + getListOfColourSpaceConverts(false)) ("MaxTemporalLayer,t", m_iMaxTemporalLayer, 500, "Maximum Temporal Layer to be decoded. -1 to decode all layers") ("TargetOutputLayerSet,p", m_targetOlsIdx, 500, "Target output layer set index") @@ -245,6 +247,12 @@ bool DecAppCfg::parseCfg( int argc, char* argv[] ) { m_targetOlsIdx = -1; } + + if (m_outputFrameRate == 0 && isY4mFileExt(m_reconFileName)) + { + msg(ERROR, "OutputFrameRate needs to be set when outputting to Y4M file\n"); + return false; + } return true; } diff --git a/source/App/DecoderApp/DecAppCfg.h b/source/App/DecoderApp/DecAppCfg.h index f9af97fb68..e5ae2ddc17 100644 --- a/source/App/DecoderApp/DecAppCfg.h +++ b/source/App/DecoderApp/DecAppCfg.h @@ -63,6 +63,7 @@ protected: int m_iSkipFrame; ///< counter for frames prior to the random access point to skip int m_outputBitDepth[MAX_NUM_CHANNEL_TYPE]; ///< bit depth used for writing output + int m_outputFrameRate = 0; ///< used to generate decoded Y4M InputColourSpaceConversion m_outputColourSpaceConvert; int m_targetOlsIdx; ///< target output layer set std::vector<int> m_targetOutputLayerIdSet; ///< set of LayerIds to be outputted diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp index a0e4ab3380..c3a2ad1c5b 100644 --- a/source/App/EncoderApp/EncApp.cpp +++ b/source/App/EncoderApp/EncApp.cpp @@ -1334,6 +1334,11 @@ void EncApp::xCreateLib( std::list<PelUnitBuf*>& recBufList, const int layerId ) reconFileName.append( std::to_string( layerId ) ); } } + if (isY4mFileExt(reconFileName)) + { + m_cVideoIOYuvReconFile.setOutputY4mInfo(m_sourceWidth, m_sourceHeight, m_iFrameRate, m_internalBitDepth[0], + m_chromaFormatIDC); + } m_cVideoIOYuvReconFile.open( reconFileName, true, m_outputBitDepth, m_outputBitDepth, m_internalBitDepth ); // write mode } @@ -1380,6 +1385,25 @@ void EncApp::xInitLib() void EncApp::createLib( const int layerIdx ) { + if (isY4mFileExt(m_inputFileName)) + { + int width = 0, height = 0, frameRate = 0, inputBitDepth = 0; + ChromaFormat chromaFormat = CHROMA_420; + m_cVideoIOYuvInputFile.parseY4mFileHeader(m_inputFileName, width, height, frameRate, inputBitDepth, chromaFormat); + if (width != m_sourceWidth || height != m_sourceHeight || frameRate != m_iFrameRate + || inputBitDepth != m_inputBitDepth[0] || chromaFormat != m_chromaFormatIDC) + { + printf("Warning: Y4M file info is different from input.\n"); + m_sourceWidth = width; + m_sourceHeight = height; + m_iFrameRate = frameRate; + m_inputBitDepth[0] = inputBitDepth; + m_inputBitDepth[1] = inputBitDepth; + m_chromaFormatIDC = chromaFormat; + m_MSBExtendedBitDepth[0] = m_inputBitDepth[0]; + m_MSBExtendedBitDepth[1] = m_inputBitDepth[1]; + } + } const int sourceHeight = m_isField ? m_iSourceHeightOrg : m_sourceHeight; UnitArea unitArea( m_chromaFormatIDC, Area( 0, 0, m_sourceWidth, sourceHeight ) ); diff --git a/source/Lib/Utilities/VideoIOYuv.cpp b/source/Lib/Utilities/VideoIOYuv.cpp index 9b98e092d1..836c87c308 100644 --- a/source/Lib/Utilities/VideoIOYuv.cpp +++ b/source/Lib/Utilities/VideoIOYuv.cpp @@ -50,6 +50,12 @@ using namespace std; #define FLIP_PIC 0 +constexpr int Y4M_SIGNATURE_LENGTH = 10; +const char y4mSignature[] = "YUV4MPEG2 "; +constexpr int Y4M_MAX_HEADER_LENGTH = 128; +constexpr int Y4M_FRAME_HEADER_LENGTH = 6; // basic Y4m frame header, "FRAME" + '\n' +const char y4mFrameHeader[] = { 'F', 'R', 'A', 'M', 'E', '\n' }; + // ==================================================================================================================== // Local Functions // ==================================================================================================================== @@ -154,20 +160,158 @@ void VideoIOYuv::open( const std::string &fileName, bool bWriteMode, const int f { EXIT( "Failed to write reconstructed YUV file: " << fileName.c_str() ); } + if (isY4mFileExt(fileName)) + { + writeY4mFileHeader(); + m_outY4m = true; + } } else { + if (isY4mFileExt(fileName)) + { + if (m_inY4mFileHeaderLength == 0) + { + int dummyWidth = 0, dummyHeight = 0, dummyFrameRate = 0, dummyBitDepth = 0; + ChromaFormat dummyChromaFormat = CHROMA_420; + parseY4mFileHeader(fileName, dummyWidth, dummyHeight, dummyFrameRate, dummyBitDepth, dummyChromaFormat); + } + } m_cHandle.open( fileName.c_str(), ios::binary | ios::in ); if( m_cHandle.fail() ) { EXIT( "Failed to open input YUV file: " << fileName.c_str() ); } + + if (m_inY4mFileHeaderLength) + { + m_cHandle.seekg(m_inY4mFileHeaderLength, ios::cur); + } } return; } +void VideoIOYuv::parseY4mFileHeader(const std::string &fileName, int &width, int &height, int &frameRate, int &bitDepth, + ChromaFormat &chromaFormat) +{ + m_cHandle.open(fileName.c_str(), ios::binary | ios::in); + CHECK(m_cHandle.fail(), "File open failed.") + + char header[Y4M_MAX_HEADER_LENGTH]; + m_cHandle.read(header, sizeof(header)); + CHECK(strncmp(header, y4mSignature, Y4M_SIGNATURE_LENGTH), "The input is not a Y4M file!"); + + // locate the end of the header + for (int i = Y4M_SIGNATURE_LENGTH + 1; i < Y4M_MAX_HEADER_LENGTH; i++) + { + if (header[i] == '\n') + { + header[i] = ' '; // space is used as token end later + m_inY4mFileHeaderLength = i + 1; + break; + } + } + // parse Y4M header info + for (int i = Y4M_SIGNATURE_LENGTH; i < m_inY4mFileHeaderLength; i++) + { + int numerator = 0, denominator = 0, pos = 0; + switch (header[i]) + { + case 'W': sscanf(header + i + 1, "%d", &width); break; + case 'H': sscanf(header + i + 1, "%d", &height); break; + case 'C': + if (strncmp(&header[i + 1], "mono", 4) == 0) + { + chromaFormat = CHROMA_400; + pos = i + 5; + } + else if (strncmp(&header[i + 1], "420", 3) == 0) + { + chromaFormat = CHROMA_420; + pos = i + 4; + if (strncmp(&header[pos], "jpeg", 4) == 0) + { + pos += 4; + } + else if (strncmp(&header[pos], "paldv", 5) == 0) + { + pos += 5; + } + } + else if (strncmp(&header[i + 1], "422", 3) == 0) + { + chromaFormat = CHROMA_422; + pos = i + 4; + } + else if (strncmp(&header[i + 1], "444", 3) == 0) + { + chromaFormat = CHROMA_444; + pos = i + 4; + } + bitDepth = 8; + if (header[pos] == 'p') + { + sscanf(&header[pos + 1], "%d", &bitDepth); + } + break; + case 'F': + if (sscanf(header + i + 1, "%d:%d", &numerator, &denominator) == 2) + { + if (denominator != 0) + { + frameRate = (int) (1.0 * numerator / denominator + 0.5); + } + } + break; + case 'I': CHECK(header[i + 1] != 'p', "Interlaced Y4M is not supported yet"); + case 'A': // not support, ignore + case 'X': // not support, ignore + break; + default: CHECK(true, "Wrong Y4M file header!") + } + i = (int) (strchr(header + i + 1, ' ') - header); + } + + m_cHandle.close(); +} + +void VideoIOYuv::setOutputY4mInfo(int width, int height, int frameRate, int bitDepth, ChromaFormat chromaFormat) +{ + m_outPicWidth = width; + m_outPicHeight = height; + m_outBitDepth = bitDepth; + m_outFrameRate = frameRate; + m_outChromaFormat = chromaFormat; +} + +void VideoIOYuv::writeY4mFileHeader() +{ + CHECK(m_outPicWidth == 0 || m_outPicHeight == 0 || m_outBitDepth == 0 || m_outFrameRate == 0, + "Output Y4M file into has not been set"); + std::string header = y4mSignature; + header += "W" + std::to_string(m_outPicWidth) + " "; + header += "H" + std::to_string(m_outPicHeight) + " "; + header += "F" + std::to_string(m_outFrameRate) + ":1 "; + header += "Ip A0:0 "; + switch (m_outChromaFormat) + { + case CHROMA_400: header += "Cmono"; break; + case CHROMA_420: header += "C420"; break; + case CHROMA_422: header += "C422"; break; + case CHROMA_444: header += "C444"; break; + } + if (m_outBitDepth > 8) + { + header += "p" + std::to_string(m_outBitDepth); + } + header += "\n"; + // not write extension/comment + + m_cHandle.write(header.c_str(), header.length()); +} + void VideoIOYuv::close() { m_cHandle.close(); @@ -215,6 +359,10 @@ void VideoIOYuv::skipFrames(uint32_t numFrames, uint32_t width, uint32_t height, } frameSize *= wordsize; //------------------ + if (m_inY4mFileHeaderLength) + { + frameSize += Y4M_FRAME_HEADER_LENGTH; + } const streamoff offset = frameSize * numFrames; @@ -912,6 +1060,13 @@ bool VideoIOYuv::read ( PelUnitBuf& pic, PelUnitBuf& picOrg, const InputColourSp } } + if (m_inY4mFileHeaderLength) + { + char y4mFrameHeader[Y4M_FRAME_HEADER_LENGTH]; + m_cHandle.read(y4mFrameHeader, Y4M_FRAME_HEADER_LENGTH); + CHECK(strncmp(y4mFrameHeader, "FRAME", Y4M_FRAME_HEADER_LENGTH - 1), "Wrong Y4M frame header!"); + } + const PelBuf areaBufY = picOrg.get(COMPONENT_Y); #if !EXTENSION_360_VIDEO const uint32_t stride444 = areaBufY.stride; @@ -1060,6 +1215,11 @@ bool VideoIOYuv::write( uint32_t orgWidth, uint32_t orgHeight, const CPelUnitBuf msg( WARNING, "\nWarning: writing %d x %d luma sample output picture!", width444, height444); } + if (m_outY4m) + { + m_cHandle.write(y4mFrameHeader, Y4M_FRAME_HEADER_LENGTH); + } + for(uint32_t comp=0; retval && comp < ::getNumberValidComponents(format); comp++) { const ComponentID compID = ComponentID(comp); @@ -1325,3 +1485,10 @@ bool VideoIOYuv::writeUpscaledPicture( const SPS& sps, const PPS& pps, const CPe return ret; } + +bool isY4mFileExt(const std::string &fileName) +{ + auto pos = fileName.find(".y4m"); + // ".y4m" must be at the end of the file name + return (pos != std::string::npos && pos + 4 == fileName.length()); +} diff --git a/source/Lib/Utilities/VideoIOYuv.h b/source/Lib/Utilities/VideoIOYuv.h index b18d1f8eff..48697fda1f 100644 --- a/source/Lib/Utilities/VideoIOYuv.h +++ b/source/Lib/Utilities/VideoIOYuv.h @@ -61,13 +61,26 @@ private: int m_fileBitdepth[MAX_NUM_CHANNEL_TYPE]; ///< bitdepth of input/output video file int m_MSBExtendedBitDepth[MAX_NUM_CHANNEL_TYPE]; ///< bitdepth after addition of MSBs (with value 0) int m_bitdepthShift[MAX_NUM_CHANNEL_TYPE]; ///< number of bits to increase or decrease image by before/after write/read + int m_inY4mFileHeaderLength = 0; + int m_outPicWidth = 0; + int m_outPicHeight = 0; + int m_outBitDepth = 0; + int m_outFrameRate = 0; + ChromaFormat m_outChromaFormat = CHROMA_420; + bool m_outY4m = false; public: VideoIOYuv() {} virtual ~VideoIOYuv() {} - void open ( const std::string &fileName, bool bWriteMode, const int fileBitDepth[MAX_NUM_CHANNEL_TYPE], const int MSBExtendedBitDepth[MAX_NUM_CHANNEL_TYPE], const int internalBitDepth[MAX_NUM_CHANNEL_TYPE] ); ///< open or create file - void close (); ///< close file + void parseY4mFileHeader(const std::string &fileName, int &width, int &height, int &frameRate, int &bitDepth, + ChromaFormat &chromaFormat); + void setOutputY4mInfo(int width, int height, int frameRate, int bitDepth, ChromaFormat chromaFormat); + void writeY4mFileHeader(); + void open(const std::string &fileName, bool bWriteMode, const int fileBitDepth[MAX_NUM_CHANNEL_TYPE], + const int MSBExtendedBitDepth[MAX_NUM_CHANNEL_TYPE], + const int internalBitDepth[MAX_NUM_CHANNEL_TYPE]); ///< open or create file + void close(); ///< close file #if EXTENSION_360_VIDEO void skipFrames(int numFrames, uint32_t width, uint32_t height, ChromaFormat format); #else @@ -105,5 +118,7 @@ public: }; +bool isY4mFileExt(const std::string &fileName); + #endif // __VIDEOIOYUV__ -- GitLab