diff --git a/CMakeLists.txt b/CMakeLists.txt index 282de51154e9e96256d775b965180f65a940da2f..f052c0828bbe80df7f7cf0cded99ecc16e742924 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -70,9 +70,11 @@ if( OpenMP_FOUND ) endif() # Enable warnings for some generators and toolsets. -bb_enable_warnings( gcc warnings-as-errors -Wno-sign-compare ) +# bb_enable_warnings( gcc warnings-as-errors -Wno-sign-compare ) # bb_enable_warnings( gcc -Wno-unused-variable ) # bb_enable_warnings( gcc-4.8 warnings-as-errors -Wno-unused-variable ) +# for gcc 8.2: +bb_enable_warnings( gcc warnings-as-errors -Wno-sign-compare -Wno-class-memaccess) if( XCODE ) bb_enable_warnings( clang warnings-as-errors @@ -104,6 +106,7 @@ endif() if( MSVC ) add_compile_options( "/MP" ) add_compile_options( "/EHsc" ) + add_compile_options( "/MT" ) endif() # set address sanitizer compiler arguments diff --git a/cfg/encoder_intra_vtm.cfg b/cfg/encoder_intra_vtm.cfg index f6665ccda58e9a5830521de0c992eecac99758eb..9ebe108b223591b97bdf19d0aa9fedf453d9ec95 100644 --- a/cfg/encoder_intra_vtm.cfg +++ b/cfg/encoder_intra_vtm.cfg @@ -106,13 +106,11 @@ EMT : 1 EMTFast : 1 Affine : 1 HighPrecMv : 1 - SubPuMvp : 1 MaxNumMergeCand : 6 LMChroma : 1 # use CCLM only DepQuant : 1 IMV : 2 - ALF : 1 # Fast tools diff --git a/cfg/encoder_lowdelay_P_vtm.cfg b/cfg/encoder_lowdelay_P_vtm.cfg index 5f588af822131c8a0c58e7658c4006f23bd51aa0..d83bed2d615bbe62a8bed6139c9daf01129f36f8 100644 --- a/cfg/encoder_lowdelay_P_vtm.cfg +++ b/cfg/encoder_lowdelay_P_vtm.cfg @@ -122,13 +122,11 @@ EMT : 1 EMTFast : 1 Affine : 1 HighPrecMv : 1 - SubPuMvp : 1 MaxNumMergeCand : 6 LMChroma : 1 # use CCLM only DepQuant : 1 IMV : 2 - ALF : 1 # Fast tools diff --git a/cfg/encoder_lowdelay_vtm.cfg b/cfg/encoder_lowdelay_vtm.cfg index 281ec093f234ae30f099c1059bb7cd2e3e2851b2..3bcde02d4138a1f6e484d528c77ba67f77d138b4 100644 --- a/cfg/encoder_lowdelay_vtm.cfg +++ b/cfg/encoder_lowdelay_vtm.cfg @@ -122,13 +122,11 @@ EMT : 1 EMTFast : 1 Affine : 1 HighPrecMv : 1 - SubPuMvp : 1 MaxNumMergeCand : 6 LMChroma : 1 # use CCLM only DepQuant : 1 IMV : 2 - ALF : 1 # Fast tools diff --git a/cfg/encoder_randomaccess_vtm.cfg b/cfg/encoder_randomaccess_vtm.cfg index 7e391142bb295f82824e5364bc8ffb143f424218..46d02af14ed62aa539294bc39e7b69ca46d7018a 100644 --- a/cfg/encoder_randomaccess_vtm.cfg +++ b/cfg/encoder_randomaccess_vtm.cfg @@ -136,13 +136,11 @@ EMT : 1 EMTFast : 1 Affine : 1 HighPrecMv : 1 - SubPuMvp : 1 MaxNumMergeCand : 6 LMChroma : 1 # use CCLM only DepQuant : 1 IMV : 2 - ALF : 1 # Fast tools @@ -150,7 +148,6 @@ PBIntraFast : 1 FastMrg : 1 AMaxBT : 1 - ### DO NOT ADD ANYTHING BELOW THIS LINE ### ### DO NOT DELETE THE EMPTY LINE BELOW ### diff --git a/doc/.gitignore b/doc/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..d58111897c8449a88a7e83a74f74c44a8ede7055 --- /dev/null +++ b/doc/.gitignore @@ -0,0 +1,8 @@ +# ignore these Latex files +jctvcdoc.cls +*.aux +*.log +*.lot +*.out +*.toc +*~ diff --git a/doc/pyuv_format.pdf b/doc/pyuv_format.pdf new file mode 100644 index 0000000000000000000000000000000000000000..62c669e0bb4c52d5756b428ce0838eeb57a6d06f Binary files /dev/null and b/doc/pyuv_format.pdf differ diff --git a/doc/software-manual.pdf b/doc/software-manual.pdf index 91bcc5a9f172ec570b4312133e74d3b867e71e52..0b4925cf32a94097c096cdc47707a5dbbebe3637 100644 Binary files a/doc/software-manual.pdf and b/doc/software-manual.pdf differ diff --git a/doc/software-manual.tex b/doc/software-manual.tex index ceb6b39c5d5b4736d3d7a96e94ee224d2d847cc2..0cd31316c6225c0c1924a52b909b8e5b26588341 100644 --- a/doc/software-manual.tex +++ b/doc/software-manual.tex @@ -3021,4 +3021,226 @@ If the decoder is compiled with the macro RExt__DECODER_DEBUG_BIT_STATISTICS def The Linux makefile will compile both the analyser and standard version when the `all' or `everything' target is used (where the latter will also build high-bit-depth executables). + +\section{Block statistics extension} +\label{sec:block-stat-extens} + +The block statistics extension enables straightforward visualization and statistical analysis of coding tool +usage in encoded bitstreams. The extension enables the reference +software encoder and decoder to write out statistics files in a configurable +way, which in turn can be loaded into a suitable YUV player for overlay of the +reconstructed YUV sequence, or can be used for statistical analysis at a +selectable scope (e.g. block/picture/sequence level). An example implementation +for such visualization is available with the open-source YUView player +(https://github.com/IENT/YUView). + + +\subsection{Usage} +\label{sec:usage} + +The software has to be compiled with the macros ENABLE_TRACING and +K0149_BLOCK_STATISTICS defined as 1. The statistics can be written by either +encoder or decoder. + +The extension adds additional trace channels to the ``dtrace'' functionality of +the software. The following trace channels were added: +\begin{description} +\item[D_BLOCK_STATISTICS_ALL] All syntax elements are written, no matter whether + they are actually encoded or derived. +\item[D_BLOCK_STATISTICS_CODED] Tries to write only syntax elements, which have + also been encoded. +\end{description} + +The following additional encoder options are available (part of ``dtrace''). See +the file dtrace_next.h for more details. + +\begin{OptionTableNoShorthand}{Decoder options}{tab:decoder-block-statistics} +\Option{TraceFile} & +%\ShortOption{\None} & +\Default{\None} & +File name of the produced trace file. +\\ + +\Option{TraceRule} & +%\ShortOption{-b} & +\Default{\NotSet} & +Specifies which traces should be saved, and for which POCs. +\\ + +\end{OptionTableNoShorthand} + +Concrete examples of calls for generating a block statistics file are: +\begin{verbatim} +bin/DecoderAppStatic -b str/BasketballDrive_1920x1080_QP37.vvc \ + --TraceFile="stats/BasketballDrive_1920x1080_QP37_coded.vtmbmsstats" \ + --TraceRule="D_BLOCK_STATISTICS_CODED:poc>=0" + +bin/DecoderAppStatic -b str/BasketballDrive_1920x1080_QP37.vvc \ + --TraceFile="stats/BasketballDrive_1920x1080_QP37_all.vtmbmsstats" \ + --TraceRule="D_BLOCK_STATISTICS_ALL:poc>=0" +\end{verbatim} + + +\subsection{Block statistics file formats} +\label{sec:block-stat-file} +The trace file will contain a header listing information of all available block +statistics. For each statistic it lists a type and a scale for vectors or range +for integers if applicable: +\begin{verbatim} +# VTMBMS Block Statistics +# Sequence size: [832x 480] +# Block Statistic Type: PredMode; Flag; +# Block Statistic Type: MergeFlag; Flag; +# Block Statistic Type: MVL0; Vector; Scale: 4 +# Block Statistic Type: MVL1; Vector; Scale: 4 +# Block Statistic Type: IPCM; Flag; +# Block Statistic Type: Y_IntraMode; Integer; [0, 73] +# Block Statistic Type: Cb_IntraMode; Integer; [0, 73] +\end{verbatim} + +Two formats are available for the statistics for each block, a human readable +format and a CSV based format. The header remains the same for both cases. + +For both formats each row contains the information for one block statistic. The +order of the data is: picture order count (POC), location of top left corner of +the block, size of the block, name of the statistic, and value of the +statistic. +The macro BLOCK_STATS_AS_CSV is available in order to choose the required format. +The human readable format can also be easily processed with other software, for +example YUView, using regular expressions. The CSV based formats provides the +universal interface required by spreadsheet applications. + +The human readable format is based on the format used for the other dtrace +statistics. Some examples for this format are: +\begin{verbatim} +BlockStat: POC 16 @( 112, 0) [ 8x 8] SkipFlag=1 +BlockStat: POC 16 @( 112, 0) [ 8x 8] InterDir=1 +BlockStat: POC 16 @( 112, 0) [ 8x 8] MergeFlag=1 +BlockStat: POC 16 @( 112, 0) [ 8x 8] MergeIdx=0 +BlockStat: POC 16 @( 112, 0) [ 8x 8] MergeType=0 +BlockStat: POC 16 @( 112, 0) [ 8x 8] MVPIdxL0=255 +BlockStat: POC 16 @( 112, 0) [ 8x 8] MVPNumL0=255 +BlockStat: POC 16 @( 112, 0) [ 8x 8] RefIdxL0=0 +BlockStat: POC 16 @( 112, 0) [ 8x 8] MVDL0={ 0, 0} +BlockStat: POC 16 @( 112, 0) [ 8x 8] MVL0={ -70, 18} +BlockStat: POC 16 @( 112, 8) [ 8x 8] PredMode=0 +BlockStat: POC 16 @( 112, 8) [ 8x 8] PartSize=0 +\end{verbatim} + +Some examples of the CSV based format are: +\begin{verbatim} +BlockStat;16; 112; 0; 8; 8;SkipFlag;1 +BlockStat;16; 112; 0; 8; 8;InterDir;1 +BlockStat;16; 112; 0; 8; 8;MergeFlag;1 +BlockStat;16; 112; 0; 8; 8;MergeIdx;0 +BlockStat;16; 112; 0; 8; 8;MergeType;0 +BlockStat;16; 112; 0; 8; 8;MVPIdxL0;255 +BlockStat;16; 112; 0; 8; 8;MVPNumL0;255 +BlockStat;16; 112; 0; 8; 8;RefIdxL0;0 +BlockStat;16; 112; 0; 8; 8;MVDL0; 0; 0 +BlockStat;16; 112; 0; 8; 8;MVL0; -70; 18 +BlockStat;16; 112; 8; 8; 8;PredMode;0 +BlockStat;16; 112; 8; 8; 8;PartSize;0 +\end{verbatim} + +\subsection{Visualization} +\label{sec:visualization} + +The block statistics can be viewed with YUView, which is freely available under +GPLv3: \url{https://github.com/IENT/YUView}. The latest releases and the master +branch have the functionality required for viewing the block statistics. YUView +assumes that the file extension of block statistics file is +“.vtmbmsstatsâ€. However, if a file is not recognized you can choose from a list +of supported file formats. + + +Statistics can be overlaid with YUV sequences. Some example snapshots are: + +\begin{figure}[htpb] + \centering + \includegraphics[width=0.8\linewidth]{figures/YUView} + \caption{YUView} + \label{fig:yuview} +\end{figure} + +\begin{figure}[htpb] + \centering + \includegraphics[width=0.5\linewidth]{figures/raceHorsesShot2MotionVectors} + \caption{Motion vectors} + \label{fig:motion-vectors} +\end{figure} + + +\begin{figure}[htpb] + \centering + \includegraphics[width=0.5\linewidth]{figures/raceHorsesShot3SkipFlag} + \caption{Skip flag} + \label{fig:skip-flag} +\end{figure} + +\subsection{Adding statistics} +\label{sec:adding-statistics} + +In order to add further block statistics, do the following: + + +\begin{description} +\item[source/Lib/CommonLib/dtrace_blockstatistics.h] + Add your statistic to the BlockStatistic enum: +\begin{verbatim} +enum class BlockStatistic { + // general + PredMode, + PartSize, + Depth, +\end{verbatim} + +Further, add your statistic to the map blockstatistic2description: +\begin{verbatim} +static const std::map<BlockStatistic, + std::tuple<std::string, BlockStatisticType, std::string>> + blockstatistic2description = +{ + { BlockStatistic::PredMode, + std::tuple<std::string, BlockStatisticType, std::string> + {"PredMode", BlockStatisticType::Flag, ""}}, + { BlockStatistic::MergeFlag, + std::tuple<std::string, BlockStatisticType, std::string> + {"MergeFlag", BlockStatisticType::Flag, ""}}, + { BlockStatistic::MVL0, + std::tuple<std::string, BlockStatisticType, std::string> + {"MVL0", BlockStatisticType::Vector, "Scale: 4"}}, + YOURS +\end{verbatim} + + +\item[source/Lib/CommonLib/dtrace_blockstatistics.cpp] All code for + writing syntax elements is kept in this file in + getAndStoreBlockStatistics. This function is called once for each + CTU, after it has been en/decoded. The following macros have been + defined to facilitate writing of block statistics: +\begin{verbatim} +DTRACE_BLOCK_SCALAR(ctx,channel,cs_cu_pu,stat_type,val) +DTRACE_BLOCK_SCALAR_CHROMA(ctx,channel,cs_cu_pu,stat_type,val) +DTRACE_BLOCK_VECTOR(ctx,channel,cu_pu,stat_type,v_x,v_y) +DTRACE_BLOCK_AFFINETF(ctx,channel,pu,stat_type,v_x0,v_y0,v_x1,v_y1,v_x2,v_y2) +\end{verbatim} + +An example: +\begin{verbatim} +DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, + cu, GetBlockStatisticName(BlockStatistic::PredMode), cu.predMode); +\end{verbatim} + + +\item[Block statistics for debugging] The statistics can also be used + to write out other data, not just syntax elements. Add your + statistics to dtrace_blockstatistics.h. Where it should be used the + following headers have to be included: +\begin{verbatim} +#include "dtrace_next.h" +#include "dtrace_blockstatistics.h" +\end{verbatim} +\end{description} + \end{document} diff --git a/source/App/DecoderAnalyserApp/CMakeLists.txt b/source/App/DecoderAnalyserApp/CMakeLists.txt index 46f0e44363824ac28b541c4e8576272936c854c6..bd4975afa64248238e18513ebcd6de594c42093c 100644 --- a/source/App/DecoderAnalyserApp/CMakeLists.txt +++ b/source/App/DecoderAnalyserApp/CMakeLists.txt @@ -29,7 +29,7 @@ target_compile_definitions( ${EXE_NAME} PUBLIC RExt__DECODER_DEBUG_BIT_STATISTIC target_compile_definitions( ${EXE_NAME} PUBLIC RExt__DECODER_DEBUG_TOOL_STATISTICS=1 ) if( ENABLE_VTM ) - target_compile_definitions( ${EXE_NAME} PUBLIC JEM_TOOLS=0 ) + target_compile_definitions( ${EXE_NAME} PUBLIC BMS_TOOLS=0 ) endif() if( SET_ENABLE_TRACING ) diff --git a/source/App/DecoderApp/CMakeLists.txt b/source/App/DecoderApp/CMakeLists.txt index 8bd727935494d1eb74379616cd882ebf75986065..dec7c6e1ec22c9734e8b4cb33cae2c3862c0f8f5 100644 --- a/source/App/DecoderApp/CMakeLists.txt +++ b/source/App/DecoderApp/CMakeLists.txt @@ -27,7 +27,7 @@ add_executable( ${EXE_NAME} ${SRC_FILES} ${INC_FILES} ${NATVIS_FILES} ${CMAKE_CU include_directories(${CMAKE_CURRENT_BINARY_DIR}) if( ENABLE_VTM ) - target_compile_definitions( ${EXE_NAME} PUBLIC JEM_TOOLS=0 ) + target_compile_definitions( ${EXE_NAME} PUBLIC BMS_TOOLS=0 ) endif() diff --git a/source/App/DecoderApp/DecApp.cpp b/source/App/DecoderApp/DecApp.cpp index a7f26e83303ff5b107c9152fc424d4d427f04b93..3c2f719c0959159f7a4280c15e362513d2b0fe7e 100644 --- a/source/App/DecoderApp/DecApp.cpp +++ b/source/App/DecoderApp/DecApp.cpp @@ -121,6 +121,10 @@ uint32_t DecApp::decode() * requires the DecApp::decode() method to be called again with the same * nal unit. */ #if RExt__DECODER_DEBUG_STATISTICS + CodingStatistics& stat = CodingStatistics::GetSingletonInstance(); + CHECK(m_statMode < STATS__MODE_NONE || m_statMode > STATS__MODE_ALL, "Wrong coding statistics output mode"); + stat.m_mode = m_statMode; + CodingStatistics::CodingStatisticsData* backupStats = new CodingStatistics::CodingStatisticsData(CodingStatistics::GetStatistics()); #endif @@ -210,6 +214,11 @@ uint32_t DecApp::decode() } } + if (m_packedYUVMode && (m_outputBitDepth[CH_L] != 10 && m_outputBitDepth[CH_L] != 12)) + { + EXIT ("Invalid output bit-depth for packed YUV output, aborting\n"); + } + m_cVideoIOYuvReconFile.open( m_reconFileName, true, m_outputBitDepth, m_outputBitDepth, bitDepths.recon ); // write mode openedReconFile = true; } @@ -279,9 +288,9 @@ void DecApp::xCreateDecLib() m_cDecLib.create(); // initialize decoder class - m_cDecLib.init( + m_cDecLib.init( #if JVET_J0090_MEMORY_BANDWITH_MEASURE - m_cacheCfgFile + m_cacheCfgFile #endif ); m_cDecLib.setDecodedPictureHashSEIEnabled(m_decodedPictureHashSEIEnabled); @@ -394,11 +403,13 @@ void DecApp::xWriteOutput( PicList* pcListPic, uint32_t tId ) if (display) { m_cVideoIOYuvReconFile.write( pcPicTop->getRecoBuf(), pcPicBottom->getRecoBuf(), - m_outputColourSpaceConvert, - conf.getWindowLeftOffset() + defDisp.getWindowLeftOffset(), - conf.getWindowRightOffset() + defDisp.getWindowRightOffset(), - conf.getWindowTopOffset() + defDisp.getWindowTopOffset(), - conf.getWindowBottomOffset() + defDisp.getWindowBottomOffset(), NUM_CHROMA_FORMAT, isTff ); + m_outputColourSpaceConvert, + false, // TODO: m_packedYUVMode, + conf.getWindowLeftOffset() + defDisp.getWindowLeftOffset(), + conf.getWindowRightOffset() + defDisp.getWindowRightOffset(), + conf.getWindowTopOffset() + defDisp.getWindowTopOffset(), + conf.getWindowBottomOffset() + defDisp.getWindowBottomOffset(), + NUM_CHROMA_FORMAT, isTff ); } } @@ -445,6 +456,7 @@ void DecApp::xWriteOutput( PicList* pcListPic, uint32_t tId ) m_cVideoIOYuvReconFile.write( pcPic->getRecoBuf(), m_outputColourSpaceConvert, + m_packedYUVMode, conf.getWindowLeftOffset() + defDisp.getWindowLeftOffset(), conf.getWindowRightOffset() + defDisp.getWindowRightOffset(), conf.getWindowTopOffset() + defDisp.getWindowTopOffset(), @@ -502,15 +514,18 @@ void DecApp::xFlushOutput( PicList* pcListPic ) // write to file if ( !m_reconFileName.empty() ) { - const Window &conf = pcPicTop->cs->sps->getConformanceWindow(); + const Window &conf = pcPicTop->cs->sps->getConformanceWindow(); const Window defDisp = (m_respectDefDispWindow && pcPicTop->cs->sps->getVuiParametersPresentFlag()) ? pcPicTop->cs->sps->getVuiParameters()->getDefaultDisplayWindow() : Window(); - const bool isTff = pcPicTop->topField; + const bool isTff = pcPicTop->topField; + m_cVideoIOYuvReconFile.write( pcPicTop->getRecoBuf(), pcPicBottom->getRecoBuf(), - m_outputColourSpaceConvert, - conf.getWindowLeftOffset() + defDisp.getWindowLeftOffset(), - conf.getWindowRightOffset() + defDisp.getWindowRightOffset(), - conf.getWindowTopOffset() + defDisp.getWindowTopOffset(), - conf.getWindowBottomOffset() + defDisp.getWindowBottomOffset(), NUM_CHROMA_FORMAT, isTff ); + m_outputColourSpaceConvert, + false, // TODO: m_packedYUVMode, + conf.getWindowLeftOffset() + defDisp.getWindowLeftOffset(), + conf.getWindowRightOffset() + defDisp.getWindowRightOffset(), + conf.getWindowTopOffset() + defDisp.getWindowTopOffset(), + conf.getWindowBottomOffset() + defDisp.getWindowBottomOffset(), + NUM_CHROMA_FORMAT, isTff ); } // update POC of display order @@ -560,6 +575,7 @@ void DecApp::xFlushOutput( PicList* pcListPic ) m_cVideoIOYuvReconFile.write( pcPic->getRecoBuf(), m_outputColourSpaceConvert, + m_packedYUVMode, conf.getWindowLeftOffset() + defDisp.getWindowLeftOffset(), conf.getWindowRightOffset() + defDisp.getWindowRightOffset(), conf.getWindowTopOffset() + defDisp.getWindowTopOffset(), diff --git a/source/App/DecoderApp/DecAppCfg.cpp b/source/App/DecoderApp/DecAppCfg.cpp index 6e520b49eeca61d15991ddc1552d89fa9e637276..ca55e2493a7e9f28c0d619666abed5c477cfde9f 100644 --- a/source/App/DecoderApp/DecAppCfg.cpp +++ b/source/App/DecoderApp/DecAppCfg.cpp @@ -96,6 +96,7 @@ bool DecAppCfg::parseCfg( int argc, char* argv[] ) ("SEIColourRemappingInfoFilename", m_colourRemapSEIFileName, string(""), "Colour Remapping YUV output file name. If empty, no remapping is applied (ignore SEI message)\n") ("OutputDecodedSEIMessagesFilename", m_outputDecodedSEIMessagesFilename, string(""), "When non empty, output decoded SEI messages to the indicated file. If file is '-', then output to stdout\n") ("ClipOutputVideoToRec709Range", m_bClipOutputVideoToRec709Range, false, "If true then clip output video to the Rec. 709 Range on saving") + ("PYUV", m_packedYUVMode, false, "If true then output 10-bit and 12-bit YUV data as 5-byte and 3-byte (respectively) packed YUV data. Ignored for interlaced output.") #if ENABLE_TRACING ("TraceChannelsList", bTracingChannelsList, false, "List all available tracing channels" ) ("TraceRule", sTracingRule, string( "" ), "Tracing rule (ex: \"D_CABAC:poc==8\" or \"D_REC_CB_LUMA:poc==8\")" ) @@ -103,6 +104,13 @@ bool DecAppCfg::parseCfg( int argc, char* argv[] ) #endif #if JVET_J0090_MEMORY_BANDWITH_MEASURE ("CacheCfg", m_cacheCfgFile, string( "" ), "CacheCfg File" ) +#endif +#if RExt__DECODER_DEBUG_STATISTICS + ("Stats", m_statMode, 3, "Control decoder debugging statistic output mode\n" + "\t0: disable statistic\n" + "\t1: enable bit statistic\n" + "\t2: enable tool statistic\n" + "\t3: enable bit and tool statistic\n") #endif ; @@ -220,6 +228,8 @@ DecAppCfg::DecAppCfg() , m_respectDefDispWindow(0) , m_outputDecodedSEIMessagesFilename() , m_bClipOutputVideoToRec709Range(false) +, m_packedYUVMode(false) +, m_statMode(0) { for (uint32_t channelTypeIndex = 0; channelTypeIndex < MAX_NUM_CHANNEL_TYPE; channelTypeIndex++) { diff --git a/source/App/DecoderApp/DecAppCfg.h b/source/App/DecoderApp/DecAppCfg.h index b721849a26366a6fe5b6209036b4db922ff77b92..7b370eb83804f11b678afaa513226d2a10d512da 100644 --- a/source/App/DecoderApp/DecAppCfg.h +++ b/source/App/DecoderApp/DecAppCfg.h @@ -70,7 +70,9 @@ protected: int m_respectDefDispWindow; ///< Only output content inside the default display window std::string m_outputDecodedSEIMessagesFilename; ///< filename to output decoded SEI messages to. If '-', then use stdout. If empty, do not output details. bool m_bClipOutputVideoToRec709Range; ///< If true, clip the output video to the Rec 709 range on saving. + bool m_packedYUVMode; ///< If true, output 10-bit and 12-bit YUV data as 5-byte and 3-byte (respectively) packed YUV data std::string m_cacheCfgFile; ///< Config file of cache model + int m_statMode; ///< Config statistic mode (0 - bit stat, 1 - tool stat, 3 - both) public: DecAppCfg(); diff --git a/source/App/DecoderApp/decmain.cpp b/source/App/DecoderApp/decmain.cpp index 73c44ea0223c122f58fd44720751b6edefcf79d3..61db2a5e36a52e5465b5e265000d069771859d49 100644 --- a/source/App/DecoderApp/decmain.cpp +++ b/source/App/DecoderApp/decmain.cpp @@ -57,9 +57,9 @@ int main(int argc, char* argv[]) // print information fprintf( stdout, "\n" ); #ifdef SVNREVISION - fprintf( stdout, "VVCSoftware: VTM Decoder Version %s (%s@r%s) ", NEXT_SOFTWARE_VERSION, SVNRELATIVEURL, SVNREVISION /*NV_VERSION*/ ); + fprintf( stdout, "VVCSoftware: BMS Decoder Version %s (%s@r%s) ", NEXT_SOFTWARE_VERSION, SVNRELATIVEURL, SVNREVISION /*NV_VERSION*/ ); #else - fprintf( stdout, "VVCSoftware: VTM Decoder Version %s ", NEXT_SOFTWARE_VERSION /*NV_VERSION*/ ); + fprintf( stdout, "VVCSoftware: BMS Decoder Version %s ", NEXT_SOFTWARE_VERSION /*NV_VERSION*/ ); #endif fprintf( stdout, NVM_ONOS ); fprintf( stdout, NVM_COMPILEDBY ); diff --git a/source/App/EncoderApp/CMakeLists.txt b/source/App/EncoderApp/CMakeLists.txt index 49ba1259f7bd21fa18ce71ceec10798eb3eb1ada..d5322a6b6f7740b35c7666be2ec0847f52e50709 100644 --- a/source/App/EncoderApp/CMakeLists.txt +++ b/source/App/EncoderApp/CMakeLists.txt @@ -29,7 +29,7 @@ add_executable( ${EXE_NAME} ${SRC_FILES} ${INC_FILES} ${NATVIS_FILES} ${CMAKE_CU include_directories(${CMAKE_CURRENT_BINARY_DIR}) if( ENABLE_VTM ) - target_compile_definitions( ${EXE_NAME} PUBLIC JEM_TOOLS=0 ) + target_compile_definitions( ${EXE_NAME} PUBLIC BMS_TOOLS=0 ) endif() if( SET_ENABLE_TRACING ) diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp index b48d190874b0fa1066265e4e19e957b232ce894c..59a2e070d57c2a80b45449b634ea5991a5f11768 100644 --- a/source/App/EncoderApp/EncApp.cpp +++ b/source/App/EncoderApp/EncApp.cpp @@ -247,6 +247,9 @@ void EncApp::xInitLibCfg() m_cEncLib.setFastIntraEMT ( m_FastEMT & m_EMT & 1 ); m_cEncLib.setInterEMT ( ( m_EMT >> 1 ) & 1 ); m_cEncLib.setFastInterEMT ( ( m_FastEMT >> 1 ) & ( m_EMT >> 1 ) & 1 ); +#endif +#if JVET_K0157 + m_cEncLib.setUseCompositeRef ( m_compositeRefEnabled ); #endif // ADD_NEW_TOOL : (encoder app) add setting of tool enabling flags and associated parameters here @@ -549,6 +552,17 @@ void EncApp::xCreateLib( std::list<PelUnitBuf*>& recBufList #endif if (!m_reconFileName.empty()) { + if (m_packedYUVMode && ((m_outputBitDepth[CH_L] != 10 && m_outputBitDepth[CH_L] != 12) + || ((m_iSourceWidth & (1 + (m_outputBitDepth[CH_L] & 3))) != 0))) + { + EXIT ("Invalid output bit-depth or image width for packed YUV output, aborting\n"); + } + if (m_packedYUVMode && (m_chromaFormatIDC != CHROMA_400) && ((m_outputBitDepth[CH_C] != 10 && m_outputBitDepth[CH_C] != 12) + || (((m_iSourceWidth / SPS::getWinUnitX (m_chromaFormatIDC)) & (1 + (m_outputBitDepth[CH_C] & 3))) != 0))) + { + EXIT ("Invalid chroma output bit-depth or image width for packed YUV output, aborting\n"); + } + m_cVideoIOYuvReconFile.open(m_reconFileName, true, m_outputBitDepth, m_outputBitDepth, m_internalBitDepth); // write mode } @@ -737,7 +751,10 @@ void EncApp::xWriteOutput( int iNumEncoded, std::list<PelUnitBuf*>& recBufList if (!m_reconFileName.empty()) { - m_cVideoIOYuvReconFile.write( *pcPicYuvRecTop, *pcPicYuvRecBottom, ipCSC, m_confWinLeft, m_confWinRight, m_confWinTop, m_confWinBottom, NUM_CHROMA_FORMAT, m_isTopFieldFirst ); + m_cVideoIOYuvReconFile.write( *pcPicYuvRecTop, *pcPicYuvRecBottom, + ipCSC, + false, // TODO: m_packedYUVMode, + m_confWinLeft, m_confWinRight, m_confWinTop, m_confWinBottom, NUM_CHROMA_FORMAT, m_isTopFieldFirst ); } } } @@ -749,7 +766,9 @@ void EncApp::xWriteOutput( int iNumEncoded, std::list<PelUnitBuf*>& recBufList if (!m_reconFileName.empty()) { m_cVideoIOYuvReconFile.write( *pcPicYuvRec, - ipCSC, m_confWinLeft, m_confWinRight, m_confWinTop, m_confWinBottom, NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range ); + ipCSC, + m_packedYUVMode, + m_confWinLeft, m_confWinRight, m_confWinTop, m_confWinBottom, NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range ); } } } diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp index 4070fcc13a3ca88005a324ed4cc6d256df767ed3..5149ada2620ed43d54ee65ffd6e1c3f57547ef8b 100644 --- a/source/App/EncoderApp/EncAppCfg.cpp +++ b/source/App/EncoderApp/EncAppCfg.cpp @@ -107,6 +107,7 @@ EncAppCfg::EncAppCfg() : m_inputColourSpaceConvert(IPCOLOURSPACE_UNCHANGED) , m_snrInternalColourSpace(false) , m_outputInternalColourSpace(false) +, m_packedYUVMode(false) #if EXTENSION_360_VIDEO , m_ext360(*this) #endif @@ -766,6 +767,7 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) ("FramesToBeEncoded,f", m_framesToBeEncoded, 0, "Number of frames to be encoded (default=all)") ("ClipInputVideoToRec709Range", m_bClipInputVideoToRec709Range, false, "If true then clip input video to the Rec. 709 Range on loading when InternalBitDepth is less than MSBExtendedBitDepth") ("ClipOutputVideoToRec709Range", m_bClipOutputVideoToRec709Range, false, "If true then clip output video to the Rec. 709 Range on saving when OutputBitDepth is less than InternalBitDepth") + ("PYUV", m_packedYUVMode, false, "If true then output 10-bit and 12-bit YUV data as 5-byte and 3-byte (respectively) packed YUV data. Ignored for interlaced output.") ("SummaryOutFilename", m_summaryOutFilename, string(), "Filename to use for producing summary output file. If empty, do not produce a file.") ("SummaryPicFilenameBase", m_summaryPicFilenameBase, string(), "Base filename to use for producing summary picture output files. The actual filenames used will have I.txt, P.txt and B.txt appended. If empty, do not produce a file.") ("SummaryVerboseness", m_summaryVerboseness, 0u, "Specifies the level of the verboseness of the text output") @@ -849,6 +851,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) "\t1: Enable fast methods only for Intra EMT\n" "\t2: Enable fast methods only for Inter EMT\n" "\t3: Enable fast methods for both Intra & Inter EMT\n") +#endif +#if JVET_K0157 + ("CompositeLTReference", m_compositeRefEnabled, false, "Enable Composite Long Term Reference Frame") #endif // ADD_NEW_TOOL : (encoder app) add parsing parameters here @@ -912,7 +917,7 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) /* Quantization parameters */ #if QP_SWITCHING_FOR_PARALLEL ("QP,q", m_iQP, 30, "Qp value") - ("QPIncrementFrame,-qpif", m_qpIncrementAtSourceFrame, OptionalValue<uint32_t>(), "If a source file frame number is specified, the internal QP will be incremented for all POCs associated with source frames >= frame number. If empty, do not increment.") + ("QPIncrementFrame,-qpif", m_qpIncrementAtSourceFrame, OptionalValue<uint32_t>(), "If a source file frame number is specified, the internal QP will be incremented for all POCs associated with source frames >= frame number. If empty, do not increment.") #else ("QP,q", m_fQP, 30.0, "Qp value, if value is float, QP is switched once during encoding") #endif @@ -1291,6 +1296,21 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) po::ErrorReporter err; const list<const char*>& argv_unhandled = po::scanArgv(opts, argc, (const char**) argv, err); +#if JVET_K0157 + if (m_compositeRefEnabled) + { + for (int i = 0; i < m_iGOPSize; i++) + { + m_GOPList[i].m_POC *= 2; + m_GOPList[i].m_deltaRPS *= 2; + for (int j = 0; j < m_GOPList[i].m_numRefPics; j++) + { + m_GOPList[i].m_referencePics[j] *= 2; + } + } + } +#endif + for (list<const char*>::const_iterator it = argv_unhandled.begin(); it != argv_unhandled.end(); it++) { msg( ERROR, "Unhandled argument ignored: `%s'\n", *it); @@ -1797,14 +1817,25 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] ) #endif #if ENABLE_QPA - if( m_LargeCTU && m_bUsePerceptQPA && !m_bUseAdaptiveQP && ( m_iSourceHeight <= 1280 ) && ( m_iSourceWidth <= 2048 ) ) + if (m_bUsePerceptQPA && !m_bUseAdaptiveQP && m_dualTree && (m_cbQpOffsetDualTree != 0 || m_crQpOffsetDualTree != 0)) + { + msg( WARNING, "*************************************************************************\n" ); + msg( WARNING, "* WARNING: chroma QPA on, ignoring nonzero dual-tree chroma QP offsets! *\n" ); + msg( WARNING, "*************************************************************************\n" ); + } + + #if QP_SWITCHING_FOR_PARALLEL + if( m_LargeCTU && ( m_iQP < 38 ) && ( m_iGOPSize > 4 ) && m_bUsePerceptQPA && !m_bUseAdaptiveQP && ( m_iSourceHeight <= 1280 ) && ( m_iSourceWidth <= 2048 ) ) + #else + if( m_LargeCTU && ( ( int ) m_fQP < 38 ) && ( m_iGOPSize > 4 ) && m_bUsePerceptQPA && !m_bUseAdaptiveQP && ( m_iSourceHeight <= 1280 ) && ( m_iSourceWidth <= 2048 ) ) + #endif #else if( false ) #endif { - msg( WARNING, "***************************************************************************\n" ); - msg( WARNING, "* WARNING: QPA on with LargeCTU for incompatible size, limiting CTU size! *\n" ); - msg( WARNING, "***************************************************************************\n" ); + msg( WARNING, "*************************************************************************\n" ); + msg( WARNING, "* WARNING: QPA on with large CTU for <=HD sequences, limiting CTU size! *\n" ); + msg( WARNING, "*************************************************************************\n" ); m_uiCTUSize = m_uiMaxCUWidth; if( ( 1u << m_quadtreeTULog2MaxSize ) > m_uiCTUSize ) m_quadtreeTULog2MaxSize--; @@ -1924,6 +1955,9 @@ bool EncAppCfg::xCheckParameter() #if JVET_K1000_SIMPLIFIED_EMT xConfirmPara( m_EMT, "EMT only allowed with NEXT profile" ); xConfirmPara( m_FastEMT, "EMT only allowed with NEXT profile" ); +#endif +#if JVET_K0157 + xConfirmPara(m_compositeRefEnabled, "Composite Reference Frame is only allowed with NEXT profile"); #endif // ADD_NEW_TOOL : (parameter check) add a check for next tools here } @@ -2217,6 +2251,9 @@ bool EncAppCfg::xCheckParameter() xConfirmPara( m_bipredSearchRange < 0 , "Bi-prediction refinement search range must be more than 0" ); xConfirmPara( m_minSearchWindow < 0, "Minimum motion search window size for the adaptive window ME must be greater than or equal to 0" ); xConfirmPara( m_iMaxDeltaQP > MAX_DELTA_QP, "Absolute Delta QP exceeds supported range (0 to 7)" ); +#if ENABLE_QPA + xConfirmPara( m_bUsePerceptQPA && m_uiDeltaQpRD > 0, "Perceptual QPA cannot be used together with slice-level multiple-QP optimization" ); +#endif #if SHARP_LUMA_DELTA_QP xConfirmPara( m_lumaLevelToDeltaQPMapping.mode && m_uiDeltaQpRD > 0, "Luma-level-based Delta QP cannot be used together with slice level multiple-QP optimization\n" ); #endif @@ -2391,6 +2428,9 @@ bool EncAppCfg::xCheckParameter() xConfirmPara( m_intraConstraintFlag, "IntraConstraintFlag cannot be 1 for inter sequences"); } +#if JVET_K0157 + int multipleFactor = m_compositeRefEnabled ? 2 : 1; +#endif bool verifiedGOP=false; bool errorGOP=false; int checkGOP=1; @@ -2411,7 +2451,11 @@ bool EncAppCfg::xCheckParameter() for(int i=0; i<m_iGOPSize; i++) { +#if JVET_K0157 + if (m_GOPList[i].m_POC == m_iGOPSize * multipleFactor) +#else if(m_GOPList[i].m_POC==m_iGOPSize) +#endif { xConfirmPara( m_GOPList[i].m_temporalId!=0 , "The last frame in each GOP must have temporal ID = 0 " ); } @@ -2445,7 +2489,11 @@ bool EncAppCfg::xCheckParameter() while(!verifiedGOP&&!errorGOP) { int curGOP = (checkGOP-1)%m_iGOPSize; +#if JVET_K0157 + int curPOC = ((checkGOP - 1) / m_iGOPSize)*m_iGOPSize * multipleFactor + m_GOPList[curGOP].m_POC; +#else int curPOC = ((checkGOP-1)/m_iGOPSize)*m_iGOPSize + m_GOPList[curGOP].m_POC; +#endif if(m_GOPList[curGOP].m_POC<0) { msg( WARNING, "\nError: found fewer Reference Picture Sets than GOPSize\n"); @@ -2472,7 +2520,11 @@ bool EncAppCfg::xCheckParameter() found=true; for(int k=0; k<m_iGOPSize; k++) { +#if JVET_K0157 + if (absPOC % (m_iGOPSize * multipleFactor) == m_GOPList[k].m_POC % (m_iGOPSize * multipleFactor)) +#else if(absPOC%m_iGOPSize == m_GOPList[k].m_POC%m_iGOPSize) +#endif { if(m_GOPList[k].m_temporalId==m_GOPList[curGOP].m_temporalId) { @@ -2524,7 +2576,11 @@ bool EncAppCfg::xCheckParameter() { //step backwards in coding order and include any extra available pictures we might find useful to replace the ones with POC < 0. int offGOP = (checkGOP-1+offset)%m_iGOPSize; +#if JVET_K0157 + int offPOC = ((checkGOP - 1 + offset) / m_iGOPSize)*(m_iGOPSize * multipleFactor) + m_GOPList[offGOP].m_POC; +#else int offPOC = ((checkGOP-1+offset)/m_iGOPSize)*m_iGOPSize + m_GOPList[offGOP].m_POC; +#endif if(offPOC>=0&&m_GOPList[offGOP].m_temporalId<=m_GOPList[curGOP].m_temporalId) { bool newRef=false; @@ -2926,7 +2982,7 @@ void EncAppCfg::xPrintParameter() msg( DETAILS, "Real Format : %dx%d %gHz\n", m_iSourceWidth - m_confWinLeft - m_confWinRight, m_iSourceHeight - m_confWinTop - m_confWinBottom, (double)m_iFrameRate / m_temporalSubsampleRatio ); msg( DETAILS, "Internal Format : %dx%d %gHz\n", m_iSourceWidth, m_iSourceHeight, (double)m_iFrameRate / m_temporalSubsampleRatio ); msg( DETAILS, "Sequence PSNR output : %s\n", ( m_printMSEBasedSequencePSNR ? "Linear average, MSE-based" : "Linear average only" ) ); - msg(DETAILS, "Hexadecimal PSNR output : %s\n", ( m_printHexPsnr ? "Enabled" : "Disabled" ) ); + msg( DETAILS, "Hexadecimal PSNR output : %s\n", ( m_printHexPsnr ? "Enabled" : "Disabled" ) ); msg( DETAILS, "Sequence MSE output : %s\n", ( m_printSequenceMSE ? "Enabled" : "Disabled" ) ); msg( DETAILS, "Frame MSE output : %s\n", ( m_printFrameMSE ? "Enabled" : "Disabled" ) ); msg( DETAILS, "Cabac-zero-word-padding : %s\n", ( m_cabacZeroWordPaddingEnabled ? "Enabled" : "Disabled" ) ); @@ -3164,6 +3220,9 @@ void EncAppCfg::xPrintParameter() #endif #if JVET_K1000_SIMPLIFIED_EMT msg( VERBOSE, "EMT: %1d(intra) %1d(inter) ", m_EMT & 1, ( m_EMT >> 1 ) & 1 ); +#endif +#if JVET_K0157 + msg(VERBOSE, "CompositeLTReference:%d ", m_compositeRefEnabled); #endif } // ADD_NEW_TOOL (add some output indicating the usage of tools) diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h index 10a0554a5566dda4e2157f8e84a4f11b136eb0a1..2ec9c86e06aa867ee60c1bc34e0936f64a488404 100644 --- a/source/App/EncoderApp/EncAppCfg.h +++ b/source/App/EncoderApp/EncAppCfg.h @@ -115,6 +115,7 @@ protected: bool m_cabacZeroWordPaddingEnabled; bool m_bClipInputVideoToRec709Range; bool m_bClipOutputVideoToRec709Range; + bool m_packedYUVMode; ///< If true, output 10-bit and 12-bit YUV data as 5-byte and 3-byte (respectively) packed YUV data // profile/level Profile::Name m_profile; @@ -229,6 +230,9 @@ protected: int m_FastEMT; ///< XZ: Fast Methods of Enhanced Multiple Transform #endif +#if JVET_K0157 + bool m_compositeRefEnabled; +#endif // ADD_NEW_TOOL : (encoder app) add tool enabling flags and associated parameters here unsigned m_uiMaxCUWidth; ///< max. CU width in pixel @@ -285,7 +289,7 @@ protected: int m_maxNumOffsetsPerPic; ///< SAO maximun number of offset per picture bool m_saoCtuBoundary; ///< SAO parameter estimation using non-deblocked pixels for CTU bottom and right boundary areas #if K0238_SAO_GREEDY_MERGE_ENCODING - bool m_saoGreedyMergeEnc; ///< SAO greedy merge encoding algorithm + bool m_saoGreedyMergeEnc; ///< SAO greedy merge encoding algorithm #endif // coding tools (loop filter) bool m_bLoopFilterDisable; ///< flag for using deblocking filter diff --git a/source/App/EncoderApp/encmain.cpp b/source/App/EncoderApp/encmain.cpp index 7c34b5d14e793dab2ce557f07cfc6dbbb090e9f4..6d5bdf109d5ebcfee1e85c19cb14b0b4601d0597 100644 --- a/source/App/EncoderApp/encmain.cpp +++ b/source/App/EncoderApp/encmain.cpp @@ -87,9 +87,9 @@ int main(int argc, char* argv[]) // print information fprintf( stdout, "\n" ); #ifdef SVNREVISION - fprintf( stdout, "VVCSoftware: VTM Encoder Version %s (%s@r%s) ", NEXT_SOFTWARE_VERSION, SVNRELATIVEURL, SVNREVISION /*NV_VERSION*/ ); + fprintf( stdout, "VVCSoftware: BMS Encoder Version %s (%s@r%s) ", NEXT_SOFTWARE_VERSION, SVNRELATIVEURL, SVNREVISION /*NV_VERSION*/ ); #else - fprintf( stdout, "VVCSoftware: VTM Encoder Version %s ", NEXT_SOFTWARE_VERSION /*NV_VERSION*/ ); + fprintf( stdout, "VVCSoftware: BMS Encoder Version %s ", NEXT_SOFTWARE_VERSION /*NV_VERSION*/ ); #endif fprintf( stdout, NVM_ONOS ); fprintf( stdout, NVM_COMPILEDBY ); diff --git a/source/App/SEIRemovalApp/seiremovalmain.cpp b/source/App/SEIRemovalApp/seiremovalmain.cpp index 4a4d79fa62554f374ca7b3359896630432756987..96eb667d378cd6bca665dd9f589b8d96e2b4f37b 100644 --- a/source/App/SEIRemovalApp/seiremovalmain.cpp +++ b/source/App/SEIRemovalApp/seiremovalmain.cpp @@ -57,9 +57,9 @@ int main(int argc, char* argv[]) // print information fprintf( stdout, "\n" ); #ifdef SVNREVISION - fprintf( stdout, "VVCSoftware: VTM Decoder Version %s (%s@r%s) ", NEXT_SOFTWARE_VERSION, SVNRELATIVEURL, SVNREVISION /*NV_VERSION*/ ); + fprintf( stdout, "VVCSoftware: BMS Decoder Version %s (%s@r%s) ", NEXT_SOFTWARE_VERSION, SVNRELATIVEURL, SVNREVISION /*NV_VERSION*/ ); #else - fprintf( stdout, "VVCSoftware: VTM Decoder Version %s ", NEXT_SOFTWARE_VERSION /*NV_VERSION*/ ); + fprintf( stdout, "VVCSoftware: BMS Decoder Version %s ", NEXT_SOFTWARE_VERSION /*NV_VERSION*/ ); #endif fprintf( stdout, NVM_ONOS ); fprintf( stdout, NVM_COMPILEDBY ); diff --git a/source/Lib/CommonAnalyserLib/CMakeLists.txt b/source/Lib/CommonAnalyserLib/CMakeLists.txt index ff996a3391d016844131439ed8d495671ec2982c..e0cdf5fa1e65f7bf849ba5296d9a0cd897260f68 100644 --- a/source/Lib/CommonAnalyserLib/CMakeLists.txt +++ b/source/Lib/CommonAnalyserLib/CMakeLists.txt @@ -27,6 +27,9 @@ file( GLOB AVX2_SRC_FILES "../CommonLib/x86/avx2/*.cpp" ) # get sse4.1 source files file( GLOB SSE41_SRC_FILES "../CommonLib/x86/sse41/*.cpp" ) +# get sse4.2 source files +file( GLOB SSE42_SRC_FILES "../CommonLib/x86/sse42/*.cpp" ) + # get libmd5 source files file( GLOB MD5_SRC_FILES "../libmd5/*.cpp" ) @@ -35,7 +38,7 @@ file( GLOB MD5_INC_FILES "../libmd5/*.h" ) # get all source files -set( SRC_FILES ${BASE_SRC_FILES} ${X86_SRC_FILES} ${SSE41_SRC_FILES} ${AVX_SRC_FILES} ${AVX2_SRC_FILES} ${MD5_SRC_FILES} ) +set( SRC_FILES ${BASE_SRC_FILES} ${X86_SRC_FILES} ${SSE41_SRC_FILES} ${SSE42_SRC_FILES} ${AVX_SRC_FILES} ${AVX2_SRC_FILES} ${MD5_SRC_FILES} ) # get all include files set( INC_FILES ${BASE_INC_FILES} ${X86_INC_FILES} ${MD5_INC_FILES} ) @@ -46,7 +49,7 @@ add_library( ${LIB_NAME} STATIC ${SRC_FILES} ${INC_FILES} ${NATVIS_FILES} ) target_compile_definitions( ${LIB_NAME} PUBLIC RExt__DECODER_DEBUG_TOOL_STATISTICS=1 ) if( ENABLE_VTM ) - target_compile_definitions( ${LIB_NAME} PUBLIC JEM_TOOLS=0 ) + target_compile_definitions( ${LIB_NAME} PUBLIC BMS_TOOLS=0 ) endif() if( EXTENSION_360_VIDEO ) @@ -86,6 +89,7 @@ target_link_libraries( ${LIB_NAME} Threads::Threads ) # set needed compile definitions set_property( SOURCE ${SSE41_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_SSE41 ) +set_property( SOURCE ${SSE42_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_SSE42 ) set_property( SOURCE ${AVX_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_AVX ) set_property( SOURCE ${AVX2_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_AVX2 ) # set needed compile flags @@ -94,6 +98,7 @@ if( MSVC ) set_property( SOURCE ${AVX2_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2" ) elseif( UNIX ) set_property( SOURCE ${SSE41_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-msse4.1" ) + set_property( SOURCE ${SSE42_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-msse4.2" ) set_property( SOURCE ${AVX_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-mavx" ) set_property( SOURCE ${AVX2_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-mavx2" ) endif() diff --git a/source/Lib/CommonLib/AdaptiveLoopFilter.cpp b/source/Lib/CommonLib/AdaptiveLoopFilter.cpp index 8d5420fefb0c314ef8ead60648be042b9f1a4cad..d00fd667beaf955fb4fcfc7cb3f3a7986528419f 100644 --- a/source/Lib/CommonLib/AdaptiveLoopFilter.cpp +++ b/source/Lib/CommonLib/AdaptiveLoopFilter.cpp @@ -217,19 +217,25 @@ void AdaptiveLoopFilter::create( const int picWidth, const int picHeight, const // Laplacian based activity for( int i = 0; i < NUM_DIRECTIONS; i++ ) { - m_laplacian[i] = new int*[m_CLASSIFICATION_BLK_SIZE + 5]; - - for( int y = 0; y < m_CLASSIFICATION_BLK_SIZE + 5; y++ ) + if ( m_laplacian[i] == nullptr ) { - m_laplacian[i][y] = new int[m_CLASSIFICATION_BLK_SIZE + 5]; + m_laplacian[i] = new int*[m_CLASSIFICATION_BLK_SIZE + 5]; + + for( int y = 0; y < m_CLASSIFICATION_BLK_SIZE + 5; y++ ) + { + m_laplacian[i][y] = new int[m_CLASSIFICATION_BLK_SIZE + 5]; + } } } // Classification - m_classifier = new AlfClassifier*[picHeight]; - for( int i = 0; i < picHeight; i++ ) + if ( m_classifier == nullptr ) { - m_classifier[i] = new AlfClassifier[picWidth]; + m_classifier = new AlfClassifier*[picHeight]; + for( int i = 0; i < picHeight; i++ ) + { + m_classifier[i] = new AlfClassifier[picWidth]; + } } } diff --git a/source/Lib/CommonLib/Buffer.h b/source/Lib/CommonLib/Buffer.h index 565a121fc4bf2392057cc8b2295cd2c7e90886cc..1763242e3dc0af2fd30165b09045a5028c460863 100644 --- a/source/Lib/CommonLib/Buffer.h +++ b/source/Lib/CommonLib/Buffer.h @@ -366,7 +366,6 @@ void AreaBuf<T>::addAvg( const AreaBuf<const T> &other1, const AreaBuf<const T> template<> void AreaBuf<Pel>::addAvg( const AreaBuf<const Pel> &other1, const AreaBuf<const Pel> &other2, const ClpRng& clpRng ); - template<typename T> void AreaBuf<T>::linearTransform( const int scale, const int shift, const int offset, bool bClip, const ClpRng& clpRng ) { @@ -385,6 +384,7 @@ void AreaBuf<T>::toLast( const ClpRng& clpRng ) template<> void AreaBuf<Pel>::toLast( const ClpRng& clpRng ); + template<typename T> void AreaBuf<T>::removeHighFreq( const AreaBuf<T>& other, const bool bClip, const ClpRng& clpRng ) { @@ -579,7 +579,8 @@ struct UnitBuf void addAvg ( const UnitBuf<const T> &other1, const UnitBuf<const T> &other2, const ClpRngs& clpRngs, const bool chromaOnly = false, const bool lumaOnly = false); void extendSingleBorderPel(); void extendBorderPel ( unsigned margin ); - void removeHighFreq ( const UnitBuf<T>& other, const bool bClip, const ClpRngs& clpRngs); + void removeHighFreq ( const UnitBuf<T>& other, const bool bClip, const ClpRngs& clpRngs + ); UnitBuf< T> subBuf (const UnitArea& subArea); const UnitBuf<const T> subBuf (const UnitArea& subArea) const; @@ -648,6 +649,7 @@ void UnitBuf<T>::reconstruct(const UnitBuf<const T> &pred, const UnitBuf<const T } } + template<typename T> void UnitBuf<T>::addAvg(const UnitBuf<const T> &other1, const UnitBuf<const T> &other2, const ClpRngs& clpRngs, const bool chromaOnly /* = false */, const bool lumaOnly /* = false */) { @@ -681,7 +683,8 @@ void UnitBuf<T>::extendBorderPel( unsigned margin ) } template<typename T> -void UnitBuf<T>::removeHighFreq( const UnitBuf<T>& other, const bool bClip, const ClpRngs& clpRngs) +void UnitBuf<T>::removeHighFreq( const UnitBuf<T>& other, const bool bClip, const ClpRngs& clpRngs + ) { for( unsigned i = 0; i < bufs.size(); i++ ) { diff --git a/source/Lib/CommonLib/CMakeLists.txt b/source/Lib/CommonLib/CMakeLists.txt index c633fec92feffc7661ac904de1c2632a7b4e3224..7a91672c034457ed88f3bf059fed4a539d1e7a7e 100644 --- a/source/Lib/CommonLib/CMakeLists.txt +++ b/source/Lib/CommonLib/CMakeLists.txt @@ -24,6 +24,9 @@ file( GLOB AVX_SRC_FILES "x86/avx/*.cpp" ) # get avx2 source files file( GLOB AVX2_SRC_FILES "x86/avx2/*.cpp" ) +# get sse4.2 source files +file( GLOB SSE42_SRC_FILES "x86/sse42/*.cpp" ) + # get sse4.1 source files file( GLOB SSE41_SRC_FILES "x86/sse41/*.cpp" ) @@ -35,7 +38,7 @@ file( GLOB MD5_INC_FILES "../libmd5/*.h" ) # get all source files -set( SRC_FILES ${BASE_SRC_FILES} ${X86_SRC_FILES} ${SSE41_SRC_FILES} ${AVX_SRC_FILES} ${AVX2_SRC_FILES} ${MD5_SRC_FILES} ) +set( SRC_FILES ${BASE_SRC_FILES} ${X86_SRC_FILES} ${SSE41_SRC_FILES} ${SSE42_SRC_FILES} ${AVX_SRC_FILES} ${AVX2_SRC_FILES} ${MD5_SRC_FILES} ) # get all include files set( INC_FILES ${BASE_INC_FILES} ${X86_INC_FILES} ${MD5_INC_FILES} ) @@ -45,7 +48,7 @@ set( INC_FILES ${BASE_INC_FILES} ${X86_INC_FILES} ${MD5_INC_FILES} ) add_library( ${LIB_NAME} STATIC ${SRC_FILES} ${INC_FILES} ${NATVIS_FILES} ) if( ENABLE_VTM ) - target_compile_definitions( ${LIB_NAME} PUBLIC JEM_TOOLS=0 ) + target_compile_definitions( ${LIB_NAME} PUBLIC BMS_TOOLS=0 ) endif() if( EXTENSION_360_VIDEO ) @@ -85,6 +88,7 @@ target_link_libraries( ${LIB_NAME} Threads::Threads ) # set needed compile definitions set_property( SOURCE ${SSE41_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_SSE41 ) +set_property( SOURCE ${SSE42_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_SSE42 ) set_property( SOURCE ${AVX_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_AVX ) set_property( SOURCE ${AVX2_SRC_FILES} APPEND PROPERTY COMPILE_DEFINITIONS USE_AVX2 ) # set needed compile flags @@ -93,6 +97,7 @@ if( MSVC ) set_property( SOURCE ${AVX2_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2" ) elseif( UNIX ) set_property( SOURCE ${SSE41_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-msse4.1" ) + set_property( SOURCE ${SSE42_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-msse4.2" ) set_property( SOURCE ${AVX_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-mavx" ) set_property( SOURCE ${AVX2_SRC_FILES} APPEND PROPERTY COMPILE_FLAGS "-mavx2" ) endif() diff --git a/source/Lib/CommonLib/CodingStatistics.h b/source/Lib/CommonLib/CodingStatistics.h index 619a896553eacc8df47fd3308ea91e1dba6a5e87..43496c0356fad85d3ef477cd0dcc62cd4dd5b41b 100644 --- a/source/Lib/CommonLib/CodingStatistics.h +++ b/source/Lib/CommonLib/CodingStatistics.h @@ -66,12 +66,6 @@ enum CodingStatisticsType STATS__CABAC_BITS__REF_FRM_IDX, STATS__CABAC_BITS__MVD, STATS__CABAC_BITS__MVD_EP, -#if JVET_K_AFFINE - STATS__CABAC_BITS__AFFINE_FLAG, -#if JVET_K0337_AFFINE_6PARA - STATS__CABAC_BITS__AFFINE_TYPE, -#endif -#endif STATS__CABAC_BITS__TRANSFORM_SUBDIV_FLAG, STATS__CABAC_BITS__QT_ROOT_CBF, STATS__CABAC_BITS__DELTA_QP_EP, @@ -79,21 +73,14 @@ enum CodingStatisticsType STATS__CABAC_BITS__QT_CBF, STATS__CABAC_BITS__CROSS_COMPONENT_PREDICTION, STATS__CABAC_BITS__TRANSFORM_SKIP_FLAGS, - STATS__CABAC_BITS__LAST_SIG_X_Y, STATS__CABAC_BITS__SIG_COEFF_GROUP_FLAG, STATS__CABAC_BITS__SIG_COEFF_MAP_FLAG, -#if JVET_K0072 - STATS__CABAC_BITS__PAR_FLAG, -#endif STATS__CABAC_BITS__GT1_FLAG, STATS__CABAC_BITS__GT2_FLAG, STATS__CABAC_BITS__SIGN_BIT, STATS__CABAC_BITS__ESCAPE_BITS, STATS__CABAC_BITS__SAO, -#if JVET_K0371_ALF - STATS__CABAC_BITS__ALF, -#endif STATS__CABAC_TRM_BITS, STATS__CABAC_FIXED_BITS, STATS__CABAC_PCM_ALIGN_BITS, @@ -104,6 +91,22 @@ enum CodingStatisticsType STATS__CABAC_EP_BIT_ALIGNMENT, STATS__CABAC_BITS__ALIGNED_SIGN_BIT, STATS__CABAC_BITS__ALIGNED_ESCAPE_BITS, + STATS__CABAC_BITS__OTHER, + STATS__CABAC_BITS__INVALID, + STATS__TOOL_TOTAL_FRAME,// This is a special case and is not included in the report. +#if JVET_K_AFFINE + STATS__CABAC_BITS__AFFINE_FLAG, + STATS__TOOL_AFF, +#if JVET_K0337_AFFINE_6PARA + STATS__CABAC_BITS__AFFINE_TYPE, +#endif +#endif +#if JVET_K0072 + STATS__CABAC_BITS__PAR_FLAG, +#endif +#if JVET_K0371_ALF + STATS__CABAC_BITS__ALF, +#endif #if JVET_K0357_AMVR STATS__CABAC_BITS__IMV_FLAG, #endif @@ -111,13 +114,23 @@ enum CodingStatisticsType STATS__CABAC_BITS__EMT_CU_FLAG, STATS__CABAC_BITS__EMT_TU_INDEX, #endif - STATS__CABAC_BITS__OTHER, - STATS__CABAC_BITS__INVALID, - STATS__TOOL_TOTAL_FRAME,// This is a special case and is not included in the report. +#if JVET_K1000_SIMPLIFIED_EMT + STATS__TOOL_EMT, +#endif STATS__TOOL_TOTAL, STATS__NUM_STATS }; +enum CodingStatisticsMode +{ + STATS__MODE_NONE = 0, + STATS__MODE_BITS = 1, + STATS__MODE_TOOLS = 2, + + STATS__MODE_ALL = + STATS__MODE_BITS | STATS__MODE_TOOLS +}; + static inline const char* getName(CodingStatisticsType name) { static const char *statNames[]= @@ -326,11 +339,13 @@ public: friend class CodingStatistics; }; + int m_mode; + private: CodingStatisticsData data; - CodingStatistics() : data() + CodingStatistics() : m_mode(STATS__MODE_ALL), data() { } @@ -408,11 +423,8 @@ private: printf( "\n" ); } -public: - - ~CodingStatistics() + void OutputBitStats() { -#if RExt__DECODER_DEBUG_BIT_STATISTICS const int64_t es = CODINGSTATISTICS_ENTROPYSCALE; int64_t countTotal = 0; @@ -594,9 +606,82 @@ public: OutputDashedLine( "GRAND TOTAL" ); epTotalBits += cavlcTotalBits; OutputLine ( "TOTAL", '~', "~~GT~~", "~~GT~~", "~~GT~~", cabacTotalBits, epTotalBits ); + } + + void OutputToolStats() + { + printf("\n"); + printf( " %-45s- Width Height Type Count Impacted pixels %% Impacted pixels\n", "Tools statistics" ); + OutputDashedLine( "" ); + + const uint64_t toolCount = STATS__TOOL_TOTAL - (STATS__TOOL_TOTAL_FRAME + 1); + StatTool subTotalTool[toolCount]; + StatTool statTotalTool[toolCount][CODING_STATS_NUM_SUBCLASSES]; + uint64_t totalPixels = GetStatisticTool( STATS__TOOL_TOTAL_FRAME ).pixels; + for( int i = 0; i < toolCount; i++ ) + { + const int type = i + (STATS__TOOL_TOTAL_FRAME + 1); + const char *pName = getName( CodingStatisticsType( type ) ); + + for( uint32_t c = 0; c < CODING_STATS_NUM_SUBCLASSES; c++ ) + { + StatTool &sTool = data.statistics_tool[type][c]; + if( sTool.count == 0 ) + { + continue; + } + + uint32_t wIdx = CodingStatisticsClassType::GetSubClassWidth( c ); + uint32_t hIdx = CodingStatisticsClassType::GetSubClassHeight( c ); + OutputLine( pName, ':', wIdx, hIdx, CodingStatisticsClassType::GetSubClassString( c ), sTool, totalPixels ); + + statTotalTool[i][c] += sTool; + subTotalTool[i] += sTool; + } + + if (subTotalTool[i].count != 0) + { + OutputLine( pName, '~', "~~ST~~", "~~ST~~", "~~ST~~", subTotalTool[i], totalPixels ); + } + } + + for( int i = 0; i < toolCount; i++ ) + { + const int type = i + (STATS__TOOL_TOTAL_FRAME + 1); + const char *pName = getName( CodingStatisticsType( type ) ); + + if (subTotalTool[i].count != 0) + OutputDashedLine( "Break down by tool/Channel type" ); + + for( uint32_t c = 0; c < CODING_STATS_NUM_SUBCLASSES; c += CODING_STATS_NUM_SIZES ) + { + StatTool typeTotalTool; + for( uint32_t w = 0; w < CODING_STATS_NUM_WIDTHS; w++ ) + { + for( uint32_t h = 0; h < CODING_STATS_NUM_HEIGHTS; h++ ) + typeTotalTool += statTotalTool[i][c + h * CODING_STATS_NUM_WIDTHS + w]; + } + + if( typeTotalTool.count != 0 ) + { + OutputLine( pName, '=', "-", "-", CodingStatisticsClassType::GetSubClassString( c ), typeTotalTool, totalPixels ); + } + } + } + } + +public: + + ~CodingStatistics() + { +#if RExt__DECODER_DEBUG_BIT_STATISTICS + if (m_mode & STATS__MODE_BITS) + OutputBitStats(); #endif //RExt__DECODER_DEBUG_BIT_STATISTICS #ifdef RExt__DECODER_DEBUG_TOOL_STATISTICS + if (m_mode & STATS__MODE_TOOLS) + OutputToolStats(); #endif //RExt__DECODER_DEBUG_TOOL_STATISTICS } diff --git a/source/Lib/CommonLib/CodingStructure.cpp b/source/Lib/CommonLib/CodingStructure.cpp index b6475c12c99705eab6e18a262dbe20964db70fe9..55c7eebec481a2c82e238f989bd3af1d8886681c 100644 --- a/source/Lib/CommonLib/CodingStructure.cpp +++ b/source/Lib/CommonLib/CodingStructure.cpp @@ -557,7 +557,9 @@ cTUTraverser CodingStructure::traverseTUs( const UnitArea& unit, const ChannelTy void CodingStructure::allocateVectorsAtPicLevel() { - const int twice = ( !pcv->ISingleTree && slice->isIntra() && pcv->chrFormat != CHROMA_400 ) ? 2 : 1; + const int twice = ( + !pcv->ISingleTree && slice->isIntra() + && pcv->chrFormat != CHROMA_400 ) ? 2 : 1; size_t allocSize = twice * unitScale[0].scale( area.blocks[0].size() ).area(); cus.reserve( allocSize ); diff --git a/source/Lib/CommonLib/CodingStructure.h b/source/Lib/CommonLib/CodingStructure.h index 15e5ea4dfd717f85164fc88ded69d70ce83ff0cf..df7337a1cad487139cf6ab0b2dfeef57cb619028 100644 --- a/source/Lib/CommonLib/CodingStructure.h +++ b/source/Lib/CommonLib/CodingStructure.h @@ -58,7 +58,6 @@ enum PictureType PIC_ORG_RESI, NUM_PIC_TYPES }; - extern XUCache g_globalUnitCache; // --------------------------------------------------------------------------- @@ -147,7 +146,6 @@ public: cCUTraverser traverseCUs(const UnitArea& _unit, const ChannelType _chType) const; cPUTraverser traversePUs(const UnitArea& _unit, const ChannelType _chType) const; cTUTraverser traverseTUs(const UnitArea& _unit, const ChannelType _chType) const; - // --------------------------------------------------------------------------- // encoding search utilities // --------------------------------------------------------------------------- diff --git a/source/Lib/CommonLib/Common.h b/source/Lib/CommonLib/Common.h index a10194a5dffab672a5a45d59c80531d0d813737d..ad0e5ec79adb4872eef350b34eac84e9ce427bbc 100644 --- a/source/Lib/CommonLib/Common.h +++ b/source/Lib/CommonLib/Common.h @@ -114,7 +114,6 @@ struct UnitScale Size scale( const Size &size ) const { return { size.width >> posx, size.height >> posy }; } Area scale( const Area &_area ) const { return Area( scale( _area.pos() ), scale( _area.size() ) ); } }; - inline size_t rsAddr(const Position &pos, const uint32_t stride, const UnitScale &unitScale ) { return (size_t)(stride >> unitScale.posx) * (size_t)(pos.y >> unitScale.posy) + (size_t)(pos.x >> unitScale.posx); diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h index 7c8e0c1d131dfdf15e61305bf6aa46907fcd0417..d878f43d5bd1a0489a65f8b5aafc010739d7bd89 100644 --- a/source/Lib/CommonLib/CommonDef.h +++ b/source/Lib/CommonLib/CommonDef.h @@ -331,6 +331,7 @@ static const int AFFINE_MAX_NUM_COMB = 12; ///< max static const int AFFINE_MIN_BLOCK_SIZE = 4; ///< Minimum affine MC block size #endif + #if W0038_DB_OPT static const int MAX_ENCODER_DEBLOCKING_QUALITY_LAYERS = 8 ; #endif @@ -401,7 +402,6 @@ static const unsigned C806_ALF_TEMPPRED_NUM = 6; static const int NTAPS_LUMA = 8; ///< Number of taps for luma static const int NTAPS_CHROMA = 4; ///< Number of taps for chroma - // ==================================================================================================================== // Macro functions // ==================================================================================================================== diff --git a/source/Lib/CommonLib/Contexts.cpp b/source/Lib/CommonLib/Contexts.cpp index d7d21d2fce751fc8d80eb670e6cec8687021e779..b387c12caabc004df24bc3a9bb36e0313f6cf210 100644 --- a/source/Lib/CommonLib/Contexts.cpp +++ b/source/Lib/CommonLib/Contexts.cpp @@ -383,6 +383,7 @@ const CtxSet ContextSetCfg::AffineType = ContextSetCfg::addCtxSet #endif #endif + const CtxSet ContextSetCfg::Mvd = ContextSetCfg::addCtxSet ({ { 169, 198,}, @@ -804,7 +805,7 @@ void CtxStore<BinProbModel>::init( int qp, int initId ) const std::vector<uint8_t>& initTable = ContextSetCfg::getInitTable( initId ); CHECK( m_CtxBuffer.size() != initTable.size(), "Size of init table (" << initTable.size() << ") does not match size of context buffer (" << m_CtxBuffer.size() << ")." ); - int clippedQP = std::min( std::max( 0, qp ), MAX_QP ); + int clippedQP = Clip3( 0, MAX_QP, qp ); for( std::size_t k = 0; k < m_CtxBuffer.size(); k++ ) { m_CtxBuffer[k].init( clippedQP, initTable[k] ); diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp index b5e4bc9dd77d8e79b3e7d0776214029da55155a3..89899afb677fe2f02a92f5037ae8298172c68421 100644 --- a/source/Lib/CommonLib/InterPrediction.cpp +++ b/source/Lib/CommonLib/InterPrediction.cpp @@ -109,7 +109,6 @@ void InterPrediction::destroy() m_filteredBlockTmp[i][c] = nullptr; } } - } void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC ) @@ -124,7 +123,6 @@ void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC ) } m_currChromaFormat = chromaFormatIDC; - if( m_acYuvPred[REF_PIC_LIST_0][COMPONENT_Y] == nullptr ) // check if first is null (in which case, nothing initialised yet) { for( uint32_t c = 0; c < MAX_NUM_COMPONENT; c++ ) @@ -148,6 +146,7 @@ void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC ) } } + m_iRefListIdx = -1; } @@ -365,7 +364,9 @@ void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const R } #endif -void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList& eRefPicList, PelUnitBuf& pcYuvPred, const bool& bi ) + +void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList& eRefPicList, PelUnitBuf& pcYuvPred, const bool& bi +) { const SPS &sps = *pu.cs->sps; @@ -400,7 +401,6 @@ void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList& for( uint32_t comp = COMPONENT_Y; comp < pcYuvPred.bufs.size() && comp <= m_maxCompIDToPred; comp++ ) { const ComponentID compID = ComponentID( comp ); - #if JVET_K_AFFINE if ( pu.cu->affine ) { @@ -526,7 +526,6 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio refBuf = refPic->getRecoBuf( CompArea( compID, chFmt, offset, pu.blocks[compID].size() ) ); } - if( yFrac == 0 ) { m_if.filterHor(compID, (Pel*) refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, width, height, xFrac, rndRes, chFmt, clpRng); @@ -537,7 +536,7 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio } else { - PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][compID], pu.blocks[compID]); + PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][compID], pu.blocks[compID]); int vFilterSize = isLuma(compID) ? NTAPS_LUMA : NTAPS_CHROMA; m_if.filterHor(compID, (Pel*) refBuf.buf - ((vFilterSize >> 1) - 1) * refBuf.stride, refBuf.stride, tmpBuf.buf, tmpBuf.stride, width, height + vFilterSize - 1, xFrac, false, chFmt, clpRng); @@ -545,7 +544,6 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio m_if.filterVer(compID, (Pel*) tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, dstBuf.buf, dstBuf.stride, width, height, yFrac, false, rndRes, chFmt, clpRng); JVET_J0090_SET_CACHE_ENABLE( true ); } - } #if JVET_K_AFFINE @@ -761,7 +759,8 @@ void InterPrediction::xWeightedAverage( const PredictionUnit& pu, const CPelUnit } } -void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBuf, const RefPicList &eRefPicList ) +void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBuf, const RefPicList &eRefPicList +) { CodingStructure &cs = *pu.cs; const PPS &pps = *cs.pps; @@ -801,7 +800,8 @@ void InterPrediction::motionCompensation( PredictionUnit &pu, PelUnitBuf &predBu return; } -void InterPrediction::motionCompensation( CodingUnit &cu, const RefPicList &eRefPicList ) +void InterPrediction::motionCompensation( CodingUnit &cu, const RefPicList &eRefPicList +) { for( auto &pu : CU::traversePUs( cu ) ) { @@ -810,10 +810,12 @@ void InterPrediction::motionCompensation( CodingUnit &cu, const RefPicList &eRef } } -void InterPrediction::motionCompensation( PredictionUnit &pu, const RefPicList &eRefPicList /*= REF_PIC_LIST_X*/ ) +void InterPrediction::motionCompensation( PredictionUnit &pu, const RefPicList &eRefPicList /*= REF_PIC_LIST_X*/ +) { PelUnitBuf predBuf = pu.cs->getPredBuf( pu ); - motionCompensation( pu, predBuf, eRefPicList ); + motionCompensation( pu, predBuf, eRefPicList + ); } diff --git a/source/Lib/CommonLib/InterPrediction.h b/source/Lib/CommonLib/InterPrediction.h index a895322cead55ccc2ae4e85a53d3fe50bca856ee..cbe87a5165bd8774d59e4c1d92a3971ba862a6a4 100644 --- a/source/Lib/CommonLib/InterPrediction.h +++ b/source/Lib/CommonLib/InterPrediction.h @@ -49,7 +49,6 @@ #include "RdCost.h" #include "ContextModelling.h" - // forward declaration class Mv; @@ -61,7 +60,6 @@ class Mv; // Class definition // ==================================================================================================================== - class InterPrediction : public WeightPrediction { private: @@ -84,7 +82,8 @@ protected: int m_iRefListIdx; - void xPredInterUni ( const PredictionUnit& pu, const RefPicList& eRefPicList, PelUnitBuf& pcYuvPred, const bool& bi ); + void xPredInterUni ( const PredictionUnit& pu, const RefPicList& eRefPicList, PelUnitBuf& pcYuvPred, const bool& bi + ); void xPredInterBi ( PredictionUnit& pu, PelUnitBuf &pcYuvPred ); void xPredInterBlk ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng ); @@ -99,7 +98,6 @@ protected: #if JVET_K0346 void xSubPuMC(PredictionUnit& pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList = REF_PIC_LIST_X); #endif - void destroy(); @@ -114,9 +112,12 @@ public: void init (RdCost* pcRdCost, ChromaFormat chromaFormatIDC); // inter - void motionCompensation (PredictionUnit &pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList = REF_PIC_LIST_X); - void motionCompensation (PredictionUnit &pu, const RefPicList &eRefPicList = REF_PIC_LIST_X); - void motionCompensation (CodingUnit &cu, const RefPicList &eRefPicList = REF_PIC_LIST_X); + void motionCompensation (PredictionUnit &pu, PelUnitBuf& predBuf, const RefPicList &eRefPicList = REF_PIC_LIST_X + ); + void motionCompensation (PredictionUnit &pu, const RefPicList &eRefPicList = REF_PIC_LIST_X + ); + void motionCompensation (CodingUnit &cu, const RefPicList &eRefPicList = REF_PIC_LIST_X + ); #if JVET_J0090_MEMORY_BANDWITH_MEASURE void cacheAssign( CacheModel *cache ); diff --git a/source/Lib/CommonLib/LoopFilter.cpp b/source/Lib/CommonLib/LoopFilter.cpp index 96a0dda5b7157dd6be99d3d00bb6f53af05cfcb4..926240e1e5253f7daf95a2ce8d5b62853d35682d 100644 --- a/source/Lib/CommonLib/LoopFilter.cpp +++ b/source/Lib/CommonLib/LoopFilter.cpp @@ -421,7 +421,6 @@ unsigned LoopFilter::xGetBoundaryStrengthSingle ( const CodingUnit& cu, const De const Slice& sliceQ = *cu.slice; const Position& cuPosLuma = cu.lumaPos(); - const Position& posQ = localPos; const Position posP = ( edgeDir == EDGE_VER ) ? posQ.offset( -1, 0 ) : posQ.offset( 0, -1 ); diff --git a/source/Lib/CommonLib/MotionInfo.h b/source/Lib/CommonLib/MotionInfo.h index 001b84ad427718adf2ae4404ff1c4f7c8c2cb897..d90715d127b887d1027ac638717921697392a53a 100644 --- a/source/Lib/CommonLib/MotionInfo.h +++ b/source/Lib/CommonLib/MotionInfo.h @@ -108,7 +108,6 @@ struct MotionInfo Mv mv [ NUM_REF_PIC_LIST_01 ]; int16_t refIdx [ NUM_REF_PIC_LIST_01 ]; - MotionInfo() : isInter( false ), interDir( 0 ), sliceIdx( 0 ), refIdx{ NOT_VALID, NOT_VALID } { } // ensure that MotionInfo(0) produces '\x000....' bit pattern - needed to work with AreaBuf - don't use this constructor for anything else MotionInfo( int i ) : isInter( i != 0 ), interDir( 0 ), sliceIdx( 0 ), refIdx{ 0, 0 } { CHECKD( i != 0, "The argument for this constructor has to be '0'" ); } @@ -143,4 +142,5 @@ struct MotionInfo } }; + #endif // __MOTIONINFO__ diff --git a/source/Lib/CommonLib/Mv.h b/source/Lib/CommonLib/Mv.h index e74c7fce2a17e3e5aad232edc6e17c9681fe1d3c..001d68ebe7c785a78f0df789d5d7ff68262498df 100644 --- a/source/Lib/CommonLib/Mv.h +++ b/source/Lib/CommonLib/Mv.h @@ -265,7 +265,6 @@ public: } #endif };// END CLASS DEFINITION MV - #if JVET_K0357_AMVR void roundMV( Mv& rcMv, unsigned imvShift ); #endif diff --git a/source/Lib/CommonLib/Picture.cpp b/source/Lib/CommonLib/Picture.cpp index a90cf4aaeb9371c3d10895fc60f8c6d2a9530ad3..525b8cc5fd7f134105e491908a4942e9551a86f1 100644 --- a/source/Lib/CommonLib/Picture.cpp +++ b/source/Lib/CommonLib/Picture.cpp @@ -728,6 +728,10 @@ Picture::Picture() { m_prevQP[i] = -1; } +#if JVET_K0157 + m_spliceIdx = NULL; + m_ctuNums = 0; +#endif } void Picture::create(const ChromaFormat &_chromaFormat, const Size &size, const unsigned _maxCUSize, const unsigned _margin, const bool _decoder) @@ -788,6 +792,13 @@ void Picture::destroy() tileMap = nullptr; } #endif +#if JVET_K0157 + if (m_spliceIdx) + { + delete[] m_spliceIdx; + m_spliceIdx = NULL; + } +#endif } void Picture::createTempBuffers( const unsigned _maxCUSize ) @@ -903,6 +914,14 @@ void Picture::finalInit( const SPS& sps, const PPS& pps ) tileMap = new TileMap; tileMap->create( sps, pps ); #endif +#if JVET_K0157 + if (m_spliceIdx == NULL) + { + m_ctuNums = cs->pcv->sizeInCtus; + m_spliceIdx = new int[m_ctuNums]; + memset(m_spliceIdx, 0, m_ctuNums * sizeof(int)); + } +#endif } void Picture::allocateNewSlice() @@ -1113,3 +1132,25 @@ Pel* Picture::getOrigin( const PictureType &type, const ComponentID compID ) con return M_BUFS( jId, type ).getOrigin( compID ); } + +#if JVET_K0157 +void Picture::createSpliceIdx(int nums) +{ + m_ctuNums = nums; + m_spliceIdx = new int[m_ctuNums]; + memset(m_spliceIdx, 0, m_ctuNums * sizeof(int)); +} + +bool Picture::getSpliceFull() +{ + int count = 0; + for (int i = 0; i < m_ctuNums; i++) + { + if (m_spliceIdx[i] != 0) + count++; + } + if (count < m_ctuNums * 0.25) + return false; + return true; +} +#endif diff --git a/source/Lib/CommonLib/Picture.h b/source/Lib/CommonLib/Picture.h index f9b360b987dccf6c160fb7063b6e350e56d8d5f6..d742e85cd8a0c7277c44472ab49918d74a3006d8 100644 --- a/source/Lib/CommonLib/Picture.h +++ b/source/Lib/CommonLib/Picture.h @@ -225,6 +225,14 @@ struct Picture : public UnitArea int getPOC() const { return poc; } void setBorderExtension( bool bFlag) { m_bIsBorderExtended = bFlag;} Pel* getOrigin( const PictureType &type, const ComponentID compID ) const; + +#if JVET_K0157 + int getSpliceIdx(uint32_t idx) const { return m_spliceIdx[idx]; } + void setSpliceIdx(uint32_t idx, int poc) { m_spliceIdx[idx] = poc; } + void createSpliceIdx(int nums); + bool getSpliceFull(); +#endif + public: bool m_bIsBorderExtended; bool referenced; @@ -240,6 +248,11 @@ public: uint32_t layer; uint32_t depth; +#if JVET_K0157 + int* m_spliceIdx; + int m_ctuNums; +#endif + #if ENABLE_SPLIT_PARALLELISM #if ENABLE_WPP_PARALLELISM PelStorage m_bufs[( PARL_SPLIT_MAX_NUM_JOBS * PARL_WPP_MAX_NUM_THREADS )][NUM_PIC_TYPES]; diff --git a/source/Lib/CommonLib/Quant.cpp b/source/Lib/CommonLib/Quant.cpp index fddd9d2291ba5cd4d2dd7a9ecf9a17f43cb1c75c..61ebccbf0a90aa0bbc79003642a72c5ba31e8b5f 100644 --- a/source/Lib/CommonLib/Quant.cpp +++ b/source/Lib/CommonLib/Quant.cpp @@ -793,7 +793,8 @@ void Quant::quant(TransformUnit &tu, const ComponentID &compID, const CCoeffBuf const int iQBits = QUANT_SHIFT + cQP.per + iTransformShift; // QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset - const int64_t iAdd = int64_t(tu.cs->slice->getSliceType() == I_SLICE ? 171 : 85) << int64_t(iQBits - 9); + const int64_t iAdd = int64_t(tu.cs->slice->getSliceType() == I_SLICE + ? 171 : 85) << int64_t(iQBits - 9); #if HEVC_USE_SIGN_HIDING const int qBits8 = iQBits - 8; #endif diff --git a/source/Lib/CommonLib/RdCost.h b/source/Lib/CommonLib/RdCost.h index 766816757c3bd86b2f9cb5a27d903303ccfaff81..c33ac7457a500d00821ce02c7feded2a7b3dd1ca 100644 --- a/source/Lib/CommonLib/RdCost.h +++ b/source/Lib/CommonLib/RdCost.h @@ -89,7 +89,10 @@ public: // - 0 = no subsampling, 1 = even rows, 2 = every 4th, etc. int subShift; - DistParam() : org(), cur(), step( 1 ), bitDepth( 0 ), useMR( false ), applyWeight( false ), isBiPred( false ), wpCur( nullptr ), compID( MAX_NUM_COMPONENT ), maximumDistortionForEarlyExit( std::numeric_limits<Distortion>::max() ), subShift( 0 ) { } + DistParam() : + org(), cur(), step( 1 ), bitDepth( 0 ), useMR( false ), applyWeight( false ), isBiPred( false ), wpCur( nullptr ), compID( MAX_NUM_COMPONENT ), maximumDistortionForEarlyExit( std::numeric_limits<Distortion>::max() ), subShift( 0 ) + + { } }; /// RD cost computation class @@ -116,7 +119,6 @@ private: int m_iCostScale; bool m_useQtbt; - public: RdCost(); virtual ~RdCost(); @@ -169,6 +171,7 @@ public: void setCostScale ( int iCostScale ) { m_iCostScale = iCostScale; } Distortion getCost ( uint32_t b ) { return Distortion( m_motionLambda * b ); } + #if ENABLE_SPLIT_PARALLELISM void copyState( const RdCost& other ); #endif diff --git a/source/Lib/CommonLib/Rom.cpp b/source/Lib/CommonLib/Rom.cpp index dfb4ce1b5fb444f4a2cc837857543a7a5ff79aab..84c9beca97ea9ca805a2651e8f4d1b721e4a5e06 100644 --- a/source/Lib/CommonLib/Rom.cpp +++ b/source/Lib/CommonLib/Rom.cpp @@ -186,6 +186,7 @@ const int g_aiNonLMPosThrs[] = { 3, 1, 0 }; #endif + // initialize ROM variables void initROM() { diff --git a/source/Lib/CommonLib/Slice.cpp b/source/Lib/CommonLib/Slice.cpp index 7e26fc803f31578221081ed0b1f997c3e4949196..be7267d1d8605577ba9aec499c404c631344f8d8 100644 --- a/source/Lib/CommonLib/Slice.cpp +++ b/source/Lib/CommonLib/Slice.cpp @@ -419,7 +419,6 @@ void Slice::setRefPicList( PicList& rcListPic, bool checkNumPocTotalCurr, bool b pcRefPic = xGetLongTermRefPic(rcListPic, m_pRPS->getPOC(i), m_pRPS->getCheckLTMSBPresent(i)); } } - // ref_pic_list_init Picture* rpsCurrList0[MAX_NUM_REF+1]; Picture* rpsCurrList1[MAX_NUM_REF+1]; @@ -432,7 +431,7 @@ void Slice::setRefPicList( PicList& rcListPic, bool checkNumPocTotalCurr, bool b // - Otherwise, when the current picture contains a P or B slice, the value of NumPocTotalCurr shall not be equal to 0. if (getRapPicFlag()) { - CHECK(numPicTotalCurr != 0, "Invalid state"); + CHECK(numPicTotalCurr != 0, "Invalid state"); } if (m_eSliceType == I_SLICE) @@ -502,7 +501,6 @@ void Slice::setRefPicList( PicList& rcListPic, bool checkNumPocTotalCurr, bool b m_bIsUsedAsLongTerm[REF_PIC_LIST_1][rIdx] = ( cIdx >= NumPicStCurr0 + NumPicStCurr1 ); } } - // For generalized B // note: maybe not existed case (always L0 is copied to L1 if L1 is empty) if( bCopyL0toL1ErrorCase && isInterB() && getNumRefIdx(REF_PIC_LIST_1) == 0) @@ -533,7 +531,7 @@ int Slice::getNumRpsCurrTempList() const numRpsCurrTempList++; } } - return numRpsCurrTempList; + return numRpsCurrTempList; } void Slice::initEqualRef() @@ -1312,7 +1310,11 @@ int Slice::checkThatAllRefPicsAreAvailable( PicList& rcListPic, const ReferenceP /** Function for constructing an explicit Reference Picture Set out of the available pictures in a referenced Reference Picture Set */ -void Slice::createExplicitReferencePictureSetFromReference( PicList& rcListPic, const ReferencePictureSet *pReferencePictureSet, bool isRAP, int pocRandomAccess, bool bUseRecoveryPoint, const bool bEfficientFieldIRAPEnabled) +void Slice::createExplicitReferencePictureSetFromReference(PicList& rcListPic, const ReferencePictureSet *pReferencePictureSet, bool isRAP, int pocRandomAccess, bool bUseRecoveryPoint, const bool bEfficientFieldIRAPEnabled +#if JVET_K0157 + , bool isEncodeLtRef, bool isCompositeRefEnable +#endif +) { Picture* rpcPic; int i, j; @@ -1352,7 +1354,11 @@ void Slice::createExplicitReferencePictureSetFromReference( PicList& rcListPic, } else { +#if JVET_K0157 + if (bEfficientFieldIRAPEnabled && rpcPic->getPOC() == this->getAssociatedIRAPPOC() && this->getAssociatedIRAPPOC() == this->getPOC() + (isCompositeRefEnable ? 2 : 1)) +#else if(bEfficientFieldIRAPEnabled && rpcPic->getPOC() == this->getAssociatedIRAPPOC() && this->getAssociatedIRAPPOC() == this->getPOC()+1) +#endif { irapIsInRPS = true; } @@ -1371,7 +1377,11 @@ void Slice::createExplicitReferencePictureSetFromReference( PicList& rcListPic, while ( iterPic != rcListPic.end()) { rpcPic = *(iterPic++); +#if JVET_K0157 + if (rpcPic->getPOC() == this->getAssociatedIRAPPOC() && this->getAssociatedIRAPPOC() == this->getPOC() + (isCompositeRefEnable ? 2 : 1)) +#else if(rpcPic->getPOC() == this->getAssociatedIRAPPOC() && this->getAssociatedIRAPPOC() == this->getPOC()+1) +#endif { pLocalRPS->setDeltaPOC(k, 1); pLocalRPS->setUsed(k, true); @@ -1381,6 +1391,53 @@ void Slice::createExplicitReferencePictureSetFromReference( PicList& rcListPic, } } } +#if JVET_K0157 + if (isCompositeRefEnable && isEncodeLtRef) + { + useNewRPS = true; + nrOfNegativePictures = 0; + nrOfPositivePictures = 0; + for (i = 0; i<pReferencePictureSet->getNumberOfPictures(); i++) + { + j = 0; + k = 0; + + // loop through all pictures in the reference picture buffer + PicList::iterator iterPic = rcListPic.begin(); + while (iterPic != rcListPic.end()) + { + j++; + rpcPic = *(iterPic++); + + if (rpcPic->getPOC() == this->getPOC() + 1 + pReferencePictureSet->getDeltaPOC(i) && rpcPic->referenced) + { + // This picture exists as a reference picture + // and should be added to the explicit Reference Picture Set + pLocalRPS->setDeltaPOC(k, pReferencePictureSet->getDeltaPOC(i) + 1); + pLocalRPS->setUsed(k, pReferencePictureSet->getUsed(i) && (!isRAP)); + if (bEfficientFieldIRAPEnabled) + { + pLocalRPS->setUsed(k, pLocalRPS->getUsed(k) && !(bUseRecoveryPoint && this->getPOC() > pocRandomAccess && this->getPOC() + pReferencePictureSet->getDeltaPOC(i) + 1 < pocRandomAccess)); + } + + if (pLocalRPS->getDeltaPOC(k) < 0) + { + nrOfNegativePictures++; + } + else + { + if (bEfficientFieldIRAPEnabled && rpcPic->getPOC() == this->getAssociatedIRAPPOC() && this->getAssociatedIRAPPOC() == this->getPOC() + 2) + { + irapIsInRPS = true; + } + nrOfPositivePictures++; + } + k++; + } + } + } + } +#endif pLocalRPS->setNumberOfNegativePictures(nrOfNegativePictures); pLocalRPS->setNumberOfPositivePictures(nrOfPositivePictures); pLocalRPS->setNumberOfPictures(nrOfNegativePictures+nrOfPositivePictures); @@ -1641,6 +1698,9 @@ SPSNext::SPSNext( SPS& sps ) #else #endif , m_MTTMode ( 0 ) +#if JVET_K0157 + , m_compositeRefEnabled ( false ) +#endif // ADD_NEW_TOOL : (sps extension) add tool enabling flags here (with "false" as default values) { } diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h index 5164daada2b1b45d73837a0b4b009635c7863015..94a7fe402e8d135c4b8865b4c8fc7809202eb273 100644 --- a/source/Lib/CommonLib/Slice.h +++ b/source/Lib/CommonLib/Slice.h @@ -859,6 +859,9 @@ private: // multi type tree (QTBT + triple split) unsigned m_MTTMode; +#if JVET_K0157 + bool m_compositeRefEnabled; //composite longterm reference +#endif // ADD_NEW_TOOL : (sps extension) add tool enabling flags and associated parameters here public: @@ -915,7 +918,6 @@ public: void setUseInterEMT ( bool b ) { m_InterEMT = b; } bool getUseInterEMT () const { return m_InterEMT; } #endif - //===== additional parameters ===== // qtbt void setCTUSize ( unsigned ctuSize ) { m_CTUSize = ctuSize; } @@ -959,6 +961,10 @@ public: unsigned getMTTMode () const { return m_MTTMode; } void setMTTMode ( unsigned mode ) { m_MTTMode = mode; m_MTTEnabled = ( m_MTTMode != 0 ); } +#if JVET_K0157 + void setUseCompositeRef(bool b) { m_compositeRefEnabled = b; } + bool getUseCompositeRef() const { return m_compositeRefEnabled; } +#endif // ADD_NEW_TOOL : (sps extension) add access functions for tool enabling flags and associated parameters here }; @@ -1764,9 +1770,13 @@ public: bool isTemporalLayerSwitchingPoint( PicList& rcListPic ) const; bool isStepwiseTemporalLayerSwitchingPointCandidate( PicList& rcListPic ) const; int checkThatAllRefPicsAreAvailable( PicList& rcListPic, const ReferencePictureSet *pReferencePictureSet, bool printErrors, int pocRandomAccess = 0, bool bUseRecoveryPoint = false) const; - void createExplicitReferencePictureSetFromReference( PicList& rcListPic, const ReferencePictureSet *pReferencePictureSet, bool isRAP, int pocRandomAccess, bool bUseRecoveryPoint, const bool bEfficientFieldIRAPEnabled); + void createExplicitReferencePictureSetFromReference(PicList& rcListPic, const ReferencePictureSet *pReferencePictureSet, bool isRAP, int pocRandomAccess, bool bUseRecoveryPoint, const bool bEfficientFieldIRAPEnabled +#if JVET_K0157 + , bool isEncodeLtRef, bool isCompositeRefEnable +#endif + ); void setMaxNumMergeCand(uint32_t val ) { m_maxNumMergeCand = val; } - uint32_t getMaxNumMergeCand() const { return m_maxNumMergeCand; } + uint32_t getMaxNumMergeCand() const { return m_maxNumMergeCand; } void setNoOutputPriorPicsFlag( bool val ) { m_noOutputPriorPicsFlag = val; } bool getNoOutputPriorPicsFlag() const { return m_noOutputPriorPicsFlag; } diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp index c3d3c15ef78e2f63578faec4b4f27543bac2f696..52235829949eedd056e0b520df558d109b519393 100644 --- a/source/Lib/CommonLib/TrQuant.cpp +++ b/source/Lib/CommonLib/TrQuant.cpp @@ -703,6 +703,13 @@ void TrQuant::xT( const TransformUnit &tu, const ComponentID &compID, const CPel const unsigned ucMode = getEmtMode ( tu, compID ); const unsigned ucTrIdx = getEmtTrIdx( tu, compID ); + if( ucTrIdx != DCT2 ) + { +#if RExt__DECODER_DEBUG_TOOL_STATISTICS + CodingStatistics::IncrementStatisticTool( CodingStatisticsClassType{ STATS__TOOL_EMT, uint32_t( iWidth ), uint32_t( iHeight ), compID } ); +#endif + } + #if INTRA67_3MPM #if HEVC_USE_4x4_DSTVII xTrMxN_EMT(channelBitDepth, resi.buf, resi.stride, dstCoeff.buf, iWidth, iHeight, useDST, maxLog2TrDynamicRange, ucMode, ucTrIdx diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 499837ae61f1eacb1a90213179ad92f3c819a48b..ed0b7a5e665abb0e9b58caa77c8856e41e57ec10 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -50,6 +50,9 @@ #include <assert.h> #include <cassert> + + + #define JVET_K1000_SIMPLIFIED_EMT 1 // EMT with only DCT-2, DCT-8 and DST-7 #define JVET_K0371_ALF 1 @@ -75,6 +78,9 @@ #define REUSE_CU_RESULTS 1 #endif +#define JVET_K0390_RATECTRL 1 +#define RATECTRL_FIX_FULLNBIT 1 //fix the QP-lambda relationship in rate control if JVET-K0154 for FULL_NBIT is enabled + #define JVET_K0352_MERGE_ENCOPT 1 // encoder optimization for merge #define JVET_K0556_MAX_TT_SIZE_64 1 // Maximum TT size is set to 64x64 for P/B-slice @@ -82,11 +88,12 @@ #define JVET_K0554 1 // when adopting, also remove the macro HM_QTBT_ONLY_QT_IMPLICIT (keep the case for value 0) #define JVET_K0346 1 // simplifications on ATMVP + #define JVET_K0063_PDPC_SIMP 1 // Simplified PDPC #define JVET_K0351_LESS_CONSTRAINT 1 // Only disallow binary split with same orientation in center partition of the ternary split and release the other constraints in K0351. -#define JVET_K0251_QP_EXT 1 // Extending the QP parameter value range for coarse quantization +#define JVET_K0251_QP_EXT 1 // Extending the QP parameter value range for coarse quantization #define JVET_K_AFFINE 1 #if JVET_K_AFFINE @@ -104,6 +111,7 @@ #define JVET_K0357_AMVR 1 // Adaptive motion vector resolution separated from JEM_TOOLS macro + #ifndef JVET_B0051_NON_MPM_MODE #define JVET_B0051_NON_MPM_MODE ( 1 && JEM_TOOLS ) @@ -157,11 +165,16 @@ #ifndef ENABLE_TRACING #define ENABLE_TRACING 0 // DISABLE by default (enable only when debugging, requires 15% run-time in decoding) -- see documentation in 'doc/DTrace for NextSoftware.pdf' - +#if ENABLE_TRACING +#define K0149_BLOCK_STATISTICS 1 // enables block statistics, which can be analysed with YUView (https://github.com/IENT/YUView) +#if K0149_BLOCK_STATISTICS +#define BLOCK_STATS_AS_CSV 0 // statistics will be written in a comma separated value format. this is not supported by YUView +#endif +#endif #endif // ! ENABLE_TRACING #define WCG_EXT 0 // part of JEM sharp Luma qp -#define WCG_WPSNR WCG_EXT +#define WCG_WPSNR WCG_EXT #if HEVC_TOOLS #define HEVC_USE_INTRA_SMOOTHING_T32 1 @@ -180,6 +193,7 @@ #define HEVC_USE_SIGN_HIDING 1 #endif +#define JVET_K0157 1 #define KEEP_PRED_AND_RESI_SIGNALS 0 @@ -298,7 +312,7 @@ #define SHARP_LUMA_DELTA_QP 1 ///< include non-normative LCU deltaQP and normative chromaQP change #define ER_CHROMA_QP_WCG_PPS 1 ///< Chroma QP model for WCG used in Anchor 3.2 -#define ENABLE_QPA 0 +#define ENABLE_QPA 1 ///< Non-normative perceptual QP adaptation according to JVET-H0047 and JVET-K0206. Deactivated by default, activated using encoder arguments --PerceptQPA=1 --SliceChromaQPOffsetPeriodicity=1 @@ -1422,8 +1436,8 @@ enum AlfFilterType struct AlfFilterShape { AlfFilterShape( int size ) - : filterLength( size ), - numCoeff( size * size / 4 + 1 ), + : filterLength( size ), + numCoeff( size * size / 4 + 1 ), filterSize( size * size / 2 + 1 ) { if( size == 5 ) @@ -1466,7 +1480,7 @@ struct AlfFilterShape 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 1, 1 + 2, 2, 2, 1, 1 }; golombIdx = { diff --git a/source/Lib/CommonLib/Unit.cpp b/source/Lib/CommonLib/Unit.cpp index 9b14f80f9b0655fa01a05983c6ecf80df72a2c5a..05eff4c40d068e0679a55b01e5c6d75815b5ed9b 100644 --- a/source/Lib/CommonLib/Unit.cpp +++ b/source/Lib/CommonLib/Unit.cpp @@ -274,7 +274,6 @@ CodingUnit& CodingUnit::operator=( const CodingUnit& other ) imv = other.imv; imvNumCand = other.imvNumCand; #endif - return *this; } diff --git a/source/Lib/CommonLib/Unit.h b/source/Lib/CommonLib/Unit.h index 18694015491a8c7565477df51e8dc1607e0738a4..3e453c722c3e1d1257b64d7f532dd4dfb80b3724 100644 --- a/source/Lib/CommonLib/Unit.h +++ b/source/Lib/CommonLib/Unit.h @@ -313,7 +313,6 @@ struct CodingUnit : public UnitArea #if JVET_K1000_SIMPLIFIED_EMT uint8_t emtFlag; #endif - // needed for fast imv mode decisions int8_t imvNumCand; diff --git a/source/Lib/CommonLib/UnitPartitioner.cpp b/source/Lib/CommonLib/UnitPartitioner.cpp index 89fea3e4b6d2a41c9bbf80a244d8c26393167cbf..f12c4889f911584bba1c431475bc529d4a759f2f 100644 --- a/source/Lib/CommonLib/UnitPartitioner.cpp +++ b/source/Lib/CommonLib/UnitPartitioner.cpp @@ -472,11 +472,11 @@ bool QTBTPartitioner::canSplit( const PartSplit split, const CodingStructure &cs break; case CU_TRIH_SPLIT: if( ( cs.sps->getSpsNext().getMTTMode() & 1 ) != 1 ) return false; - if( area.height <= 2 * minTtSize || area.height > maxTtSize ) return false; + if( area.height <= 2 * minTtSize || area.height > maxTtSize || area.width > maxTtSize) return false; break; case CU_TRIV_SPLIT: if( ( cs.sps->getSpsNext().getMTTMode() & 1 ) != 1 ) return false; - if( area.width <= 2 * minTtSize || area.width > maxTtSize ) return false; + if( area.width <= 2 * minTtSize || area.width > maxTtSize || area.height > maxTtSize) return false; break; default: break; diff --git a/source/Lib/CommonLib/UnitTools.cpp b/source/Lib/CommonLib/UnitTools.cpp index 9d131a11216e7456eae993006008c21eec02da1c..9086d3cf473387c333d2cd34af1b740ada730ae3 100644 --- a/source/Lib/CommonLib/UnitTools.cpp +++ b/source/Lib/CommonLib/UnitTools.cpp @@ -66,6 +66,30 @@ UnitArea CS::getArea( const CodingStructure &cs, const UnitArea &area, const Cha return isDualITree( cs ) ? area.singleChan( chType ) : area; } +#if DMVR_JVET_LOW_LATENCY_K0217 +void CS::setRefinedMotionField(CodingStructure &cs) +{ + for (CodingUnit *cu : cs.cus) + { + for (auto &pu : CU::traversePUs(*cu)) + { + if (pu.cs->sps->getSpsNext().getUseDMVR() + && pu.mergeFlag + && pu.mergeType == MRG_TYPE_DEFAULT_N + && !pu.frucMrgMode + && !pu.cu->LICFlag + && !pu.cu->affine + && PU::isBiPredFromDifferentDir(pu)) + { + pu.mv[REF_PIC_LIST_0] += pu.mvd[REF_PIC_LIST_0]; + pu.mv[REF_PIC_LIST_1] -= pu.mvd[REF_PIC_LIST_0]; + pu.mvd[REF_PIC_LIST_0].setZero(); + PU::spanMotionInfo(pu); + } + } + } +} +#endif // CU tools bool CU::isIntra(const CodingUnit &cu) @@ -311,8 +335,8 @@ int PU::getIntraMPMs( const PredictionUnit &pu, unsigned* mpm, const ChannelType CHECK(2 >= numMPMs, "Invalid number of most probable modes"); - const int offset = (int) NUM_LUMA_MODE - 5; - const int mod = offset + 3; + const int offset = 61; + const int mod = 64; if (leftIntraDir == aboveIntraDir) { @@ -619,7 +643,6 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co // compute the location of the current PU int cnt = 0; - const Position posLT = pu.Y().topLeft(); const Position posRT = pu.Y().topRight(); const Position posLB = pu.Y().bottomLeft(); @@ -639,7 +662,6 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co // get Inter Dir mrgCtx.interDirNeighbours[cnt] = miLeft.interDir; - // get Mv from Left mrgCtx.mvFieldNeighbours[cnt << 1].setMvField(miLeft.mv[0], miLeft.refIdx[0]); @@ -678,7 +700,7 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co // get Inter Dir mrgCtx.interDirNeighbours[cnt] = miAbove.interDir; - // get Mv from Left + // get Mv from Above mrgCtx.mvFieldNeighbours[cnt << 1].setMvField( miAbove.mv[0], miAbove.refIdx[0] ); if( slice.isInterB() ) @@ -720,7 +742,7 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co // get Inter Dir mrgCtx.interDirNeighbours[cnt] = miAboveRight.interDir; - // get Mv from Left + // get Mv from Above-right mrgCtx.mvFieldNeighbours[cnt << 1].setMvField( miAboveRight.mv[0], miAboveRight.refIdx[0] ); if( slice.isInterB() ) @@ -800,7 +822,8 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co bool bMrgIdxMatchATMVPCan = ( mrgCandIdx == cnt ); bool tmpLICFlag = false; - isAvailableSubPu = cs.sps->getSpsNext().getUseATMVP() && getInterMergeSubPuMvpCand( pu, mrgCtx, tmpLICFlag, cnt ); + isAvailableSubPu = cs.sps->getSpsNext().getUseATMVP() && getInterMergeSubPuMvpCand( pu, mrgCtx, tmpLICFlag, cnt + ); if( isAvailableSubPu ) { @@ -1078,7 +1101,6 @@ void PU::getInterMergeCandidates( const PredictionUnit &pu, MergeCtx& mrgCtx, co } - static int xGetDistScaleFactor(const int &iCurrPOC, const int &iCurrRefPOC, const int &iColPOC, const int &iColRefPOC) { int iDiffPocD = iColPOC - iColRefPOC; @@ -1124,7 +1146,6 @@ bool PU::getColocatedMVP(const PredictionUnit &pu, const RefPicList &eRefPicList { return false; } - int iColRefIdx = mi.refIdx[eColRefPicList]; if (iColRefIdx < 0) @@ -2165,7 +2186,6 @@ bool PU::isAffineMrgFlagCoded( const PredictionUnit &pu ) } return getFirstAvailableAffineNeighbour( pu ) != nullptr; } - void PU::getAffineMergeCand( const PredictionUnit &pu, MvField (*mvFieldNeighbours)[3], unsigned char &interDirNeighbours, int &numValidMergeCand ) { for ( int mvNum = 0; mvNum < 3; mvNum++ ) @@ -2416,7 +2436,8 @@ void clipColBlkMv(int& mvX, int& mvY, const PredictionUnit& pu) } #endif -bool PU::getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx& mrgCtx, bool& LICFlag, const int count) +bool PU::getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx& mrgCtx, bool& LICFlag, const int count +) { const Slice &slice = *pu.cs->slice; #if JVET_K0346 diff --git a/source/Lib/CommonLib/UnitTools.h b/source/Lib/CommonLib/UnitTools.h index f7640e1135dfae435d6838b6904d25c1b722c95f..2571b290bc149bd7df1f56d38a50fd8b19babbcd 100644 --- a/source/Lib/CommonLib/UnitTools.h +++ b/source/Lib/CommonLib/UnitTools.h @@ -49,6 +49,9 @@ namespace CS uint64_t getEstBits ( const CodingStructure &cs ); UnitArea getArea ( const CodingStructure &cs, const UnitArea &area, const ChannelType chType ); bool isDualITree ( const CodingStructure &cs ); +#if DMVR_JVET_LOW_LATENCY_K0217 + void setRefinedMotionField ( CodingStructure &cs ); +#endif } @@ -81,6 +84,7 @@ namespace CU bool hasNonTsCodedBlock (const CodingUnit& cu); uint32_t getNumNonZeroCoeffNonTs (const CodingUnit& cu); + PUTraverser traversePUs ( CodingUnit& cu); TUTraverser traverseTUs ( CodingUnit& cu); cPUTraverser traversePUs (const CodingUnit& cu); @@ -126,6 +130,7 @@ namespace PU #if JVET_K0357_AMVR void applyImv ( PredictionUnit &pu, MergeCtx &mrgCtx, InterPrediction *interPred = NULL ); #endif + void getAffineMergeCand (const PredictionUnit &pu, MvField (*mvFieldNeighbours)[3], unsigned char &interDirNeighbours, int &numValidMergeCand ); #if JVET_K_AFFINE bool isAffineMrgFlagCoded (const PredictionUnit &pu ); void getAffineMergeCand (const PredictionUnit &pu, MvField (*mvFieldNeighbours)[3], unsigned char &interDirNeighbours, int &numValidMergeCand ); @@ -133,7 +138,8 @@ namespace PU void setAllAffineMv ( PredictionUnit &pu, Mv affLT, Mv affRT, Mv affLB, RefPicList eRefList ); #endif #if JVET_K0346 - bool getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx &mrgCtx, bool& LICFlag, const int count); + bool getInterMergeSubPuMvpCand(const PredictionUnit &pu, MergeCtx &mrgCtx, bool& LICFlag, const int count + ); bool getInterMergeSubPuRecurCand(const PredictionUnit &pu, MergeCtx &mrgCtx, const int count); #endif bool isBiPredFromDifferentDir (const PredictionUnit &pu); diff --git a/source/Lib/CommonLib/dtrace.cpp b/source/Lib/CommonLib/dtrace.cpp index 3a6c3dae5896fc452067476e74b5f6d7664b154d..66bb00825599aa8624a8973b65a3939232c5fc56 100644 --- a/source/Lib/CommonLib/dtrace.cpp +++ b/source/Lib/CommonLib/dtrace.cpp @@ -320,3 +320,18 @@ void CDTrace::dtrace_repeat( int k, int i_times, const char *format, /*va_list a } return; } + +#if K0149_BLOCK_STATISTICS +void CDTrace::dtrace_header( const char *format, /*va_list args*/... ) +{ + if( m_trace_file ) + { + va_list args; + va_start ( args, format ); + vfprintf ( m_trace_file, format, args ); + fflush( m_trace_file ); + va_end ( args ); + } + return; +} +#endif diff --git a/source/Lib/CommonLib/dtrace.h b/source/Lib/CommonLib/dtrace.h index 87140ee6aaad7f3fd1e33fecb43195059344e443..9d622f3eed7876f759e8a7adf6081113c4bdb8a4 100644 --- a/source/Lib/CommonLib/dtrace.h +++ b/source/Lib/CommonLib/dtrace.h @@ -47,6 +47,11 @@ #include <vector> #include <cstdarg> +#if K0149_BLOCK_STATISTICS +class CodingStructure; +struct Position; +#endif + class CDTrace; typedef std::string CType; @@ -118,6 +123,21 @@ public: template<bool bCount> void dtrace ( int, const char *format, /*va_list args*/... ); void dtrace_repeat( int, int i_times, const char *format, /*va_list args*/... ); +#if K0149_BLOCK_STATISTICS + void dtrace_header ( const char *format, /*va_list args*/... ); + // CTU + void dtrace_block_scalar( int k, const CodingStructure &cs, std::string stat_type, signed value ); + // CU + void dtrace_block_scalar( int k, const CodingUnit &cu, std::string stat_type, signed value, bool isChroma = false ); + void dtrace_block_vector( int k, const CodingUnit &cu, std::string stat_type, signed val_x, signed val_y ); + // PU + void dtrace_block_scalar( int k, const PredictionUnit &pu, std::string stat_type, signed value, bool isChroma = false ); + void dtrace_block_vector( int k, const PredictionUnit &pu, std::string stat_type, signed val_x, signed val_y ); + void dtrace_block_affinetf( int k, const PredictionUnit &pu, std::string stat_type, signed val_x0, signed val_y0, signed val_x1, signed val_y1, signed val_x2, signed val_y2 ); + // TU + void dtrace_block_scalar(int k, const TransformUnit &tu, std::string stat_type, signed value, bool isChroma = false ); + void dtrace_block_vector(int k, const TransformUnit &tu, std::string stat_type, signed val_x, signed val_y); +#endif bool update ( state_type stateval ); int init( vstring channel_names ); int getLastError() { return m_error_code; } @@ -126,6 +146,9 @@ public: std::string getErrMessage(); int64_t getChannelCounter( int channel ) { return chanRules[channel].getCounter(); } void decrementChannelCounter( int channel ) { chanRules[channel].decrementCounter(); } +#if K0149_BLOCK_STATISTICS + bool isChannelActive( int channel ) { return chanRules[channel].active(); } +#endif }; diff --git a/source/Lib/CommonLib/dtrace_blockstatistics.cpp b/source/Lib/CommonLib/dtrace_blockstatistics.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9e4552d8a5513f9b8822400100f79d572022effb --- /dev/null +++ b/source/Lib/CommonLib/dtrace_blockstatistics.cpp @@ -0,0 +1,775 @@ +/* The copyright in this software is being made available under the BSD + * License, included below. This software may be subject to other third party + * and contributor rights, including patent rights, and no such rights are + * granted under this license. + * + * Copyright (c) 2010-2018, ITU/ISO/IEC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file dtrace_blockstatistics.cpp + * \brief DTrace block statistcis support for next software + */ + +#include "dtrace_blockstatistics.h" +#include "dtrace.h" +#include "dtrace_next.h" +#include "CommonLib/Unit.h" +#include "CommonLib/Picture.h" +#include "CommonLib/UnitTools.h" +//#include "CommonLib/CodingStructure.h" + +#if K0149_BLOCK_STATISTICS +std::string GetBlockStatisticName(BlockStatistic statistic) +{ + auto statisticIterator = blockstatistic2description.find(statistic); + // enforces that all delcared statistic enum items are also part of the map + assert(statisticIterator != blockstatistic2description.end() && "A block statistics declared in the enum is missing in the map for statistic description."); + + return std::get<0>(statisticIterator->second); +} + +std::string GetBlockStatisticTypeString(BlockStatistic statistic) +{ + auto statisticIterator = blockstatistic2description.find(statistic); + // enforces that all delcared statistic enum items are also part of the map + assert(statisticIterator != blockstatistic2description.end() && "A block statistics declared in the enum is missing in the map for statistic description."); + + BlockStatisticType statisticType = std::get<1>(statisticIterator->second); + switch (statisticType) { + case BlockStatisticType::Flag: + return std::string("Flag"); + break; + case BlockStatisticType::Vector: + return std::string("Vector"); + break; + case BlockStatisticType::Integer: + return std::string("Integer"); + break; + case BlockStatisticType::AffineTFVectors: + return std::string("AffineTFVectors"); + break; + default: + assert(0); + break; + } + return std::string(); +} + +std::string GetBlockStatisticTypeSpecificInfo(BlockStatistic statistic) +{ + auto statisticIterator = blockstatistic2description.find(statistic); + // enforces that all delcared statistic enum items are also part of the map + assert(statisticIterator != blockstatistic2description.end() && "A block statistics declared in the enum is missing in the map for statistic description."); + + return std::get<2>(statisticIterator->second); +} + +void CDTrace::dtrace_block_scalar( int k, const CodingStructure &cs, std::string stat_type, signed value ) +{ +#if BLOCK_STATS_AS_CSV + dtrace<false>( k, "BlockStat;%d;%4d;%4d;%2d;%2d;%s;%d\n", cs.picture->poc, cs.area.lx(), cs.area.ly(), cs.area.lwidth(), cs.area.lheight(), stat_type.c_str(), value ); +#else + dtrace<false>( k, "BlockStat: POC %d @(%4d,%4d) [%2dx%2d] %s=%d\n", cs.picture->poc, cs.area.lx(), cs.area.ly(), cs.area.lwidth(), cs.area.lheight(), stat_type.c_str(), value ); +#endif +} + +void CDTrace::dtrace_block_scalar( int k, const CodingUnit &cu, std::string stat_type, signed value, bool isChroma /*= false*/ ) +{ + const CodingStructure& cs = *cu.cs; +#if BLOCK_STATS_AS_CSV + if(isChroma) + { + dtrace<false>( k, "BlockStat;%d;%4d;%4d;%2d;%2d;%s;%d\n", cs.picture->poc, cu.Cb().x*2, cu.Cb().y*2, cu.Cb().width*2, cu.Cb().height*2, stat_type.c_str(), value ); + } + else + { + dtrace<false>( k, "BlockStat;%d;%4d;%4d;%2d;%2d;%s;%d\n", cs.picture->poc, cu.lx(), cu.ly(), cu.lwidth(), cu.lheight(), stat_type.c_str(), value ); + } +#else + if(isChroma) + { + dtrace<false>( k, "BlockStat: POC %d @(%4d,%4d) [%2dx%2d] %s=%d\n", cs.picture->poc, cu.Cb().x*2, cu.Cb().y*2, cu.Cb().width*2, cu.Cb().height*2, stat_type.c_str(), value ); + } + else + { + dtrace<false>( k, "BlockStat: POC %d @(%4d,%4d) [%2dx%2d] %s=%d\n", cs.picture->poc, cu.lx(), cu.ly(), cu.lwidth(), cu.lheight(), stat_type.c_str(), value ); + } +#endif +} + +void CDTrace::dtrace_block_vector( int k, const CodingUnit &cu, std::string stat_type, signed val_x, signed val_y ) +{ + const CodingStructure& cs = *cu.cs; +#if BLOCK_STATS_AS_CSV + dtrace<false>( k, "BlockStat;%d;%4d;%4d;%2d;%2d;%s;%4d;%4d\n", cs.picture->poc, cu.lx(), cu.ly(), cu.lwidth(), cu.lheight(), stat_type.c_str(), val_x, val_y ); +#else + dtrace<false>( k, "BlockStat: POC %d @(%4d,%4d) [%2dx%2d] %s={%4d,%4d}\n", cs.picture->poc, cu.lx(), cu.ly(), cu.lwidth(), cu.lheight(), stat_type.c_str(), val_x, val_y ); +#endif +} + +void CDTrace::dtrace_block_scalar( int k, const PredictionUnit &pu, std::string stat_type, signed value, bool isChroma /*= false*/ ) +{ + const CodingStructure& cs = *pu.cs; +#if BLOCK_STATS_AS_CSV + if(isChroma) + { + dtrace<false>( k, "BlockStat;%d;%4d;%4d;%2d;%2d;%s;%d\n", cs.picture->poc, pu.Cb().x*2, pu.Cb().y*2, pu.Cb().width*2, pu.Cb().height*2, stat_type.c_str(), value ); + } + else + { + dtrace<false>( k, "BlockStat;%d;%4d;%4d;%2d;%2d;%s;%d\n", cs.picture->poc, pu.lx(), pu.ly(), pu.lwidth(), pu.lheight(), stat_type.c_str(), value ); + } +#else + if(isChroma) + { + dtrace<false>( k, "BlockStat: POC %d @(%4d,%4d) [%2dx%2d] %s=%d\n", cs.picture->poc, pu.Cb().x*2, pu.Cb().y*2, pu.Cb().width*2, pu.Cb().height*2, stat_type.c_str(), value ); + } + else + { + dtrace<false>( k, "BlockStat: POC %d @(%4d,%4d) [%2dx%2d] %s=%d\n", cs.picture->poc, pu.lx(), pu.ly(), pu.lwidth(), pu.lheight(), stat_type.c_str(), value ); + } +#endif +} + +void CDTrace::dtrace_block_vector( int k, const PredictionUnit &pu, std::string stat_type, signed val_x, signed val_y ) +{ + const CodingStructure& cs = *pu.cs; +#if BLOCK_STATS_AS_CSV + dtrace<false>( k, "BlockStat;%d;%4d;%4d;%2d;%2d;%s;%4d;%4d\n", cs.picture->poc, pu.lx(), pu.ly(), pu.lwidth(), pu.lheight(), stat_type.c_str(), val_x, val_y ); +#else + dtrace<false>( k, "BlockStat: POC %d @(%4d,%4d) [%2dx%2d] %s={%4d,%4d}\n", cs.picture->poc, pu.lx(), pu.ly(), pu.lwidth(), pu.lheight(), stat_type.c_str(), val_x, val_y ); +#endif +} + +void CDTrace::dtrace_block_scalar(int k, const TransformUnit &tu, std::string stat_type, signed value, bool isChroma /*= false*/ ) +{ + const CodingStructure& cs = *tu.cs; +#if BLOCK_STATS_AS_CSV + if(isChroma) + { + dtrace<false>( k, "BlockStat;%d;%4d;%4d;%2d;%2d;%s;%d\n", cs.picture->poc, tu.Cb().x*2, tu.Cb().y*2, tu.Cb().width*2, tu.Cb().height*2, stat_type.c_str(), value ); + } + else + { + dtrace<false>( k, "BlockStat;%d;%4d;%4d;%2d;%2d;%s;%d\n", cs.picture->poc, tu.lx(), tu.ly(), tu.lwidth(), tu.lheight(), stat_type.c_str(), value ); + } +#else + if(isChroma) + { + dtrace<false>( k, "BlockStat: POC %d @(%4d,%4d) [%2dx%2d] %s=%d\n", cs.picture->poc, tu.Cb().x*2, tu.Cb().y*2, tu.Cb().width*2, tu.Cb().height*2, stat_type.c_str(), value ); + } + else + { + dtrace<false>( k, "BlockStat: POC %d @(%4d,%4d) [%2dx%2d] %s=%d\n", cs.picture->poc, tu.lx(), tu.ly(), tu.lwidth(), tu.lheight(), stat_type.c_str(), value ); + } +#endif +} + +void CDTrace::dtrace_block_vector(int k, const TransformUnit &tu, std::string stat_type, signed val_x, signed val_y) +{ + const CodingStructure& cs = *tu.cs; +#if BLOCK_STATS_AS_CSV + dtrace<false>(k, "BlockStat;%d;%4d;%4d;%2d;%2d;%s;%4d;%4d\n", cs.picture->poc, pu.lx(), pu.ly(), pu.lwidth(), pu.lheight(), stat_type.c_str(), val_x, val_y); +#else + dtrace<false>(k, "BlockStat: POC %d @(%4d,%4d) [%2dx%2d] %s={%4d,%4d}\n", cs.picture->poc, tu.lx(), tu.ly(), tu.lwidth(), tu.lheight(), stat_type.c_str(), val_x, val_y); +#endif +} + +void CDTrace::dtrace_block_affinetf( int k, const PredictionUnit &pu, std::string stat_type, signed val_x0, signed val_y0, signed val_x1, signed val_y1, signed val_x2, signed val_y2 ) +{ + const CodingStructure& cs = *pu.cs; +#if BLOCK_STATS_AS_CSV + dtrace<false>( k, "BlockStat;%d;%4d;%4d;%2d;%2d;%s;%4d;%4d;%4d;%4d;%4d;%4d\n", + cs.picture->poc, pu.lx(), pu.ly(), pu.lwidth(), pu.lheight(), stat_type.c_str(), + val_x0, val_y0, val_x1, val_y1 , val_x2, val_y2 ); +#else + dtrace<false>( k, "BlockStat: POC %d @(%4d,%4d) [%2dx%2d] %s={%4d,%4d,%4d,%4d,%4d,%4d}\n", + cs.picture->poc, pu.lx(), pu.ly(), pu.lwidth(), pu.lheight(), stat_type.c_str(), + val_x0, val_y0, val_x1, val_y1 , val_x2, val_y2 ); +#endif +} + + + +void writeBlockStatisticsHeader(const SPS *sps) +{ + static bool has_header_been_written = false; + if (has_header_been_written) + { + return; + } + + // only write header when block statistics are used + bool write_blockstatistics = g_trace_ctx->isChannelActive( D_BLOCK_STATISTICS_ALL) || g_trace_ctx->isChannelActive( D_BLOCK_STATISTICS_CODED); + if(!write_blockstatistics) + { + return; + } + + DTRACE_HEADER( g_trace_ctx, "# VTMBMS Block Statistics\n"); + // sequence info + DTRACE_HEADER( g_trace_ctx, "# Sequence size: [%dx %d]\n", sps->getPicWidthInLumaSamples(), sps->getPicHeightInLumaSamples()); + // list statistics + for( auto i = static_cast<int>(BlockStatistic::PredMode); i < static_cast<int>(BlockStatistic::NumBlockStatistics); i++) + { + BlockStatistic statistic = BlockStatistic(i); + std::string statitic_name = GetBlockStatisticName(statistic); + std::string statitic_type = GetBlockStatisticTypeString(statistic); + std::string statitic_type_specific_info = GetBlockStatisticTypeSpecificInfo(statistic); + DTRACE_HEADER( g_trace_ctx, "# Block Statistic Type: %s; %s; %s\n", statitic_name.c_str(), statitic_type.c_str(), statitic_type_specific_info.c_str()); + } + + has_header_been_written = true; +} + +void getAndStoreBlockStatistics(const CodingStructure& cs, const UnitArea& ctuArea) +{ + // two differemt behaviors, depending on which information is needed + bool writeAll = g_trace_ctx->isChannelActive( D_BLOCK_STATISTICS_ALL); + bool writeCoded = g_trace_ctx->isChannelActive( D_BLOCK_STATISTICS_CODED); + + CHECK(writeAll && writeCoded, "Either used D_BLOCK_STATISTICS_ALL_DATA or D_BLOCK_STATISTICS_CODED_DATA. Not both at once!") + + if (writeCoded) + writeAllCodedData(cs, ctuArea); // this will write out important cu-based data, only if it is actually decoded and used + else if (writeAll) + writeAllData(cs, ctuArea); // this will write out all inter- or intra-prediction related data +} + +void writeAllData(const CodingStructure& cs, const UnitArea& ctuArea) +{ + const int maxNumChannelType = cs.pcv->chrFormat != CHROMA_400 && CS::isDualITree( cs ) ? 2 : 1; + + for( int ch = 0; ch < maxNumChannelType; ch++ ) + { + const ChannelType chType = ChannelType( ch ); + + for( const CodingUnit &cu : cs.traverseCUs( CS::getArea( cs, ctuArea, chType ), chType ) ) + { + if( chType == CHANNEL_TYPE_LUMA ) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::PredMode), cu.predMode); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::PartSize), cu.partSize); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::Depth), cu.depth); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::QT_Depth), cu.qtDepth); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::BT_Depth), cu.btDepth); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::MT_Depth), cu.mtDepth); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::ChromaQPAdj), cu.chromaQpAdj); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::QP), cu.qp); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::SplitSeries), (int)cu.splitSeries); + + if (cs.pps->getTransquantBypassEnabledFlag()) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::TransQuantBypassFlag), cu.transQuantBypass); + } + + // skip flag + if (!cs.slice->isIntra()) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::SkipFlag), cu.skip); + } + + #if JVET_K1000_SIMPLIFIED_EMT && HM_EMT_NSST_AS_IN_JEM + if (!(!((cs.sps->getSpsNext().getUseIntraEMT() && CU::isIntra(cu)) || (cs.sps->getSpsNext().getUseInterEMT() && CU::isInter(cu))) || isChroma(cu.chType))) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::EMTFlag), cu.emtFlag); + } + #endif + } + else if( chType == CHANNEL_TYPE_CHROMA ) + { + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::PartSize_Chroma), cu.partSize); + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::Depth_Chroma), cu.depth); + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::QT_Depth_Chroma), cu.qtDepth); + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::BT_Depth_Chroma), cu.btDepth); + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::MT_Depth_Chroma), cu.mtDepth); + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::ChromaQPAdj_Chroma), cu.chromaQpAdj); + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::QP_Chroma), cu.qp); + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::SplitSeries_Chroma), (int)cu.splitSeries); + + if (cs.pps->getTransquantBypassEnabledFlag()) + { + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::TransQuantBypassFlag_Chroma), cu.transQuantBypass); + } + + #if JVET_K1000_SIMPLIFIED_EMT && HM_EMT_NSST_AS_IN_JEM + if (!(!((cs.sps->getSpsNext().getUseIntraEMT() && CU::isIntra(cu)) || (cs.sps->getSpsNext().getUseInterEMT() && CU::isInter(cu))) || isChroma(cu.chType))) + { + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::EMTFlag_Chroma), cu.emtFlag); + } + #endif + } + + + switch( cu.predMode ) + { + case MODE_INTER: + { + for( const PredictionUnit &pu : CU::traversePUs( cu ) ) + { + if (!pu.cu->skip) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::MergeFlag), pu.mergeFlag); + } + if( pu.mergeFlag ) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::MergeIdx), pu.mergeIdx); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::MergeType), pu.mergeType); + } +#if JVET_K_AFFINE + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::AffineFlag), pu.cu->affine); +#if JVET_K0337_AFFINE_6PARA + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::AffineType), pu.cu->affineType); +#endif +#endif + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::InterDir), pu.interDir); + + if (pu.interDir != 2 /* PRED_L1 */) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::MVPIdxL0), pu.mvpIdx[REF_PIC_LIST_0]); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::RefIdxL0), pu.refIdx[REF_PIC_LIST_0]); + } + if (pu.interDir != 1 /* PRED_L1 */) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::MVPIdxL1), pu.mvpIdx[REF_PIC_LIST_1]); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::RefIdxL1), pu.refIdx[REF_PIC_LIST_1]); + } +#if JVET_K_AFFINE + if (!pu.cu->affine) + { +#endif + if (pu.interDir != 2 /* PRED_L1 */) + { + Mv mv = pu.mv[REF_PIC_LIST_0]; + Mv mvd = pu.mvd[REF_PIC_LIST_0]; +#if JVET_K0346 || JVET_K_AFFINE + mv.setLowPrec(); + mvd.setLowPrec(); +#endif + DTRACE_BLOCK_VECTOR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::MVDL0), mvd.hor, mvd.ver); + DTRACE_BLOCK_VECTOR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::MVL0), mv.hor, mv.ver); + } + if (pu.interDir != 1 /* PRED_L1 */) + { + Mv mv = pu.mv[REF_PIC_LIST_1]; + Mv mvd = pu.mvd[REF_PIC_LIST_1]; +#if JVET_K0346 || JVET_K_AFFINE + mv.setLowPrec(); + mvd.setLowPrec(); +#endif + DTRACE_BLOCK_VECTOR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::MVDL1), mvd.hor, mvd.ver); + DTRACE_BLOCK_VECTOR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::MVL1), mv.hor, mv.ver); + } +#if JVET_K_AFFINE + } + else + { + if (pu.interDir != 2 /* PRED_L1 */) + { + Mv mv[3]; + const CMotionBuf &mb = pu.getMotionBuf(); + mv[0] = mb.at(0, 0).mv[REF_PIC_LIST_0]; + mv[1] = mb.at(mb.width - 1, 0).mv[REF_PIC_LIST_0]; + mv[2] = mb.at(0, mb.height - 1).mv[REF_PIC_LIST_0]; +#if JVET_K0346 || JVET_K_AFFINE + // motion vectors should use low precision or they will appear to large + mv[0].setLowPrec(); + mv[1].setLowPrec(); + mv[2].setLowPrec(); +#endif + DTRACE_BLOCK_AFFINETF(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::AffineMVL0), mv[0].hor, mv[0].ver, mv[1].hor, mv[1].ver, mv[2].hor, mv[2].ver); + } + if (pu.interDir != 1 /* PRED_L1 */) + { + Mv mv[3]; + const CMotionBuf &mb = pu.getMotionBuf(); + mv[0] = mb.at(0, 0).mv[REF_PIC_LIST_1]; + mv[1] = mb.at(mb.width - 1, 0).mv[REF_PIC_LIST_1]; + mv[2] = mb.at(0, mb.height - 1).mv[REF_PIC_LIST_1]; +#if JVET_K0346 || JVET_K_AFFINE + // motion vectors should use low precision or they will appear to large + mv[0].setLowPrec(); + mv[1].setLowPrec(); + mv[2].setLowPrec(); +#endif + DTRACE_BLOCK_AFFINETF(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::AffineMVL1), mv[0].hor, mv[0].ver, mv[1].hor, mv[1].ver, mv[2].hor, mv[2].ver); + } + } +#endif + } +#if JVET_K0357_AMVR + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::IMVMode), cu.imv); +#endif + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::RootCbf), cu.rootCbf); + } + break; + case MODE_INTRA: + { + + if(chType == CHANNEL_TYPE_LUMA) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::IPCM), cu.ipcm); + } + else if(chType == CHANNEL_TYPE_CHROMA) + { + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, cu, GetBlockStatisticName(BlockStatistic::IPCM_Chroma), cu.ipcm); + } + + const uint32_t numChType = ::getNumberValidChannels( cu.chromaFormat ); + + for( uint32_t chType = CHANNEL_TYPE_LUMA; chType < numChType; chType++ ) + { + if( cu.blocks[chType].valid() ) + { + for( const PredictionUnit &pu : CU::traversePUs( cu ) ) + { + if( isLuma( ChannelType( chType ) ) ) + { + const uint32_t uiChFinalMode = PU::getFinalIntraMode( pu, ChannelType( chType ) ); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::Luma_IntraMode), uiChFinalMode); + } + else + { + const uint32_t uiChFinalMode = PU::getFinalIntraMode( pu, ChannelType( chType ) ); + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, pu, GetBlockStatisticName(BlockStatistic::Chroma_IntraMode), uiChFinalMode); + #if ENABLE_CHROMA_422 + assert(0); + #endif + } + } + } + } + } + break; + default: + THROW( "Invalid prediction mode" ); + break; + } + + for (const TransformUnit &tu : CU::traverseTUs(cu)) + { + if (tu.Y().valid()) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::Cbf_Y), tu.cbf[COMPONENT_Y]); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::TransformSkipFlag_Y), tu.transformSkip[COMPONENT_Y]); + } + if (!(cu.chromaFormat == CHROMA_400 || (CS::isDualITree(*cu.cs) && cu.chType == CHANNEL_TYPE_LUMA))) + { + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::Cbf_Cb), tu.cbf[COMPONENT_Cb]); + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::Cbf_Cr), tu.cbf[COMPONENT_Cr]); + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::TransformSkipFlag_Cb), tu.transformSkip[COMPONENT_Cb]); + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_ALL, tu, GetBlockStatisticName(BlockStatistic::TransformSkipFlag_Cr), tu.transformSkip[COMPONENT_Cr]); + } + } + } + } +} + +void writeAllCodedData(const CodingStructure & cs, const UnitArea & ctuArea) +{ + const int maxNumChannelType = cs.pcv->chrFormat != CHROMA_400 && CS::isDualITree(cs) ? 2 : 1; + + for (int ch = 0; ch < maxNumChannelType; ch++) + { + const ChannelType chType = ChannelType(ch); + const SPS& sps = *cs.sps; + + for (const CodingUnit &cu : cs.traverseCUs(CS::getArea(cs, ctuArea, chType), chType)) + { + if( chType == CHANNEL_TYPE_LUMA ) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::PartSize), cu.partSize); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::Depth), cu.depth); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::QT_Depth), cu.qtDepth); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::BT_Depth), cu.btDepth); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::MT_Depth), cu.mtDepth); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::ChromaQPAdj), cu.chromaQpAdj); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::QP), cu.qp); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::SplitSeries), (int)cu.splitSeries); + // transquant bypass flag + if (cs.pps->getTransquantBypassEnabledFlag()) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::TransQuantBypassFlag), cu.transQuantBypass); + } + // skip flag + if (!cs.slice->isIntra()) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::SkipFlag), cu.skip); + } + + // prediction mode and partitioning data + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::PredMode), cu.predMode); + + if (CU::isIntra(cu) && cu.partSize == SIZE_2Nx2N) + { + if (!(!sps.getUsePCM() || cu.lumaSize().width > (1 << sps.getPCMLog2MaxSize()) || cu.lumaSize().width < (1 << sps.getPCMLog2MinSize()))) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::IPCM), cu.ipcm); + } + } + } + else if (chType == CHANNEL_TYPE_CHROMA ) + { + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::PartSize_Chroma), cu.partSize); + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::Depth_Chroma), cu.depth); + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::QT_Depth_Chroma), cu.qtDepth); + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::BT_Depth_Chroma), cu.btDepth); + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::MT_Depth_Chroma), cu.mtDepth); + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::ChromaQPAdj_Chroma), cu.chromaQpAdj); + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::QP_Chroma), cu.qp); + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::SplitSeries_Chroma), (int)cu.splitSeries); + // transquant bypass flag + if (cs.pps->getTransquantBypassEnabledFlag()) + { + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::TransQuantBypassFlag_Chroma), cu.transQuantBypass); + } + + if (CU::isIntra(cu) && cu.partSize == SIZE_2Nx2N) + { + if (!(!sps.getUsePCM() || cu.lumaSize().width > (1 << sps.getPCMLog2MaxSize()) || cu.lumaSize().width < (1 << sps.getPCMLog2MinSize()))) + { + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::IPCM_Chroma), cu.ipcm); + } + } + } + + for (auto &pu : CU::traversePUs(cu)) + { + switch (pu.cu->predMode) + { + case MODE_INTRA: + { + if (pu.Y().valid()) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::Luma_IntraMode), PU::getFinalIntraMode(pu, ChannelType(chType))); + } + if (!(pu.chromaFormat == CHROMA_400 || (CS::isDualITree(*pu.cs) && pu.chType == CHANNEL_TYPE_LUMA))) + { + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::Chroma_IntraMode), PU::getFinalIntraMode(pu, CHANNEL_TYPE_CHROMA)); + } + break; + } + case MODE_INTER: + { + if (!pu.cu->skip) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::MergeFlag), pu.mergeFlag); + } + if (pu.mergeFlag) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::MergeType), pu.mergeType); + #if JVET_K_AFFINE + if (!(cu.cs->slice->isIntra() || !cu.cs->sps->getSpsNext().getUseAffine() || cu.partSize != SIZE_2Nx2N) + && !(!cu.firstPU->mergeFlag && !(cu.lumaSize().width > 8 && cu.lumaSize().height > 8)) + && !(cu.firstPU->mergeFlag && !PU::isAffineMrgFlagCoded(*cu.firstPU))) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::AffineFlag), pu.cu->affine); + if (cu.affine && !cu.firstPU->mergeFlag && cu.cs->sps->getSpsNext().getUseAffineType()) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::AffineType), pu.cu->affineType); + } + } + #endif + #if JVET_K_AFFINE + if (!(pu.cu->affine)) + #endif + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::MergeIdx), pu.mergeIdx); + } + } + else + { + if (!pu.cs->slice->isInterP()) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::InterDir), pu.interDir); + } + #if JVET_K_AFFINE + if (!(cu.cs->slice->isIntra() || !cu.cs->sps->getSpsNext().getUseAffine() || cu.partSize != SIZE_2Nx2N) + && !(!cu.firstPU->mergeFlag && !(cu.lumaSize().width > 8 && cu.lumaSize().height > 8)) + && !(cu.firstPU->mergeFlag && !PU::isAffineMrgFlagCoded(*cu.firstPU))) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::AffineFlag), pu.cu->affine); + if (cu.affine && !cu.firstPU->mergeFlag && cu.cs->sps->getSpsNext().getUseAffineType()) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::AffineType), pu.cu->affineType); + } + } + #endif + } + if (pu.interDir != 2 /* PRED_L1 */) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::MVPIdxL0), pu.mvpIdx[REF_PIC_LIST_0]); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::RefIdxL0), pu.refIdx[REF_PIC_LIST_0]); + } + if (pu.interDir != 1 /* PRED_L1 */) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::MVPIdxL1), pu.mvpIdx[REF_PIC_LIST_1]); + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::RefIdxL1), pu.refIdx[REF_PIC_LIST_1]); + } + + #if JVET_K_AFFINE + if (!pu.cu->affine) + { + #endif + if (pu.interDir != 2 /* PRED_L1 */) + { + Mv mv = pu.mv[REF_PIC_LIST_0]; + Mv mvd = pu.mvd[REF_PIC_LIST_0]; + #if JVET_K0346 || JVET_K_AFFINE + mv.setLowPrec(); + mvd.setLowPrec(); + #endif + DTRACE_BLOCK_VECTOR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::MVDL0), mvd.hor, mvd.ver); + DTRACE_BLOCK_VECTOR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::MVL0), mv.hor, mv.ver); + } + if (pu.interDir != 1 /* PRED_L1 */) + { + Mv mv = pu.mv[REF_PIC_LIST_1]; + Mv mvd = pu.mvd[REF_PIC_LIST_1]; + #if JVET_K0346 || JVET_K_AFFINE + mv.setLowPrec(); + mvd.setLowPrec(); + #endif + DTRACE_BLOCK_VECTOR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::MVDL0), mvd.hor, mvd.ver); + DTRACE_BLOCK_VECTOR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::MVL1), mv.hor, mv.ver); + } + #if JVET_K_AFFINE + } + else + { + if (pu.interDir != 2 /* PRED_L1 */) + { + Mv mv[3]; + const CMotionBuf &mb = pu.getMotionBuf(); + mv[0] = mb.at(0, 0).mv[REF_PIC_LIST_0]; + mv[1] = mb.at(mb.width - 1, 0).mv[REF_PIC_LIST_0]; + mv[2] = mb.at(0, mb.height - 1).mv[REF_PIC_LIST_0]; + #if JVET_K0346 || JVET_K_AFFINE + // motion vectors should use low precision or they will appear to large + mv[0].setLowPrec(); + mv[1].setLowPrec(); + mv[2].setLowPrec(); + #endif + DTRACE_BLOCK_AFFINETF(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::AffineMVL0), mv[0].hor, mv[0].ver, mv[1].hor, mv[1].ver, mv[2].hor, mv[2].ver); + } + if (pu.interDir != 1 /* PRED_L1 */) + { + Mv mv[3]; + const CMotionBuf &mb = pu.getMotionBuf(); + mv[0] = mb.at(0, 0).mv[REF_PIC_LIST_1]; + mv[1] = mb.at(mb.width - 1, 0).mv[REF_PIC_LIST_1]; + mv[2] = mb.at(0, mb.height - 1).mv[REF_PIC_LIST_1]; + #if JVET_K0346 || JVET_K_AFFINE + // motion vectors should use low precision or they will appear to large + mv[0].setLowPrec(); + mv[1].setLowPrec(); + mv[2].setLowPrec(); + #endif + DTRACE_BLOCK_AFFINETF(g_trace_ctx, D_BLOCK_STATISTICS_CODED, pu, GetBlockStatisticName(BlockStatistic::AffineMVL1), mv[0].hor, mv[0].ver, mv[1].hor, mv[1].ver, mv[2].hor, mv[2].ver); + } + } + #endif + #if JVET_K0357_AMVR + if (cu.cs->sps->getSpsNext().getUseIMV() && CU::hasSubCUNonZeroMVd(cu)) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::IMVMode), cu.imv); + } + #endif + break; + } + default: + { + CHECK(1, "Invalid prediction mode"); + break; + } + } + } // end pu + if (CU::isInter(cu)) + { + const PredictionUnit &pu = *cu.firstPU; + if (!((cu.cs->pcv->noRQT || cu.partSize == SIZE_2Nx2N) && pu.mergeFlag)) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::RootCbf), cu.rootCbf); + } + } + if (cu.rootCbf || CU::isIntra(cu)) + { + for (const TransformUnit &tu : CU::traverseTUs(cu)) + { + if (tu.Y().valid()) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, tu, GetBlockStatisticName(BlockStatistic::Cbf_Y), tu.cbf[COMPONENT_Y]); +#if HM_EMT_NSST_AS_IN_JEM && JVET_K1000_SIMPLIFIED_EMT + if (!(!tu.cu->cs->pps->getUseTransformSkip() || tu.cu->transQuantBypass || !TU::hasTransformSkipFlag(*tu.cs, tu.blocks[COMPONENT_Y]) || (isLuma(COMPONENT_Y) && tu.cu->emtFlag))) +#else + if (!(!tu.cu->cs->pps->getUseTransformSkip() || tu.cu->transQuantBypass || !TU::hasTransformSkipFlag(*tu.cs, tu.blocks[COMPONENT_Y]))) +#endif + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, tu, GetBlockStatisticName(BlockStatistic::TransformSkipFlag_Y), tu.transformSkip[COMPONENT_Y]); + } + } + if (!(cu.chromaFormat == CHROMA_400 || (CS::isDualITree(*cu.cs) && cu.chType == CHANNEL_TYPE_LUMA))) + { + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, tu, GetBlockStatisticName(BlockStatistic::Cbf_Cb), tu.cbf[COMPONENT_Cb]); + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, tu, GetBlockStatisticName(BlockStatistic::Cbf_Cr), tu.cbf[COMPONENT_Cr]); +#if HM_EMT_NSST_AS_IN_JEM && JVET_K1000_SIMPLIFIED_EMT + if (!(!tu.cu->cs->pps->getUseTransformSkip() || tu.cu->transQuantBypass || !TU::hasTransformSkipFlag(*tu.cs, tu.blocks[COMPONENT_Cb]) || (isLuma(COMPONENT_Cb) && tu.cu->emtFlag))) +#else + if (!(!tu.cu->cs->pps->getUseTransformSkip() || tu.cu->transQuantBypass || !TU::hasTransformSkipFlag(*tu.cs, tu.blocks[COMPONENT_Cb]))) +#endif + { + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, tu, GetBlockStatisticName(BlockStatistic::TransformSkipFlag_Cb), tu.transformSkip[COMPONENT_Cb]); + } +#if HM_EMT_NSST_AS_IN_JEM && JVET_K1000_SIMPLIFIED_EMT + if (!(!tu.cu->cs->pps->getUseTransformSkip() || tu.cu->transQuantBypass || !TU::hasTransformSkipFlag(*tu.cs, tu.blocks[COMPONENT_Cr]) || (isLuma(COMPONENT_Cr) && tu.cu->emtFlag))) +#else + if (!(!tu.cu->cs->pps->getUseTransformSkip() || tu.cu->transQuantBypass || !TU::hasTransformSkipFlag(*tu.cs, tu.blocks[COMPONENT_Cr]))) +#endif + { + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, tu, GetBlockStatisticName(BlockStatistic::TransformSkipFlag_Cr), tu.transformSkip[COMPONENT_Cr]); + } + } + } + } +#if JVET_K1000_SIMPLIFIED_EMT && HM_EMT_NSST_AS_IN_JEM + if (!(!((cs.sps->getSpsNext().getUseIntraEMT() && CU::isIntra(cu)) || (cs.sps->getSpsNext().getUseInterEMT() && CU::isInter(cu))) || isChroma(cu.chType))) + { + if( isLuma( ChannelType( chType ) ) ) + { + DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::EMTFlag), cu.emtFlag); + } + else + { + DTRACE_BLOCK_SCALAR_CHROMA(g_trace_ctx, D_BLOCK_STATISTICS_CODED, cu, GetBlockStatisticName(BlockStatistic::EMTFlag_Chroma), cu.emtFlag); + } + } +#endif + } + } +} +#endif diff --git a/source/Lib/CommonLib/dtrace_blockstatistics.h b/source/Lib/CommonLib/dtrace_blockstatistics.h new file mode 100644 index 0000000000000000000000000000000000000000..a81b03a27f258971585def1a1fa174e0e5197393 --- /dev/null +++ b/source/Lib/CommonLib/dtrace_blockstatistics.h @@ -0,0 +1,208 @@ +/* The copyright in this software is being made available under the BSD + * License, included below. This software may be subject to other third party + * and contributor rights, including patent rights, and no such rights are + * granted under this license. + * + * Copyright (c) 2010-2018, ITU/ISO/IEC + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file dtrace_blockstatistics.h + * \brief DTrace block statistcis support for next software + */ + +#ifndef _DTRACE_BLOCKSTATISTICS_H_ +#define _DTRACE_BLOCKSTATISTICS_H_ + +#include <map> +#include "CommonLib/CommonDef.h" +#include "CommonLib/Unit.h" + +#if K0149_BLOCK_STATISTICS +#define DTRACE_HEADER(ctx,...) ctx->dtrace_header( __VA_ARGS__ ) +#define DTRACE_BLOCK_SCALAR(ctx,channel,cs_cu_pu,stat_type,val) ctx->dtrace_block_scalar( channel, cs_cu_pu, stat_type, val ) +#define DTRACE_BLOCK_SCALAR_CHROMA(ctx,channel,cs_cu_pu,stat_type,val) ctx->dtrace_block_scalar( channel, cs_cu_pu, stat_type, val, true) +#define DTRACE_BLOCK_VECTOR(ctx,channel,cu_pu,stat_type,v_x,v_y) ctx->dtrace_block_vector( channel, cu_pu, stat_type, v_x, v_y ) +#define DTRACE_BLOCK_AFFINETF(ctx,channel,pu,stat_type,v_x0,v_y0,v_x1,v_y1,v_x2,v_y2) ctx->dtrace_block_affinetf( channel, pu, stat_type, v_x0, v_y0, v_x1, v_y1, v_x2, v_y2 ) + +enum class BlockStatistic { + // general + PredMode, + PartSize, + Depth, + QT_Depth, + BT_Depth, + MT_Depth, + ChromaQPAdj, + QP, + SplitSeries, + TransQuantBypassFlag, +#if JVET_K1000_SIMPLIFIED_EMT + EMTFlag, +#endif + TransformSkipFlag_Y, + TransformSkipFlag_Cb, + TransformSkipFlag_Cr, + + // intra + IPCM, + Luma_IntraMode, + Chroma_IntraMode, + // inter + SkipFlag, + RootCbf, + Cbf_Y, + Cbf_Cb, + Cbf_Cr, +#if JVET_K0357_AMVR + IMVMode, +#endif + InterDir, + MergeFlag, + MergeIdx, + MergeType, + MVPIdxL0, + MVPIdxL1, + MVL0, + MVL1, + MVDL0, + MVDL1, + RefIdxL0, + RefIdxL1, +#if JVET_K_AFFINE + AffineFlag, + AffineMVL0, + AffineMVL1, +#if JVET_K0337_AFFINE_6PARA + AffineType, +#endif +#endif + +// for dual tree + // general + PartSize_Chroma, + Depth_Chroma, + QT_Depth_Chroma, + BT_Depth_Chroma, + MT_Depth_Chroma, + ChromaQPAdj_Chroma, + QP_Chroma, + SplitSeries_Chroma, + TransQuantBypassFlag_Chroma, + + // intra + IPCM_Chroma, + + NumBlockStatistics, +}; + +enum class BlockStatisticType { + Flag, + Vector, + Integer, + AffineTFVectors, +}; + +static const std::map<BlockStatistic, std::tuple<std::string, BlockStatisticType, std::string>> blockstatistic2description = +{ + // Statistics enum Statistics name string Statistic Type Type specific information: + // Value range, vector scale + { BlockStatistic::PredMode, std::tuple<std::string, BlockStatisticType, std::string>{"PredMode", BlockStatisticType::Flag, ""}}, + { BlockStatistic::MergeFlag, std::tuple<std::string, BlockStatisticType, std::string>{"MergeFlag", BlockStatisticType::Flag, ""}}, + { BlockStatistic::MVL0, std::tuple<std::string, BlockStatisticType, std::string>{"MVL0", BlockStatisticType::Vector, "Scale: 4"}}, + { BlockStatistic::MVL1, std::tuple<std::string, BlockStatisticType, std::string>{"MVL1", BlockStatisticType::Vector, "Scale: 4"}}, + { BlockStatistic::IPCM, std::tuple<std::string, BlockStatisticType, std::string>{"IPCM", BlockStatisticType::Flag, ""}}, + { BlockStatistic::Luma_IntraMode, std::tuple<std::string, BlockStatisticType, std::string>{"Luma_IntraMode", BlockStatisticType::Integer, "[0, " + std::to_string(NUM_INTRA_MODE) + "]"}}, + { BlockStatistic::Chroma_IntraMode, std::tuple<std::string, BlockStatisticType, std::string>{"Chroma_IntraMode", BlockStatisticType::Integer, "[0, " + std::to_string(NUM_INTRA_MODE) + "]"}}, + { BlockStatistic::SkipFlag, std::tuple<std::string, BlockStatisticType, std::string>{"SkipFlag", BlockStatisticType::Flag, ""}}, + { BlockStatistic::TransformSkipFlag_Y, std::tuple<std::string, BlockStatisticType, std::string>{"TransformSkipFlag_Y", BlockStatisticType::Flag, ""}}, + { BlockStatistic::TransformSkipFlag_Cb, std::tuple<std::string, BlockStatisticType, std::string>{"TransformSkipFlag_Cb", BlockStatisticType::Flag, ""}}, + { BlockStatistic::TransformSkipFlag_Cr, std::tuple<std::string, BlockStatisticType, std::string>{"TransformSkipFlag_Cr", BlockStatisticType::Flag, ""}}, + { BlockStatistic::PartSize, std::tuple<std::string, BlockStatisticType, std::string>{"PartSize", BlockStatisticType::Integer, "[0, " + std::to_string(NUMBER_OF_PART_SIZES) + "]"}}, + { BlockStatistic::Depth, std::tuple<std::string, BlockStatisticType, std::string>{"Depth", BlockStatisticType::Integer, "[0, 7]"}}, + { BlockStatistic::QT_Depth, std::tuple<std::string, BlockStatisticType, std::string>{"QT_Depth", BlockStatisticType::Integer, "[0, 7]"}}, + { BlockStatistic::BT_Depth, std::tuple<std::string, BlockStatisticType, std::string>{"BT_Depth", BlockStatisticType::Integer, "[0, 7]"}}, + { BlockStatistic::MT_Depth, std::tuple<std::string, BlockStatisticType, std::string>{"MT_Depth", BlockStatisticType::Integer, "[0, 7]"}}, + { BlockStatistic::ChromaQPAdj, std::tuple<std::string, BlockStatisticType, std::string>{"ChromaQPAdj", BlockStatisticType::Integer, "[-10, 10]"}}, + { BlockStatistic::QP, std::tuple<std::string, BlockStatisticType, std::string>{"QP", BlockStatisticType::Integer, "[0, 51]"}}, + { BlockStatistic::SplitSeries, std::tuple<std::string, BlockStatisticType, std::string>{"SplitSeries", BlockStatisticType::Integer, "[0, " + std::to_string(std::numeric_limits<SplitSeries>::max()) + "]"}}, + { BlockStatistic::RootCbf, std::tuple<std::string, BlockStatisticType, std::string>{"RootCbf", BlockStatisticType::Flag, ""}}, + { BlockStatistic::Cbf_Y, std::tuple<std::string, BlockStatisticType, std::string>{"Cbf_Y", BlockStatisticType::Flag, ""}}, + { BlockStatistic::Cbf_Cb, std::tuple<std::string, BlockStatisticType, std::string>{"Cbf_Cb", BlockStatisticType::Flag, ""}}, + { BlockStatistic::Cbf_Cr, std::tuple<std::string, BlockStatisticType, std::string>{"Cbf_Cr", BlockStatisticType::Flag, ""}}, + { BlockStatistic::TransQuantBypassFlag, std::tuple<std::string, BlockStatisticType, std::string>{"TransQuantBypassFlag", BlockStatisticType::Flag, ""}}, + { BlockStatistic::MergeIdx, std::tuple<std::string, BlockStatisticType, std::string>{"MergeIdx", BlockStatisticType::Integer, "[0, 7]"}}, + { BlockStatistic::InterDir, std::tuple<std::string, BlockStatisticType, std::string>{"InterDir", BlockStatisticType::Integer, "[1, 3]"}}, + { BlockStatistic::MergeType, std::tuple<std::string, BlockStatisticType, std::string>{"MergeType", BlockStatisticType::Integer, "[0, 2]"}}, + { BlockStatistic::MVPIdxL0, std::tuple<std::string, BlockStatisticType, std::string>{"MVPIdxL0", BlockStatisticType::Integer, "[0, 1]"}}, + { BlockStatistic::MVDL0, std::tuple<std::string, BlockStatisticType, std::string>{"MVDL0", BlockStatisticType::Vector, "Scale: 4"}}, + { BlockStatistic::RefIdxL0, std::tuple<std::string, BlockStatisticType, std::string>{"RefIdxL0", BlockStatisticType::Integer, "[0, 4]"}}, + { BlockStatistic::MVPIdxL1, std::tuple<std::string, BlockStatisticType, std::string>{"MVPIdxL1", BlockStatisticType::Integer, "[0, 1]"}}, + { BlockStatistic::MVDL1, std::tuple<std::string, BlockStatisticType, std::string>{"MVDL1", BlockStatisticType::Vector, "Scale: 4"}}, + { BlockStatistic::RefIdxL1, std::tuple<std::string, BlockStatisticType, std::string>{"RefIdxL1", BlockStatisticType::Integer, "[0, 4]"}}, +#if JVET_K0357_AMVR + { BlockStatistic::IMVMode, std::tuple<std::string, BlockStatisticType, std::string>{"IMVMode", BlockStatisticType::Integer, "[0, 2]"}}, +#endif +#if JVET_K_AFFINE + { BlockStatistic::AffineFlag, std::tuple<std::string, BlockStatisticType, std::string>{"AffineFlag", BlockStatisticType::Flag, ""}}, + { BlockStatistic::AffineMVL0, std::tuple<std::string, BlockStatisticType, std::string>{"AffineMVL0", BlockStatisticType::AffineTFVectors, "Scale: 4"}}, + { BlockStatistic::AffineMVL1, std::tuple<std::string, BlockStatisticType, std::string>{"AffineMVL1", BlockStatisticType::AffineTFVectors, "Scale: 4"}}, +#if JVET_K0337_AFFINE_6PARA + { BlockStatistic::AffineType, std::tuple<std::string, BlockStatisticType, std::string>{"AffineType", BlockStatisticType::Flag, ""} }, +#endif +#endif +#if JVET_K1000_SIMPLIFIED_EMT + { BlockStatistic::EMTFlag, std::tuple<std::string, BlockStatisticType, std::string>{"EMTFlag", BlockStatisticType::Flag, ""}}, +#endif + + + // for dual tree + { BlockStatistic::PartSize_Chroma, std::tuple<std::string, BlockStatisticType, std::string>{"PartSize_Chroma", BlockStatisticType::Integer, "[0, " + std::to_string(NUMBER_OF_PART_SIZES) + "]"}}, + { BlockStatistic::Depth_Chroma, std::tuple<std::string, BlockStatisticType, std::string>{"Depth_Chroma", BlockStatisticType::Integer, "[0, 10]"}}, // todo: actual limits? + { BlockStatistic::QT_Depth_Chroma, std::tuple<std::string, BlockStatisticType, std::string>{"QT_Depth_Chroma", BlockStatisticType::Integer, "[0, 10]"}}, // todo: actual limits? + { BlockStatistic::BT_Depth_Chroma, std::tuple<std::string, BlockStatisticType, std::string>{"BT_Depth_Chroma", BlockStatisticType::Integer, "[0, 10]"}}, // todo: actual limits? + { BlockStatistic::MT_Depth_Chroma, std::tuple<std::string, BlockStatisticType, std::string>{"MT_Depth_Chroma", BlockStatisticType::Integer, "[0, 10]"}}, // todo: actual limits? + { BlockStatistic::ChromaQPAdj_Chroma, std::tuple<std::string, BlockStatisticType, std::string>{"ChromaQPAdj_Chroma", BlockStatisticType::Integer, "[-10, 10]"}}, // todo: actual limits? + { BlockStatistic::QP_Chroma, std::tuple<std::string, BlockStatisticType, std::string>{"QP_Chroma", BlockStatisticType::Integer, "[0, 51]"}}, + { BlockStatistic::SplitSeries_Chroma, std::tuple<std::string, BlockStatisticType, std::string>{"SplitSeries_Chroma", BlockStatisticType::Integer, "[0, " + std::to_string(std::numeric_limits<SplitSeries>::max()) + "]"}}, + { BlockStatistic::TransQuantBypassFlag_Chroma, std::tuple<std::string, BlockStatisticType, std::string>{"TransQuantBypassFlag_Chroma", BlockStatisticType::Flag, ""}}, + { BlockStatistic::IPCM_Chroma, std::tuple<std::string, BlockStatisticType, std::string>{"IPCM_Chroma", BlockStatisticType::Flag, ""}}, + +}; + + +std::string GetBlockStatisticName(BlockStatistic statistic); +std::string GetBlockStatisticTypeString(BlockStatistic statistic); +std::string GetBlockStatisticTypeSpecificInfo(BlockStatistic statistic); + +void writeBlockStatisticsHeader(const SPS *sps); +void getAndStoreBlockStatistics(const CodingStructure& cs, const UnitArea& ctuArea); +void writeAllData(const CodingStructure& cs, const UnitArea& ctuArea); +void writeAllCodedData(const CodingStructure& cs, const UnitArea& ctuArea); +#endif + +#endif // _DTRACE_BLOCKSTATISTICS_H_ diff --git a/source/Lib/CommonLib/dtrace_next.h b/source/Lib/CommonLib/dtrace_next.h index 89f214cc9c5d7dbeda5b15b3168f266f4e4a623a..88d6249f118683fcd18bbbee5935f68b79e943b0 100644 --- a/source/Lib/CommonLib/dtrace_next.h +++ b/source/Lib/CommonLib/dtrace_next.h @@ -145,8 +145,12 @@ enum DTRACE_CHANNEL D_RDOQ_COST, D_TMP, D_CRC +#if K0149_BLOCK_STATISTICS + , + D_BLOCK_STATISTICS_ALL, + D_BLOCK_STATISTICS_CODED, +#endif }; - #define _CNL_DEF(_s) {_s,(std::string(#_s))} inline void tracing_uninit( CDTrace *pDtrace ) @@ -245,6 +249,11 @@ inline CDTrace* tracing_init( std::string& sTracingFile, std::string& sTracingRu _CNL_DEF( D_RDOQ_COST ), _CNL_DEF( D_TMP ), _CNL_DEF( D_CRC ) + #if K0149_BLOCK_STATISTICS + , + _CNL_DEF( D_BLOCK_STATISTICS_ALL ), + _CNL_DEF( D_BLOCK_STATISTICS_CODED ), + #endif }; dtrace_channels_t channels( next_channels, &next_channels[sizeof( next_channels ) / sizeof( next_channels[0] )] ); diff --git a/source/Lib/CommonLib/x86/InitX86.cpp b/source/Lib/CommonLib/x86/InitX86.cpp index 47d109ea0e678a28b5e998c2b58363fea9dbdbb6..d94ede594c582b7bd6338a17f9203ff60b483451 100644 --- a/source/Lib/CommonLib/x86/InitX86.cpp +++ b/source/Lib/CommonLib/x86/InitX86.cpp @@ -169,5 +169,24 @@ void AdaptiveLoopFilter::initAdaptiveLoopFilterX86() } #endif +#if ENABLE_SIMD_OPT_CPR +void IbcHashMap::initIbcHashMapX86() +{ + auto vext = read_x86_extension_flags(); + switch (vext) + { + case AVX512: + case AVX2: + case AVX: + case SSE42: + _initIbcHashMapX86<SSE42>(); + break; + case SSE41: + default: + break; + } +} +#endif + #endif diff --git a/source/Lib/DecoderAnalyserLib/CMakeLists.txt b/source/Lib/DecoderAnalyserLib/CMakeLists.txt index b26321590b94b8c2eaac751d7ed8dcf980bdeedc..9b0017617cd6af649ccf69129f8205355165e636 100644 --- a/source/Lib/DecoderAnalyserLib/CMakeLists.txt +++ b/source/Lib/DecoderAnalyserLib/CMakeLists.txt @@ -18,7 +18,7 @@ target_compile_definitions( ${LIB_NAME} PUBLIC RExt__DECODER_DEBUG_BIT_STATISTIC target_compile_definitions( ${LIB_NAME} PUBLIC RExt__DECODER_DEBUG_TOOL_STATISTICS=1 ) if( ENABLE_VTM ) - target_compile_definitions( ${LIB_NAME} PUBLIC JEM_TOOLS=0 ) + target_compile_definitions( ${LIB_NAME} PUBLIC BMS_TOOLS=0 ) endif() if( EXTENSION_360_VIDEO ) diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp index 16908fa548e848a3c068886d6e637c4a0fbf8c48..91eba95ce14e8ccdce8de247ef665c4f2c5c5158 100644 --- a/source/Lib/DecoderLib/CABACReader.cpp +++ b/source/Lib/DecoderLib/CABACReader.cpp @@ -180,7 +180,7 @@ bool CABACReader::coding_tree_unit( CodingStructure& cs, const UnitArea& area, i ctx += leftCTUAddr > -1 ? ( ctbAlfFlag[leftCTUAddr] ? 1 : 0 ) : 0; ctx += aboveCTUAddr > -1 ? ( ctbAlfFlag[aboveCTUAddr] ? 1 : 0 ) : 0; - if( alfSliceParam.chromaCtbPresentFlag && compIdx ) + if( compIdx && alfSliceParam.chromaCtbPresentFlag ) { ctbAlfFlag[ctuRsAddr] = 1; } @@ -703,7 +703,6 @@ bool CABACReader::split_cu_flag( CodingStructure& cs, Partitioner &partitioner ) bool CABACReader::coding_unit( CodingUnit &cu, Partitioner &partitioner, CUCtx& cuCtx ) { CodingStructure& cs = *cu.cs; - // transquant bypass flag if( cs.pps->getTransquantBypassEnabledFlag() ) { @@ -802,7 +801,7 @@ void CABACReader::imv_mode( CodingUnit& cu, MergeCtx& mrgCtx ) unsigned value = 0; unsigned ctxId = DeriveCtx::CtxIMVFlag( cu ); - value = m_BinDecoder.decodeBin( Ctx::ImvFlag( ctxId ) ); + value = m_BinDecoder.decodeBin( Ctx::ImvFlag( ctxId ) ); DTRACE( g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", value, ctxId ); if( spsNext.getImvMode() == IMV_4PEL && value ) @@ -852,7 +851,6 @@ void CABACReader::cu_pred_data( CodingUnit &cu ) intra_chroma_pred_modes( cu ); return; } - MergeCtx mrgCtx; for( auto &pu : CU::traversePUs( cu ) ) @@ -863,6 +861,7 @@ void CABACReader::cu_pred_data( CodingUnit &cu ) #if JVET_K0357_AMVR imv_mode ( cu, mrgCtx ); #endif + } diff --git a/source/Lib/DecoderLib/CMakeLists.txt b/source/Lib/DecoderLib/CMakeLists.txt index 0ec46167283c09b10e12bbfd98ee9adabec5598a..62413e9004538890a6c4506006be8d78ea6c6959 100644 --- a/source/Lib/DecoderLib/CMakeLists.txt +++ b/source/Lib/DecoderLib/CMakeLists.txt @@ -17,7 +17,7 @@ add_library( ${LIB_NAME} STATIC ${SRC_FILES} ${INC_FILES} ${NATVIS_FILES} ) target_compile_definitions( ${LIB_NAME} PUBLIC ) if( ENABLE_VTM ) - target_compile_definitions( ${LIB_NAME} PUBLIC JEM_TOOLS=0 ) + target_compile_definitions( ${LIB_NAME} PUBLIC BMS_TOOLS=0 ) endif() if( EXTENSION_360_VIDEO ) diff --git a/source/Lib/DecoderLib/DecCu.cpp b/source/Lib/DecoderLib/DecCu.cpp index 370f41f93afabe3f8a81491ee1afa5cb68241f47..4c06ed241f2e349832f4efb4ac77e8089b43ad45 100644 --- a/source/Lib/DecoderLib/DecCu.cpp +++ b/source/Lib/DecoderLib/DecCu.cpp @@ -48,6 +48,10 @@ #if RExt__DECODER_DEBUG_TOOL_STATISTICS #include "CommonLib/CodingStatistics.h" #endif +#if K0149_BLOCK_STATISTICS +#include "CommonLib/ChromaFormat.h" +#include "CommonLib/dtrace_blockstatistics.h" +#endif //! \ingroup DecoderLib //! \{ @@ -107,6 +111,9 @@ void DecCu::decompressCtu( CodingStructure& cs, const UnitArea& ctuArea ) DTRACE_BLOCK_REC( cs.picture->getRecoBuf( currCU ), currCU, currCU.predMode ); } } +#if K0149_BLOCK_STATISTICS + getAndStoreBlockStatistics(cs, ctuArea); +#endif } // ==================================================================================================================== @@ -400,8 +407,13 @@ void DecCu::xDeriveCUMV( CodingUnit &cu ) MergeCtx mrgCtx; #if RExt__DECODER_DEBUG_TOOL_STATISTICS +#if JVET_K_AFFINE + if( pu.cu->affine ) + { + CodingStatistics::IncrementStatisticTool( CodingStatisticsClassType{ STATS__TOOL_AFF, pu.Y().width, pu.Y().height } ); + } +#endif #endif - if( pu.mergeFlag ) { { diff --git a/source/Lib/DecoderLib/DecLib.cpp b/source/Lib/DecoderLib/DecLib.cpp index 511967e0710bec006bc64f227b1e392b230c1f0b..590a0525cd48af9fdef09e17267eaf305c5b5e91 100644 --- a/source/Lib/DecoderLib/DecLib.cpp +++ b/source/Lib/DecoderLib/DecLib.cpp @@ -48,6 +48,9 @@ #include <fcntl.h> #include "AnnexBread.h" #include "NALread.h" +#if K0149_BLOCK_STATISTICS +#include "CommonLib/dtrace_blockstatistics.h" +#endif #if RExt__DECODER_DEBUG_TOOL_STATISTICS #include "CommonLib/CodingStatistics.h" @@ -512,6 +515,10 @@ void DecLib::executeLoopFilters() // deblocking filter m_cLoopFilter.loopFilterPic( cs ); +#if DMVR_JVET_LOW_LATENCY_K0217 + CS::setRefinedMotionField(cs); +#endif + if( cs.sps->getUseSAO() ) { m_cSAO.SAOProcess( cs, cs.picture->getSAO() ); @@ -898,6 +905,15 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl } m_apcSlicePilot->setIndependentSliceIdx(uiIndependentSliceIdx); +#if K0149_BLOCK_STATISTICS + PPS *pps = m_parameterSetManager.getPPS(m_apcSlicePilot->getPPSId()); + CHECK(pps == 0, "No PPS present"); + SPS *sps = m_parameterSetManager.getSPS(pps->getSPSId()); + CHECK(sps == 0, "No SPS present"); + + writeBlockStatisticsHeader(sps); +#endif + DTRACE_UPDATE( g_trace_ctx, std::make_pair( "poc", m_apcSlicePilot->getPOC() ) ); #if HEVC_DEPENDENT_SLICES @@ -1085,7 +1101,7 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl pcSlice->checkCRA(pcSlice->getRPS(), m_pocCRA, m_associatedIRAPType, m_cListPic ); // Set reference list pcSlice->setRefPicList( m_cListPic, true, true ); - + if (!pcSlice->isIntra()) { bool bLowDelay = true; diff --git a/source/Lib/DecoderLib/DecSlice.cpp b/source/Lib/DecoderLib/DecSlice.cpp index 88ae6e7632318e469898361eee3c930f0bcf40ac..01c9f40106e0516cc6d78e6d95956a56125e0bb3 100644 --- a/source/Lib/DecoderLib/DecSlice.cpp +++ b/source/Lib/DecoderLib/DecSlice.cpp @@ -225,6 +225,7 @@ void DecSlice::decompressSlice( Slice* slice, InputBitstream* bitstream ) #endif + isLastCtuOfSliceSegment = cabacReader.coding_tree_unit( cs, ctuArea, pic->m_prevQP, ctuRsAddr ); m_pcCuDecoder->decompressCtu( cs, ctuArea ); diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp index b681d5b1b2b9a968ac33c358430caf99c6719b0e..fe994754f6a0ac68f074fcbb10995ff7e83e6690 100644 --- a/source/Lib/DecoderLib/VLCReader.cpp +++ b/source/Lib/DecoderLib/VLCReader.cpp @@ -827,7 +827,6 @@ void HLSyntaxReader::parseSPSNext( SPSNext& spsNext, const bool usePCM ) } #endif #endif - for( int k = 0; k < SPSNext::NumReservedFlags; k++ ) { READ_FLAG( symbol, "reserved_flag" ); if( symbol != 0 ) EXIT("Incompatible version: SPSNext reserved flag not equal to zero (bitstream was probably created with newer software version)" ); @@ -1819,7 +1818,7 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, ParameterSetManager *para pcSlice->setSubPuMvpSliceSubblkSizeEnable(uiCode); if (pcSlice->getSubPuMvpSliceSubblkSizeEnable()) { - READ_CODE(3, uiCode, "slice_atmvp_subblk_size_log2"); + READ_CODE(3, uiCode, "log2_slice_sub_pu_tmvp_size_minus2"); pcSlice->setSubPuMvpSubblkLog2Size(uiCode + MIN_CU_LOG2); } else diff --git a/source/Lib/EncoderLib/Analyze.h b/source/Lib/EncoderLib/Analyze.h index 977db82526cfda2cc3ca80a2e59d0d02b1672324..2e5e4736054211ade625a795d717096a26d9026b 100644 --- a/source/Lib/EncoderLib/Analyze.h +++ b/source/Lib/EncoderLib/Analyze.h @@ -45,6 +45,7 @@ #include <stdio.h> #include <memory.h> #include <assert.h> +#include <cinttypes> #include "CommonLib/CommonDef.h" #include "CommonLib/ChromaFormat.h" #include "math.h" @@ -84,9 +85,17 @@ public: virtual ~Analyze() {} Analyze() { clear(); } - void addResult( double psnr[MAX_NUM_COMPONENT], double bits, const double MSEyuvframe[MAX_NUM_COMPONENT]) + void addResult( double psnr[MAX_NUM_COMPONENT], double bits, const double MSEyuvframe[MAX_NUM_COMPONENT] +#if JVET_K0157 + , bool isEncodeLtRef +#endif + ) { m_dAddBits += bits; +#if JVET_K0157 + if (isEncodeLtRef) + return; +#endif for(uint32_t i=0; i<MAX_NUM_COMPONENT; i++) { m_dPSNRSum[i] += psnr[i]; @@ -173,11 +182,10 @@ public: PSNRyuv = (MSEyuv == 0) ? 999.99 : 10.0 * log10((maxval * maxval) / MSEyuv); } - #if ENABLE_QPA || WCG_WPSNR - void printOut ( char cDelim, const ChromaFormat chFmt, const bool printMSEBasedSNR, const bool printSequenceMSE, const BitDepths &bitDepths, const bool useWPSNR = false ) + void printOut ( char cDelim, const ChromaFormat chFmt, const bool printMSEBasedSNR, const bool printSequenceMSE, const bool printHexPsnr, const BitDepths &bitDepths, const bool useWPSNR = false ) #else - void printOut ( char cDelim, const ChromaFormat chFmt, const bool printMSEBasedSNR, const bool printSequenceMSE, const BitDepths &bitDepths ) + void printOut ( char cDelim, const ChromaFormat chFmt, const bool printMSEBasedSNR, const bool printSequenceMSE, const bool printHexPsnr, const BitDepths &bitDepths ) #endif { #if !WCG_WPSNR @@ -226,6 +234,11 @@ public: #endif msg( e_msg_level, " \tTotal Frames | " "Bitrate " "Y-PSNR" ); + if (printHexPsnr) + { + msg(e_msg_level, "xY-PSNR "); + } + if (printSequenceMSE) { msg( e_msg_level, " Y-MSE\n" ); @@ -244,6 +257,19 @@ public: #endif getPsnr(COMPONENT_Y) / (double)getNumPic() ); + if (printHexPsnr) + { + double dPsnr; + uint64_t xPsnr; + dPsnr = getPsnr(COMPONENT_Y) / (double)getNumPic(); + + copy(reinterpret_cast<uint8_t *>(&dPsnr), + reinterpret_cast<uint8_t *>(&dPsnr) + sizeof(dPsnr), + reinterpret_cast<uint8_t *>(&xPsnr)); + + msg(e_msg_level, " %16" PRIx64 " ", xPsnr); + } + if (printSequenceMSE) { msg( e_msg_level, " %8.4lf\n", m_MSEyuvframe[COMPONENT_Y] / (double)getNumPic() ); @@ -267,6 +293,11 @@ public: #endif msg( e_msg_level, "\tTotal Frames | " "Bitrate " "Y-PSNR" ); + if (printHexPsnr) + { + msg(e_msg_level, "xY-PSNR "); + } + if (printSequenceMSE) { msg( e_msg_level, " Y-MSE\n" ); @@ -285,6 +316,19 @@ public: #endif getPsnr(COMPONENT_Y) / (double)getNumPic() ); + if (printHexPsnr) + { + double dPsnr; + uint64_t xPsnr; + dPsnr = getPsnr(COMPONENT_Y) / (double)getNumPic(); + + copy(reinterpret_cast<uint8_t *>(&dPsnr), + reinterpret_cast<uint8_t *>(&dPsnr) + sizeof(dPsnr), + reinterpret_cast<uint8_t *>(&xPsnr)); + + msg(e_msg_level, " %16" PRIx64 " ", xPsnr); + } + if (printSequenceMSE) { msg( e_msg_level, " %8.4lf\n", m_MSEyuvframe[COMPONENT_Y] / (double)getNumPic() ); @@ -313,6 +357,11 @@ public: #endif msg( e_msg_level, " \tTotal Frames | " "Bitrate " "Y-PSNR " "U-PSNR " "V-PSNR " "YUV-PSNR " ); + if (printHexPsnr) + { + msg(e_msg_level, "xY-PSNR " "xU-PSNR " "xV-PSNR "); + } + if (printSequenceMSE) { msg( e_msg_level, " Y-MSE " "U-MSE " "V-MSE " "YUV-MSE \n" ); @@ -340,6 +389,21 @@ public: getPsnr(COMPONENT_Cr) / (double)getNumPic(), PSNRyuv ); + if (printHexPsnr) + { + double dPsnr[MAX_NUM_COMPONENT]; + uint64_t xPsnr[MAX_NUM_COMPONENT]; + for (int i = 0; i < MAX_NUM_COMPONENT; i++) + { + dPsnr[i] = getPsnr((ComponentID)i) / (double)getNumPic(); + + copy(reinterpret_cast<uint8_t *>(&dPsnr[i]), + reinterpret_cast<uint8_t *>(&dPsnr[i]) + sizeof(dPsnr[i]), + reinterpret_cast<uint8_t *>(&xPsnr[i])); + } + msg(e_msg_level, " %16" PRIx64 " %16" PRIx64 " %16" PRIx64, xPsnr[COMPONENT_Y], xPsnr[COMPONENT_Cb], xPsnr[COMPONENT_Cr]); + } + if (printSequenceMSE) { msg( e_msg_level, " %8.4lf " "%8.4lf " "%8.4lf " "%8.4lf\n", @@ -373,6 +437,11 @@ public: m_ext360.printHeader(e_msg_level); #endif + if (printHexPsnr) + { + msg(e_msg_level, "xY-PSNR " "xU-PSNR " "xV-PSNR "); + } + if (printSequenceMSE) { msg( e_msg_level, " Y-MSE " "U-MSE " "V-MSE " "YUV-MSE \n" ); @@ -400,6 +469,21 @@ public: getPsnr(COMPONENT_Cr) / (double)getNumPic(), PSNRyuv ); + if (printHexPsnr) + { + double dPsnr[MAX_NUM_COMPONENT]; + uint64_t xPsnr[MAX_NUM_COMPONENT]; + for (int i = 0; i < MAX_NUM_COMPONENT; i++) + { + dPsnr[i] = getPsnr((ComponentID)i) / (double)getNumPic(); + + copy(reinterpret_cast<uint8_t *>(&dPsnr[i]), + reinterpret_cast<uint8_t *>(&dPsnr[i]) + sizeof(dPsnr[i]), + reinterpret_cast<uint8_t *>(&xPsnr[i])); + } + msg(e_msg_level, " %16" PRIx64 " %16" PRIx64 " %16" PRIx64 , xPsnr[COMPONENT_Y], xPsnr[COMPONENT_Cb], xPsnr[COMPONENT_Cr]); + } + #if EXTENSION_360_VIDEO m_ext360.printPSNRs(getNumPic(), e_msg_level); #endif @@ -427,7 +511,7 @@ public: } - void printSummary(const ChromaFormat chFmt, const bool printSequenceMSE, const BitDepths &bitDepths, const std::string &sFilename) + void printSummary(const ChromaFormat chFmt, const bool printSequenceMSE, const bool printHexPsnr, const BitDepths &bitDepths, const std::string &sFilename) { FILE* pFile = fopen (sFilename.c_str(), "at"); diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp index d98bd514611797f6bbf6811b58f27a64d245655f..10059954eda284343993f235a96be6cbe46b8b3a 100644 --- a/source/Lib/EncoderLib/CABACWriter.cpp +++ b/source/Lib/EncoderLib/CABACWriter.cpp @@ -642,7 +642,6 @@ void CABACWriter::split_cu_mode_mt(const PartSplit split, const CodingStructure& void CABACWriter::coding_unit( const CodingUnit& cu, Partitioner& partitioner, CUCtx& cuCtx ) { CodingStructure& cs = *cu.cs; - // transquant bypass flag if( cs.pps->getTransquantBypassEnabledFlag() ) { @@ -753,6 +752,7 @@ void CABACWriter::cu_pred_data( const CodingUnit& cu ) #if JVET_K0357_AMVR imv_mode ( cu ); #endif + } @@ -1302,7 +1302,7 @@ void CABACWriter::imv_mode( const CodingUnit& cu ) } unsigned ctxId = DeriveCtx::CtxIMVFlag( cu ); - m_BinEncoder.encodeBin( ( cu.imv > 0 ), Ctx::ImvFlag( ctxId ) ); + m_BinEncoder.encodeBin( ( cu.imv > 0 ), Ctx::ImvFlag( ctxId ) ); DTRACE( g_trace_ctx, D_SYNTAX, "imv_mode() value=%d ctx=%d\n", (cu.imv > 0), ctxId ); if( spsNext.getImvMode() == IMV_4PEL && cu.imv > 0 ) diff --git a/source/Lib/EncoderLib/CMakeLists.txt b/source/Lib/EncoderLib/CMakeLists.txt index 9c30ad89e76226aa20bbc92b7f6c7ff73058855e..9e75e9fb100aea05e051b09172210d93e3b66676 100644 --- a/source/Lib/EncoderLib/CMakeLists.txt +++ b/source/Lib/EncoderLib/CMakeLists.txt @@ -17,7 +17,7 @@ add_library( ${LIB_NAME} STATIC ${SRC_FILES} ${INC_FILES} ${NATVIS_FILES} ) target_compile_definitions( ${LIB_NAME} PUBLIC ) if( ENABLE_VTM ) - target_compile_definitions( ${LIB_NAME} PUBLIC JEM_TOOLS=0 ) + target_compile_definitions( ${LIB_NAME} PUBLIC BMS_TOOLS=0 ) endif() if( EXTENSION_360_VIDEO ) diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h index 015b7cc95794a95eb66267544956d842c630b8b8..6996a87228967306b9a46d9dc3d4142bcba838a5 100644 --- a/source/Lib/EncoderLib/EncCfg.h +++ b/source/Lib/EncoderLib/EncCfg.h @@ -212,6 +212,9 @@ protected: #if ENABLE_WPP_PARALLELISM bool m_AltDQPCoding; +#endif +#if JVET_K0157 + bool m_compositeRefEnabled; //composite reference #endif // ADD_NEW_TOOL : (encoder lib) add tool enabling flags and associated parameters here @@ -567,8 +570,8 @@ public: bool getPrintMSEBasedSequencePSNR () const { return m_printMSEBasedSequencePSNR; } void setPrintMSEBasedSequencePSNR (bool value) { m_printMSEBasedSequencePSNR = value; } - bool getPrintHexPsnr() const { return m_printHexPsnr; } - void setPrintHexPsnr(bool value) { m_printHexPsnr = value; } + bool getPrintHexPsnr () const { return m_printHexPsnr; } + void setPrintHexPsnr (bool value) { m_printHexPsnr = value; } bool getPrintFrameMSE () const { return m_printFrameMSE; } void setPrintFrameMSE (bool value) { m_printFrameMSE = value; } @@ -668,6 +671,11 @@ public: +#if JVET_K0157 + void setUseCompositeRef (bool b) { m_compositeRefEnabled = b; } + bool getUseCompositeRef () const { return m_compositeRefEnabled; } +#endif + // ADD_NEW_TOOL : (encoder lib) add access functions here void setMaxCUWidth ( uint32_t u ) { m_maxCUWidth = u; } diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp index dadf0bf0252c3eef545f147d9cff7032afbf4d77..4dd1259285ddefe1ad264ddd7cb4d51eedd7031f 100644 --- a/source/Lib/EncoderLib/EncCu.cpp +++ b/source/Lib/EncoderLib/EncCu.cpp @@ -302,7 +302,6 @@ void EncCu::compressCtu( CodingStructure& cs, const UnitArea& area, const unsign // init the partitioning manager Partitioner *partitioner = PartitionerFactory::get( *cs.slice ); partitioner->initCtu( area, CH_L, *cs.slice ); - // init current context pointer m_CurrCtx = m_CtxBuffer.data(); @@ -322,7 +321,9 @@ void EncCu::compressCtu( CodingStructure& cs, const UnitArea& area, const unsign const bool copyUnsplitCTUSignals = bestCS->cus.size() == 1 && KEEP_PRED_AND_RESI_SIGNALS; cs.useSubStructure( *bestCS, partitioner->chType, CS::getArea( *bestCS, area, partitioner->chType ), copyUnsplitCTUSignals, false, false, copyUnsplitCTUSignals ); - if( !cs.pcv->ISingleTree && cs.slice->isIntra() && cs.pcv->chrFormat != CHROMA_400 ) + if( !cs.pcv->ISingleTree && + cs.slice->isIntra() + && cs.pcv->chrFormat != CHROMA_400 ) { m_CABACEstimator->getCtx() = m_CurrCtx->start; @@ -340,6 +341,12 @@ void EncCu::compressCtu( CodingStructure& cs, const UnitArea& area, const unsign cs.useSubStructure( *bestCS, partitioner->chType, CS::getArea( *bestCS, area, partitioner->chType ), copyUnsplitCTUSignals, false, false, copyUnsplitCTUSignals ); } +#if JVET_K0390_RATECTRL + if (m_pcEncCfg->getUseRateCtrl()) + { + (m_pcRateCtrl->getRCPic()->getLCU(ctuRsAddr)).m_actualMSE = (double)bestCS->dist / (double)m_pcRateCtrl->getRCPic()->getLCU(ctuRsAddr).m_numberOfPixel; + } +#endif // reset context states and uninit context pointer m_CABACEstimator->getCtx() = m_CurrCtx->start; m_CurrCtx = 0; @@ -555,7 +562,6 @@ void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Par } #endif - m_modeCtrl->initCULevel( partitioner, *tempCS ); m_CurrCtx->start = m_CABACEstimator->getCtx(); @@ -704,9 +710,9 @@ void EncCu::updateLambda( Slice* slice, double dQP ) #endif #endif double qp_temp = (double) dQP + bitdepth_luma_qp_scale - SHIFT_QP; - + double dQPFactor = m_pcEncCfg->getGOPEntry( m_pcSliceEncoder->getGopId() ).m_QPFactor; - + if( slice->getSliceType() == I_SLICE ) { if( m_pcEncCfg->getIntraQpFactor() >= 0.0 /*&& m_pcEncCfg->getGOPEntry( m_pcSliceEncoder->getGopId() ).m_sliceType != I_SLICE*/ ) @@ -757,7 +763,7 @@ void EncCu::updateLambda( Slice* slice, double dQP ) dLambda *= lambdaModifier; int qpBDoffset = slice->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA); - int iQP = max( -qpBDoffset, min( MAX_QP, (int) floor( dQP + 0.5 ) ) ); + int iQP = Clip3(-qpBDoffset, MAX_QP, (int)floor(dQP + 0.5)); m_pcSliceEncoder->setUpLambda(slice, dLambda, iQP); #else @@ -1249,7 +1255,8 @@ void EncCu::xCheckRDCostIntra( CodingStructure *&tempCS, CodingStructure *&bestC m_CABACEstimator->cu_transquant_bypass_flag( cu ); } - if( !cu.cs->slice->isIntra() ) + if( !cu.cs->slice->isIntra() + ) { m_CABACEstimator->cu_skip_flag ( cu ); } @@ -1326,7 +1333,7 @@ void EncCu::xCheckIntraPCM(CodingStructure *&tempCS, CodingStructure *&bestCS, P cu.ipcm = true; tempCS->addPU(tempCS->area, partitioner.chType); - + tempCS->addTU( tempCS->area, partitioner.chType ); m_pcIntraSearch->IPCMSearch(*tempCS, partitioner); @@ -1340,7 +1347,8 @@ void EncCu::xCheckIntraPCM(CodingStructure *&tempCS, CodingStructure *&bestCS, P m_CABACEstimator->cu_transquant_bypass_flag( cu ); } - if( !cu.cs->slice->isIntra() ) + if( !cu.cs->slice->isIntra() + ) { m_CABACEstimator->cu_skip_flag ( cu ); } @@ -1505,6 +1513,10 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& static_vector<unsigned, MRG_MAX_NUM_CANDS> RdModeList; bool mrgTempBufSet = false; +#if DMVR_JVET_LOW_LATENCY_K0217 + Mv refinedMvdL0[MRG_MAX_NUM_CANDS]; +#endif + for( unsigned i = 0; i < MRG_MAX_NUM_CANDS; i++ ) { RdModeList.push_back( i ); @@ -1553,7 +1565,6 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& m_pcRdCost->setDistParam (distParam, tempCS->getOrgBuf().Y(), m_acMergeBuffer[0].Y(), sps.getBitDepth (CHANNEL_TYPE_LUMA), COMPONENT_Y, bUseHadamard); const UnitArea localUnitArea( tempCS->area.chromaFormat, Area( 0, 0, tempCS->area.Y().width, tempCS->area.Y().height) ); - for( uint32_t uiMergeCand = 0; uiMergeCand < mergeCtx.numValidMergeCand; uiMergeCand++ ) { acMergeBuffer[uiMergeCand] = m_acMergeBuffer[uiMergeCand].getBuf( localUnitArea ); @@ -1570,6 +1581,9 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& { mergeCtx.mvFieldNeighbours[2*uiMergeCand].mv = pu.mv[0]; mergeCtx.mvFieldNeighbours[2*uiMergeCand+1].mv = pu.mv[1]; +#if DMVR_JVET_LOW_LATENCY_K0217 + refinedMvdL0[uiMergeCand] = pu.mvd[0]; +#endif } #if DISTORTION_TYPE_BUGFIX @@ -1585,10 +1599,8 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& double cost = (double)uiSad + (double)uiBitsCand * sqrtLambdaForFirstPass; updateCandList( uiMergeCand, cost, RdModeList, candCostList, uiNumMrgSATDCand ); - CHECK( std::min( uiMergeCand + 1, uiNumMrgSATDCand ) != RdModeList.size(), "" ); } - // Try to limit number of candidates using SATD-costs for( uint32_t i = 1; i < uiNumMrgSATDCand; i++ ) { @@ -1617,7 +1629,6 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& for( uint32_t uiMrgHADIdx = 0; uiMrgHADIdx < uiNumMrgSATDCand; uiMrgHADIdx++ ) { uint32_t uiMergeCand = RdModeList[uiMrgHADIdx]; - if( ( (uiNoResidualPass != 0) && candHasNoResidual[uiMergeCand] ) || ( (uiNoResidualPass == 0) && bestIsSkip ) ) { @@ -1647,6 +1658,9 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& if( mrgTempBufSet ) { +#if DMVR_JVET_LOW_LATENCY_K0217 + pu.mvd[0] = refinedMvdL0[uiMergeCand]; +#endif tempCS->getPredBuf().copyFrom( acMergeBuffer[ uiMergeCand ]); } else @@ -1655,11 +1669,15 @@ void EncCu::xCheckRDCostMerge2Nx2N( CodingStructure *&tempCS, CodingStructure *& } + xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass #if JVET_K0357_AMVR - xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, NULL, true, ( ( uiNoResidualPass == 0 ) ? &candHasNoResidual[uiMergeCand] : NULL ) ); -#else - xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, uiNoResidualPass, ( ( uiNoResidualPass == 0 ) ? &candHasNoResidual[uiMergeCand] : NULL ) ); + , NULL #endif +#if JVET_K1000_SIMPLIFIED_EMT + , 1 +#endif + , uiNoResidualPass == 0 ? &candHasNoResidual[uiMergeCand] : NULL ); + if( m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip ) { bestIsSkip = bestCS->getCU( partitioner.chType )->rootCbf == 0; @@ -1744,7 +1762,6 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct cu.firstPU->mergeFlag = true; cu.firstPU->mergeIdx = 0; - PU::getAffineMergeCand( *cu.firstPU, affineMvField, interDirNeighbours, numValidMergeCand ); if( numValidMergeCand == -1 ) { @@ -1758,30 +1775,38 @@ void EncCu::xCheckRDCostAffineMerge2Nx2N( CodingStructure *&tempCS, CodingStruct PU::spanMotionInfo( *cu.firstPU ); m_pcInterSearch->motionCompensation( cu ); + + xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0 #if JVET_K0357_AMVR - xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0, NULL, true, &hasNoResidual); -#else - xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0, &hasNoResidual); + , NULL #endif +#if JVET_K1000_SIMPLIFIED_EMT + , 1 +#endif + , &hasNoResidual); if( ! (encTestMode.lossless || hasNoResidual) ) { tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); tempCS->copyStructure( *bestCS, partitioner.chType ); tempCS->getPredBuf().copyFrom( bestCS->getPredBuf() ); + + xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 1 #if JVET_K0357_AMVR - xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 1, NULL, true, &hasNoResidual); -#else - xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 1, &hasNoResidual); + , NULL +#endif +#if JVET_K1000_SIMPLIFIED_EMT + , 1 #endif + , &hasNoResidual); } } #endif - void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode ) { tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); + CodingUnit &cu = tempCS->addCU( tempCS->area, partitioner.chType ); partitioner.setCUData( cu ); @@ -1798,16 +1823,24 @@ void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestC cu.qp = encTestMode.qp; CU::addPUs( cu ); + m_pcInterSearch->predInterSearch( cu, partitioner ); #if JVET_K0357_AMVR const unsigned wIdx = gp_sizeIdxInfo->idxFrom( tempCS->area.lwidth () ); #endif + + + xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, 0 #if JVET_K0357_AMVR - xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0, (m_pImvTempCS ? m_pImvTempCS[wIdx][encTestMode.partSize] : NULL)); -#else - xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0, NULL); + , m_pImvTempCS ? m_pImvTempCS[wIdx][encTestMode.partSize] : NULL +#endif +#if JVET_K1000_SIMPLIFIED_EMT + , 1 #endif + , 0 + ); + } @@ -1843,6 +1876,7 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be } } + CodingUnit &cu = ( pcCUInfo2Reuse != nullptr ) ? *tempCS->getCU( partitioner.chType ) : tempCS->addCU( tempCS->area, partitioner.chType ); if( pcCUInfo2Reuse == nullptr ) @@ -1877,11 +1911,13 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be cu.emtFlag = false; #endif + if( pcCUInfo2Reuse != nullptr ) { // reuse the motion info from pcCUInfo2Reuse CU::resetMVDandMV2Int( cu, m_pcInterSearch ); + if( !CU::hasSubCUNonZeroMVd( cu ) ) { m_modeCtrl->useModeResult( encTestModeBase, tempCS, partitioner ); @@ -1894,26 +1930,41 @@ bool EncCu::xCheckRDCostInterIMV( CodingStructure *&tempCS, CodingStructure *&be } else { + m_pcInterSearch->predInterSearch( cu, partitioner ); + } + if( !CU::hasSubCUNonZeroMVd( cu ) ) { m_modeCtrl->useModeResult( encTestModeBase, tempCS, partitioner ); return false; } - xEncodeInterResidual(tempCS, bestCS, partitioner, encTestModeBase, 0, NULL); + xEncodeInterResidual( tempCS, bestCS, partitioner, encTestModeBase, 0 +#if JVET_K0357_AMVR + , NULL +#endif +#if JVET_K1000_SIMPLIFIED_EMT + , true +#endif + , 0 + ); return true; } #endif +void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, int residualPass #if JVET_K0357_AMVR -void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, int residualPass, CodingStructure* imvCS, int emtMode, bool* bestHasNonResi ) -#else -void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, int residualPass, bool* bestHasNonResi ) + , CodingStructure* imvCS +#endif +#if JVET_K1000_SIMPLIFIED_EMT + , int emtMode #endif + , bool* bestHasNonResi + ) { if( residualPass == 1 && encTestMode.lossless ) { @@ -1922,19 +1973,72 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be CodingUnit* cu = tempCS->getCU( partitioner.chType ); double bestCostInternal = MAX_DOUBLE; +#if JVET_K1000_SIMPLIFIED_EMT + double bestCost = bestCS->cost; + const SPS& sps = *tempCS->sps; + const int maxSizeEMT = tempCS->pcv->noRQT ? EMT_INTER_MAX_CU_WITH_QTBT : EMT_INTER_MAX_CU; + bool swapped = false; // avoid unwanted data copy + bool reloadCU = false; + const bool considerEmtSecondPass = emtMode && sps.getSpsNext().getUseInterEMT() && partitioner.currArea().lwidth() <= maxSizeEMT && partitioner.currArea().lheight() <= maxSizeEMT; + + int minEMTMode = 0; + int maxEMTMode = (considerEmtSecondPass?1:0); + if( emtMode == 2 ) + { + minEMTMode = maxEMTMode = (cu->emtFlag?1:0); + } + for( int curEmtMode = minEMTMode; curEmtMode <= maxEMTMode; curEmtMode++ ) +#endif { +#if JVET_K1000_SIMPLIFIED_EMT + if( reloadCU ) + { + if( bestCost == bestCS->cost ) //The first EMT pass didn't become the bestCS, so we clear the TUs generated + { + tempCS->clearTUs(); + } + else if( false == swapped ) + { + tempCS->initStructData( encTestMode.qp, encTestMode.lossless ); + tempCS->copyStructure( *bestCS, partitioner.chType ); + tempCS->getPredBuf().copyFrom( bestCS->getPredBuf() ); + bestCost = bestCS->cost; + cu = tempCS->getCU( partitioner.chType ); + swapped = true; + } + else + { + tempCS->clearTUs(); + bestCost = bestCS->cost; + cu = tempCS->getCU( partitioner.chType ); + } + + //we need to restart the distortion for the new tempCS, the bit count and the cost + tempCS->dist = 0; + tempCS->fracBits = 0; + tempCS->cost = MAX_DOUBLE; + } + + reloadCU = true; // enable cu reloading +#endif cu->skip = false; +#if JVET_K1000_SIMPLIFIED_EMT + cu->emtFlag = curEmtMode; +#endif const bool skipResidual = residualPass == 1; m_pcInterSearch->encodeResAndCalcRdInterCU( *tempCS, partitioner, skipResidual ); - xEncodeDontSplit( *tempCS, partitioner ); xCheckDQP( *tempCS, partitioner ); + +#if JVET_K1000_SIMPLIFIED_EMT + double emtFirstPassCost = tempCS->cost; +#endif #if JVET_K0357_AMVR if( imvCS && (tempCS->cost < imvCS->cost) ) { @@ -1944,7 +2048,6 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be } imvCS->copyStructure( *tempCS, partitioner.chType ); } - #endif if( NULL != bestHasNonResi && (bestCostInternal > tempCS->cost) ) { @@ -1959,8 +2062,22 @@ void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&be #endif xCheckBestMode( tempCS, bestCS, partitioner, encTestMode ); +#if JVET_K1000_SIMPLIFIED_EMT + //now we check whether the second pass should be skipped or not + if( !curEmtMode && maxEMTMode ) + { + const double thresholdToSkipEmtSecondPass = 1.1; // Skip checking EMT transforms + const bool bCond1 = !cu->firstTU->cbf[COMPONENT_Y]; - }//end emt loop + const bool bCond3 = emtFirstPassCost > ( bestCost * thresholdToSkipEmtSecondPass ); + + if( m_pcEncCfg->getFastInterEMT() && (bCond1 || bCond3 ) ) + { + maxEMTMode = 0; // do not test EMT + } + } +#endif + } //end emt loop } diff --git a/source/Lib/EncoderLib/EncCu.h b/source/Lib/EncoderLib/EncCu.h index 560973c32561f8b35ad41df9b8726f119029e78a..101a62b582b961f3a1fc883fcfbdf683ef8117a7 100644 --- a/source/Lib/EncoderLib/EncCu.h +++ b/source/Lib/EncoderLib/EncCu.h @@ -121,7 +121,6 @@ private: unsigned int m_prevPOC; bool m_clearSubMergeStatic; #endif - #if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM EncLib* m_pcEncLib; #endif @@ -201,13 +200,16 @@ protected: void xCheckRDCostMerge2Nx2N ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &pm, const EncTestMode& encTestMode ); + void xEncodeInterResidual ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, int residualPass = 0 #if JVET_K0357_AMVR - void xEncodeInterResidual ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, int residualPass = 0, CodingStructure* imvCS = NULL, int emtMode = 1, bool* bestHasNonResi = NULL ); -#else - void xEncodeInterResidual ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, int residualPass, bool* bestHasNonResi ); + , CodingStructure* imvCS = NULL +#endif +#if JVET_K1000_SIMPLIFIED_EMT + , int emtMode = 1 #endif + , bool* bestHasNonResi = NULL + ); #if REUSE_CU_RESULTS - void xReuseCachedResult ( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &Partitioner ); #endif }; diff --git a/source/Lib/EncoderLib/EncGOP.cpp b/source/Lib/EncoderLib/EncGOP.cpp index bebe67399e6c85b36ec66fede8929284a1212f73..8c133e5668b778383b3b41ededb430b7960ca2fb 100644 --- a/source/Lib/EncoderLib/EncGOP.cpp +++ b/source/Lib/EncoderLib/EncGOP.cpp @@ -110,6 +110,15 @@ EncGOP::EncGOP() #endif m_bInitAMaxBT = true; +#if JVET_K0157 + m_bgPOC = -1; + m_picBg = NULL; + m_picOrig = NULL; + m_isEncodedLTRef = false; + m_isUseLTRef = false; + m_isPrepareLTRef = true; + m_lastLTRefPoc = 0; +#endif } EncGOP::~EncGOP() @@ -139,6 +148,20 @@ void EncGOP::destroy() m_pcDeblockingTempPicYuv = NULL; } #endif +#if JVET_K0157 + if (m_picBg) + { + m_picBg->destroy(); + delete m_picBg; + m_picBg = NULL; + } + if (m_picOrig) + { + m_picOrig->destroy(); + delete m_picOrig; + m_picOrig = NULL; + } +#endif } void EncGOP::init ( EncLib* pcEncLib ) @@ -159,7 +182,7 @@ void EncGOP::init ( EncLib* pcEncLib ) m_totalCoded = 0; m_AUWriterIf = pcEncLib->getAUWriterIf(); - + #if WCG_EXT pcEncLib->getRdCost()->initLumaLevelToWeightTable(); #endif @@ -1286,7 +1309,11 @@ void trySkipOrDecodePicture( bool& decPic, bool& encPic, const EncCfg& cfg, Pict // ==================================================================================================================== void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, std::list<PelUnitBuf*>& rcListPicYuvRecOut, - bool isField, bool isTff, const InputColourSpaceConversion snr_conversion, const bool printFrameMSE ) + bool isField, bool isTff, const InputColourSpaceConversion snr_conversion, const bool printFrameMSE +#if JVET_K0157 + , bool isEncodeLtRef +#endif +) { // TODO: Split this function up. @@ -1296,7 +1323,11 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, pcBitstreamRedirect = new OutputBitstream; AccessUnit::iterator itLocationToPushSliceHeaderNALU; // used to store location where NALU containing slice header is to be inserted - xInitGOP( iPOCLast, iNumPicRcvd, isField ); + xInitGOP(iPOCLast, iNumPicRcvd, isField +#if JVET_K0157 + , isEncodeLtRef +#endif + ); m_iNumPicCoded = 0; SEIMessages leadingSeiMessages; @@ -1335,11 +1366,18 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, /////////////////////////////////////////////////////////////////////////////////////////////////// Initial to start encoding int iTimeOffset; int pocCurr; +#if JVET_K0157 + int multipleFactor = m_pcCfg->getUseCompositeRef() ? 2 : 1; +#endif if(iPOCLast == 0) //case first frame or first top field { pocCurr=0; +#if JVET_K0157 + iTimeOffset = multipleFactor; +#else iTimeOffset = 1; +#endif } else if(iPOCLast == 1 && isField) //case first bottom field, just like the first frame, the poc computation is not right anymore, we set the right value { @@ -1348,11 +1386,24 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, } else { +#if JVET_K0157 + pocCurr = iPOCLast - iNumPicRcvd * multipleFactor + m_pcCfg->getGOPEntry(iGOPid).m_POC - ((isField && m_iGopSize>1) ? 1 : 0); +#else pocCurr = iPOCLast - iNumPicRcvd + m_pcCfg->getGOPEntry(iGOPid).m_POC - ((isField && m_iGopSize>1) ? 1:0); +#endif iTimeOffset = m_pcCfg->getGOPEntry(iGOPid).m_POC; } +#if JVET_K0157 + if (m_pcCfg->getUseCompositeRef() && isEncodeLtRef) + { + pocCurr++; + iTimeOffset--; + } + if (pocCurr / multipleFactor >= m_pcCfg->getFramesToBeEncoded()) +#else if(pocCurr>=m_pcCfg->getFramesToBeEncoded()) +#endif { if (m_pcCfg->getEfficientFieldIRAPEnabled()) { @@ -1399,7 +1450,11 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, pcPic->allocateNewSlice(); m_pcSliceEncoder->setSliceSegmentIdx(0); - m_pcSliceEncoder->initEncSlice ( pcPic, iPOCLast, pocCurr, iGOPid, pcSlice, isField ); + m_pcSliceEncoder->initEncSlice(pcPic, iPOCLast, pocCurr, iGOPid, pcSlice, isField +#if JVET_K0157 + , isEncodeLtRef +#endif + ); DTRACE_UPDATE( g_trace_ctx, ( std::make_pair( "poc", pocCurr ) ) ); DTRACE_UPDATE( g_trace_ctx, ( std::make_pair( "final", 0 ) ) ); @@ -1425,7 +1480,6 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, { pcSlice->setSliceType(I_SLICE); } - // Set the nal unit type pcSlice->setNalUnitType(getNalUnitType(pocCurr, m_iLastIDR, isField)); if(pcSlice->getTemporalLayerNonReferenceFlag()) @@ -1463,7 +1517,34 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, } pcSlice->decodingRefreshMarking(m_pocCRA, m_bRefreshPending, rcListPic, m_pcCfg->getEfficientFieldIRAPEnabled()); +#if JVET_K0157 + if (m_pcCfg->getUseCompositeRef() && isEncodeLtRef) + { + setUseLTRef(true); + setPrepareLTRef(false); + setNewestBgPOC(pocCurr); + setLastLTRefPoc(pocCurr); + } + else if (pcPic->cs->sps->getSpsNext().getUseCompositeRef() && getLastLTRefPoc() >= 0 && getEncodedLTRef()==false && !getPicBg()->getSpliceFull() && (pocCurr - getLastLTRefPoc()) > (m_pcCfg->getFrameRate() * 2)) + { + setUseLTRef(false); + setPrepareLTRef(false); + setEncodedLTRef(true); + setNewestBgPOC(-1); + setLastLTRefPoc(-1); + } + + if (pcPic->cs->sps->getSpsNext().getUseCompositeRef() && m_picBg->getSpliceFull() && getUseLTRef()) + { + m_pcEncLib->selectReferencePictureSet(pcSlice, pocCurr, iGOPid, m_bgPOC); + } + else + { + m_pcEncLib->selectReferencePictureSet(pcSlice, pocCurr, iGOPid, -1); + } +#else m_pcEncLib->selectReferencePictureSet(pcSlice, pocCurr, iGOPid); +#endif if (!m_pcCfg->getEfficientFieldIRAPEnabled()) { if ( pcSlice->getNalUnitType() == NAL_UNIT_CODED_SLICE_BLA_W_LP @@ -1484,7 +1565,11 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, || (m_pcCfg->getEfficientFieldIRAPEnabled() && isField && pcSlice->getAssociatedIRAPType() >= NAL_UNIT_CODED_SLICE_BLA_W_LP && pcSlice->getAssociatedIRAPType() <= NAL_UNIT_CODED_SLICE_CRA && pcSlice->getAssociatedIRAPPOC() == pcSlice->getPOC()+1) ) { - pcSlice->createExplicitReferencePictureSetFromReference(rcListPic, pcSlice->getRPS(), pcSlice->isIRAP(), m_iLastRecoveryPicPOC, m_pcCfg->getDecodingRefreshType() == 3, m_pcCfg->getEfficientFieldIRAPEnabled()); + pcSlice->createExplicitReferencePictureSetFromReference(rcListPic, pcSlice->getRPS(), pcSlice->isIRAP(), m_iLastRecoveryPicPOC, m_pcCfg->getDecodingRefreshType() == 3, m_pcCfg->getEfficientFieldIRAPEnabled() +#if JVET_K0157 + , isEncodeLtRef, m_pcCfg->getUseCompositeRef() +#endif + ); } pcSlice->applyReferencePictureSet(rcListPic, pcSlice->getRPS()); @@ -1552,19 +1637,42 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, } } } +#if JVET_K0157 + if (pcSlice->getRPSidx() == -1) + arrangeLongtermPicturesInRPS(pcSlice, rcListPic); +#else arrangeLongtermPicturesInRPS(pcSlice, rcListPic); +#endif RefPicListModification* refPicListModification = pcSlice->getRefPicListModification(); refPicListModification->setRefPicListModificationFlagL0(0); refPicListModification->setRefPicListModificationFlagL1(0); + +#if JVET_K0157 + if (m_pcCfg->getUseCompositeRef() && getUseLTRef() && (pocCurr > getLastLTRefPoc())) + { + pcSlice->setNumRefIdx(REF_PIC_LIST_0, min(m_pcCfg->getGOPEntry(iGOPid).m_numRefPicsActive + 1, pcSlice->getRPS()->getNumberOfPictures())); + pcSlice->setNumRefIdx(REF_PIC_LIST_1, min(m_pcCfg->getGOPEntry(iGOPid).m_numRefPicsActive + 1, pcSlice->getRPS()->getNumberOfPictures())); + } + else + { + pcSlice->setNumRefIdx(REF_PIC_LIST_0, std::min(m_pcCfg->getGOPEntry(iGOPid).m_numRefPicsActive, pcSlice->getRPS()->getNumberOfPictures())); + pcSlice->setNumRefIdx(REF_PIC_LIST_1, std::min(m_pcCfg->getGOPEntry(iGOPid).m_numRefPicsActive, pcSlice->getRPS()->getNumberOfPictures())); + } + if (pcPic->cs->sps->getSpsNext().getUseCompositeRef() && getPrepareLTRef()) { + arrangeCompositeReference(pcSlice, rcListPic, pocCurr); + } +#else pcSlice->setNumRefIdx(REF_PIC_LIST_0,min(m_pcCfg->getGOPEntry(iGOPid).m_numRefPicsActive,pcSlice->getRPS()->getNumberOfPictures())); pcSlice->setNumRefIdx(REF_PIC_LIST_1,min(m_pcCfg->getGOPEntry(iGOPid).m_numRefPicsActive,pcSlice->getRPS()->getNumberOfPictures())); +#endif // Set reference list pcSlice->setRefPicList ( rcListPic ); if( m_pcCfg->getUseAMaxBT() ) { - if( !pcSlice->isIntra() ) + if( !pcSlice->isIntra() + ) { int refLayer = pcSlice->getDepth(); if( refLayer > 9 ) refLayer = 9; // Max layer is 10 @@ -1614,7 +1722,6 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, { pcSlice->setSliceType ( P_SLICE ); } - xUpdateRasInit( pcSlice ); // Do decoding refresh marking if any @@ -1767,12 +1874,12 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, #if V0078_ADAPTIVE_LOWER_BOUND if (estimatedCpbFullness - estimatedBits < m_pcRateCtrl->getRCPic()->getLowerBound()) { - estimatedBits = max(200, estimatedCpbFullness - m_pcRateCtrl->getRCPic()->getLowerBound()); + estimatedBits = std::max(200, estimatedCpbFullness - m_pcRateCtrl->getRCPic()->getLowerBound()); } #else if (estimatedCpbFullness - estimatedBits < (int)(m_pcRateCtrl->getCpbSize()*0.1f)) { - estimatedBits = max(200, estimatedCpbFullness - (int)(m_pcRateCtrl->getCpbSize()*0.1f)); + estimatedBits = std::max(200, estimatedCpbFullness - (int)(m_pcRateCtrl->getCpbSize()*0.1f)); } #endif @@ -1905,7 +2012,11 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, trySkipOrDecodePicture( decPic, encPic, *m_pcCfg, pcPic ); pcPic->cs->slice = pcSlice; // please keep this - if (pcSlice->getPPS()->getSliceChromaQpFlag() && CS::isDualITree(*pcSlice->getPic()->cs)) +#if ENABLE_QPA + if (pcSlice->getPPS()->getSliceChromaQpFlag() && CS::isDualITree (*pcSlice->getPic()->cs) && !m_pcCfg->getUsePerceptQPA() && (m_pcCfg->getSliceChromaOffsetQpPeriodicity() == 0)) +#else + if (pcSlice->getPPS()->getSliceChromaQpFlag() && CS::isDualITree (*pcSlice->getPic()->cs)) +#endif { // overwrite chroma qp offset for dual tree pcSlice->setSliceChromaQpDelta(COMPONENT_Cb, m_pcCfg->getChromaCbQpOffsetDualTree()); @@ -2010,6 +2121,10 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, m_pcLoopFilter->loopFilterPic( cs ); +#if DMVR_JVET_LOW_LATENCY_K0217 + CS::setRefinedMotionField(cs); +#endif + DTRACE_UPDATE( g_trace_ctx, ( std::make_pair( "final", 1 ) ) ); if( pcSlice->getSPS()->getUseSAO() ) @@ -2043,7 +2158,12 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, } } #endif - +#if JVET_K0157 + if (pcPic->cs->sps->getSpsNext().getUseCompositeRef() && getPrepareLTRef()) + { + updateCompositeReference(pcSlice, rcListPic, pocCurr); + } +#endif } else // skip enc picture { @@ -2059,7 +2179,8 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, { for( const CodingUnit *cu : pcPic->cs->cus ) { - if( !pcSlice->isIntra() ) + if( !pcSlice->isIntra() + ) { m_uiBlkSize[pcSlice->getDepth()] += cu->Y().area(); m_uiNumBlk [pcSlice->getDepth()]++; @@ -2236,7 +2357,11 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, m_pcCfg->setEncodedFlag(iGOPid, true); double PSNR_Y; - xCalculateAddPSNRs( isField, isTff, iGOPid, pcPic, accessUnit, rcListPic, encTime, snr_conversion, printFrameMSE, &PSNR_Y ); + xCalculateAddPSNRs(isField, isTff, iGOPid, pcPic, accessUnit, rcListPic, encTime, snr_conversion, printFrameMSE, &PSNR_Y +#if JVET_K0157 + , isEncodeLtRef +#endif + ); // Only produce the Green Metadata SEI message with the last picture. if( m_pcCfg->getSEIGreenMetadataInfoSEIEnable() && pcSlice->getPOC() == ( m_pcCfg->getFramesToBeEncoded() - 1 ) ) @@ -2300,7 +2425,10 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, pcPic->reconstructed = true; m_bFirst = false; m_iNumPicCoded++; - m_totalCoded ++; +#if JVET_K0157 + if (!(pcPic->cs->sps->getSpsNext().getUseCompositeRef() && isEncodeLtRef)) +#endif + m_totalCoded ++; /* logging: insert a newline at end of picture period */ if (m_pcCfg->getEfficientFieldIRAPEnabled()) @@ -2319,7 +2447,7 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, } -void EncGOP::printOutSummary(uint32_t uiNumAllPicCoded, bool isField, const bool printMSEBasedSNR, const bool printSequenceMSE, const BitDepths &bitDepths) +void EncGOP::printOutSummary(uint32_t uiNumAllPicCoded, bool isField, const bool printMSEBasedSNR, const bool printSequenceMSE, const bool printHexPsnr, const BitDepths &bitDepths) { #if ENABLE_QPA const bool useWPSNR = m_pcEncLib->getUseWPSNR(); @@ -2345,25 +2473,25 @@ void EncGOP::printOutSummary(uint32_t uiNumAllPicCoded, bool isField, const bool m_gcAnalyzeWPSNR.setFrmRate(m_pcCfg->getFrameRate()*rateMultiplier / (double)m_pcCfg->getTemporalSubsampleRatio()); } #endif - + const ChromaFormat chFmt = m_pcCfg->getChromaFormatIdc(); //-- all msg( INFO, "\n" ); msg( DETAILS,"\nSUMMARY --------------------------------------------------------\n" ); #if ENABLE_QPA - m_gcAnalyzeAll.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, bitDepths, useWPSNR); + m_gcAnalyzeAll.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths, useWPSNR); #else - m_gcAnalyzeAll.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, bitDepths); + m_gcAnalyzeAll.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths); #endif msg( DETAILS,"\n\nI Slices--------------------------------------------------------\n" ); - m_gcAnalyzeI.printOut('i', chFmt, printMSEBasedSNR, printSequenceMSE, bitDepths); + m_gcAnalyzeI.printOut('i', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths); msg( DETAILS,"\n\nP Slices--------------------------------------------------------\n" ); - m_gcAnalyzeP.printOut('p', chFmt, printMSEBasedSNR, printSequenceMSE, bitDepths); + m_gcAnalyzeP.printOut('p', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths); msg( DETAILS,"\n\nB Slices--------------------------------------------------------\n" ); - m_gcAnalyzeB.printOut('b', chFmt, printMSEBasedSNR, printSequenceMSE, bitDepths); + m_gcAnalyzeB.printOut('b', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths); #if WCG_WPSNR if (useLumaWPSNR) @@ -2374,14 +2502,14 @@ void EncGOP::printOutSummary(uint32_t uiNumAllPicCoded, bool isField, const bool #endif if (!m_pcCfg->getSummaryOutFilename().empty()) { - m_gcAnalyzeAll.printSummary(chFmt, printSequenceMSE, bitDepths, m_pcCfg->getSummaryOutFilename()); + m_gcAnalyzeAll.printSummary(chFmt, printSequenceMSE, printHexPsnr, bitDepths, m_pcCfg->getSummaryOutFilename()); } if (!m_pcCfg->getSummaryPicFilenameBase().empty()) { - m_gcAnalyzeI.printSummary(chFmt, printSequenceMSE, bitDepths, m_pcCfg->getSummaryPicFilenameBase()+"I.txt"); - m_gcAnalyzeP.printSummary(chFmt, printSequenceMSE, bitDepths, m_pcCfg->getSummaryPicFilenameBase()+"P.txt"); - m_gcAnalyzeB.printSummary(chFmt, printSequenceMSE, bitDepths, m_pcCfg->getSummaryPicFilenameBase()+"B.txt"); + m_gcAnalyzeI.printSummary(chFmt, printSequenceMSE, printHexPsnr, bitDepths, m_pcCfg->getSummaryPicFilenameBase()+"I.txt"); + m_gcAnalyzeP.printSummary(chFmt, printSequenceMSE, printHexPsnr, bitDepths, m_pcCfg->getSummaryPicFilenameBase()+"P.txt"); + m_gcAnalyzeB.printSummary(chFmt, printSequenceMSE, printHexPsnr, bitDepths, m_pcCfg->getSummaryPicFilenameBase()+"B.txt"); } #if WCG_WPSNR @@ -2399,13 +2527,13 @@ void EncGOP::printOutSummary(uint32_t uiNumAllPicCoded, bool isField, const bool msg( DETAILS,"\n\nSUMMARY INTERLACED ---------------------------------------------\n" ); #if ENABLE_QPA - m_gcAnalyzeAll_in.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, bitDepths, useWPSNR); + m_gcAnalyzeAll_in.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths, useWPSNR); #else - m_gcAnalyzeAll_in.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, bitDepths); + m_gcAnalyzeAll_in.printOut('a', chFmt, printMSEBasedSNR, printSequenceMSE, printHexPsnr, bitDepths); #endif if (!m_pcCfg->getSummaryOutFilename().empty()) { - m_gcAnalyzeAll_in.printSummary(chFmt, printSequenceMSE, bitDepths, m_pcCfg->getSummaryOutFilename()); + m_gcAnalyzeAll_in.printSummary(chFmt, printSequenceMSE, printHexPsnr, bitDepths, m_pcCfg->getSummaryOutFilename()); #if WCG_WPSNR if (useLumaWPSNR) { @@ -2448,13 +2576,19 @@ uint64_t EncGOP::preLoopFilterPicAndCalcDist( Picture* pcPic ) // ==================================================================================================================== // Protected member functions // ==================================================================================================================== - - -void EncGOP::xInitGOP( int iPOCLast, int iNumPicRcvd, bool isField ) +void EncGOP::xInitGOP( int iPOCLast, int iNumPicRcvd, bool isField +#if JVET_K0157 + , bool isEncodeLtRef +#endif +) { CHECK(!( iNumPicRcvd > 0 ), "Unspecified error"); // Exception for the first frames +#if JVET_K0157 + if ((isField && (iPOCLast == 0 || iPOCLast == 1)) || (!isField && (iPOCLast == 0)) || isEncodeLtRef) +#else if ( ( isField && (iPOCLast == 0 || iPOCLast == 1) ) || (!isField && (iPOCLast == 0)) ) +#endif { m_iGopSize = 1; } @@ -2485,7 +2619,12 @@ void EncGOP::xGetBuffer( PicList& rcListPic, iTimeOffset--; } +#if JVET_K0157 + int multipleFactor = m_pcCfg->getUseCompositeRef() ? 2 : 1; + for (i = 0; i < (iNumPicRcvd * multipleFactor - iTimeOffset + 1); i += multipleFactor) +#else for ( i = 0; i < (iNumPicRcvd - iTimeOffset + 1); i++ ) +#endif { iterPicYuvRec--; } @@ -2512,15 +2651,18 @@ void EncGOP::xGetBuffer( PicList& rcListPic, #if ENABLE_QPA #ifndef BETA - #define BETA (2.0 / 3.0) // value between 0 and 1; use 0.0 for traditional PSNR + #define BETA 0.5 // value between 0.0 and 1; use 0.0 to obtain traditional PSNR #endif #define GLOBAL_AVERAGING 1 // "global" averaging of a_k across a set instead of one picture #if FRAME_WEIGHTING static const uint32_t DQP[16] = { 4, 12, 11, 12, 9, 12, 11, 12, 6, 12, 11, 12, 9, 12, 11, 12 }; #endif -static inline double calcWeightedSquaredError(const CPelBuf& org, const CPelBuf& rec, double &sumAct, - const uint32_t imageWidth, const uint32_t imageHeight, const uint32_t offsetX, const uint32_t offsetY, int blockWidth, int blockHeight) +static inline double calcWeightedSquaredError(const CPelBuf& org, const CPelBuf& rec, + double &sumAct, const uint32_t bitDepth, + const uint32_t imageWidth, const uint32_t imageHeight, + const uint32_t offsetX, const uint32_t offsetY, + int blockWidth, int blockHeight) { const int O = org.stride; const int R = rec.stride; @@ -2534,8 +2676,8 @@ static inline double calcWeightedSquaredError(const CPelBuf& org, const CPelB const int hAct = offsetY + (uint32_t)blockHeight < imageHeight ? blockHeight : blockHeight - 1; const int wAct = offsetX + (uint32_t)blockWidth < imageWidth ? blockWidth : blockWidth - 1; - uint64_t ssErr = 0; // sum of squared diffs - uint64_t saAct = 0; // sum of abs. activity + uint64_t ssErr = 0; // sum of squared diffs + uint64_t saAct = 0; // sum of abs. activity double msAct; int x, y; @@ -2544,7 +2686,7 @@ static inline double calcWeightedSquaredError(const CPelBuf& org, const CPelB { for (x = 0; x < blockWidth; x++) { - register int64_t iDiff = (int64_t)o[y*O + x] - (int64_t)r[y*R + x]; + const int64_t iDiff = (int64_t)o[y*O + x] - (int64_t)r[y*R + x]; ssErr += uint64_t(iDiff * iDiff); } } @@ -2554,13 +2696,18 @@ static inline double calcWeightedSquaredError(const CPelBuf& org, const CPelB { for (x = xAct; x < wAct; x++) { - saAct += uint64_t(abs(4 * (int64_t)o[y*O + x] - (int64_t)o[y*O + x-1] - (int64_t)o[y*O + x+1] - (int64_t)o[(y-1)*O + x] - (int64_t)o[(y+1)*O + x])); + const int f = 12 * (int)o[y*O + x] - 2 * ((int)o[y*O + x-1] + (int)o[y*O + x+1] + (int)o[(y-1)*O + x] + (int)o[(y+1)*O + x]) + - (int)o[(y-1)*O + x-1] - (int)o[(y-1)*O + x+1] - (int)o[(y+1)*O + x-1] - (int)o[(y+1)*O + x+1]; + saAct += abs(f); } } // calculate weight (mean squared activity) msAct = (double)saAct / (double(wAct - xAct) * double(hAct - yAct)); - if (msAct < 8.0) msAct = 8.0; + + // lower limit, accounts for high-pass gain + if (msAct < double(1 << (bitDepth - 4))) msAct = double(1 << (bitDepth - 4)); + msAct *= msAct; // because ssErr is squared sumAct += msAct; // includes high-pass gain @@ -2572,9 +2719,9 @@ static inline double calcWeightedSquaredError(const CPelBuf& org, const CPelB uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1, const uint32_t rshift #if ENABLE_QPA - , const uint32_t chromaShift /*= 0*/ + , const uint32_t chromaShift /*= 0*/ #endif - ) + ) { uint64_t uiTotalDiff; const Pel* pSrc0 = pic0.bufAt(0, 0); @@ -2591,7 +2738,7 @@ uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1, { const uint32_t W = pic0.width; // image width const uint32_t H = pic0.height; // image height - const double R = double(W * H) / (1920.0 * 1080.0); + const double R = double(W * H) / (1920.0 * 1080.0); const uint32_t B = Clip3<uint32_t>(0, 128 >> chromaShift, 4 * uint32_t(16.0 * sqrt(R) + 0.5)); // WPSNR block size in integer multiple of 4 (for SIMD, = 64 at full-HD) uint32_t x, y; @@ -2603,7 +2750,7 @@ uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1, { for (x = 0; x < W; x++) { - register int64_t iDiff = (int64_t)pSrc0[x] - (int64_t)pSrc1[x]; + const int64_t iDiff = (int64_t)pSrc0[x] - (int64_t)pSrc1[x]; uiTotalDiff += uint64_t(iDiff * iDiff); } pSrc0 += pic0.stride; @@ -2620,7 +2767,11 @@ uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1, { for (x = 0; x < W; x += B) { - wmse += calcWeightedSquaredError(pic1, pic0, sumAct, W, H, x, y, B, B); + wmse += calcWeightedSquaredError(pic1, pic0, + sumAct, BD, + W, H, + x, y, + B, B); #if !GLOBAL_AVERAGING numAct += 1.0; #endif @@ -2629,11 +2780,17 @@ uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1, // integer weighted distortion #if GLOBAL_AVERAGING - sumAct = 1.5 * double(1 << BD); - if ((W << chromaShift) > 2048 && (H << chromaShift) > 1280) // UHD luma + sumAct = 32.0 * double(1 << BD); + + if ((W << chromaShift) > 2048 && (H << chromaShift) > 1280) // for UHD/4K + { + sumAct *= 0.5; + } + else if ((W << chromaShift) <= 1024 || (H << chromaShift) <= 640) // 480p { - sumAct /= 1.5; + sumAct *= 2.0; } + return (wmse <= 0.0) ? 0 : uint64_t(wmse * pow(sumAct, BETA) + 0.5); #else return (wmse <= 0.0 || numAct <= 0.0) ? 0 : uint64_t(wmse * pow(sumAct / numAct, BETA) + 0.5); @@ -2670,7 +2827,7 @@ uint64_t EncGOP::xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1, return uiTotalDiff; } #if WCG_WPSNR -double EncGOP::xFindDistortionPlaneWPSNR(const CPelBuf& pic0, const CPelBuf& pic1, const uint32_t rshift, const CPelBuf& picLuma0, +double EncGOP::xFindDistortionPlaneWPSNR(const CPelBuf& pic0, const CPelBuf& pic1, const uint32_t rshift, const CPelBuf& picLuma0, ComponentID compID, const ChromaFormat chfmt ) { const bool useLumaWPSNR = m_pcEncLib->getLumaLevelToDeltaQPMapping().isEnabled(); @@ -2723,9 +2880,17 @@ double EncGOP::xFindDistortionPlaneWPSNR(const CPelBuf& pic0, const CPelBuf& pic } #endif -void EncGOP::xCalculateAddPSNRs( const bool isField, const bool isFieldTopFieldFirst, const int iGOPid, Picture* pcPic, const AccessUnit&accessUnit, PicList &rcListPic, const int64_t dEncTime, const InputColourSpaceConversion snr_conversion, const bool printFrameMSE, double* PSNR_Y ) +void EncGOP::xCalculateAddPSNRs( const bool isField, const bool isFieldTopFieldFirst, const int iGOPid, Picture* pcPic, const AccessUnit&accessUnit, PicList &rcListPic, const int64_t dEncTime, const InputColourSpaceConversion snr_conversion, const bool printFrameMSE, double* PSNR_Y +#if JVET_K0157 + , bool isEncodeLtRef +#endif +) { - xCalculateAddPSNR( pcPic, pcPic->getRecoBuf(), accessUnit, (double) dEncTime, snr_conversion, printFrameMSE, PSNR_Y ); + xCalculateAddPSNR(pcPic, pcPic->getRecoBuf(), accessUnit, (double)dEncTime, snr_conversion, printFrameMSE, PSNR_Y +#if JVET_K0157 + , isEncodeLtRef +#endif + ); //In case of field coding, compute the interlaced PSNR for both fields if(isField) @@ -2778,19 +2943,31 @@ void EncGOP::xCalculateAddPSNRs( const bool isField, const bool isFieldTopFieldF } Picture* correspondingFieldPic = *(iterPic); - if( (pcPic->topField && isFieldTopFieldFirst) || (!pcPic->topField && !isFieldTopFieldFirst)) + if ((pcPic->topField && isFieldTopFieldFirst) || (!pcPic->topField && !isFieldTopFieldFirst)) { - xCalculateInterlacedAddPSNR(pcPic, correspondingFieldPic, pcPic->getRecoBuf(), correspondingFieldPic->getRecoBuf(), snr_conversion, printFrameMSE, PSNR_Y ); + xCalculateInterlacedAddPSNR(pcPic, correspondingFieldPic, pcPic->getRecoBuf(), correspondingFieldPic->getRecoBuf(), snr_conversion, printFrameMSE, PSNR_Y +#if JVET_K0157 + , isEncodeLtRef +#endif + ); } else { - xCalculateInterlacedAddPSNR(correspondingFieldPic, pcPic, correspondingFieldPic->getRecoBuf(), pcPic->getRecoBuf(), snr_conversion, printFrameMSE, PSNR_Y ); + xCalculateInterlacedAddPSNR(correspondingFieldPic, pcPic, correspondingFieldPic->getRecoBuf(), pcPic->getRecoBuf(), snr_conversion, printFrameMSE, PSNR_Y +#if JVET_K0157 + , isEncodeLtRef +#endif + ); } } } } -void EncGOP::xCalculateAddPSNR( Picture* pcPic, PelUnitBuf cPicD, const AccessUnit& accessUnit, double dEncTime, const InputColourSpaceConversion conversion, const bool printFrameMSE, double* PSNR_Y ) +void EncGOP::xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUnit& accessUnit, double dEncTime, const InputColourSpaceConversion conversion, const bool printFrameMSE, double* PSNR_Y +#if JVET_K0157 + , bool isEncodeLtRef +#endif +) { const SPS& sps = *pcPic->cs->sps; const CPelUnitBuf& pic = cPicD; @@ -2860,7 +3037,7 @@ void EncGOP::xCalculateAddPSNR( Picture* pcPic, PelUnitBuf cPicD, const AccessUn #else const uint64_t uiSSDtemp = xFindDistortionPlane(recPB, orgPB, 0); #if WCG_WPSNR - const double uiSSDtempWeighted = xFindDistortionPlaneWPSNR(recPB, orgPB, 0, org.get(COMPONENT_Y), compID, format); + const double uiSSDtempWeighted = xFindDistortionPlaneWPSNR(recPB, orgPB, 0, org.get(COMPONENT_Y), compID, format); #endif const uint32_t maxval = 255 << (bitDepth - 8); #endif @@ -2919,13 +3096,21 @@ void EncGOP::xCalculateAddPSNR( Picture* pcPic, PelUnitBuf cPicD, const AccessUn m_vRVM_RP.push_back( uibits ); //===== add PSNR ===== - m_gcAnalyzeAll.addResult (dPSNR, (double)uibits, MSEyuvframe); + m_gcAnalyzeAll.addResult(dPSNR, (double)uibits, MSEyuvframe +#if JVET_K0157 + , isEncodeLtRef +#endif + ); #if EXTENSION_360_VIDEO m_ext360.addResult(m_gcAnalyzeAll); #endif if (pcSlice->isIntra()) { - m_gcAnalyzeI.addResult (dPSNR, (double)uibits, MSEyuvframe); + m_gcAnalyzeI.addResult(dPSNR, (double)uibits, MSEyuvframe +#if JVET_K0157 + , isEncodeLtRef +#endif + ); *PSNR_Y = dPSNR[COMPONENT_Y]; #if EXTENSION_360_VIDEO m_ext360.addResult(m_gcAnalyzeI); @@ -2933,7 +3118,11 @@ void EncGOP::xCalculateAddPSNR( Picture* pcPic, PelUnitBuf cPicD, const AccessUn } if (pcSlice->isInterP()) { - m_gcAnalyzeP.addResult (dPSNR, (double)uibits, MSEyuvframe); + m_gcAnalyzeP.addResult(dPSNR, (double)uibits, MSEyuvframe +#if JVET_K0157 + , isEncodeLtRef +#endif + ); *PSNR_Y = dPSNR[COMPONENT_Y]; #if EXTENSION_360_VIDEO m_ext360.addResult(m_gcAnalyzeP); @@ -2941,7 +3130,11 @@ void EncGOP::xCalculateAddPSNR( Picture* pcPic, PelUnitBuf cPicD, const AccessUn } if (pcSlice->isInterB()) { - m_gcAnalyzeB.addResult (dPSNR, (double)uibits, MSEyuvframe); + m_gcAnalyzeB.addResult(dPSNR, (double)uibits, MSEyuvframe +#if JVET_K0157 + , isEncodeLtRef +#endif + ); *PSNR_Y = dPSNR[COMPONENT_Y]; #if EXTENSION_360_VIDEO m_ext360.addResult(m_gcAnalyzeB); @@ -3024,7 +3217,11 @@ void EncGOP::xCalculateAddPSNR( Picture* pcPic, PelUnitBuf cPicD, const AccessUn void EncGOP::xCalculateInterlacedAddPSNR( Picture* pcPicOrgFirstField, Picture* pcPicOrgSecondField, PelUnitBuf cPicRecFirstField, PelUnitBuf cPicRecSecondField, - const InputColourSpaceConversion conversion, const bool printFrameMSE, double* PSNR_Y ) + const InputColourSpaceConversion conversion, const bool printFrameMSE, double* PSNR_Y +#if JVET_K0157 + , bool isEncodeLtRef +#endif +) { const SPS &sps = *pcPicOrgFirstField->cs->sps; const ChromaFormat format = sps.getChromaFormatIdc(); @@ -3100,7 +3297,11 @@ void EncGOP::xCalculateInterlacedAddPSNR( Picture* pcPicOrgFirstField, Picture* uint32_t uibits = 0; // the number of bits for the pair is not calculated here - instead the overall total is used elsewhere. //===== add PSNR ===== - m_gcAnalyzeAll_in.addResult (dPSNR, (double)uibits, MSEyuvframe); + m_gcAnalyzeAll_in.addResult (dPSNR, (double)uibits, MSEyuvframe +#if JVET_K0157 + , isEncodeLtRef +#endif + ); *PSNR_Y = dPSNR[COMPONENT_Y]; @@ -3130,13 +3331,21 @@ NalUnitType EncGOP::getNalUnitType(int pocCurr, int lastIDR, bool isField) return NAL_UNIT_CODED_SLICE_IDR_W_RADL; } - if(m_pcCfg->getEfficientFieldIRAPEnabled() && isField && pocCurr == 1) +#if JVET_K0157 + if (m_pcCfg->getEfficientFieldIRAPEnabled() && isField && pocCurr == (m_pcCfg->getUseCompositeRef() ? 2: 1)) +#else + if (m_pcCfg->getEfficientFieldIRAPEnabled() && isField && pocCurr == 1) +#endif { // to avoid the picture becoming an IRAP return NAL_UNIT_CODED_SLICE_TRAIL_R; } - if(m_pcCfg->getDecodingRefreshType() != 3 && (pocCurr - isField) % m_pcCfg->getIntraPeriod() == 0) +#if JVET_K0157 + if (m_pcCfg->getDecodingRefreshType() != 3 && (pocCurr - isField) % (m_pcCfg->getIntraPeriod() * (m_pcCfg->getUseCompositeRef() ? 2 : 1)) == 0) +#else + if (m_pcCfg->getDecodingRefreshType() != 3 && (pocCurr - isField) % m_pcCfg->getIntraPeriod() == 0) +#endif { if (m_pcCfg->getDecodingRefreshType() == 1) { @@ -3355,6 +3564,223 @@ void EncGOP::arrangeLongtermPicturesInRPS(Slice *pcSlice, PicList& rcListPic) } } +#if JVET_K0157 +void EncGOP::arrangeCompositeReference(Slice* pcSlice, PicList& rcListPic, int pocCurr) +{ + Picture* curPic = NULL; + PicList::iterator iterPic = rcListPic.begin(); + const PreCalcValues *pcv = pcSlice->getPPS()->pcv; + m_bgPOC = pocCurr + 1; + if (m_picBg->getSpliceFull()) + { + return; + } + while (iterPic != rcListPic.end()) + { + curPic = *(iterPic++); + if (curPic->getPOC() == pocCurr) + { + break; + } + } + if (pcSlice->getSliceType() == I_SLICE) + { + return; + } + + int width = pcv->lumaWidth; + int height = pcv->lumaHeight; + int stride = curPic->getOrigBuf().get(COMPONENT_Y).stride; + int cStride = curPic->getOrigBuf().get(COMPONENT_Cb).stride; + Pel* curLumaAddr = curPic->getOrigBuf().get(COMPONENT_Y).buf; + Pel* curCbAddr = curPic->getOrigBuf().get(COMPONENT_Cb).buf; + Pel* curCrAddr = curPic->getOrigBuf().get(COMPONENT_Cr).buf; + Pel* bgOrgLumaAddr = m_picOrig->getOrigBuf().get(COMPONENT_Y).buf; + Pel* bgOrgCbAddr = m_picOrig->getOrigBuf().get(COMPONENT_Cb).buf; + Pel* bgOrgCrAddr = m_picOrig->getOrigBuf().get(COMPONENT_Cr).buf; + int cuMaxWidth = pcv->maxCUWidth; + int cuMaxHeight = pcv->maxCUHeight; + int maxReplace = (pcv->sizeInCtus) / 2; + maxReplace = maxReplace < 1 ? 1 : maxReplace; + typedef struct tagCostStr + { + double cost; + int ctuIdx; + }CostStr; + CostStr* minCtuCost = new CostStr[maxReplace]; + for (int i = 0; i < maxReplace; i++) + { + minCtuCost[i].cost = 1e10; + minCtuCost[i].ctuIdx = -1; + } + int bitIncrementY = pcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) - 8; + int bitIncrementUV = pcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_CHROMA) - 8; + for (int y = 0; y < height; y += cuMaxHeight) + { + for (int x = 0; x < width; x += cuMaxWidth) + { + double lcuDist = 0.0; + double lcuDistCb = 0.0; + double lcuDistCr = 0.0; + int realPixelCnt = 0; + double lcuCost = 1e10; + int largeDist = 0; + + for (int tmpy = 0; tmpy < cuMaxHeight; tmpy++) + { + if (y + tmpy >= height) + { + break; + } + for (int tmpx = 0; tmpx < cuMaxWidth; tmpx++) + { + if (x + tmpx >= width) + { + break; + } + + realPixelCnt++; + lcuDist += abs(curLumaAddr[(y + tmpy)*stride + x + tmpx] - bgOrgLumaAddr[(y + tmpy)*stride + x + tmpx]); + if (abs(curLumaAddr[(y + tmpy)*stride + x + tmpx] - bgOrgLumaAddr[(y + tmpy)*stride + x + tmpx]) >(20 << bitIncrementY)) + { + largeDist++; + } + + if (tmpy % 2 == 0 && tmpx % 2 == 0) + { + lcuDistCb += abs(curCbAddr[(y + tmpy) / 2 * cStride + (x + tmpx) / 2] - bgOrgCbAddr[(y + tmpy) / 2 * cStride + (x + tmpx) / 2]); + lcuDistCr += abs(curCrAddr[(y + tmpy) / 2 * cStride + (x + tmpx) / 2] - bgOrgCrAddr[(y + tmpy) / 2 * cStride + (x + tmpx) / 2]); + } + } + } + + //Test the vertical or horizontal edge for background patches candidates + int yInLCU = y / cuMaxHeight; + int xInLCU = x / cuMaxWidth; + int iLCUIdx = yInLCU * pcv->widthInCtus + xInLCU; + if ((largeDist / (double)realPixelCnt < 0.01 &&lcuDist / realPixelCnt < (3.5 * (1 << bitIncrementY)) && lcuDistCb / realPixelCnt < (0.5 * (1 << bitIncrementUV)) && lcuDistCr / realPixelCnt < (0.5 * (1 << bitIncrementUV)) && m_picBg->getSpliceIdx(iLCUIdx) == 0)) + { + lcuCost = lcuDist / realPixelCnt + lcuDistCb / realPixelCnt + lcuDistCr / realPixelCnt; + //obtain the maxReplace smallest cost + //1) find the largest cost in the maxReplace candidates + for (int i = 0; i < maxReplace - 1; i++) + { + if (minCtuCost[i].cost > minCtuCost[i + 1].cost) + { + swap(minCtuCost[i].cost, minCtuCost[i + 1].cost); + swap(minCtuCost[i].ctuIdx, minCtuCost[i + 1].ctuIdx); + } + } + // 2) compare the current cost with the largest cost + if (lcuCost < minCtuCost[maxReplace - 1].cost) + { + minCtuCost[maxReplace - 1].cost = lcuCost; + minCtuCost[maxReplace - 1].ctuIdx = iLCUIdx; + } + } + } + } + + // modify QP for background CTU + { + for (int i = 0; i < maxReplace; i++) + { + if (minCtuCost[i].ctuIdx != -1) + { + m_picBg->setSpliceIdx(minCtuCost[i].ctuIdx, pocCurr); + } + } + } + delete[]minCtuCost; +} + +void EncGOP::updateCompositeReference(Slice* pcSlice, PicList& rcListPic, int pocCurr) +{ + Picture* curPic = NULL; + const PreCalcValues *pcv = pcSlice->getPPS()->pcv; + PicList::iterator iterPic = rcListPic.begin(); + iterPic = rcListPic.begin(); + while (iterPic != rcListPic.end()) + { + curPic = *(iterPic++); + if (curPic->getPOC() == pocCurr) + { + break; + } + } + assert(curPic->getPOC() == pocCurr); + + int width = pcv->lumaWidth; + int height = pcv->lumaHeight; + int stride = curPic->getRecoBuf().get(COMPONENT_Y).stride; + int cStride = curPic->getRecoBuf().get(COMPONENT_Cb).stride; + + Pel* bgLumaAddr = m_picBg->getRecoBuf().get(COMPONENT_Y).buf; + Pel* bgCbAddr = m_picBg->getRecoBuf().get(COMPONENT_Cb).buf; + Pel* bgCrAddr = m_picBg->getRecoBuf().get(COMPONENT_Cr).buf; + Pel* curLumaAddr = curPic->getRecoBuf().get(COMPONENT_Y).buf; + Pel* curCbAddr = curPic->getRecoBuf().get(COMPONENT_Cb).buf; + Pel* curCrAddr = curPic->getRecoBuf().get(COMPONENT_Cr).buf; + + int maxCuWidth = pcv->maxCUWidth; + int maxCuHeight = pcv->maxCUHeight; + + // Update background reference + if (pcSlice->getSliceType() == I_SLICE)//(pocCurr == 0) + { + curPic->extendPicBorder(); + curPic->setBorderExtension(true); + + m_picBg->getRecoBuf().copyFrom(curPic->getRecoBuf()); + m_picOrig->getOrigBuf().copyFrom(curPic->getOrigBuf()); + } + else + { + //cout << "update B" << pocCurr << endl; + for (int y = 0; y < height; y += maxCuHeight) + { + for (int x = 0; x < width; x += maxCuWidth) + { + if (m_picBg->getSpliceIdx((y / maxCuHeight)*pcv->widthInCtus + x / maxCuWidth) == pocCurr) + { + for (int tmpy = 0; tmpy < maxCuHeight; tmpy++) + { + if (y + tmpy >= height) + { + break; + } + for (int tmpx = 0; tmpx < maxCuWidth; tmpx++) + { + if (x + tmpx >= width) + { + break; + } + bgLumaAddr[(y + tmpy)*stride + x + tmpx] = curLumaAddr[(y + tmpy)*stride + x + tmpx]; + if (tmpy % 2 == 0 && tmpx % 2 == 0) + { + bgCbAddr[(y + tmpy) / 2 * cStride + (x + tmpx) / 2] = curCbAddr[(y + tmpy) / 2 * cStride + (x + tmpx) / 2]; + bgCrAddr[(y + tmpy) / 2 * cStride + (x + tmpx) / 2] = curCrAddr[(y + tmpy) / 2 * cStride + (x + tmpx) / 2]; + } + } + } + } + } + } + m_picBg->setBorderExtension(false); + m_picBg->extendPicBorder(); + m_picBg->setBorderExtension(true); + + curPic->extendPicBorder(); + curPic->setBorderExtension(true); + m_picOrig->getOrigBuf().copyFrom(curPic->getOrigBuf()); + + m_picBg->setBorderExtension(false); + m_picBg->extendPicBorder(); + m_picBg->setBorderExtension(true); + } +} +#endif + void EncGOP::applyDeblockingFilterMetric( Picture* pcPic, uint32_t uiNumSlices ) { PelBuf cPelBuf = pcPic->getRecoBuf().get( COMPONENT_Y ); diff --git a/source/Lib/EncoderLib/EncGOP.h b/source/Lib/EncoderLib/EncGOP.h index e7aa27d50d8fcaf15fb0e4de1b90d76ad165c62b..84c60e73bbc0a541bb385ddaa2ffd822fe42b803 100644 --- a/source/Lib/EncoderLib/EncGOP.h +++ b/source/Lib/EncoderLib/EncGOP.h @@ -131,6 +131,15 @@ private: SEIWriter m_seiWriter; +#if JVET_K0157 + Picture * m_picBg; + Picture * m_picOrig; + int m_bgPOC; + bool m_isEncodedLTRef; + bool m_isPrepareLTRef; + bool m_isUseLTRef; + int m_lastLTRefPoc; +#endif //--Adaptive Loop filter EncSampleAdaptiveOffset* m_pcSAO; #if JVET_K0371_ALF @@ -173,21 +182,45 @@ public: void init ( EncLib* pcEncLib ); void compressGOP ( int iPOCLast, int iNumPicRcvd, PicList& rcListPic, std::list<PelUnitBuf*>& rcListPicYuvRec, - bool isField, bool isTff, const InputColourSpaceConversion snr_conversion, const bool printFrameMSE ); + bool isField, bool isTff, const InputColourSpaceConversion snr_conversion, const bool printFrameMSE +#if JVET_K0157 + , bool isEncodeLtRef +#endif + ); void xAttachSliceDataToNalUnit (OutputNALUnit& rNalu, OutputBitstream* pcBitstreamRedirect); int getGOPSize() { return m_iGopSize; } PicList* getListPic() { return m_pcListPic; } +#if JVET_K0157 + void setPicBg(Picture* tmpPicBg) { m_picBg = tmpPicBg; } + Picture* getPicBg() const { return m_picBg; } + void setPicOrig(Picture* tmpPicBg) { m_picOrig = tmpPicBg; } + Picture* getPicOrig() { return m_picOrig; } + void setNewestBgPOC(int poc) { m_bgPOC = poc; } + int getNewestBgPOC() const { return m_bgPOC; } + void setEncodedLTRef(bool isEncodedLTRef) { m_isEncodedLTRef = isEncodedLTRef; } + bool getEncodedLTRef() { return m_isEncodedLTRef; } + void setUseLTRef(bool isUseLTRef) { m_isUseLTRef = isUseLTRef; } + bool getUseLTRef() { return m_isUseLTRef; } + void setPrepareLTRef(bool isPrepareLTRef) { m_isPrepareLTRef = isPrepareLTRef; } + bool getPrepareLTRef() { return m_isPrepareLTRef; } + void setLastLTRefPoc(int iLastLTRefPoc) { m_lastLTRefPoc = iLastLTRefPoc; } + int getLastLTRefPoc() const { return m_lastLTRefPoc; } - void printOutSummary ( uint32_t uiNumAllPicCoded, bool isField, const bool printMSEBasedSNR, const bool printSequenceMSE, const BitDepths &bitDepths ); +#endif + void printOutSummary ( uint32_t uiNumAllPicCoded, bool isField, const bool printMSEBasedSNR, const bool printSequenceMSE, const bool printHexPsnr, const BitDepths &bitDepths ); #if W0038_DB_OPT uint64_t preLoopFilterPicAndCalcDist( Picture* pcPic ); #endif EncSlice* getSliceEncoder() { return m_pcSliceEncoder; } NalUnitType getNalUnitType( int pocCurr, int lastIdr, bool isField ); void arrangeLongtermPicturesInRPS(Slice *, PicList& ); +#if JVET_K0157 + void arrangeCompositeReference(Slice* pcSlice, PicList& rcListPic, int pocCurr); + void updateCompositeReference(Slice* pcSlice, PicList& rcListPic, int pocCurr); +#endif #if EXTENSION_360_VIDEO Analyze& getAnalyzeAllData() { return m_gcAnalyzeAll; } @@ -201,15 +234,31 @@ protected: protected: - void xInitGOP ( int iPOCLast, int iNumPicRcvd, bool isField ); + void xInitGOP ( int iPOCLast, int iNumPicRcvd, bool isField +#if JVET_K0157 + , bool isEncodeLtRef +#endif + ); void xGetBuffer ( PicList& rcListPic, std::list<PelUnitBuf*>& rcListPicYuvRecOut, int iNumPicRcvd, int iTimeOffset, Picture*& rpcPic, int pocCurr, bool isField ); - void xCalculateAddPSNRs ( const bool isField, const bool isFieldTopFieldFirst, const int iGOPid, Picture* pcPic, const AccessUnit&accessUnit, PicList &rcListPic, int64_t dEncTime, const InputColourSpaceConversion snr_conversion, const bool printFrameMSE, double* PSNR_Y ); - void xCalculateAddPSNR ( Picture* pcPic, PelUnitBuf cPicD, const AccessUnit&, double dEncTime, const InputColourSpaceConversion snr_conversion, const bool printFrameMSE, double* PSNR_Y ); + void xCalculateAddPSNRs(const bool isField, const bool isFieldTopFieldFirst, const int iGOPid, Picture* pcPic, const AccessUnit&accessUnit, PicList &rcListPic, int64_t dEncTime, const InputColourSpaceConversion snr_conversion, const bool printFrameMSE, double* PSNR_Y +#if JVET_K0157 + , bool isEncodeLtRef +#endif + ); + void xCalculateAddPSNR(Picture* pcPic, PelUnitBuf cPicD, const AccessUnit&, double dEncTime, const InputColourSpaceConversion snr_conversion, const bool printFrameMSE, double* PSNR_Y +#if JVET_K0157 + , bool isEncodeLtRef +#endif + ); void xCalculateInterlacedAddPSNR( Picture* pcPicOrgFirstField, Picture* pcPicOrgSecondField, PelUnitBuf cPicRecFirstField, PelUnitBuf cPicRecSecondField, - const InputColourSpaceConversion snr_conversion, const bool printFrameMSE, double* PSNR_Y ); + const InputColourSpaceConversion snr_conversion, const bool printFrameMSE, double* PSNR_Y +#if JVET_K0157 + , bool isEncodeLtRef +#endif + ); uint64_t xFindDistortionPlane(const CPelBuf& pic0, const CPelBuf& pic1, const uint32_t rshift #if ENABLE_QPA diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp index 9612ec3650d013e0083c3b68f591cdfba97b2047..8429fde15c8b81878a62b94ae85474446d06e14b 100644 --- a/source/Lib/EncoderLib/EncLib.cpp +++ b/source/Lib/EncoderLib/EncLib.cpp @@ -87,7 +87,9 @@ void EncLib::create () - +#if JVET_K0157 + m_iPOCLast = m_compositeRefEnabled ? -2 : -1; +#endif // create processing unit classes m_cGOPEncoder. create( ); m_cSliceEncoder. create( getSourceWidth(), getSourceHeight(), m_chromaFormatIDC, m_maxCUWidth, m_maxCUHeight, m_maxTotalCUDepth ); @@ -140,8 +142,13 @@ void EncLib::create () if ( m_RCEnableRateControl ) { +#if RATECTRL_FIX_FULLNBIT + m_cRateCtrl.init(m_framesToBeEncoded, m_RCTargetBitrate, (int)((double)m_iFrameRate / m_temporalSubsampleRatio + 0.5), m_iGOPSize, m_iSourceWidth, m_iSourceHeight, + m_maxCUWidth, m_maxCUHeight, getBitDepth(CHANNEL_TYPE_LUMA), m_RCKeepHierarchicalBit, m_RCUseLCUSeparateModel, m_GOPList); +#else m_cRateCtrl.init( m_framesToBeEncoded, m_RCTargetBitrate, (int)( (double)m_iFrameRate/m_temporalSubsampleRatio + 0.5), m_iGOPSize, m_iSourceWidth, m_iSourceHeight, m_maxCUWidth, m_maxCUHeight,m_RCKeepHierarchicalBit, m_RCUseLCUSeparateModel, m_GOPList ); +#endif } } @@ -219,6 +226,12 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf ) omp_set_nested( true ); #endif +#if JVET_K0157 + if (sps0.getSpsNext().getUseCompositeRef()) + { + sps0.setLongTermRefsPresent(true); + } +#endif #if U0132_TARGET_BITS_SATURATION if (m_RCCpbSaturationEnabled) @@ -248,6 +261,14 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf ) xInitPPS(pps1, sps0); } #endif +#if JVET_K0157 + if (sps0.getSpsNext().getUseCompositeRef()) + { + PPS &pps2 = *(m_ppsMap.allocatePS(2)); + xInitPPS(pps2, sps0); + xInitPPSforLT(pps2); + } +#endif // initialize processing unit classes m_cGOPEncoder. init( this ); @@ -356,6 +377,22 @@ void EncLib::init( bool isFieldCoding, AUWriterIf* auWriterIf ) #if ENABLE_WPP_PARALLELISM m_entropyCodingSyncContextStateVec.resize( pps0.pcv->heightInCtus ); #endif +#if JVET_K0157 + if (sps0.getSpsNext().getUseCompositeRef()) + { + Picture *picBg = new Picture; + picBg->create(sps0.getChromaFormatIdc(), Size(sps0.getPicWidthInLumaSamples(), sps0.getPicHeightInLumaSamples()), sps0.getMaxCUWidth(), sps0.getMaxCUWidth() + 16, false); + picBg->getRecoBuf().fill(0); + picBg->finalInit(sps0, pps0); + picBg->allocateNewSlice(); + picBg->createSpliceIdx(pps0.pcv->sizeInCtus); + m_cGOPEncoder.setPicBg(picBg); + Picture *picOrig = new Picture; + picOrig->create(sps0.getChromaFormatIdc(), Size(sps0.getPicWidthInLumaSamples(), sps0.getPicHeightInLumaSamples()), sps0.getMaxCUWidth(), sps0.getMaxCUWidth() + 16, false); + picOrig->getOrigBuf().fill(0); + m_cGOPEncoder.setPicOrig(picOrig); + } +#endif } #if HEVC_USE_SCALING_LISTS @@ -441,6 +478,15 @@ void EncLib::xInitScalingLists(SPS &sps, PPS &pps) } #endif +#if JVET_K0157 +void EncLib::xInitPPSforLT(PPS& pps) +{ + pps.setOutputFlagPresentFlag(true); + pps.setDeblockingFilterControlPresentFlag(true); + pps.setPPSDeblockingFilterDisabledFlag(true); +} +#endif + // ==================================================================================================================== // Public member functions // ==================================================================================================================== @@ -483,6 +529,38 @@ void EncLib::deletePicBuffer() void EncLib::encode( bool flush, PelStorage* pcPicYuvOrg, PelStorage* cPicYuvTrueOrg, const InputColourSpaceConversion snrCSC, std::list<PelUnitBuf*>& rcListPicYuvRecOut, int& iNumEncoded ) { +#if JVET_K0157 + if (m_compositeRefEnabled && m_cGOPEncoder.getPicBg()->getSpliceFull() && m_iPOCLast >= 10 && m_iNumPicRcvd == 0 && m_cGOPEncoder.getEncodedLTRef() == false) + { + Picture* picCurr = NULL; + xGetNewPicBuffer(rcListPicYuvRecOut, picCurr, 2); + const PPS *pps = m_ppsMap.getPS(2); + const SPS *sps = m_spsMap.getPS(pps->getSPSId()); + + picCurr->M_BUFS(0, PIC_ORIGINAL).copyFrom(m_cGOPEncoder.getPicBg()->getRecoBuf()); + picCurr->finalInit(*sps, *pps); + picCurr->poc = m_iPOCLast - 1; + m_iPOCLast -= 2; + if (getUseAdaptiveQP()) + { + AQpPreanalyzer::preanalyze(picCurr); + } + if (m_RCEnableRateControl) + { + m_cRateCtrl.initRCGOP(m_iNumPicRcvd); + } + m_cGOPEncoder.compressGOP(m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut, + false, false, snrCSC, m_printFrameMSE, true); + m_cGOPEncoder.setEncodedLTRef(true); + if (m_RCEnableRateControl) + { + m_cRateCtrl.destroyRCGOP(); + } + + iNumEncoded = 0; + m_iNumPicRcvd = 0; + } +#endif //PROF_ACCUM_AND_START_NEW_SET( getProfilerPic(), P_GOP_LEVEL ); if (pcPicYuvOrg != NULL) { @@ -493,7 +571,11 @@ void EncLib::encode( bool flush, PelStorage* pcPicYuvOrg, PelStorage* cPicYuvTru int ppsID=-1; // Use default PPS ID if (getWCGChromaQPControl().isEnabled()) { +#if JVET_K0157 + ppsID = getdQPs()[m_iPOCLast / (m_compositeRefEnabled ? 2 : 1) + 1]; +#else ppsID=getdQPs()[ m_iPOCLast+1 ]; +#endif ppsID+=(getSwitchPOC() != -1 && (m_iPOCLast+1 >= getSwitchPOC())?1:0); } xGetNewPicBuffer( rcListPicYuvRecOut, @@ -533,8 +615,12 @@ void EncLib::encode( bool flush, PelStorage* pcPicYuvOrg, PelStorage* cPicYuvTru } // compress GOP - m_cGOPEncoder.compressGOP( m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut, - false, false, snrCSC, m_printFrameMSE ); + m_cGOPEncoder.compressGOP(m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut, + false, false, snrCSC, m_printFrameMSE +#if JVET_K0157 + , false +#endif + ); if ( m_RCEnableRateControl ) { @@ -622,8 +708,11 @@ void EncLib::encode( bool flush, PelStorage* pcPicYuvOrg, PelStorage* pcPicYuvTr if ( m_iNumPicRcvd && ((flush&&fieldNum==1) || (m_iPOCLast/2)==0 || m_iNumPicRcvd==m_iGOPSize ) ) { // compress GOP - m_cGOPEncoder.compressGOP( m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut, - true, isTff, snrCSC, m_printFrameMSE ); + m_cGOPEncoder.compressGOP(m_iPOCLast, m_iNumPicRcvd, m_cListPic, rcListPicYuvRecOut, true, isTff, snrCSC, m_printFrameMSE +#if JVET_K0157 + , false +#endif + ); iNumEncoded += m_iNumPicRcvd; m_uiNumAllPicCoded += m_iNumPicRcvd; @@ -711,8 +800,11 @@ void EncLib::xGetNewPicBuffer ( std::list<PelUnitBuf*>& rcListPicYuvRecOut, Pict rpcPic->reconstructed = false; rpcPic->referenced = true; - +#if JVET_K0157 + m_iPOCLast += (m_compositeRefEnabled ? 2 : 1); +#else m_iPOCLast++; +#endif m_iNumPicRcvd++; } @@ -821,6 +913,9 @@ void EncLib::xInitSPS(SPS &sps) sps.getSpsNext().setUseIntraEMT ( m_IntraEMT ); sps.getSpsNext().setUseInterEMT ( m_InterEMT ); #endif +#if JVET_K0157 + sps.getSpsNext().setUseCompositeRef ( m_compositeRefEnabled ); +#endif // ADD_NEW_TOOL : (encoder lib) set tool enabling flags and associated parameters here @@ -861,7 +956,7 @@ void EncLib::xInitSPS(SPS &sps) sps.setMaxTLayers( m_maxTempLayer ); sps.setTemporalIdNestingFlag( ( m_maxTempLayer == 1 ) ? true : false ); - for (int i = 0; i < min(sps.getMaxTLayers(),(uint32_t) MAX_TLAYER); i++ ) + for (int i = 0; i < std::min(sps.getMaxTLayers(), (uint32_t) MAX_TLAYER); i++ ) { sps.setMaxDecPicBuffering(m_maxDecPicBuffering[i], i); sps.setNumReorderPics(m_numReorderPics[i], i); @@ -1151,7 +1246,7 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps) if (getUsePerceptQPA() && !bUseDQP) { CHECK( m_iMaxCuDQPDepth != 0, "max. delta-QP depth must be zero!" ); - bUseDQP = true; + bUseDQP = (getBaseQP() < 38) && (getSourceWidth() > 512 || getSourceHeight() > 320); } #endif @@ -1245,9 +1340,15 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps) } } } + #if ENABLE_QPA + if ((getUsePerceptQPA() || getSliceChromaOffsetQpPeriodicity() > 0) && (getChromaFormatIdc() != CHROMA_400)) + { + bChromaDeltaQPEnabled = true; + } + #endif pps.setSliceChromaQpFlag(bChromaDeltaQPEnabled); #endif - if (!pps.getSliceChromaQpFlag() && sps.getSpsNext().getUseDualITree()) + if (!pps.getSliceChromaQpFlag() && sps.getSpsNext().getUseDualITree() && (getChromaFormatIdc() != CHROMA_400)) { pps.setSliceChromaQpFlag(m_chromaCbQpOffsetDualTree != 0 || m_chromaCrQpOffsetDualTree != 0); } @@ -1323,7 +1424,7 @@ void EncLib::xInitPPS(PPS &pps, const SPS &sps) } } CHECK(!(bestPos <= 15), "Unspecified error"); - pps.setNumRefIdxL0DefaultActive(bestPos); + pps.setNumRefIdxL0DefaultActive(bestPos); pps.setNumRefIdxL1DefaultActive(bestPos); pps.setTransquantBypassEnabledFlag(getTransquantBypassEnabledFlag()); pps.setUseTransformSkip( m_useTransformSkip ); @@ -1495,8 +1596,20 @@ void EncLib::xInitRPS(SPS &sps, bool isFieldCoding) // This is a function that // determines what Reference Picture Set to use // for a specific slice (with POC = POCCurr) -void EncLib::selectReferencePictureSet(Slice* slice, int POCCurr, int GOPid ) +void EncLib::selectReferencePictureSet(Slice* slice, int POCCurr, int GOPid +#if JVET_K0157 + , int ltPoc +#endif +) { +#if JVET_K0157 + bool isEncodeLtRef = (POCCurr == ltPoc); + if (m_compositeRefEnabled && isEncodeLtRef) + { + POCCurr++; + } + int rIdx = GOPid; +#endif slice->setRPSidx(GOPid); for(int extraNum=m_iGOPSize; extraNum<m_extraRPSs+m_iGOPSize; extraNum++) @@ -1511,6 +1624,9 @@ void EncLib::selectReferencePictureSet(Slice* slice, int POCCurr, int GOPid ) if(POCIndex == m_GOPList[extraNum].m_POC) { slice->setRPSidx(extraNum); +#if JVET_K0157 + rIdx = extraNum; +#endif } } else @@ -1518,6 +1634,9 @@ void EncLib::selectReferencePictureSet(Slice* slice, int POCCurr, int GOPid ) if(POCCurr==m_GOPList[extraNum].m_POC) { slice->setRPSidx(extraNum); +#if JVET_K0157 + rIdx = extraNum; +#endif } } } @@ -1525,9 +1644,77 @@ void EncLib::selectReferencePictureSet(Slice* slice, int POCCurr, int GOPid ) if(POCCurr == 1 && slice->getPic()->fieldPic) { slice->setRPSidx(m_iGOPSize+m_extraRPSs); +#if JVET_K0157 + rIdx = m_iGOPSize + m_extraRPSs; +#endif } +#if JVET_K0157 + ReferencePictureSet *rps = const_cast<ReferencePictureSet *>(slice->getSPS()->getRPSList()->getReferencePictureSet(slice->getRPSidx())); + if (m_compositeRefEnabled && ltPoc != -1 && !isEncodeLtRef) + { + if (ltPoc != -1 && rps->getNumberOfLongtermPictures() != 1 && !isEncodeLtRef) + { + int idx = rps->getNumberOfPictures(); + int maxPicOrderCntLSB = 1 << slice->getSPS()->getBitsForPOC(); + int ltPocLsb = ltPoc % maxPicOrderCntLSB; + + rps->setNumberOfPictures(rps->getNumberOfPictures() + 1); + rps->setNumberOfLongtermPictures(1); + rps->setPOC(idx, ltPoc); + rps->setPocLSBLT(idx, ltPocLsb); + rps->setDeltaPOC(idx, -POCCurr + ltPoc); + rps->setUsed(idx, true); + } + } + else if (m_compositeRefEnabled && isEncodeLtRef) + { + ReferencePictureSet* localRPS = slice->getLocalRPS(); + (*localRPS) = ReferencePictureSet(); + int refPics = rps->getNumberOfPictures(); + localRPS->setNumberOfPictures(rps->getNumberOfPictures()); + for (int i = 0; i < refPics; i++) + { + localRPS->setDeltaPOC(i, rps->getDeltaPOC(i) + 1); + localRPS->setUsed(i, rps->getUsed(i)); + } + localRPS->setNumberOfNegativePictures(rps->getNumberOfNegativePictures()); + localRPS->setNumberOfPositivePictures(rps->getNumberOfPositivePictures()); + localRPS->setInterRPSPrediction(true); + int deltaRPS = 1; + int newIdc = 0; + for (int i = 0; i < refPics; i++) + { + int deltaPOC = ((i != refPics) ? rps->getDeltaPOC(i) : 0); // check if the reference abs POC is >= 0 + int refIdc = 0; + for (int j = 0; j < localRPS->getNumberOfPictures(); j++) // loop through the pictures in the new RPS + { + if ((deltaPOC + deltaRPS) == localRPS->getDeltaPOC(j)) + { + if (localRPS->getUsed(j)) + { + refIdc = 1; + } + else + { + refIdc = 2; + } + } + } + localRPS->setRefIdc(i, refIdc); + newIdc++; + } + localRPS->setNumRefIdc(newIdc + 1); + localRPS->setRefIdc(newIdc, 0); + localRPS->setDeltaRPS(deltaRPS); + localRPS->setDeltaRIdxMinus1(slice->getSPS()->getRPSList()->getNumberOfReferencePictureSets() - 1 - rIdx); + slice->setRPS(localRPS); + slice->setRPSidx(-1); + return; + } +#else const ReferencePictureSet *rps = (slice->getSPS()->getRPSList()->getReferencePictureSet(slice->getRPSidx())); +#endif slice->setRPS(rps); } @@ -1678,7 +1865,11 @@ int EncCfg::getQPForPicture(const uint32_t gopIndex, const Slice *pSlice) const const int* pdQPs = getdQPs(); if ( pdQPs ) { +#if JVET_K0157 + qp += pdQPs[pSlice->getPOC() / (m_compositeRefEnabled ? 2 : 1)]; +#else qp += pdQPs[ pSlice->getPOC() ]; +#endif } #endif diff --git a/source/Lib/EncoderLib/EncLib.h b/source/Lib/EncoderLib/EncLib.h index 7fbdd7aec8d55f30710f93ea2eac8991fefd86de..74d78d10a3909ce88fdf86126d6744ef29404a82 100644 --- a/source/Lib/EncoderLib/EncLib.h +++ b/source/Lib/EncoderLib/EncLib.h @@ -149,6 +149,9 @@ protected: void xInitPPS (PPS &pps, const SPS &sps); ///< initialize PPS from encoder options #if HEVC_USE_SCALING_LISTS void xInitScalingLists (SPS &sps, PPS &pps); ///< initialize scaling lists +#endif +#if JVET_K0157 + void xInitPPSforLT(PPS& pps); #endif void xInitHrdParameters(SPS &sps); ///< initialize HRD parameters @@ -209,7 +212,12 @@ public: #endif RateCtrl* getRateCtrl () { return &m_cRateCtrl; } - void selectReferencePictureSet(Slice* slice, int POCCurr, int GOPid ); + + void selectReferencePictureSet(Slice* slice, int POCCurr, int GOPid +#if JVET_K0157 + , int ltPoc +#endif + ); int getReferencePictureSetIdxForSOP(int POCCurr, int GOPid ); bool PPSNeedsWriting(int ppsId); @@ -240,7 +248,7 @@ public: int& iNumEncoded, bool isTff ); - void printSummary(bool isField) { m_cGOPEncoder.printOutSummary (m_uiNumAllPicCoded, isField, m_printMSEBasedSequencePSNR, m_printSequenceMSE, m_spsMap.getFirstPS()->getBitDepths()); } + void printSummary(bool isField) { m_cGOPEncoder.printOutSummary (m_uiNumAllPicCoded, isField, m_printMSEBasedSequencePSNR, m_printSequenceMSE, m_printHexPsnr, m_spsMap.getFirstPS()->getBitDepths()); } }; diff --git a/source/Lib/EncoderLib/EncModeCtrl.cpp b/source/Lib/EncoderLib/EncModeCtrl.cpp index 6823598fa38ad57b5b996b6f8d056ef24430b368..dd8cd9bd160d0811c5c7abd42b5a20ee9e1d2c15 100644 --- a/source/Lib/EncoderLib/EncModeCtrl.cpp +++ b/source/Lib/EncoderLib/EncModeCtrl.cpp @@ -743,7 +743,8 @@ bool BestEncInfoCache::isValid( const CodingStructure& cs, const Partitioner& pa BestEncodingInfo& encInfo = *m_bestEncInfo[idx1][idx2][idx3][idx4]; - if( cs.picture->poc != encInfo.poc || CS::getArea( cs, cs.area, partitioner.chType ) != encInfo.cu || !isTheSameNbHood( encInfo.cu, partitioner ) ) + if( cs.picture->poc != encInfo.poc || CS::getArea( cs, cs.area, partitioner.chType ) != encInfo.cu || !isTheSameNbHood( encInfo.cu, partitioner ) + ) { return false; } @@ -942,7 +943,15 @@ void EncModeCtrlMTnoRQT::initCTUEncoding( const Slice &slice ) if( m_pcEncCfg->getUseE0023FastEnc() ) { - m_skipThreshold = ( ( slice.getMinPictureDistance() <= PICTURE_DISTANCE_TH ) ? FAST_SKIP_DEPTH : SKIP_DEPTH ); +#if JVET_K0157 + if (m_pcEncCfg->getUseCompositeRef()) + m_skipThreshold = ( ( slice.getMinPictureDistance() <= PICTURE_DISTANCE_TH * 2 ) ? FAST_SKIP_DEPTH : SKIP_DEPTH ); + else + m_skipThreshold = ((slice.getMinPictureDistance() <= PICTURE_DISTANCE_TH) ? FAST_SKIP_DEPTH : SKIP_DEPTH); + +#else + m_skipThreshold = ((slice.getMinPictureDistance() <= PICTURE_DISTANCE_TH) ? FAST_SKIP_DEPTH : SKIP_DEPTH); +#endif } else { @@ -1057,7 +1066,6 @@ void EncModeCtrlMTnoRQT::initCULevel( Partitioner &partitioner, const CodingStru #endif xGetMinMaxQP( minQP, maxQP, cs, partitioner, baseQP, *cs.sps, *cs.pps, true ); - // Add coding modes here // NOTE: Working back to front, as a stack, which is more efficient with the container // NOTE: First added modes will be processed at the end. @@ -1425,11 +1433,11 @@ bool EncModeCtrlMTnoRQT::tryMode( const EncTestMode& encTestmode, const CodingSt { return false; } - if( lastTestMode().type != ETM_INTRA && cuECtx.bestCS && cuECtx.bestCU && interHadActive( cuECtx ) ) { // Get SATD threshold from best Inter-CU - if( !cs.slice->isIntra() && m_pcEncCfg->getUsePbIntraFast() ) + if( !cs.slice->isIntra() && m_pcEncCfg->getUsePbIntraFast() + ) { CodingUnit* bestCU = cuECtx.bestCU; if( bestCU && CU::isInter( *bestCU ) ) diff --git a/source/Lib/EncoderLib/EncModeCtrl.h b/source/Lib/EncoderLib/EncModeCtrl.h index d8caa932771b8cb34f4c63b0f5780a15889ae3c8..12fd159c5f840d80c6ceae961b95d7b776c32409 100644 --- a/source/Lib/EncoderLib/EncModeCtrl.h +++ b/source/Lib/EncoderLib/EncModeCtrl.h @@ -385,6 +385,8 @@ struct CodedCUInfo bool validMv[NUM_REF_PIC_LIST_01][MAX_STORED_CU_INFO_REFS]; Mv saveMv [NUM_REF_PIC_LIST_01][MAX_STORED_CU_INFO_REFS]; + + #if ENABLE_SPLIT_PARALLELISM uint64_t @@ -431,6 +433,7 @@ public: bool getMv ( const UnitArea& area, const RefPicList refPicList, const int iRefIdx, Mv& rMv ) const; void setMv ( const UnitArea& area, const RefPicList refPicList, const int iRefIdx, const Mv& rMv ); + }; #if REUSE_CU_RESULTS diff --git a/source/Lib/EncoderLib/EncSlice.cpp b/source/Lib/EncoderLib/EncSlice.cpp index 4c7541aa94352b937ea0772e01e0585067e5f29c..a11f93c01dad8f484cb893a85e4ec2a3ec201cf2 100644 --- a/source/Lib/EncoderLib/EncSlice.cpp +++ b/source/Lib/EncoderLib/EncSlice.cpp @@ -40,6 +40,9 @@ #include "EncLib.h" #include "CommonLib/UnitTools.h" #include "CommonLib/Picture.h" +#if K0149_BLOCK_STATISTICS +#include "CommonLib/dtrace_blockstatistics.h" +#endif #if ENABLE_WPP_PARALLELISM #include <mutex> @@ -57,6 +60,9 @@ extern recursive_mutex g_cache_mutex; EncSlice::EncSlice() : m_encCABACTableIdx(I_SLICE) +#if ENABLE_QPA + , m_adaptedLumaQP(-1) +#endif { } @@ -140,7 +146,120 @@ EncSlice::setUpLambda( Slice* slice, const double dLambda, int iQP) slice->setLambdas( dLambdas ); } +#if ENABLE_QPA + +static inline int apprI3Log2 (const double d) // rounded 3*log2(d) +{ + return d < 1.5e-13 ? -128 : int (floor (3.0 * log (d) / log (2.0) + 0.5)); +} + +static void filterAndCalculateAverageEnergies (const Pel* pSrc, const int iSrcStride, + double &hpEner, const int iHeight, const int iWidth, + const uint32_t uBitDepth /* luma bit-depth (4-16) */) +{ + uint64_t saAct = 0; + + // skip first row as there may be a black border frame + pSrc += iSrcStride; + // center rows + for (int y = 1; y < iHeight - 1; y++) + { + // skip column as there may be a black border frame + + for (int x = 1; x < iWidth - 1; x++) // and columns + { + const int f = 12 * (int)pSrc[x ] - 2 * ((int)pSrc[x-1] + (int)pSrc[x+1] + (int)pSrc[x -iSrcStride] + (int)pSrc[x +iSrcStride]) + - (int)pSrc[x-1-iSrcStride] - (int)pSrc[x+1-iSrcStride] - (int)pSrc[x-1+iSrcStride] - (int)pSrc[x+1+iSrcStride]; + saAct += abs (f); + } + // skip column as there may be a black border frame + pSrc += iSrcStride; + } + // skip last row as there may be a black border frame + + hpEner = double(saAct) / double((iWidth - 2) * (iHeight - 2)); + + // lower limit, compensate for highpass amplification + if (hpEner < double(1 << (uBitDepth - 4))) hpEner = double(1 << (uBitDepth - 4)); +} + +#ifndef GLOBAL_AVERAGING + #define GLOBAL_AVERAGING 1 // "global" averaging of a_k across a set instead of one picture +#endif + +#if GLOBAL_AVERAGING +static double getAveragePictureEnergy (const CPelBuf picOrig, const uint32_t uBitDepth) +{ + double hpEnerPic = 5.65625 * double(1 << (uBitDepth >> 1)); // square-root of a_pic value + + if (picOrig.width > 2048 && picOrig.height > 1280) // for UHD/4K + { + hpEnerPic *= (4.0 / 5.65625); + } + else if (picOrig.width <= 1024 || picOrig.height <= 640) // 480p + { + hpEnerPic *= (8.0 / 5.65625); + } + + return hpEnerPic; +} +#endif + +static int applyQPAdaptationChroma (Picture* const pcPic, Slice* const pcSlice, EncCfg* const pcEncCfg, const int sliceQP) +{ + double hpEner[MAX_NUM_COMPONENT] = {0.0, 0.0, 0.0}; + int optSliceChromaQpOffset[2] = {0, 0}; + int savedLumaQP = -1; + + for (uint32_t comp = 0; comp < getNumberValidComponents (pcPic->chromaFormat); comp++) + { + const ComponentID compID = (ComponentID)comp; + const CPelBuf picOrig = pcPic->getOrigBuf (pcPic->block (compID)); + + filterAndCalculateAverageEnergies (picOrig.buf, picOrig.stride, hpEner[comp], picOrig.height, picOrig.width, + pcSlice->getSPS()->getBitDepth (toChannelType (compID)) - (isChroma (compID) ? 1 : 0)); + if (isChroma (compID)) + { + const int adaptChromaQPOffset = 2.0 * hpEner[comp] <= hpEner[0] ? 0 : apprI3Log2 (2.0 * hpEner[comp] / hpEner[0]); + #if GLOBAL_AVERAGING + int averageAdaptedLumaQP = Clip3 (0, MAX_QP, sliceQP + apprI3Log2 (hpEner[0] / getAveragePictureEnergy (pcPic->getOrigBuf().Y(), pcSlice->getSPS()->getBitDepth (CH_L)))); + #else + int averageAdaptedLumaQP = Clip3 (0, MAX_QP, sliceQP); // mean slice QP + #endif + #if SHARP_LUMA_DELTA_QP + + // change mean picture QP index based on picture's average luma value (Sharp) + if (pcEncCfg->getLumaLevelToDeltaQPMapping().mode == LUMALVL_TO_DQP_NUM_MODES) + { + const CPelBuf picLuma = pcPic->getOrigBuf().Y(); + uint64_t uAvgLuma = 0; + + for (SizeType y = 0; y < picLuma.height; y++) + { + for (SizeType x = 0; x < picLuma.width; x++) + { + uAvgLuma += (uint64_t)picLuma.at (x, y); + } + } + uAvgLuma = (uAvgLuma + (picLuma.area() >> 1)) / picLuma.area(); + + averageAdaptedLumaQP = Clip3 (0, MAX_QP, averageAdaptedLumaQP + 1 - int((3 * uAvgLuma * uAvgLuma) >> uint64_t (2 * pcSlice->getSPS()->getBitDepth (CH_L) - 1))); + } + #endif + const int lumaChromaMappingDQP = averageAdaptedLumaQP - getScaledChromaQP (averageAdaptedLumaQP, pcEncCfg->getChromaFormatIdc()); + + optSliceChromaQpOffset[comp-1] = std::min (3 + lumaChromaMappingDQP, adaptChromaQPOffset + lumaChromaMappingDQP); + if (savedLumaQP < 0) savedLumaQP = averageAdaptedLumaQP; // save it for later + } + } + + pcEncCfg->setSliceChromaOffsetQpIntraOrPeriodic (pcEncCfg->getSliceChromaOffsetQpPeriodicity(), optSliceChromaQpOffset); + + return savedLumaQP; +} + +#endif // ENABLE_QPA /** - non-referenced frame marking @@ -156,8 +275,11 @@ EncSlice::setUpLambda( Slice* slice, const double dLambda, int iQP) \param rpcSlice slice header class \param isField true for field coding */ - -void EncSlice::initEncSlice( Picture* pcPic, const int pocLast, const int pocCurr, const int iGOPid, Slice*& rpcSlice, const bool isField ) +void EncSlice::initEncSlice(Picture* pcPic, const int pocLast, const int pocCurr, const int iGOPid, Slice*& rpcSlice, const bool isField +#if JVET_K0157 + , bool isEncodeLtRef +#endif +) { double dQP; double dLambda; @@ -166,7 +288,19 @@ void EncSlice::initEncSlice( Picture* pcPic, const int pocLast, const int pocCur rpcSlice->setSliceBits(0); rpcSlice->setPic( pcPic ); rpcSlice->initSlice(); +#if JVET_K0157 + int multipleFactor = pcPic->cs->sps->getSpsNext().getUseCompositeRef() ? 2 : 1; + if (pcPic->cs->sps->getSpsNext().getUseCompositeRef() && isEncodeLtRef) + { + rpcSlice->setPicOutputFlag(false); + } + else + { + rpcSlice->setPicOutputFlag(true); + } +#else rpcSlice->setPicOutputFlag( true ); +#endif rpcSlice->setPOC( pocCurr ); #if JVET_K0072 rpcSlice->setDepQuantEnabledFlag( m_pcCfg->getDepQuantEnabledFlag() ); @@ -190,7 +324,11 @@ void EncSlice::initEncSlice( Picture* pcPic, const int pocLast, const int pocCur } else { +#if JVET_K0157 + poc = poc % (m_pcCfg->getGOPSize() * multipleFactor); +#else poc = poc % m_pcCfg->getGOPSize(); +#endif } if ( poc == 0 ) @@ -199,11 +337,19 @@ void EncSlice::initEncSlice( Picture* pcPic, const int pocLast, const int pocCur } else { +#if JVET_K0157 + int step = m_pcCfg->getGOPSize() * multipleFactor; +#else int step = m_pcCfg->getGOPSize(); +#endif depth = 0; for( int i=step>>1; i>=1; i>>=1 ) { +#if JVET_K0157 + for (int j = i; j<(m_pcCfg->getGOPSize() * multipleFactor); j += step) +#else for ( int j=i; j<m_pcCfg->getGOPSize(); j+=step ) +#endif { if ( j == poc ) { @@ -233,11 +379,19 @@ void EncSlice::initEncSlice( Picture* pcPic, const int pocLast, const int pocCur { if(m_pcCfg->getDecodingRefreshType() == 3) { +#if JVET_K0157 + eSliceType = (pocLast == 0 || pocCurr % (m_pcCfg->getIntraPeriod() * multipleFactor) == 0 || m_pcGOPEncoder->getGOPSize() == 0) ? I_SLICE : eSliceType; +#else eSliceType = (pocLast == 0 || pocCurr % m_pcCfg->getIntraPeriod() == 0 || m_pcGOPEncoder->getGOPSize() == 0) ? I_SLICE : eSliceType; +#endif } else { +#if JVET_K0157 + eSliceType = (pocLast == 0 || (pocCurr - (isField ? 1 : 0)) % (m_pcCfg->getIntraPeriod() * multipleFactor) == 0 || m_pcGOPEncoder->getGOPSize() == 0) ? I_SLICE : eSliceType; +#else eSliceType = (pocLast == 0 || (pocCurr - (isField ? 1 : 0)) % m_pcCfg->getIntraPeriod() == 0 || m_pcGOPEncoder->getGOPSize() == 0) ? I_SLICE : eSliceType; +#endif } } @@ -399,7 +553,7 @@ void EncSlice::initEncSlice( Picture* pcPic, const int pocLast, const int pocCur dLambda *= lambdaModifier; #endif - iQP = max( -rpcSlice->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA), min( MAX_QP, (int) floor( dQP + 0.5 ) ) ); + iQP = Clip3( -rpcSlice->getSPS()->getQpBDOffset( CHANNEL_TYPE_LUMA ), MAX_QP, (int) floor( dQP + 0.5 ) ); #endif m_vdRdPicLambda[iDQpIdx] = dLambda; @@ -418,11 +572,20 @@ void EncSlice::initEncSlice( Picture* pcPic, const int pocLast, const int pocCur #endif #if W0038_CQP_ADJ + #if ENABLE_QPA + m_adaptedLumaQP = -1; + + if ((m_pcCfg->getUsePerceptQPA() || m_pcCfg->getSliceChromaOffsetQpPeriodicity() > 0) && !m_pcCfg->getUseRateCtrl() && rpcSlice->getPPS()->getSliceChromaQpFlag() && + (rpcSlice->isIntra() || (m_pcCfg->getSliceChromaOffsetQpPeriodicity() > 0 && (rpcSlice->getPOC() % m_pcCfg->getSliceChromaOffsetQpPeriodicity()) == 0))) + { + m_adaptedLumaQP = applyQPAdaptationChroma (pcPic, rpcSlice, m_pcCfg, iQP); + } + #endif if(rpcSlice->getPPS()->getSliceChromaQpFlag()) { - const bool bUseIntraOrPeriodicOffset = rpcSlice->getSliceType()==I_SLICE || (m_pcCfg->getSliceChromaOffsetQpPeriodicity()!=0 && (rpcSlice->getPOC()%m_pcCfg->getSliceChromaOffsetQpPeriodicity())==0); - int cbQP = bUseIntraOrPeriodicOffset? m_pcCfg->getSliceChromaOffsetQpIntraOrPeriodic(false) : m_pcCfg->getGOPEntry(iGOPid).m_CbQPoffset; - int crQP = bUseIntraOrPeriodicOffset? m_pcCfg->getSliceChromaOffsetQpIntraOrPeriodic(true) : m_pcCfg->getGOPEntry(iGOPid).m_CrQPoffset; + const bool bUseIntraOrPeriodicOffset = rpcSlice->isIntra() || (m_pcCfg->getSliceChromaOffsetQpPeriodicity() > 0 && (rpcSlice->getPOC() % m_pcCfg->getSliceChromaOffsetQpPeriodicity()) == 0); + int cbQP = bUseIntraOrPeriodicOffset ? m_pcCfg->getSliceChromaOffsetQpIntraOrPeriodic(false) : m_pcCfg->getGOPEntry(iGOPid).m_CbQPoffset; + int crQP = bUseIntraOrPeriodicOffset ? m_pcCfg->getSliceChromaOffsetQpIntraOrPeriodic(true) : m_pcCfg->getGOPEntry(iGOPid).m_CrQPoffset; cbQP = Clip3( -12, 12, cbQP + rpcSlice->getPPS()->getQpOffset(COMPONENT_Cb) ) - rpcSlice->getPPS()->getQpOffset(COMPONENT_Cb); crQP = Clip3( -12, 12, crQP + rpcSlice->getPPS()->getQpOffset(COMPONENT_Cr) ) - rpcSlice->getPPS()->getQpOffset(COMPONENT_Cr); @@ -453,7 +616,7 @@ void EncSlice::initEncSlice( Picture* pcPic, const int pocLast, const int pocCur #endif setUpLambda(rpcSlice, dLambda, iQP); - + #if WCG_EXT // cost = Distortion + Lambda*R, // when QP is adjusted by luma, distortion is changed, so we have to adjust lambda to match the distortion, then the cost function becomes @@ -471,11 +634,19 @@ void EncSlice::initEncSlice( Picture* pcPic, const int pocLast, const int pocCur { if(m_pcCfg->getDecodingRefreshType() == 3) { +#if JVET_K0157 + eSliceType = (pocLast == 0 || (pocCurr) % (m_pcCfg->getIntraPeriod() * multipleFactor) == 0 || m_pcGOPEncoder->getGOPSize() == 0) ? I_SLICE : eSliceType; +#else eSliceType = (pocLast == 0 || (pocCurr) % m_pcCfg->getIntraPeriod() == 0 || m_pcGOPEncoder->getGOPSize() == 0) ? I_SLICE : eSliceType; +#endif } else { +#if JVET_K0157 + eSliceType = (pocLast == 0 || (pocCurr - (isField ? 1 : 0)) % (m_pcCfg->getIntraPeriod() * multipleFactor) == 0 || m_pcGOPEncoder->getGOPSize() == 0) ? I_SLICE : eSliceType; +#else eSliceType = (pocLast == 0 || (pocCurr - (isField ? 1 : 0)) % m_pcCfg->getIntraPeriod() == 0 || m_pcGOPEncoder->getGOPSize() == 0) ? I_SLICE : eSliceType; +#endif } } @@ -485,7 +656,7 @@ void EncSlice::initEncSlice( Picture* pcPic, const int pocLast, const int pocCur if (m_pcCfg->getUseRecalculateQPAccordingToLambda()) { dQP = xGetQPValueAccordingToLambda( dLambda ); - iQP = max( -rpcSlice->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA), min( MAX_QP, (int) floor( dQP + 0.5 ) ) ); + iQP = Clip3( -rpcSlice->getSPS()->getQpBDOffset( CHANNEL_TYPE_LUMA ), MAX_QP, (int) floor( dQP + 0.5 ) ); } rpcSlice->setSliceQp ( iQP ); @@ -653,7 +824,7 @@ double EncSlice::calculateLambda( const Slice* slice, dLambda *= lambdaModifier; #endif - iQP = max( -slice->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA), min( MAX_QP, (int) floor( dQP + 0.5 ) ) ); + iQP = Clip3( -slice->getSPS()->getQpBDOffset( CHANNEL_TYPE_LUMA ), MAX_QP, (int) floor( dQP + 0.5 ) ); #if JVET_K0072 if( m_pcCfg->getDepQuantEnabledFlag() ) @@ -677,68 +848,9 @@ void EncSlice::resetQP( Picture* pic, int sliceQP, double lambda ) } #if ENABLE_QPA -static inline int apprI2Log2 (const double d) -{ - return d < 6.0e-20 ? -128 : int(floor(2.0 * log(d) / log(2.0) + 0.5)); -} - -#ifndef HLM_L1_NORM - #define HLM_L1_NORM -#endif - -static int filterAndCalculateAverageEnergies (const Pel* pSrc, const int iSrcStride, - double &hpEner, const int iHeight, const int iWidth, - const int iPOC = 0) -{ - int iHpValue; - uint32_t uHpERow, uHpEner = 0; - - // skip first row as there may be a black border frame - pSrc += iSrcStride; - // center rows - for (int y = 1; y < iHeight - 1; y++) - { - uHpERow = 0; - // skip column as there may be a black border frame - - for (int x = 1; x < iWidth - 1; x++) // and columns - { - iHpValue = 4 * (int)pSrc[x] - (int)pSrc[x-1] - (int)pSrc[x+1] - (int)pSrc[x-iSrcStride] - (int)pSrc[x+iSrcStride]; -#ifdef HLM_L1_NORM - uHpERow += abs (iHpValue); -#else - uHpERow += iHpValue * iHpValue; -#endif - } - // skip column as there may be a black border frame -#ifdef HLM_L1_NORM - uHpEner += uHpERow; -#else - uHpEner += (uHpERow + 64) >> 7; // avoids overflows -#endif - pSrc += iSrcStride; - } - // skip last row as there may be a black border frame - - hpEner = double(uHpEner) / double((iWidth - 2) * (iHeight - 2)); -#ifdef HLM_L1_NORM - hpEner *= hpEner; -#endif - // lower limit, compensate for highpass amplification - if (hpEner < 64.0) hpEner = 64.0; - - if (iPOC <= 0) return 0; - return 1; // OK -} - -#ifdef HLM_L1_NORM - #undef HLM_L1_NORM -#endif - -#if ENABLE_QPA -static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice, const PreCalcValues& pcv, +static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice, const PreCalcValues& pcv, const uint32_t startAddr, const uint32_t boundingAddr, const bool useSharpLumaDQP, - const int gopSize, const double hpEnerAvg, const double hpEnerMax) + const double hpEnerAvg, const double hpEnerMax, const bool useFrameWiseQPA, const int previouslyAdaptedLumaQP = -1) { const int iBitDepth = pcSlice->getSPS()->getBitDepth (CHANNEL_TYPE_LUMA); const int iQPIndex = pcSlice->getSliceQp(); // initial QP index for current slice, used in following loops @@ -746,20 +858,28 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice, co const TileMap& tileMap = *pcPic->tileMap; #endif bool sliceQPModified = false; - double hpEnerPic = 1.0 / (1.5 * double(1 << iBitDepth)); // speedup: multiply instead of divide in loops below +#if GLOBAL_AVERAGING + const double hpEnerPic = 1.0 / getAveragePictureEnergy (pcPic->getOrigBuf().Y(), iBitDepth); // inverse, speed +#else + const double hpEnerPic = 1.0 / hpEnerAvg; // speedup: multiply instead of divide in loop below; 1.0 for tuning +#endif - if (pcv.lumaWidth > 2048 && pcv.lumaHeight > 1280) // for UHD/4K + if (useFrameWiseQPA || (iQPIndex >= MAX_QP)) { - hpEnerPic *= 1.5; - } + int iQPFixed; - if ((pcPic->getPOC() & 1) && (iQPIndex >= MAX_QP)) - { - int iQPFixed = Clip3 (0, MAX_QP, iQPIndex + ((apprI2Log2 (hpEnerAvg * hpEnerPic) + apprI2Log2 (hpEnerMax * hpEnerPic) + 1) >> 1)); // adapted slice QP = (mean(QP) + max(QP)) / 2 + if (useFrameWiseQPA) + { + iQPFixed = (previouslyAdaptedLumaQP < 0) ? Clip3 (0, MAX_QP, iQPIndex + apprI3Log2 (hpEnerAvg * hpEnerPic)) : previouslyAdaptedLumaQP; // average-activity slice QP + } + else + { + iQPFixed = Clip3 (0, MAX_QP, iQPIndex + ((apprI3Log2 (hpEnerAvg * hpEnerPic) + apprI3Log2 (hpEnerMax * hpEnerPic) + 1) >> 1)); // adapted slice QP = (mean(QP) + max(QP)) / 2 + } #if SHARP_LUMA_DELTA_QP // change new fixed QP based on average CTU luma value (Sharp) - if (useSharpLumaDQP) + if (useSharpLumaDQP && (iQPIndex < MAX_QP) && (previouslyAdaptedLumaQP < 0)) { uint64_t uAvgLuma = 0; @@ -779,9 +899,9 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice, co } #endif - if (iQPFixed < iQPIndex) iQPFixed = iQPIndex; + if (iQPIndex >= MAX_QP) iQPFixed = MAX_QP; else - if (iQPFixed > iQPIndex) + if (iQPFixed != iQPIndex) { const double* oldLambdas = pcSlice->getLambdas(); const double corrFactor = pow (2.0, double(iQPFixed - iQPIndex) / 3.0); @@ -816,12 +936,12 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice, co const uint32_t ctuRsAddr = ctuTsAddr; #endif - int iQPAdapt = Clip3 (0, MAX_QP, iQPIndex + apprI2Log2 (pcPic->m_uEnerHpCtu[ctuRsAddr] * hpEnerPic)); + int iQPAdapt = Clip3 (0, MAX_QP, iQPIndex + apprI3Log2 (pcPic->m_uEnerHpCtu[ctuRsAddr] * hpEnerPic)); #if SHARP_LUMA_DELTA_QP - if ((pcv.widthInCtus > 1) && (gopSize > 1)) // try to enforce CTU SNR greater than zero dB + if (pcv.widthInCtus > 1) // try to enforce CTU SNR greater than zero dB #else - if ((!pcSlice->isIntra()) && (gopSize > 1)) // try to enforce CTU SNR greater than zero dB + if (!pcSlice->isIntra()) // try to enforce CTU SNR greater than zero dB #endif { const Pel dcOffset = pcPic->m_iOffsetCtu[ctuRsAddr]; @@ -832,11 +952,11 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice, co { const uint64_t uAvgLuma = (uint64_t)dcOffset; - iQPAdapt = max (0, iQPAdapt + 1 - int((3 * uAvgLuma * uAvgLuma) >> uint64_t(2 * iBitDepth - 1))); + iQPAdapt = std::max (0, iQPAdapt + 1 - int((3 * uAvgLuma * uAvgLuma) >> uint64_t(2 * iBitDepth - 1))); } #endif - const uint32_t uRefScale = g_invQuantScales[iQPAdapt % 6] << ((iQPAdapt / 6) + iBitDepth - (pcSlice->isIntra() ? 4 : 3)); + const uint32_t uRefScale = g_invQuantScales[iQPAdapt % 6] << ((iQPAdapt / 6) + iBitDepth - 4); const CompArea subArea = clipArea (CompArea (COMPONENT_Y, pcPic->chromaFormat, Area ((ctuRsAddr % pcv.widthInCtus) * pcv.maxCUWidth, (ctuRsAddr / pcv.widthInCtus) * pcv.maxCUHeight, pcv.maxCUWidth, pcv.maxCUHeight)), pcPic->Y()); const Pel* pSrc = pcPic->getOrigBuf (subArea).buf; const SizeType iSrcStride = pcPic->getOrigBuf (subArea).stride; @@ -866,10 +986,10 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice, co // reduce QP index if CTU would be fully quantized to zero if (uAbsDCless < uRefScale) { - const int limit = min (0, ((iQPIndex + 4) >> 3) - 6); - const int redVal = max (limit, apprI2Log2 ((double)uAbsDCless / (double)uRefScale)); + const int limit = std::min (0, ((iQPIndex + 4) >> 3) - 6); + const int redVal = std::max (limit, apprI3Log2 ((double)uAbsDCless / (double)uRefScale)); - iQPAdapt = max (0, iQPAdapt + redVal); + iQPAdapt = std::max (0, iQPAdapt + redVal); } #if SHARP_LUMA_DELTA_QP @@ -879,7 +999,7 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice, co pcPic->m_iOffsetCtu[ctuRsAddr] = (Pel)iQPAdapt; // adapted QPs - if ((pcv.widthInCtus > 1) && (gopSize > 1)) // try to reduce local bitrate peaks via minimum smoothing + if (pcv.widthInCtus > 1) // try to reduce local bitrate peaks via minimum smoothing of the adapted QPs { iQPAdapt = ctuRsAddr % pcv.widthInCtus; // horizontal offset if (iQPAdapt == 0) @@ -888,11 +1008,11 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice, co } else // iQPAdapt >= 1 { - iQPAdapt = (iQPAdapt > 1) ? min (pcPic->m_iOffsetCtu[ctuRsAddr - 2], pcPic->m_iOffsetCtu[ctuRsAddr]) : pcPic->m_iOffsetCtu[ctuRsAddr]; + iQPAdapt = (iQPAdapt > 1) ? std::min (pcPic->m_iOffsetCtu[ctuRsAddr - 2], pcPic->m_iOffsetCtu[ctuRsAddr]) : pcPic->m_iOffsetCtu[ctuRsAddr]; } if (ctuRsAddr > pcv.widthInCtus) { - iQPAdapt = min (iQPAdapt, (int)pcPic->m_iOffsetCtu[ctuRsAddr - 1 - pcv.widthInCtus]); // min(L, T) + iQPAdapt = std::min (iQPAdapt, (int)pcPic->m_iOffsetCtu[ctuRsAddr - 1 - pcv.widthInCtus]); } if ((ctuRsAddr > 0) && (pcPic->m_iOffsetCtu[ctuRsAddr - 1] < (Pel)iQPAdapt)) { @@ -900,7 +1020,7 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice, co } if ((ctuTsAddr == boundingAddr - 1) && (ctuRsAddr > pcv.widthInCtus)) // last CTU in the given slice { - iQPAdapt = min (pcPic->m_iOffsetCtu[ctuRsAddr - 1], pcPic->m_iOffsetCtu[ctuRsAddr - pcv.widthInCtus]); + iQPAdapt = std::min (pcPic->m_iOffsetCtu[ctuRsAddr - 1], pcPic->m_iOffsetCtu[ctuRsAddr - pcv.widthInCtus]); if (pcPic->m_iOffsetCtu[ctuRsAddr] < (Pel)iQPAdapt) { pcPic->m_iOffsetCtu[ctuRsAddr] = (Pel)iQPAdapt; @@ -914,8 +1034,6 @@ static bool applyQPAdaptation (Picture* const pcPic, Slice* const pcSlice, co } #endif // ENABLE_QPA -#endif // ENABLE_QPA || ENABLE_PRIVATE - // ==================================================================================================================== // Public member functions // ==================================================================================================================== @@ -1132,7 +1250,8 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c #if JVET_K0346 if (pcSlice->getSPS()->getSpsNext().getUseSubPuMvp()) { - if (!pcSlice->isIntra()) + if (!pcSlice->isIntra() + ) { if (pcSlice->getPOC() > m_pcCuEncoder->getPrevPOC() && m_pcCuEncoder->getClearSubMergeStatic()) { @@ -1176,7 +1295,15 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c else { m_pcCuEncoder->setPrevPOC(pcSlice->getPOC()); - m_pcCuEncoder->setClearSubMergeStatic(true); + if (m_pcCfg->getGOPSize() != m_pcCfg->getIntraPeriod()) + { + m_pcCuEncoder->setClearSubMergeStatic(true); + } + else + { + m_pcCuEncoder->clearSubMergeStatics(); + m_pcCuEncoder->setClearSubMergeStatic(false); + } } } #endif @@ -1269,22 +1396,18 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c } #if ENABLE_QPA - #if ENABLE_QPA double hpEnerMax = 1.0; double hpEnerPic = 0.0; - #endif int iSrcOffset; - #if ENABLE_QPA - if (m_pcCfg->getUsePerceptQPA() && pcSlice->getPPS()->getUseDQP() && !m_pcCfg->getUseRateCtrl()) - #endif + if (m_pcCfg->getUsePerceptQPA() && !m_pcCfg->getUseRateCtrl()) { for (uint32_t ctuTsAddr = startCtuTsAddr; ctuTsAddr < boundingCtuTsAddr; ctuTsAddr++) { #if HEVC_TILES_WPP - const uint32_t ctuRsAddr = tileMap.getCtuTsToRsAddrMap (ctuTsAddr); + const uint32_t ctuRsAddr = tileMap.getCtuTsToRsAddrMap (ctuTsAddr); #else - const uint32_t ctuRsAddr = ctuTsAddr; + const uint32_t ctuRsAddr = ctuTsAddr; #endif const Position pos ((ctuRsAddr % widthInCtus) * pcv.maxCUWidth, (ctuRsAddr / widthInCtus) * pcv.maxCUHeight); const CompArea subArea = clipArea (CompArea (COMPONENT_Y, pcPic->chromaFormat, Area (pos.x, pos.y, pcv.maxCUWidth, pcv.maxCUHeight)), pcPic->Y()); @@ -1315,24 +1438,23 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c iSrcOffset = (iSrcOffset + (x >> 1)) / x; // slow division filterAndCalculateAverageEnergies (pcPic->getOrigBuf (fltArea).buf, iSrcStride, - hpEner, iFltHeight, iFltWidth, pcPic->getPOC()); + hpEner, iFltHeight, iFltWidth, + pcSlice->getSPS()->getBitDepth (CHANNEL_TYPE_LUMA)); - #if ENABLE_QPA if (hpEner > hpEnerMax) hpEnerMax = hpEner; hpEnerPic += hpEner; pcPic->m_uEnerHpCtu[ctuRsAddr] = hpEner; pcPic->m_iOffsetCtu[ctuRsAddr] = (Pel)iSrcOffset; - #endif } // end iteration over all CTUs in current slice } - #if ENABLE_QPA - if (m_pcCfg->getUsePerceptQPA() && pcSlice->getPPS()->getUseDQP() && !m_pcCfg->getUseRateCtrl() && (boundingCtuTsAddr > startCtuTsAddr)) + if (m_pcCfg->getUsePerceptQPA() && !m_pcCfg->getUseRateCtrl() && (boundingCtuTsAddr > startCtuTsAddr)) { const double hpEnerAvg = hpEnerPic / double(boundingCtuTsAddr - startCtuTsAddr); - if (applyQPAdaptation (pcPic, pcSlice, pcv, startCtuTsAddr, boundingCtuTsAddr, m_pcCfg->getLumaLevelToDeltaQPMapping().mode == LUMALVL_TO_DQP_NUM_MODES, m_pcCfg->getGOPSize(), hpEnerAvg, hpEnerMax)) + if (applyQPAdaptation (pcPic, pcSlice, pcv, startCtuTsAddr, boundingCtuTsAddr, m_pcCfg->getLumaLevelToDeltaQPMapping().mode == LUMALVL_TO_DQP_NUM_MODES, + hpEnerAvg, hpEnerMax, (m_pcCfg->getBaseQP() >= 38) || (m_pcCfg->getSourceWidth() <= 512 && m_pcCfg->getSourceHeight() <= 320), m_adaptedLumaQP)) { m_CABACEstimator->initCtxModels (*pcSlice); #if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM @@ -1356,9 +1478,7 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c } } } - #endif // ENABLE_QPA - -#endif // ENABLE_QPA || ENABLE_PRIVATE +#endif // ENABLE_QPA cs.pcv = pcSlice->getPPS()->pcv; cs.fracBits = 0; @@ -1387,6 +1507,11 @@ void EncSlice::compressSlice( Picture* pcPic, const bool bCompressEntireSlice, c } } else +#endif +#if K0149_BLOCK_STATISTICS + const SPS *sps = pcSlice->getSPS(); + CHECK(sps == 0, "No SPS present"); + writeBlockStatisticsHeader(sps); #endif encodeCtus( pcPic, bCompressEntireSlice, bFastDeltaQP, startCtuTsAddr, boundingCtuTsAddr, m_pcLib ); @@ -1568,12 +1693,17 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons #endif + #if ENABLE_WPP_PARALLELISM pEncLib->getCuEncoder( dataId )->compressCtu( cs, ctuArea, ctuRsAddr, prevQP, currQP ); #else m_pcCuEncoder->compressCtu( cs, ctuArea, ctuRsAddr, prevQP, currQP ); #endif +#if K0149_BLOCK_STATISTICS + getAndStoreBlockStatistics(cs, ctuArea); +#endif + pCABACWriter->resetBits(); pCABACWriter->coding_tree_unit( cs, ctuArea, prevQP, ctuRsAddr, true ); const int numberOfWrittenBits = int( pCABACWriter->getEstFracBits() >> SCALE_BITS ); @@ -1629,11 +1759,13 @@ void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, cons #if !ENABLE_WPP_PARALLELISM int actualBits = int(cs.fracBits >> SCALE_BITS); + actualBits -= (int)m_uiPicTotalBits; #endif if ( pCfg->getUseRateCtrl() ) { #if ENABLE_WPP_PARALLELISM int actualBits = int( cs.fracBits >> SCALE_BITS ); + actualBits -= (int)m_uiPicTotalBits; #endif int actualQP = g_RCInvalidQPValue; double actualLambda = pRdCost->getLambda(); @@ -1957,7 +2089,7 @@ void EncSlice::calculateBoundingCtuTsAddrForSlice(uint32_t &startCtuTSAddrSlice, { // Adjust for wavefronts (no tiles). // WPP: if a slice does not start at the beginning of a CTB row, it must end within the same CTB row - boundingCtuTSAddrSlice = min(boundingCtuTSAddrSlice, startCtuTSAddrSlice - (startCtuTSAddrSlice % pcPic->cs->pcv->widthInCtus) + (pcPic->cs->pcv->widthInCtus)); + boundingCtuTSAddrSlice = std::min(boundingCtuTSAddrSlice, startCtuTSAddrSlice - (startCtuTSAddrSlice % pcPic->cs->pcv->widthInCtus) + (pcPic->cs->pcv->widthInCtus)); } #endif } @@ -2011,7 +2143,7 @@ void EncSlice::xDetermineStartAndBoundingCtuTsAddr ( uint32_t& startCtuTsAddr, pcSlice->setSliceSegmentCurStartCtuTsAddr(startCtuTsAddrSliceSegment); // Make a joint decision based on reconstruction and dependent slice bounds - startCtuTsAddr = max(startCtuTsAddrSlice , startCtuTsAddrSliceSegment ); + startCtuTsAddr = std::max(startCtuTsAddrSlice, startCtuTsAddrSliceSegment); boundingCtuTsAddr = boundingCtuTsAddrSliceSegment; #else startCtuTsAddr = startCtuTsAddrSlice; diff --git a/source/Lib/EncoderLib/EncSlice.h b/source/Lib/EncoderLib/EncSlice.h index 2c6eabb26d25d5362aba4080de659a3827d7a679..836bc4d7caccc05776643776ac7e4dbc2792c25b 100644 --- a/source/Lib/EncoderLib/EncSlice.h +++ b/source/Lib/EncoderLib/EncSlice.h @@ -106,7 +106,7 @@ public: int getGopId() const { return m_gopID; } double calculateLambda( const Slice* slice, const int GOPid, const int depth, const double refQP, const double dQP, int &iQP ); void setUpLambda( Slice* slice, const double dLambda, int iQP ); - + private: #endif #if HEVC_TILES_WPP @@ -117,6 +117,10 @@ private: public: +#if ENABLE_QPA + int m_adaptedLumaQP; + +#endif EncSlice(); virtual ~EncSlice(); @@ -126,7 +130,12 @@ public: /// preparation of slice encoding (reference marking, QP and lambda) void initEncSlice ( Picture* pcPic, const int pocLast, const int pocCurr, - const int iGOPid, Slice*& rpcSlice, const bool isField ); + const int iGOPid, Slice*& rpcSlice, const bool isField +#if JVET_K0157 + , bool isEncodeLtRef +#endif + ); + void resetQP ( Picture* pic, int sliceQP, double lambda ); // compress and encode slice @@ -150,7 +159,6 @@ public: void setSliceSegmentIdx (uint32_t i) { m_uiSliceSegmentIdx = i; } SliceType getEncCABACTableIdx() const { return m_encCABACTableIdx; } - private: double xGetQPValueAccordingToLambda ( double lambda ); }; diff --git a/source/Lib/EncoderLib/InterSearch.cpp b/source/Lib/EncoderLib/InterSearch.cpp index 1b790b62a1f9f528b4a2c4a1bb97d17a70d9a5a9..11e5085dd69ffa6556f82e62c0f9cd5a82522737 100644 --- a/source/Lib/EncoderLib/InterSearch.cpp +++ b/source/Lib/EncoderLib/InterSearch.cpp @@ -749,9 +749,11 @@ void InterSearch::xMergeEstimation( PredictionUnit& pu, PelUnitBuf& origBuf, int uiMergeIdx = uiMergeCand; } } + } + //! search of the best candidate for inter prediction void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) { @@ -796,6 +798,7 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) int bestBiPMvpL1 = 0; Distortion biPDistTemp = std::numeric_limits<Distortion>::max(); + MergeCtx mergeCtx; // Loop over Prediction Units @@ -874,7 +877,6 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) for ( int iRefList = 0; iRefList < iNumPredDir; iRefList++ ) { RefPicList eRefPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 ); - for ( int iRefIdxTemp = 0; iRefIdxTemp < cs.slice->getNumRefIdx(eRefPicList); iRefIdxTemp++ ) { uiBitsTemp = uiMbBits[iRefList]; @@ -1093,7 +1095,6 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) iRefStart = 0; iRefEnd = cs.slice->getNumRefIdx(eRefPicList)-1; - for ( int iRefIdxTemp = iRefStart; iRefIdxTemp <= iRefEnd; iRefIdxTemp++ ) { uiBitsTemp = uiMbBits[2] + uiMotBits[1-iRefList]; @@ -1187,6 +1188,7 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) uiBits [1] = bitsValidList1; uiCost [1] = costValidList1; + #if JVET_K_AFFINE uiLastModeTemp = uiLastMode; #endif @@ -1232,6 +1234,7 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) uiMEBits = uiBits[1]; } + if ( cu.partSize != SIZE_2Nx2N ) { uint32_t uiMRGIndex = 0; @@ -1308,9 +1311,9 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) int refIdx4Para[2] = { -1, -1 }; #if JVET_K0220_ENC_CTRL - xPredAffineInterSearch( pu, origBuf, puIdx, uiLastModeTemp, uiAffineCost, cMvHevcTemp, acMvAffine4Para, refIdx4Para ); + xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffineCost, cMvHevcTemp, acMvAffine4Para, refIdx4Para); #else - xPredAffineInterSearch( pu, origBuf, puIdx, uiLastModeTemp, uiAffineCost, cMvHevcTemp, bFastSkipBi, acMvAffine4Para, refIdx4Para ); + xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffineCost, cMvHevcTemp, bFastSkipBi, acMvAffine4Para, refIdx4Para); #endif if ( cu.slice->getSPS()->getSpsNext().getUseAffineType() ) { @@ -1347,9 +1350,9 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) Distortion uiAffine6Cost = std::numeric_limits<Distortion>::max(); cu.affineType = AFFINEMODEL_6PARAM; #if JVET_K0220_ENC_CTRL - xPredAffineInterSearch( pu, origBuf, puIdx, uiLastModeTemp, uiAffine6Cost, cMvHevcTemp, acMvAffine4Para, refIdx4Para ); + xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffine6Cost, cMvHevcTemp, acMvAffine4Para, refIdx4Para); #else - xPredAffineInterSearch( pu, origBuf, puIdx, uiLastModeTemp, uiAffine6Cost, cMvHevcTemp, bFastSkipBi, acMvAffine4Para, refIdx4Para ); + xPredAffineInterSearch(pu, origBuf, puIdx, uiLastModeTemp, uiAffine6Cost, cMvHevcTemp, bFastSkipBi, acMvAffine4Para, refIdx4Para); #endif // reset to 4 parameter affine inter mode @@ -1414,6 +1417,8 @@ void InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner) } } #endif + + m_maxCompIDToPred = MAX_NUM_COMPONENT; { @@ -1662,6 +1667,7 @@ Distortion InterSearch::xGetAffineTemplateCost( PredictionUnit& pu, PelUnitBuf& void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, RefPicList eRefPicList, Mv& rcMvPred, int iRefIdxPred, Mv& rcMv, int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost, const AMVPInfo& amvpInfo, bool bBi) { + Mv cMvHalf, cMvQter; CHECK(eRefPicList >= MAX_NUM_REF_LIST_ADAPT_SR || iRefIdxPred>=int(MAX_IDX_ADAPT_SR), "Invalid reference picture list"); @@ -1678,8 +1684,8 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref // NOTE: Other buf contains predicted signal from another direction PelUnitBuf otherBuf = m_tmpPredStorage[1 - (int)eRefPicList].getBuf( UnitAreaRelative(*pu.cu, pu )); origBufTmp.copyFrom(origBuf); - origBufTmp.removeHighFreq(otherBuf, m_pcEncCfg->getClipForBiPredMeEnabled(), pu.cu->slice->clpRngs() ); - + origBufTmp.removeHighFreq( otherBuf, m_pcEncCfg->getClipForBiPredMeEnabled(), pu.cu->slice->clpRngs() + ); pBuf = &origBufTmp; fWeight = 0.5; @@ -1701,6 +1707,17 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref #if JVET_K0357_AMVR cStruct.imvShift = pu.cu->imv << 1; #endif +#if JVET_K0157 + cStruct.inCtuSearch = false; + cStruct.zeroMV = false; + { + if (pu.cs->sps->getSpsNext().getUseCompositeRef() && pu.cs->slice->getRefPic(eRefPicList, iRefIdxPred)->longTerm) + { + cStruct.inCtuSearch = true; + } + } +#endif + auto blkCache = dynamic_cast<CacheBlkInfoCtrl*>( m_modeCtrl ); bool bQTBTMV = false; @@ -1730,7 +1747,11 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref { if( !bQTBTMV ) { - xSetSearchRange( pu, ( bBi ? rcMv : rcMvPred ), iSrchRng, cStruct.searchRange ); + xSetSearchRange(pu, (bBi ? rcMv : rcMvPred), iSrchRng, cStruct.searchRange +#if JVET_K0157 + , cStruct +#endif + ); } cStruct.subShiftMode = m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE1 || m_pcEncCfg->getFastInterSearchMode() == FASTINTERSEARCH_MODE3 ? 2 : 0; xPatternSearch( cStruct, rcMv, ruiCost); @@ -1799,7 +1820,11 @@ void InterSearch::xMotionEstimation(PredictionUnit& pu, PelUnitBuf& origBuf, Ref void InterSearch::xSetSearchRange ( const PredictionUnit& pu, const Mv& cMvPred, const int iSrchRng, - SearchRange& sr ) + SearchRange& sr +#if JVET_K0157 + , IntTZSearchStruct& cStruct +#endif +) { #if JVET_K0346 || JVET_K_AFFINE const int iMvShift = cMvPred.highPrec ? 4 : 2; @@ -1827,6 +1852,35 @@ void InterSearch::xSetSearchRange ( const PredictionUnit& pu, sr.top = mvTL.ver; sr.right = mvBR.hor; sr.bottom = mvBR.ver; + +#if JVET_K0157 + if (pu.cs->sps->getSpsNext().getUseCompositeRef() && cStruct.inCtuSearch) + { + Position posRB = pu.Y().bottomRight(); + Position posTL = pu.Y().topLeft(); + const PreCalcValues *pcv = pu.cs->pcv; + Position posRBinCTU(posRB.x & pcv->maxCUWidthMask, posRB.y & pcv->maxCUHeightMask); + Position posLTinCTU = Position(posTL.x & pcv->maxCUWidthMask, posTL.y & pcv->maxCUHeightMask).offset(-4, -4); + if (sr.left < -posLTinCTU.x) + sr.left = -posLTinCTU.x; + if (sr.top < -posLTinCTU.y) + sr.top = -posLTinCTU.y; + if (sr.right >((int)pcv->maxCUWidth - 4 - posRBinCTU.x)) + sr.right = (int)pcv->maxCUWidth - 4 - posRBinCTU.x; + if (sr.bottom >((int)pcv->maxCUHeight - 4 - posRBinCTU.y)) + sr.bottom = (int)pcv->maxCUHeight - 4 - posRBinCTU.y; + if (posLTinCTU.x == -4 || posLTinCTU.y == -4) + { + sr.left = sr.right = sr.bottom = sr.top = 0; + cStruct.zeroMV = 1; + } + if (posRBinCTU.x == pcv->maxCUWidthMask || posRBinCTU.y == pcv->maxCUHeightMask) + { + sr.left = sr.right = sr.bottom = sr.top = 0; + cStruct.zeroMV = 1; + } + } +#endif } @@ -1998,7 +2052,11 @@ void InterSearch::xTZSearch( const PredictionUnit& pu, // set search range Mv currBestMv(cStruct.iBestX, cStruct.iBestY ); currBestMv <<= 2; - xSetSearchRange( pu, currBestMv, m_iSearchRange>>(bFastSettings?1:0), sr ); + xSetSearchRange(pu, currBestMv, m_iSearchRange >> (bFastSettings ? 1 : 0), sr +#if JVET_K0157 + , cStruct +#endif + ); } // start search @@ -2258,7 +2316,11 @@ void InterSearch::xTZSearchSelective( const PredictionUnit& pu, // set search range Mv currBestMv(cStruct.iBestX, cStruct.iBestY ); currBestMv <<= 2; - xSetSearchRange( pu, currBestMv, m_iSearchRange, sr ); + xSetSearchRange( pu, currBestMv, m_iSearchRange, sr +#if JVET_K0157 + , cStruct +#endif + ); } // Initial search @@ -2457,7 +2519,11 @@ void InterSearch::xPatternSearchFracDIF( #if JVET_K0357_AMVR +#if JVET_K0157 + if (cStruct.imvShift || (pu.cs->sps->getSpsNext().getUseCompositeRef() && cStruct.zeroMV)) +#else if( cStruct.imvShift ) +#endif { m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY + iOffset, cStruct.iRefStride, m_lumaClpRng.bd, COMPONENT_Y, 0, 1, m_pcEncCfg->getUseHADME() && !bIsLosslessCoded ); ruiCost = m_cDistParam.distFunc( m_cDistParam ); @@ -2581,11 +2647,11 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, pu.cu->affine = true; pu.mergeFlag = false; + // Uni-directional prediction for ( int iRefList = 0; iRefList < iNumPredDir; iRefList++ ) { RefPicList eRefPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 ); - for ( int iRefIdxTemp = 0; iRefIdxTemp < slice.getNumRefIdx(eRefPicList); iRefIdxTemp++ ) { // Get RefIdx bits @@ -2715,7 +2781,6 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, { xAffineMotionEstimation( pu, origBuf, eRefPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp ); } - // Set best AMVP Index xCopyAffineAMVPInfo( affiAMVPInfoTemp[eRefPicList], aacAffineAMVPInfo[iRefList][iRefIdxTemp] ); xCheckBestAffineMVP( pu, affiAMVPInfoTemp[eRefPicList], eRefPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp ); @@ -2866,7 +2931,6 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, iRefStart = 0; iRefEnd = slice.getNumRefIdx(eRefPicList) - 1; - for ( int iRefIdxTemp = iRefStart; iRefIdxTemp <= iRefEnd; iRefIdxTemp++ ) { #if JVET_K0185_AFFINE_6PARA_ENC // reuse refidx of 4-para @@ -2955,6 +3019,7 @@ void InterSearch::xPredAffineInterSearch( PredictionUnit& pu, uiBits[1] = bitsValidList1; uiCost[1] = costValidList1; + // Affine ME result set if ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1] ) // Bi { @@ -3184,7 +3249,7 @@ void InterSearch::xCheckBestAffineMVP( PredictionUnit &pu, AffineAMVPInfo &affin if ( iVerIdx != 0 ) { #if JVET_K0185_AFFINE_6PARA_ENC - secondPred = (iVerIdx == 1 ? affineAMVPInfo.mvCandRT[iMVPIdx] : affineAMVPInfo.mvCandLT[iMVPIdx]) + (acMv[0] - affineAMVPInfo.mvCandLT[iMVPIdx]); + secondPred = (iVerIdx == 1 ? affineAMVPInfo.mvCandRT[iMVPIdx] : affineAMVPInfo.mvCandLB[iMVPIdx]) + (acMv[0] - affineAMVPInfo.mvCandLT[iMVPIdx]); #else secondPred = affineAMVPInfo.mvCandRT[iMVPIdx] + (acMv[0] - affineAMVPInfo.mvCandLT[iMVPIdx]); #endif @@ -3228,6 +3293,7 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu, Distortion& ruiCost, bool bBi ) { + const int width = pu.Y().width; const int height = pu.Y().height; @@ -3245,7 +3311,8 @@ void InterSearch::xAffineMotionEstimation( PredictionUnit& pu, // NOTE: Other buf contains predicted signal from another direction PelUnitBuf otherBuf = m_tmpPredStorage[1 - (int)eRefPicList].getBuf( UnitAreaRelative( *pu.cu, pu ) ); origBufTmp.copyFrom(origBuf); - origBufTmp.removeHighFreq(otherBuf, m_pcEncCfg->getClipForBiPredMeEnabled(), pu.cu->slice->clpRngs()); + origBufTmp.removeHighFreq(otherBuf, m_pcEncCfg->getClipForBiPredMeEnabled(), pu.cu->slice->clpRngs() + ); pBuf = &origBufTmp; fWeight = 0.5; @@ -4184,7 +4251,8 @@ void InterSearch::xEncodeInterResidualQT(CodingStructure &cs, Partitioner &parti #endif } -void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &partitioner, Distortion *puiZeroDist /*= NULL*/) +void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &partitioner, Distortion *puiZeroDist /*= NULL*/ +) { const UnitArea& currArea = partitioner.currArea(); const SPS &sps = *cs.sps; @@ -4253,10 +4321,8 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par saveCS.picture = cs.picture; saveCS.area.repositionTo(currArea); saveCS.clearTUs(); - TransformUnit &bestTU = saveCS.addTU( currArea, partitioner.chType ); - for( uint32_t c = 0; c < numTBlocks; c++ ) { const ComponentID compID = ComponentID(c); @@ -4541,7 +4607,6 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par for (uint32_t ch = 0; ch < numValidComp; ch++) { const ComponentID compID = ComponentID(ch); - if (tu.blocks[compID].valid()) { if( cs.pps->getPpsRangeExtension().getCrossComponentPredictionEnabledFlag() && isChroma(compID) && uiAbsSum[COMPONENT_Y] ) @@ -4588,7 +4653,8 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par do { - xEstimateInterResidualQT(*csSplit, partitioner, bCheckFull ? nullptr : puiZeroDist); + xEstimateInterResidualQT(*csSplit, partitioner, bCheckFull ? nullptr : puiZeroDist + ); csSplit->cost = m_pcRdCost->calcRdCost( csSplit->fracBits, csSplit->dist ); #if JVET_K1000_SIMPLIFIED_EMT @@ -4656,6 +4722,9 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par { cs.useSubStructure( *csSplit, partitioner.chType, currArea, false, false, false, true ); cs.cost = csSplit->cost; +#if JVET_K1000_SIMPLIFIED_EMT + isSplit = true; +#endif } } @@ -4680,7 +4749,8 @@ void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &par #endif } -void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &partitioner, const bool &skipResidual) +void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &partitioner, const bool &skipResidual +) { CodingUnit &cu = *cs.getCU( partitioner.chType ); @@ -4701,13 +4771,11 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa // add an empty TU cs.addTU(cs.area, partitioner.chType); - Distortion distortion = 0; for (int comp = 0; comp < numValidComponents; comp++) { const ComponentID compID = ComponentID(comp); - CPelBuf reco = cs.getRecoBuf (compID); CPelBuf org = cs.getOrgBuf (compID); #if WCG_EXT @@ -4747,7 +4815,6 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa // Residual coding. cs.getResiBuf().copyFrom (cs.getOrgBuf()); cs.getResiBuf().subtract (cs.getPredBuf()); - Distortion zeroDistortion = 0; const TempCtx ctxStart( m_CtxCache, m_CABACEstimator->getCtx() ); @@ -4755,7 +4822,6 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa cs.getOrgResiBuf().copyFrom(cs.getResiBuf()); xEstimateInterResidualQT(cs, partitioner, &zeroDistortion); - TransformUnit &firstTU = *cs.getTU( partitioner.chType ); cu.rootCbf = false; @@ -4777,7 +4843,7 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa const int numValidTBlocks = ::getNumberValidTBlocks( *cs.pcv ); for (uint32_t i = 0; i < numValidTBlocks; i++) { - cu.rootCbf |= TU::getCbf( firstTU, ComponentID( i ) ); + cu.rootCbf |= TU::getCbfAtDepth(firstTU, ComponentID(i), 0); } // ------------------------------------------------------- @@ -4819,7 +4885,6 @@ void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &pa for (int comp = 0; comp < numValidComponents; comp++) { const ComponentID compID = ComponentID(comp); - CPelBuf reco = cs.getRecoBuf (compID); CPelBuf org = cs.getOrgBuf (compID); @@ -4874,7 +4939,6 @@ uint64_t InterSearch::xGetSymbolFracBitsInter(CodingStructure &cs, Partitioner & { m_CABACEstimator->cu_transquant_bypass_flag( cu ); } - m_CABACEstimator->cu_skip_flag( cu ); m_CABACEstimator->pred_mode ( cu ); m_CABACEstimator->cu_pred_data( cu ); @@ -4888,4 +4952,3 @@ uint64_t InterSearch::xGetSymbolFracBitsInter(CodingStructure &cs, Partitioner & return fracBits; } - diff --git a/source/Lib/EncoderLib/InterSearch.h b/source/Lib/EncoderLib/InterSearch.h index 6cf72e338edbd4629b594809270d17edb8db4a16..cbdaaf21a92f2d7af4927affbf219613b26833ff 100644 --- a/source/Lib/EncoderLib/InterSearch.h +++ b/source/Lib/EncoderLib/InterSearch.h @@ -53,7 +53,6 @@ #if JVET_K0367_AFFINE_FIX_POINT #include "CommonLib/AffineGradientSearch.h" #endif - //! \ingroup EncoderLib //! \{ @@ -64,7 +63,6 @@ static const uint32_t MAX_NUM_REF_LIST_ADAPT_SR = 2; static const uint32_t MAX_IDX_ADAPT_SR = 33; static const uint32_t NUM_MV_PREDICTORS = 3; - class EncModeCtrl; /// encoder search class @@ -95,7 +93,6 @@ private: ClpRng m_lumaClpRng; - protected: // interface to option EncCfg* m_pcEncCfg; @@ -124,7 +121,6 @@ protected: bool m_isInitialized; - public: InterSearch(); virtual ~InterSearch(); @@ -145,7 +141,6 @@ public: void destroy (); void setTempBuffers (CodingStructure ****pSlitCS, CodingStructure ****pFullCS, CodingStructure **pSaveCS ); - #if ENABLE_SPLIT_PARALLELISM void copyState ( const InterSearch& other ); #endif @@ -178,6 +173,10 @@ protected: int subShiftMode; #if JVET_K0357_AMVR unsigned imvShift; +#endif +#if JVET_K0157 + bool inCtuSearch; + bool zeroMV; #endif } IntTZSearchStruct; @@ -199,7 +198,6 @@ public: /// set ME search range void setAdaptiveSearchRange ( int iDir, int iRefIdx, int iSearchRange) { CHECK(iDir >= MAX_NUM_REF_LIST_ADAPT_SR || iRefIdx>=int(MAX_IDX_ADAPT_SR), "Invalid index"); m_aaiAdaptSR[iDir][iRefIdx] = iSearchRange; } - protected: // ------------------------------------------------------------------------------------------------------------------- @@ -291,6 +289,9 @@ protected: const Mv& cMvPred, const int iSrchRng, SearchRange& sr +#if JVET_K0157 + , IntTZSearchStruct & cStruct +#endif ); void xPatternSearchFast ( const PredictionUnit& pu, @@ -368,7 +369,10 @@ protected: void xCopyAffineAMVPInfo ( AffineAMVPInfo& src, AffineAMVPInfo& dst ); void xCheckBestAffineMVP ( PredictionUnit &pu, AffineAMVPInfo &affineAMVPInfo, RefPicList eRefPicList, Mv acMv[3], Mv acMvPred[3], int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost ); + #endif + + void xExtDIFUpSamplingH ( CPelBuf* pcPattern ); void xExtDIFUpSamplingQ ( CPelBuf* pcPatternKey, Mv halfPelRef ); @@ -378,12 +382,13 @@ protected: void setWpScalingDistParam ( int iRefIdx, RefPicList eRefPicListCur, Slice *slice ); - public: - void encodeResAndCalcRdInterCU (CodingStructure &cs, Partitioner &partitioner, const bool &skipResidual); + void encodeResAndCalcRdInterCU (CodingStructure &cs, Partitioner &partitioner, const bool &skipResidual + ); void xEncodeInterResidualQT (CodingStructure &cs, Partitioner &partitioner, const ComponentID &compID); - void xEstimateInterResidualQT (CodingStructure &cs, Partitioner &partitioner, Distortion *puiZeroDist = NULL); + void xEstimateInterResidualQT (CodingStructure &cs, Partitioner &partitioner, Distortion *puiZeroDist = NULL + ); uint64_t xGetSymbolFracBitsInter (CodingStructure &cs, Partitioner &partitioner); };// END CLASS DEFINITION EncSearch diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp index b0696fe49b51d4034356e9f7f0a18fc0c46bff17..eb8519e79dbe930bd7cc2d13f507cce82b8e21ce 100644 --- a/source/Lib/EncoderLib/IntraSearch.cpp +++ b/source/Lib/EncoderLib/IntraSearch.cpp @@ -915,7 +915,8 @@ void IntraSearch::xEncIntraHeader(CodingStructure &cs, Partitioner &partitioner, // CU header if( isFirst ) { - if( !cs.slice->isIntra() ) + if( !cs.slice->isIntra() + ) { if( cs.pps->getTransquantBypassEnabledFlag() ) { diff --git a/source/Lib/EncoderLib/RateCtrl.cpp b/source/Lib/EncoderLib/RateCtrl.cpp index 6c265b9324123e14fa98c0d1d16761cd5100e890..4b4fffb03c458a4b6c7ba89e41b63c7c3cf1b066 100644 --- a/source/Lib/EncoderLib/RateCtrl.cpp +++ b/source/Lib/EncoderLib/RateCtrl.cpp @@ -39,6 +39,10 @@ #include <cmath> +#if JVET_K0390_RATECTRL +#define LAMBDA_PREC 1000000 +#endif + using namespace std; //sequence level @@ -66,6 +70,9 @@ EncRCSeq::EncRCSeq() m_useLCUSeparateModel = false; m_adaptiveBit = 0; m_lastLambda = 0.0; +#if RATECTRL_FIX_FULLNBIT + m_bitDepth = 0; +#endif } EncRCSeq::~EncRCSeq() @@ -138,6 +145,9 @@ void EncRCSeq::create( int totalFrames, int targetBitrate, int frameRate, int GO { m_picPara[i].m_alpha = 0.0; m_picPara[i].m_beta = 0.0; +#if JVET_K0390_RATECTRL + m_picPara[i].m_validPix = -1; +#endif } if ( m_useLCUSeparateModel ) @@ -150,6 +160,9 @@ void EncRCSeq::create( int totalFrames, int targetBitrate, int frameRate, int GO { m_LCUPara[i][j].m_alpha = 0.0; m_LCUPara[i][j].m_beta = 0.0; +#if JVET_K0390_RATECTRL + m_LCUPara[i][j].m_validPix = -1; +#endif } } } @@ -217,13 +230,47 @@ void EncRCSeq::initPicPara( TRCParameter* picPara ) { if (i>0) { +#if RATECTRL_FIX_FULLNBIT +#if DISTORTION_LAMBDA_BUGFIX + int bitdepth_luma_scale = + 2 + * (m_bitDepth - 8 + - DISTORTION_PRECISION_ADJUSTMENT(m_bitDepth)); +#else +#if FULL_NBIT + int bitdepth_luma_scale = 2 * (m_bitDepth - 8); +#else + int bitdepth_luma_scale = 0; +#endif +#endif + m_picPara[i].m_alpha = 3.2003 * pow(2.0, bitdepth_luma_scale); + m_picPara[i].m_beta = -1.367; +#else m_picPara[i].m_alpha = 3.2003; m_picPara[i].m_beta = -1.367; +#endif } else { +#if RATECTRL_FIX_FULLNBIT +#if DISTORTION_LAMBDA_BUGFIX + int bitdepth_luma_scale = + 2 + * (m_bitDepth - 8 + - DISTORTION_PRECISION_ADJUSTMENT(m_bitDepth)); +#else +#if FULL_NBIT + int bitdepth_luma_scale = 2 * (m_bitDepth - 8); +#else + int bitdepth_luma_scale = 0; +#endif +#endif + m_picPara[i].m_alpha = pow(2.0, bitdepth_luma_scale) * ALPHA; + m_picPara[i].m_beta = BETA2; +#else m_picPara[i].m_alpha = ALPHA; m_picPara[i].m_beta = BETA2; +#endif } } } @@ -276,7 +323,11 @@ void EncRCSeq::setAllBitRatio( double basicLambda, double* equaCoeffA, double* e int* bitsRatio = new int[m_GOPSize]; for ( int i=0; i<m_GOPSize; i++ ) { +#if JVET_K0390_RATECTRL + bitsRatio[i] = (int)(equaCoeffA[i] * pow(basicLambda, equaCoeffB[i]) * (double)getPicPara(getGOPID2Level(i)).m_validPix); +#else bitsRatio[i] = (int)( equaCoeffA[i] * pow( basicLambda, equaCoeffB[i] ) * m_numberOfPixel ); +#endif } initBitsRatio( bitsRatio ); delete[] bitsRatio; @@ -353,9 +404,75 @@ void EncRCGOP::create( EncRCSeq* encRCSeq, int numPic ) lambdaRatio[7] = 12.3; } } +#if JVET_K0390_RATECTRL + else if (encRCSeq->getAdaptiveBits() == 3) // for GOP size = 16, random access case + { + { +#if RATECTRL_FIX_FULLNBIT +#if DISTORTION_LAMBDA_BUGFIX + int bitdepth_luma_scale = + 2 + * (encRCSeq->getbitDepth() - 8 + - DISTORTION_PRECISION_ADJUSTMENT(encRCSeq->getbitDepth())); +#else +#if FULL_NBIT + int bitdepth_luma_scale = 2 * (encRCSeq->getbitDepth() - 8); +#else + int bitdepth_luma_scale = 0; +#endif +#endif + + double hierarQp = 4.2005 * log(encRCSeq->getLastLambda() / pow(2.0, bitdepth_luma_scale)) + 13.7122; // the qp of POC16 + double qpLev2 = (hierarQp + 0.0) + 0.2016 * (hierarQp + 0.0) - 4.8848; + double qpLev3 = (hierarQp + 3.0) + 0.22286 * (hierarQp + 3.0) - 5.7476; + double qpLev4 = (hierarQp + 4.0) + 0.2333 * (hierarQp + 4.0) - 5.9; + double qpLev5 = (hierarQp + 5.0) + 0.3 * (hierarQp + 5.0) - 7.1444; + + double lambdaLev1 = exp((hierarQp - 13.7122) / 4.2005) *pow(2.0, bitdepth_luma_scale); + double lambdaLev2 = exp((qpLev2 - 13.7122) / 4.2005) * pow(2.0, bitdepth_luma_scale); + double lambdaLev3 = exp((qpLev3 - 13.7122) / 4.2005) * pow(2.0, bitdepth_luma_scale); + double lambdaLev4 = exp((qpLev4 - 13.7122) / 4.2005) * pow(2.0, bitdepth_luma_scale); + double lambdaLev5 = exp((qpLev5 - 13.7122) / 4.2005) * pow(2.0, bitdepth_luma_scale); +#else + double hierarQp = 4.2005 * log(encRCSeq->getLastLambda()) + 13.7122; // the qp of POC16 + double qpLev2 = (hierarQp + 0.0) + 0.2016 * (hierarQp + 0.0) - 4.8848; + double qpLev3 = (hierarQp + 3.0) + 0.22286 * (hierarQp + 3.0) - 5.7476; + double qpLev4 = (hierarQp + 4.0) + 0.2333 * (hierarQp + 4.0) - 5.9; + double qpLev5 = (hierarQp + 5.0) + 0.3 * (hierarQp + 5.0) - 7.1444; + + double lambdaLev1 = exp((hierarQp - 13.7122) / 4.2005); + double lambdaLev2 = exp((qpLev2 - 13.7122) / 4.2005); + double lambdaLev3 = exp((qpLev3 - 13.7122) / 4.2005); + double lambdaLev4 = exp((qpLev4 - 13.7122) / 4.2005); + double lambdaLev5 = exp((qpLev5 - 13.7122) / 4.2005); +#endif + + lambdaRatio[0] = 1.0; + lambdaRatio[1] = lambdaLev2 / lambdaLev1; + lambdaRatio[2] = lambdaLev3 / lambdaLev1; + lambdaRatio[3] = lambdaLev4 / lambdaLev1; + lambdaRatio[4] = lambdaLev5 / lambdaLev1; + lambdaRatio[5] = lambdaLev5 / lambdaLev1; + lambdaRatio[6] = lambdaLev4 / lambdaLev1; + lambdaRatio[7] = lambdaLev5 / lambdaLev1; + lambdaRatio[8] = lambdaLev5 / lambdaLev1; + lambdaRatio[9] = lambdaLev3 / lambdaLev1; + lambdaRatio[10] = lambdaLev4 / lambdaLev1; + lambdaRatio[11] = lambdaLev5 / lambdaLev1; + lambdaRatio[12] = lambdaLev5 / lambdaLev1; + lambdaRatio[13] = lambdaLev4 / lambdaLev1; + lambdaRatio[14] = lambdaLev5 / lambdaLev1; + lambdaRatio[15] = lambdaLev5 / lambdaLev1; + } + } +#endif xCalEquaCoeff( encRCSeq, lambdaRatio, equaCoeffA, equaCoeffB, encRCSeq->getGOPSize() ); +#if JVET_K0390_RATECTRL + basicLambda = xSolveEqua(encRCSeq, targetBpp, equaCoeffA, equaCoeffB, encRCSeq->getGOPSize()); +#else basicLambda = xSolveEqua( targetBpp, equaCoeffA, equaCoeffB, encRCSeq->getGOPSize() ); +#endif encRCSeq->setAllBitRatio( basicLambda, equaCoeffA, equaCoeffB ); delete []lambdaRatio; @@ -396,7 +513,11 @@ void EncRCGOP::xCalEquaCoeff( EncRCSeq* encRCSeq, double* lambdaRatio, double* e } } +#if JVET_K0390_RATECTRL +double EncRCGOP::xSolveEqua(EncRCSeq* encRCSeq, double targetBpp, double* equaCoeffA, double* equaCoeffB, int GOPSize) +#else double EncRCGOP::xSolveEqua( double targetBpp, double* equaCoeffA, double* equaCoeffB, int GOPSize ) +#endif { double solution = 100.0; double minNumber = 0.1; @@ -406,7 +527,13 @@ double EncRCGOP::xSolveEqua( double targetBpp, double* equaCoeffA, double* equaC double fx = 0.0; for ( int j=0; j<GOPSize; j++ ) { +#if JVET_K0390_RATECTRL + double tmpBpp = equaCoeffA[j] * pow(solution, equaCoeffB[j]); + double actualBpp = tmpBpp * (double)encRCSeq->getPicPara(encRCSeq->getGOPID2Level(j)).m_validPix / (double)encRCSeq->getNumPixel(); + fx += actualBpp; +#else fx += equaCoeffA[j] * pow( solution, equaCoeffB[j] ); +#endif } if ( fabs( fx - targetBpp ) < 0.000001 ) @@ -484,6 +611,10 @@ EncRCPic::EncRCPic() m_picActualBits = 0; m_picQP = 0; m_picLambda = 0.0; +#if JVET_K0390_RATECTRL + m_picMSE = 0.0; + m_validPixelsInPic = 0; +#endif } EncRCPic::~EncRCPic() @@ -641,6 +772,10 @@ void EncRCPic::create( EncRCSeq* encRCSeq, EncRCGOP* encRCGOP, int frameLevel, l { LCUIdx = j*picWidthInLCU + i; m_LCUs[LCUIdx].m_actualBits = 0; +#if JVET_K0390_RATECTRL + m_LCUs[LCUIdx].m_actualSSE = 0.0; + m_LCUs[LCUIdx].m_actualMSE = 0.0; +#endif m_LCUs[LCUIdx].m_QP = 0; m_LCUs[LCUIdx].m_lambda = 0.0; m_LCUs[LCUIdx].m_targetBits = 0; @@ -654,6 +789,10 @@ void EncRCPic::create( EncRCSeq* encRCSeq, EncRCGOP* encRCGOP, int frameLevel, l m_picActualBits = 0; m_picQP = 0; m_picLambda = 0.0; +#if JVET_K0390_RATECTRL + m_validPixelsInPic = 0; + m_picMSE = 0.0; +#endif } void EncRCPic::destroy() @@ -673,6 +812,19 @@ double EncRCPic::estimatePicLambda( list<EncRCPic*>& listPreviousPictures, Slice double alpha = m_encRCSeq->getPicPara( m_frameLevel ).m_alpha; double beta = m_encRCSeq->getPicPara( m_frameLevel ).m_beta; double bpp = (double)m_targetBits/(double)m_numberOfPixel; + +#if JVET_K0390_RATECTRL + int lastPicValPix = 0; + if (listPreviousPictures.size() > 0) + { + lastPicValPix = m_encRCSeq->getPicPara(m_frameLevel).m_validPix; + } + if (lastPicValPix > 0) + { + bpp = (double)m_targetBits / (double)lastPicValPix; + } +#endif + double estLambda; if (eSliceType == I_SLICE) { @@ -727,6 +879,10 @@ double EncRCPic::estimatePicLambda( list<EncRCPic*>& listPreviousPictures, Slice estLambda = 0.1; } +#if JVET_K0390_RATECTRL + //Avoid different results in different platforms. The problem is caused by the different results of pow() in different platforms. + estLambda = double(int64_t(estLambda * (double)LAMBDA_PREC + 0.5)) / (double)LAMBDA_PREC; +#endif m_estPicLambda = estLambda; double totalWeight = 0.0; @@ -764,7 +920,24 @@ double EncRCPic::estimatePicLambda( list<EncRCPic*>& listPreviousPictures, Slice int EncRCPic::estimatePicQP( double lambda, list<EncRCPic*>& listPreviousPictures ) { +#if RATECTRL_FIX_FULLNBIT +#if DISTORTION_LAMBDA_BUGFIX + int bitdepth_luma_scale = + 2 + * (m_encRCSeq->getbitDepth() - 8 + - DISTORTION_PRECISION_ADJUSTMENT(m_encRCSeq->getbitDepth())); +#else +#if FULL_NBIT + int bitdepth_luma_scale = 2 * (m_encRCSeq->getbitDepth() - 8); +#else + int bitdepth_luma_scale = 0; +#endif +#endif + + int QP = int(4.2005 * log(lambda / pow(2.0, bitdepth_luma_scale)) + 13.7122 + 0.5); +#else int QP = int( 4.2005 * log( lambda ) + 13.7122 + 0.5 ); +#endif int lastLevelQP = g_RCInvalidQPValue; int lastPicQP = g_RCInvalidQPValue; @@ -895,13 +1068,34 @@ double EncRCPic::getLCUEstLambda( double bpp ) estLambda = 0.1; } +#if JVET_K0390_RATECTRL + //Avoid different results in different platforms. The problem is caused by the different results of pow() in different platforms. + estLambda = double(int64_t(estLambda * (double)LAMBDA_PREC + 0.5)) / (double)LAMBDA_PREC; +#endif return estLambda; } int EncRCPic::getLCUEstQP( double lambda, int clipPicQP ) { int LCUIdx = getLCUCoded(); +#if RATECTRL_FIX_FULLNBIT +#if DISTORTION_LAMBDA_BUGFIX + int bitdepth_luma_scale = + 2 + * (m_encRCSeq->getbitDepth() - 8 + - DISTORTION_PRECISION_ADJUSTMENT(m_encRCSeq->getbitDepth())); +#else +#if FULL_NBIT + int bitdepth_luma_scale = 2 * (m_encRCSeq->getbitDepth() - 8); +#else + int bitdepth_luma_scale = 0; +#endif +#endif + + int estQP = int(4.2005 * log(lambda / pow(2.0, bitdepth_luma_scale)) + 13.7122 + 0.5); +#else int estQP = int( 4.2005 * log( lambda ) + 13.7122 + 0.5 ); +#endif //for Lambda clip, LCU level clip int clipNeighbourQP = g_RCInvalidQPValue; @@ -929,6 +1123,9 @@ void EncRCPic::updateAfterCTU( int LCUIdx, int bits, int QP, double lambda, bool m_LCUs[LCUIdx].m_actualBits = bits; m_LCUs[LCUIdx].m_QP = QP; m_LCUs[LCUIdx].m_lambda = lambda; +#if JVET_K0390_RATECTRL + m_LCUs[LCUIdx].m_actualSSE = m_LCUs[LCUIdx].m_actualMSE * m_LCUs[LCUIdx].m_numberOfPixel; +#endif m_LCULeft--; m_bitsLeft -= bits; @@ -964,7 +1161,34 @@ void EncRCPic::updateAfterCTU( int LCUIdx, int bits, int QP, double lambda, bool TRCParameter rcPara; rcPara.m_alpha = alpha; rcPara.m_beta = beta; +#if JVET_K0390_RATECTRL + if (QP == g_RCInvalidQPValue && m_encRCSeq->getAdaptiveBits() == 1) + { + rcPara.m_validPix = 0; + } + else + { + rcPara.m_validPix = LCUTotalPixels; + } + + double MSE = m_LCUs[LCUIdx].m_actualMSE; + double updatedK = bpp * inputLambda / MSE; + double updatedC = MSE / pow(bpp, -updatedK); + rcPara.m_alpha = updatedC * updatedK; + rcPara.m_beta = -updatedK - 1.0; + + if (bpp > 0 && updatedK > 0.0001) + { + m_encRCSeq->setLCUPara(m_frameLevel, LCUIdx, rcPara); + } + else + { + rcPara.m_alpha = Clip3(0.0001, g_RCAlphaMaxValue, rcPara.m_alpha); + m_encRCSeq->setLCUPara(m_frameLevel, LCUIdx, rcPara); + } +#else m_encRCSeq->setLCUPara( m_frameLevel, LCUIdx, rcPara ); +#endif return; } @@ -981,7 +1205,34 @@ void EncRCPic::updateAfterCTU( int LCUIdx, int bits, int QP, double lambda, bool TRCParameter rcPara; rcPara.m_alpha = alpha; rcPara.m_beta = beta; +#if JVET_K0390_RATECTRL + if (QP == g_RCInvalidQPValue && m_encRCSeq->getAdaptiveBits() == 1) + { + rcPara.m_validPix = 0; + } + else + { + rcPara.m_validPix = LCUTotalPixels; + } + + double MSE = m_LCUs[LCUIdx].m_actualMSE; + double updatedK = bpp * inputLambda / MSE; + double updatedC = MSE / pow(bpp, -updatedK); + rcPara.m_alpha = updatedC * updatedK; + rcPara.m_beta = -updatedK - 1.0; + + if (bpp > 0 && updatedK > 0.0001) + { + m_encRCSeq->setLCUPara(m_frameLevel, LCUIdx, rcPara); + } + else + { + rcPara.m_alpha = Clip3(0.0001, g_RCAlphaMaxValue, rcPara.m_alpha); + m_encRCSeq->setLCUPara(m_frameLevel, LCUIdx, rcPara); + } +#else m_encRCSeq->setLCUPara( m_frameLevel, LCUIdx, rcPara ); +#endif } @@ -1017,16 +1268,41 @@ double EncRCPic::calAverageLambda() double totalLambdas = 0.0; int numTotalLCUs = 0; +#if JVET_K0390_RATECTRL + double totalSSE = 0.0; + int totalPixels = 0; +#endif int i; for ( i=0; i<m_numberOfLCU; i++ ) { if ( m_LCUs[i].m_lambda > 0.01 ) { +#if JVET_K0390_RATECTRL + if (m_LCUs[i].m_QP > 0 || m_encRCSeq->getAdaptiveBits() != 1) + { + m_validPixelsInPic += m_LCUs[i].m_numberOfPixel; + + totalLambdas += log(m_LCUs[i].m_lambda); + numTotalLCUs++; + } +#else totalLambdas += log( m_LCUs[i].m_lambda ); numTotalLCUs++; +#endif + +#if JVET_K0390_RATECTRL + if (m_LCUs[i].m_QP > 0 || m_encRCSeq->getAdaptiveBits() != 1) + { + totalSSE += m_LCUs[i].m_actualSSE; + totalPixels += m_LCUs[i].m_numberOfPixel; + } +#endif } } +#if JVET_K0390_RATECTRL + setPicMSE(totalPixels > 0 ? totalSSE / (double)totalPixels : 1.0); //1.0 is useless in the following process, just to make sure the divisor not be 0 +#endif double avgLambda; if( numTotalLCUs == 0 ) { @@ -1065,7 +1341,11 @@ void EncRCPic::updateAfterPicture( int actualHeaderBits, int actualTotalBits, do { // update parameters double picActualBits = ( double )m_picActualBits; +#if JVET_K0390_RATECTRL + double picActualBpp = picActualBits / (double)m_validPixelsInPic; +#else double picActualBpp = picActualBits/(double)m_numberOfPixel; +#endif double calLambda = alpha * pow( picActualBpp, beta ); double inputLambda = m_picLambda; @@ -1080,7 +1360,26 @@ void EncRCPic::updateAfterPicture( int actualHeaderBits, int actualTotalBits, do TRCParameter rcPara; rcPara.m_alpha = alpha; rcPara.m_beta = beta; +#if JVET_K0390_RATECTRL + double avgMSE = getPicMSE(); + double updatedK = picActualBpp * averageLambda / avgMSE; + double updatedC = avgMSE / pow(picActualBpp, -updatedK); + + if (m_frameLevel > 0) //only use for level > 0 + { + rcPara.m_alpha = updatedC * updatedK; + rcPara.m_beta = -updatedK - 1.0; + } + + rcPara.m_validPix = m_validPixelsInPic; + + if (m_validPixelsInPic > 0) + { + m_encRCSeq->setPicPara(m_frameLevel, rcPara); + } +#else m_encRCSeq->setPicPara( m_frameLevel, rcPara ); +#endif return; } @@ -1099,8 +1398,27 @@ void EncRCPic::updateAfterPicture( int actualHeaderBits, int actualTotalBits, do TRCParameter rcPara; rcPara.m_alpha = alpha; rcPara.m_beta = beta; +#if JVET_K0390_RATECTRL + double picActualBpp = (double)m_picActualBits / (double)m_validPixelsInPic; + + double avgMSE = getPicMSE(); + double updatedK = picActualBpp * averageLambda / avgMSE; + double updatedC = avgMSE / pow(picActualBpp, -updatedK); + if (m_frameLevel > 0) //only use for level > 0 + { + rcPara.m_alpha = updatedC * updatedK; + rcPara.m_beta = -updatedK - 1.0; + } + + rcPara.m_validPix = m_validPixelsInPic; + if (m_validPixelsInPic > 0) + { + m_encRCSeq->setPicPara(m_frameLevel, rcPara); + } +#else m_encRCSeq->setPicPara( m_frameLevel, rcPara ); +#endif if ( m_frameLevel == 1 ) { @@ -1188,12 +1506,38 @@ double EncRCPic::getLCUEstLambdaAndQP(double bpp, int clipPicQP, int *estQP) minQP = max(clipNeighbourQP - 1, minQP); } +#if RATECTRL_FIX_FULLNBIT +#if DISTORTION_LAMBDA_BUGFIX + int bitdepth_luma_scale = + 2 + * (m_encRCSeq->getbitDepth() - 8 + - DISTORTION_PRECISION_ADJUSTMENT(m_encRCSeq->getbitDepth())); +#else +#if FULL_NBIT + int bitdepth_luma_scale = 2 * (m_encRCSeq->getbitDepth() - 8); +#else + int bitdepth_luma_scale = 0; +#endif +#endif + + double maxLambda = exp(((double)(maxQP + 0.49) - 13.7122) / 4.2005) * pow(2.0, bitdepth_luma_scale); + double minLambda = exp(((double)(minQP - 0.49) - 13.7122) / 4.2005) * pow(2.0, bitdepth_luma_scale); +#else double maxLambda=exp(((double)(maxQP+0.49)-13.7122)/4.2005); double minLambda=exp(((double)(minQP-0.49)-13.7122)/4.2005); +#endif estLambda = Clip3(minLambda, maxLambda, estLambda); +#if JVET_K0390_RATECTRL + //Avoid different results in different platforms. The problem is caused by the different results of pow() in different platforms. + estLambda = double(int64_t(estLambda * (double)LAMBDA_PREC + 0.5)) / (double)LAMBDA_PREC; +#endif +#if RATECTRL_FIX_FULLNBIT + *estQP = int(4.2005 * log(estLambda / pow(2.0, bitdepth_luma_scale)) + 13.7122 + 0.5); +#else *estQP = int( 4.2005 * log(estLambda) + 13.7122 + 0.5 ); +#endif *estQP = Clip3(minQP, maxQP, *estQP); return estLambda; @@ -1231,7 +1575,11 @@ void RateCtrl::destroy() } } +#if RATECTRL_FIX_FULLNBIT +void RateCtrl::init(int totalFrames, int targetBitrate, int frameRate, int GOPSize, int picWidth, int picHeight, int LCUWidth, int LCUHeight, int bitDepth, int keepHierBits, bool useLCUSeparateModel, GOPEntry GOPList[MAX_GOP]) +#else void RateCtrl::init( int totalFrames, int targetBitrate, int frameRate, int GOPSize, int picWidth, int picHeight, int LCUWidth, int LCUHeight, int keepHierBits, bool useLCUSeparateModel, GOPEntry GOPList[MAX_GOP] ) +#endif { destroy(); @@ -1251,7 +1599,11 @@ void RateCtrl::init( int totalFrames, int targetBitrate, int frameRate, int GOPS { numberOfLevel = int( log((double)GOPSize)/log(2.0) + 0.5 ) + 1; } +#if JVET_K0390_RATECTRL + if (!isLowdelay && (GOPSize == 16 || GOPSize == 8)) +#else if ( !isLowdelay && GOPSize == 8 ) +#endif { numberOfLevel = int( log((double)GOPSize)/log(2.0) + 0.5 ) + 1; } @@ -1361,6 +1713,92 @@ void RateCtrl::init( int totalFrames, int targetBitrate, int frameRate, int GOPS adaptiveBit = 2; } } +#if JVET_K0390_RATECTRL + else if (GOPSize == 16 && !isLowdelay) + { + if (bpp > 0.2) + { + bitsRatio[0] = 10; + bitsRatio[1] = 8; + bitsRatio[2] = 4; + bitsRatio[3] = 2; + bitsRatio[4] = 1; + bitsRatio[5] = 1; + bitsRatio[6] = 2; + bitsRatio[7] = 1; + bitsRatio[8] = 1; + bitsRatio[9] = 4; + bitsRatio[10] = 2; + bitsRatio[11] = 1; + bitsRatio[12] = 1; + bitsRatio[13] = 2; + bitsRatio[14] = 1; + bitsRatio[15] = 1; + } + else if (bpp > 0.1) + { + bitsRatio[0] = 15; + bitsRatio[1] = 9; + bitsRatio[2] = 4; + bitsRatio[3] = 2; + bitsRatio[4] = 1; + bitsRatio[5] = 1; + bitsRatio[6] = 2; + bitsRatio[7] = 1; + bitsRatio[8] = 1; + bitsRatio[9] = 4; + bitsRatio[10] = 2; + bitsRatio[11] = 1; + bitsRatio[12] = 1; + bitsRatio[13] = 2; + bitsRatio[14] = 1; + bitsRatio[15] = 1; + } + else if (bpp > 0.05) + { + bitsRatio[0] = 40; + bitsRatio[1] = 17; + bitsRatio[2] = 7; + bitsRatio[3] = 2; + bitsRatio[4] = 1; + bitsRatio[5] = 1; + bitsRatio[6] = 2; + bitsRatio[7] = 1; + bitsRatio[8] = 1; + bitsRatio[9] = 7; + bitsRatio[10] = 2; + bitsRatio[11] = 1; + bitsRatio[12] = 1; + bitsRatio[13] = 2; + bitsRatio[14] = 1; + bitsRatio[15] = 1; + } + else + { + bitsRatio[0] = 40; + bitsRatio[1] = 15; + bitsRatio[2] = 6; + bitsRatio[3] = 3; + bitsRatio[4] = 1; + bitsRatio[5] = 1; + bitsRatio[6] = 3; + bitsRatio[7] = 1; + bitsRatio[8] = 1; + bitsRatio[9] = 6; + bitsRatio[10] = 3; + bitsRatio[11] = 1; + bitsRatio[12] = 1; + bitsRatio[13] = 3; + bitsRatio[14] = 1; + bitsRatio[15] = 1; + } + + if (keepHierBits == 2) + { + adaptiveBit = 3; + } + } +#endif else { msg( WARNING, "\n hierarchical bit allocation is not support for the specified coding structure currently.\n" ); @@ -1397,6 +1835,27 @@ void RateCtrl::init( int totalFrames, int targetBitrate, int frameRate, int GOPS GOPID2Level[6] = 4; GOPID2Level[7] = 4; } +#if JVET_K0390_RATECTRL + else if (GOPSize == 16 && !isLowdelay) + { + GOPID2Level[0] = 1; + GOPID2Level[1] = 2; + GOPID2Level[2] = 3; + GOPID2Level[3] = 4; + GOPID2Level[4] = 5; + GOPID2Level[5] = 5; + GOPID2Level[6] = 4; + GOPID2Level[7] = 5; + GOPID2Level[8] = 5; + GOPID2Level[9] = 3; + GOPID2Level[10] = 4; + GOPID2Level[11] = 5; + GOPID2Level[12] = 5; + GOPID2Level[13] = 4; + GOPID2Level[14] = 5; + GOPID2Level[15] = 5; + } +#endif } if ( !isLowdelay && GOPSize == 8 ) @@ -1410,11 +1869,35 @@ void RateCtrl::init( int totalFrames, int targetBitrate, int frameRate, int GOPS GOPID2Level[6] = 4; GOPID2Level[7] = 4; } +#if JVET_K0390_RATECTRL + else if (GOPSize == 16 && !isLowdelay) + { + GOPID2Level[0] = 1; + GOPID2Level[1] = 2; + GOPID2Level[2] = 3; + GOPID2Level[3] = 4; + GOPID2Level[4] = 5; + GOPID2Level[5] = 5; + GOPID2Level[6] = 4; + GOPID2Level[7] = 5; + GOPID2Level[8] = 5; + GOPID2Level[9] = 3; + GOPID2Level[10] = 4; + GOPID2Level[11] = 5; + GOPID2Level[12] = 5; + GOPID2Level[13] = 4; + GOPID2Level[14] = 5; + GOPID2Level[15] = 5; + } +#endif m_encRCSeq = new EncRCSeq; m_encRCSeq->create( totalFrames, targetBitrate, frameRate, GOPSize, picWidth, picHeight, LCUWidth, LCUHeight, numberOfLevel, useLCUSeparateModel, adaptiveBit ); m_encRCSeq->initBitsRatio( bitsRatio ); m_encRCSeq->initGOPID2Level( GOPID2Level ); +#if RATECTRL_FIX_FULLNBIT + m_encRCSeq->setBitDepth(bitDepth); +#endif m_encRCSeq->initPicPara(); if ( useLCUSeparateModel ) { diff --git a/source/Lib/EncoderLib/RateCtrl.h b/source/Lib/EncoderLib/RateCtrl.h index 8a6a58c13ebb19ed10b3dfa5449a795772327535..52b44fca853c41ba99692f051005668743e3d5f4 100644 --- a/source/Lib/EncoderLib/RateCtrl.h +++ b/source/Lib/EncoderLib/RateCtrl.h @@ -84,12 +84,19 @@ struct TRCLCU int m_numberOfPixel; double m_costIntra; int m_targetBitsLeft; +#if JVET_K0390_RATECTRL + double m_actualSSE; + double m_actualMSE; +#endif }; struct TRCParameter { double m_alpha; double m_beta; +#if JVET_K0390_RATECTRL + int m_validPix; +#endif }; class EncRCSeq @@ -147,6 +154,10 @@ public: int getAdaptiveBits() { return m_adaptiveBit; } double getLastLambda() { return m_lastLambda; } void setLastLambda( double lamdba ) { m_lastLambda = lamdba; } +#if RATECTRL_FIX_FULLNBIT + void setBitDepth(int bitDepth) { m_bitDepth = bitDepth; } + int getbitDepth() { return m_bitDepth; } +#endif private: int m_totalFrames; @@ -177,6 +188,9 @@ private: int m_adaptiveBit; double m_lastLambda; +#if RATECTRL_FIX_FULLNBIT + int m_bitDepth; +#endif }; class EncRCGOP @@ -193,7 +207,11 @@ public: private: int xEstGOPTargetBits( EncRCSeq* encRCSeq, int GOPSize ); void xCalEquaCoeff( EncRCSeq* encRCSeq, double* lambdaRatio, double* equaCoeffA, double* equaCoeffB, int GOPSize ); +#if JVET_K0390_RATECTRL + double xSolveEqua(EncRCSeq* encRCSeq, double targetBpp, double* equaCoeffA, double* equaCoeffB, int GOPSize); +#else double xSolveEqua( double targetBpp, double* equaCoeffA, double* equaCoeffB, int GOPSize ); +#endif public: EncRCSeq* getEncRCSeq() { return m_encRCSeq; } @@ -282,6 +300,10 @@ public: void setPicEstQP( int QP ) { m_estPicQP = QP; } double getPicEstLambda() { return m_estPicLambda; } void setPicEstLambda( double lambda ) { m_picLambda = lambda; } +#if JVET_K0390_RATECTRL + double getPicMSE() { return m_picMSE; } + void setPicMSE(double avgMSE) { m_picMSE = avgMSE; } +#endif private: EncRCSeq* m_encRCSeq; @@ -309,6 +331,10 @@ private: int m_picActualBits; // the whole picture, including header int m_picQP; // in integer form double m_picLambda; +#if JVET_K0390_RATECTRL + double m_picMSE; + int m_validPixelsInPic; +#endif }; class RateCtrl @@ -318,7 +344,11 @@ public: ~RateCtrl(); public: +#if RATECTRL_FIX_FULLNBIT + void init(int totalFrames, int targetBitrate, int frameRate, int GOPSize, int picWidth, int picHeight, int LCUWidth, int LCUHeight, int bitDepth, int keepHierBits, bool useLCUSeparateModel, GOPEntry GOPList[MAX_GOP]); +#else void init( int totalFrames, int targetBitrate, int frameRate, int GOPSize, int picWidth, int picHeight, int LCUWidth, int LCUHeight, int keepHierBits, bool useLCUSeparateModel, GOPEntry GOPList[MAX_GOP] ); +#endif void destroy(); void initRCPic( int frameLevel ); void initRCGOP( int numberOfPictures ); diff --git a/source/Lib/Utilities/CMakeLists.txt b/source/Lib/Utilities/CMakeLists.txt index b8976132ec48f5a9b5d7f8ad0e256ec43888bb54..0b0464411f660fffc5e6a3ab431d0c0331724388 100644 --- a/source/Lib/Utilities/CMakeLists.txt +++ b/source/Lib/Utilities/CMakeLists.txt @@ -17,7 +17,7 @@ add_library( ${LIB_NAME} STATIC ${SRC_FILES} ${INC_FILES} ${NATVIS_FILES} ) target_compile_definitions( ${LIB_NAME} PUBLIC ) if( ENABLE_VTM ) - target_compile_definitions( ${LIB_NAME} PUBLIC JEM_TOOLS=0 ) + target_compile_definitions( ${LIB_NAME} PUBLIC BMS_TOOLS=0 ) endif() if( EXTENSION_360_VIDEO ) diff --git a/source/Lib/Utilities/VideoIOYuv.cpp b/source/Lib/Utilities/VideoIOYuv.cpp index 9494596cc472e587a13d9e6c64835454ef427d1a..5f26d44d5aee4afee42ca848573ef2a0233c3900 100644 --- a/source/Lib/Utilities/VideoIOYuv.cpp +++ b/source/Lib/Utilities/VideoIOYuv.cpp @@ -413,24 +413,25 @@ static bool readPlane(Pel* dst, * @param fileBitDepth component bit depth in file * @return true for success, false in case of error */ -static bool writePlane(ostream& fd, const Pel* src, bool is16bit, +static bool writePlane(ostream& fd, const Pel* src, + const bool is16bit, const uint32_t stride_src, uint32_t width444, uint32_t height444, const ComponentID compID, const ChromaFormat srcFormat, const ChromaFormat fileFormat, - const int fileBitDepth) + const uint32_t fileBitDepth, + const uint32_t packedYUVOutputMode = 0) { const uint32_t csx_file =getComponentScaleX(compID, fileFormat); const uint32_t csy_file =getComponentScaleY(compID, fileFormat); const uint32_t csx_src =getComponentScaleX(compID, srcFormat); const uint32_t csy_src =getComponentScaleY(compID, srcFormat); -/* const uint32_t stride_src = stride444>>csx_src;*/ - - const uint32_t stride_file = (width444 * (is16bit ? 2 : 1)) >> csx_file; - const uint32_t width_file = width444 >>csx_file; - const uint32_t height_file = height444>>csy_file; + const uint32_t width_file = width444 >> csx_file; + const uint32_t height_file = height444 >> csy_file; + const bool writePYUV = (packedYUVOutputMode > 0) && (fileBitDepth == 10 || fileBitDepth == 12) && ((width_file & (1 + (fileBitDepth & 3))) == 0); + const uint32_t stride_file = writePYUV ? (width444 * fileBitDepth) >> (csx_file + 3) : (width444 * (is16bit ? 2 : 1)) >> csx_file; std::vector<uint8_t> bufVec(stride_file); uint8_t *buf=&(bufVec[0]); @@ -438,15 +439,106 @@ static bool writePlane(ostream& fd, const Pel* src, bool is16bit, const Pel *pSrcBuf = src; const int srcbuf_stride = stride_src; + if (writePYUV) + { + const uint32_t mask_y_file = (1 << csy_file) - 1; + const uint32_t mask_y_src = (1 << csy_src ) - 1; + const uint32_t widthS_file = width_file >> (fileBitDepth == 12 ? 1 : 2); + + for (uint32_t y444 = 0; y444 < height444; y444++) + { + if ((y444 & mask_y_file) == 0) // write a new line to file + { + if (csx_file < csx_src) + { + // eg file is 444, source is 422. + const uint32_t sx = csx_src - csx_file; + + if (fileBitDepth == 10) // write 4 values into 5 bytes + { + for (uint32_t x = 0; x < widthS_file; x++) + { + const uint32_t src0 = pSrcBuf[(4*x ) >> sx]; + const uint32_t src1 = pSrcBuf[(4*x+1) >> sx]; + const uint32_t src2 = pSrcBuf[(4*x+2) >> sx]; + const uint32_t src3 = pSrcBuf[(4*x+3) >> sx]; + + buf[5*x ] = ((src0 ) & 0xff); // src0:76543210 + buf[5*x+1] = ((src1 << 2) & 0xfc) + ((src0 >> 8) & 0x03); + buf[5*x+2] = ((src2 << 4) & 0xf0) + ((src1 >> 6) & 0x0f); + buf[5*x+3] = ((src3 << 6) & 0xc0) + ((src2 >> 4) & 0x3f); + buf[5*x+4] = ((src3 >> 2) & 0xff); // src3:98765432 + } + } + else if (fileBitDepth == 12) //...2 values into 3 bytes + { + for (uint32_t x = 0; x < widthS_file; x++) + { + const uint32_t src0 = pSrcBuf[(2*x ) >> sx]; + const uint32_t src1 = pSrcBuf[(2*x+1) >> sx]; + + buf[3*x ] = ((src0 ) & 0xff); // src0:76543210 + buf[3*x+1] = ((src1 << 4) & 0xf0) + ((src0 >> 8) & 0x0f); + buf[3*x+2] = ((src1 >> 4) & 0xff); // src1:BA987654 + } + } + } + else + { + // eg file is 422, source is 444. + const uint32_t sx = csx_file - csx_src; + + if (fileBitDepth == 10) // write 4 values into 5 bytes + { + for (uint32_t x = 0; x < widthS_file; x++) + { + const uint32_t src0 = pSrcBuf[(4*x ) << sx]; + const uint32_t src1 = pSrcBuf[(4*x+1) << sx]; + const uint32_t src2 = pSrcBuf[(4*x+2) << sx]; + const uint32_t src3 = pSrcBuf[(4*x+3) << sx]; + + buf[5*x ] = ((src0 ) & 0xff); // src0:76543210 + buf[5*x+1] = ((src1 << 2) & 0xfc) + ((src0 >> 8) & 0x03); + buf[5*x+2] = ((src2 << 4) & 0xf0) + ((src1 >> 6) & 0x0f); + buf[5*x+3] = ((src3 << 6) & 0xc0) + ((src2 >> 4) & 0x3f); + buf[5*x+4] = ((src3 >> 2) & 0xff); // src3:98765432 + } + } + else if (fileBitDepth == 12) //...2 values into 3 bytes + { + for (uint32_t x = 0; x < widthS_file; x++) + { + const uint32_t src0 = pSrcBuf[(2*x ) << sx]; + const uint32_t src1 = pSrcBuf[(2*x+1) << sx]; + + buf[3*x ] = ((src0 ) & 0xff); // src0:76543210 + buf[3*x+1] = ((src1 << 4) & 0xf0) + ((src0 >> 8) & 0x0f); + buf[3*x+2] = ((src1 >> 4) & 0xff); // src1:BA987654 + } + } + } + fd.write (reinterpret_cast<const char*>(buf), stride_file); + if (fd.eof() || fd.fail()) + { + return false; + } + } + if ((y444 & mask_y_src) == 0) + { + pSrcBuf += srcbuf_stride; + } + } + } + else // !writePYUV if (compID!=COMPONENT_Y && (fileFormat==CHROMA_400 || srcFormat==CHROMA_400)) { if (fileFormat!=CHROMA_400) { - const uint32_t value = 1u << (fileBitDepth - 1); + const uint32_t value = 1 << (fileBitDepth - 1); - for(uint32_t y=0; y< height_file; y++) + for (uint32_t y = 0; y < height_file; y++) { if (!is16bit) { @@ -461,7 +553,7 @@ static bool writePlane(ostream& fd, const Pel* src, bool is16bit, uint16_t val(value); for (uint32_t x = 0; x < width_file; x++) { - buf[2*x+0]= (val>>0) & 0xff; + buf[2*x ]= (val>>0) & 0xff; buf[2*x+1]= (val>>8) & 0xff; } } @@ -541,35 +633,41 @@ static bool writePlane(ostream& fd, const Pel* src, bool is16bit, return true; } -static bool writeField(ostream& fd, const Pel* top, const Pel* bottom, bool is16bit, +static bool writeField(ostream& fd, const Pel* top, const Pel* bottom, + const bool is16bit, const uint32_t stride_src, uint32_t width444, uint32_t height444, const ComponentID compID, const ChromaFormat srcFormat, const ChromaFormat fileFormat, - const uint32_t fileBitDepth, const bool isTff) + const uint32_t fileBitDepth, const bool isTff, + const uint32_t packedYUVOutputMode = 0) { const uint32_t csx_file =getComponentScaleX(compID, fileFormat); const uint32_t csy_file =getComponentScaleY(compID, fileFormat); const uint32_t csx_src =getComponentScaleX(compID, srcFormat); const uint32_t csy_src =getComponentScaleY(compID, srcFormat); - /*const uint32_t stride_src = stride444>>csx_src;*/ - - const uint32_t stride_file = (width444 * (is16bit ? 2 : 1)) >> csx_file; - const uint32_t width_file = width444 >>csx_file; - const uint32_t height_file = height444>>csy_file; + const uint32_t width_file = width444 >> csx_file; + const uint32_t height_file = height444 >> csy_file; + const bool writePYUV = (packedYUVOutputMode > 0) && (fileBitDepth == 10 || fileBitDepth == 12) && ((width_file & (1 + (fileBitDepth & 3))) == 0); + const uint32_t stride_file = writePYUV ? (width444 * fileBitDepth) >> (csx_file + 3) : (width444 * (is16bit ? 2 : 1)) >> csx_file; std::vector<uint8_t> bufVec(stride_file * 2); uint8_t *buf=&(bufVec[0]); + if (writePYUV) + { + // TODO + } + else // !writePYUV if (compID!=COMPONENT_Y && (fileFormat==CHROMA_400 || srcFormat==CHROMA_400)) { if (fileFormat!=CHROMA_400) { - const uint32_t value=1<<(fileBitDepth-1); + const uint32_t value = 1 << (fileBitDepth - 1); - for(uint32_t y=0; y< height_file; y++) + for (uint32_t y = 0; y < height_file; y++) { for (uint32_t field = 0; field < 2; field++) { @@ -588,7 +686,7 @@ static bool writeField(ostream& fd, const Pel* top, const Pel* bottom, bool is16 uint16_t val(value); for (uint32_t x = 0; x < width_file; x++) { - fieldBuffer[2*x+0]= (val>>0) & 0xff; + fieldBuffer[2*x ]= (val>>0) & 0xff; fieldBuffer[2*x+1]= (val>>8) & 0xff; } } @@ -753,7 +851,7 @@ bool VideoIOYuv::read ( PelUnitBuf& pic, PelUnitBuf& picOrg, const InputColourSp scalePlane( picOrg.get(compID), m_bitdepthShift[chType], minval, maxval); } } - + #if EXTENSION_360_VIDEO if (pic.chromaFormat != NUM_CHROMA_FORMAT) ColourSpaceConvert(picOrg, pic, ipcsc, true); @@ -777,7 +875,9 @@ bool VideoIOYuv::read ( PelUnitBuf& pic, PelUnitBuf& picOrg, const InputColourSp * @return true for success, false in case of error */ bool VideoIOYuv::write( const CPelUnitBuf& pic, - const InputColourSpaceConversion ipCSC, int confLeft, int confRight, int confTop, int confBottom, ChromaFormat format, const bool bClipToRec709 ) + const InputColourSpaceConversion ipCSC, + const bool bPackedYUVOutputMode, + int confLeft, int confRight, int confTop, int confBottom, ChromaFormat format, const bool bClipToRec709 ) { PelStorage interm; @@ -844,12 +944,13 @@ bool VideoIOYuv::write( const CPelUnitBuf& pic, { const ComponentID compID = ComponentID(comp); const ChannelType ch = toChannelType(compID); - const uint32_t csx = ::getComponentScaleX(compID, format); - const uint32_t csy = ::getComponentScaleY(compID, format); + const uint32_t csx = ::getComponentScaleX(compID, format); + const uint32_t csy = ::getComponentScaleY(compID, format); const CPelBuf area = picO.get(compID); const int planeOffset = (confLeft >> csx) + (confTop >> csy) * area.stride; if (!writePlane (m_cHandle, area.bufAt (0, 0) + planeOffset, is16bit, area.stride, - width444, height444, compID, picO.chromaFormat, format, m_fileBitdepth[ch])) + width444, height444, compID, picO.chromaFormat, format, m_fileBitdepth[ch], + bPackedYUVOutputMode ? 1 : 0)) { retval = false; } @@ -858,7 +959,10 @@ bool VideoIOYuv::write( const CPelUnitBuf& pic, return retval; } -bool VideoIOYuv::write( const CPelUnitBuf& picTop, const CPelUnitBuf& picBottom, const InputColourSpaceConversion ipCSC, int confLeft, int confRight, int confTop, int confBottom, ChromaFormat format, const bool isTff, const bool bClipToRec709 ) +bool VideoIOYuv::write( const CPelUnitBuf& picTop, const CPelUnitBuf& picBottom, + const InputColourSpaceConversion ipCSC, + const bool bPackedYUVOutputMode, + int confLeft, int confRight, int confTop, int confBottom, ChromaFormat format, const bool isTff, const bool bClipToRec709 ) { PelStorage intermTop; PelStorage intermBottom; @@ -934,8 +1038,8 @@ bool VideoIOYuv::write( const CPelUnitBuf& picTop, const CPelUnitBuf& picBottom, const CPelBuf areaTop = picTopO. get( compID ); const CPelBuf areaBottom = picBottomO.get( compID ); const CPelBuf areaTopY = picTopO.Y(); - const uint32_t width444 = areaTopY.width - (confLeft + confRight); - const uint32_t height444 = areaTopY.height - (confTop + confBottom); + const uint32_t width444 = areaTopY.width - (confLeft + confRight); + const uint32_t height444 = areaTopY.height - (confTop + confBottom); CHECK(areaTop.width == areaBottom.width , "Incompatible formats"); CHECK(areaTop.height == areaBottom.height, "Incompatible formats"); @@ -950,12 +1054,13 @@ bool VideoIOYuv::write( const CPelUnitBuf& picTop, const CPelUnitBuf& picBottom, const uint32_t csy = ::getComponentScaleY(compID, dstChrFormat ); const int planeOffset = (confLeft>>csx) + ( confTop>>csy) * areaTop.stride; //offset is for entire frame - round up for top field and down for bottom field - if (! writeField(m_cHandle, + if (!writeField (m_cHandle, (areaTop. bufAt(0,0) + planeOffset), (areaBottom.bufAt(0,0) + planeOffset), is16bit, areaTop.stride, - width444, height444, compID, dstChrFormat, format, m_fileBitdepth[ch], isTff)) + width444, height444, compID, dstChrFormat, format, m_fileBitdepth[ch], isTff, + bPackedYUVOutputMode ? 1 : 0)) { retval=false; } diff --git a/source/Lib/Utilities/VideoIOYuv.h b/source/Lib/Utilities/VideoIOYuv.h index 044d274c9ea2d02d26df263a4e2d8dca80c620c9..bf72925bde43753d9de8028bdd1b6586f4003e66 100644 --- a/source/Lib/Utilities/VideoIOYuv.h +++ b/source/Lib/Utilities/VideoIOYuv.h @@ -78,10 +78,16 @@ public: // If fileFormat=NUM_CHROMA_FORMAT, use the format defined by pPicYuv bool write( const CPelUnitBuf& pic, - const InputColourSpaceConversion ipCSC, int confLeft = 0, int confRight = 0, int confTop = 0, int confBottom = 0, ChromaFormat format = NUM_CHROMA_FORMAT, const bool bClipToRec709 = false ); ///< write one YUV frame with padding parameter + const InputColourSpaceConversion ipCSC, + const bool bPackedYUVOutputMode, + int confLeft = 0, int confRight = 0, int confTop = 0, int confBottom = 0, ChromaFormat format = NUM_CHROMA_FORMAT, const bool bClipToRec709 = false ); ///< write one YUV frame with padding parameter // If fileFormat=NUM_CHROMA_FORMAT, use the format defined by pPicYuvTop and pPicYuvBottom - bool write( const CPelUnitBuf& picTop, const CPelUnitBuf& picBot, const InputColourSpaceConversion ipCSC, int confLeft=0, int confRight=0, int confTop=0, int confBottom=0, ChromaFormat fileFormat=NUM_CHROMA_FORMAT, const bool isTff=false, const bool bClipToRec709=false); + bool write( const CPelUnitBuf& picTop, const CPelUnitBuf& picBot, + const InputColourSpaceConversion ipCSC, + const bool bPackedYUVOutputMode, + int confLeft = 0, int confRight = 0, int confTop = 0, int confBottom = 0, ChromaFormat format = NUM_CHROMA_FORMAT, const bool isTff = false, const bool bClipToRec709 = false ); + static void ColourSpaceConvert(const CPelUnitBuf &src, PelUnitBuf &dest, const InputColourSpaceConversion conversion, bool bIsForwards); bool isEof (); ///< check for end-of-file