software-manual.tex


ADAPT_SR_SCALE &
on &
division factor for adaptive search range
\\

EARLY_SKIP_THRES &
1.5 &
early skip if RD < EARLY_SKIP_THRES*avg[BestSkipRD]
\\

MAX_NUM_REF_PICS &
16 &
\\

MAX_CHROMA_FORMAT_IDC &
3 &
\\
\end{MacroTable}

\subsubsection*{TypeDef.h}
Numerous constants that guard individual adoptions are defined within
\url{source/Lib/TLibCommon/TypeDef.h}.


%%
%%
%%
\clearpage
\section{Using the decoder}
\subsection{General}
\begin{minted}{bash}
DecoderApp -b str.bin -o dec.yuv [options]
\end{minted}

\begin{OptionTableNoShorthand}{Decoder options}{tab:decoder-options}
\Option{(--help)} &
%\ShortOption{\None} &
\Default{\None} &
Prints usage information.
\\

\Option{BitStreamFile (-b)} &
%\ShortOption{-b} &
\Default{\NotSet} &
Defines the input bit stream file name.
\\

\Option{ReconFile (-o)} &
%\ShortOption{-o} &
\Default{\NotSet} &
Defines the reconstructed video file name. If empty, no file is generated. If the bitstream contains multiple layer and no single target layer is specified (i.e. TargetOutputLayerSet=-1), a reconstructed file is written for each layer and the layer index is added as suffix to ReconFile. If one or more dots exist in the file name, the layer id is added before the last dot, e.g. 'decoded.yuv' becomes 'decoded0.yuv' for layer id 0, 'decoded' becomes 'decoded0'. If the file extension is Y4M, picture width, picture height, bitdepth, chroma format and frame rate of the current decoding will be output to the Y4M file. As frame rate information is not mandatory in VVC bitstreams, best guess will be used. If no frame rate information is avaiable in a bitstream, a default frame rate (50 fps) will be output to the Y4M file.
\\

\Option{OplFile (-opl)} &
%\ShortOption{-o} &
\Default{\NotSet} &
Defines the output log file name (*.opl file). If empty, no file is generated. Each output picture log file contains one row for each output picture in the bitstream, in output order. Each row contains the following information, as CSV:	PicOrderCntVal, pic\_width\_max\_in\_luma\_samples, pic\_height\_max\_in\_luma\_samples, MD5 checksum for the Y component, MD5 checksum for the U component, MD5 checksum for the V component. The format of output log file is specified in JVET-P2008. 
\\

\Option{SkipFrames (-s)} &
%\ShortOption{-s} &
\Default{0} &
Defines the number of pictures in decoding order to skip.
\\

\Option{MaxTemporalLayer (-t)} &
%\ShortOption{-t} &
\Default{\NotSet} &
Defines the maximum temporal layer to be decoded. If -1, then all layers are decoded. When not provided the value may be inferred from the OPI NAL unit or the VPS NAL unit of the bitstream.
\\

\Option{TarDecLayerIdSetFile (-l)} &
%\ShortOption{-t} &
\Default{\NotSet} &
Specifies the targetDecLayerIdSet file name. The file would contain white-space separated LayerId values of the layers that are to be decoded.
Omitting the parameter, or using a value of -1 in the file decodes all layers.
\\

\Option{UpscaleFilterForDisplay} &
%\ShortOption{\None} &
\Default{1} &
Filters used for upscaling reconstruction to full resolution (2: ECM 12-tap luma and 6-tap chroma MC filters, 1: Alternative 12-tap luma and 6-tap chroma filters, 0: VVC 8-tap luma and 4-tap chroma MC filters).
\\

\Option{OutputBitDepth (-d)} &
%\ShortOption{-d} &
\Default{0 \\ (Native)} &
Specifies the luma bit-depth of the reconstructed YUV file (the value 0 indicates
that the native bit-depth is used)
\\

\Option{OutputBitDepthC} &
%\ShortOption{\None} &
\Default{0 \\ (Native)} &
Defines the chroma bit-depth of the reconstructed YUV file (the value 0 indicates
that the native bit-depth is used)
\\

\Option{TargetOutputLayerSet (-p)} &
%\ShortOption{-p} &
\Default{\NotSet} &
Specifies the target bitstream Layer to be decoded. (the value -1 indicates
that decoding the whole bitstream ). When not provided the value may be inferred from the OPI NAL unit or the VPS NAL unit of the bitstream.
\\

\Option{SEIDecodedPictureHash} &
%\ShortOption{\None} &
\Default{1} &
Enable or disable verification of any Picture hash SEI messages. When
this parameter is set to 0, the feature is disabled and all messages are
ignored. When set to 1 (default), the feature is enabled and the decoder
has the following behaviour:
\begin{itemize}
\item
  If Picture hash SEI messages are included in the bit stream, the same type
  of hash is calculated for each decoded picture and written to the
  log together with an indication whether the calculted value matches
  the value in the SEI message. 
  Decoding will continue even if there is a mismatch.

\item
  After decoding is complete, if any MD5sum comparison failed, a warning
  is printed and the decoder exits with the status EXIT_FAILURE

\item
  The per-picture MD5 log message has the following formats:
  [MD5:d41d8cd98f00b204e9800998ecf8427e,(OK)],
  [MD5:d41d8cd98f00b204e9800998ecf8427e,(unk)],
  [MD5:d41d8cd98f00b204e9800998ecf8427e,(***ERROR***)] [rxMD5:b9e1...]
  where, "(unk)" implies that no MD5 was signalled for this picture,
  "(OK)" implies that the decoder agrees with the signalled MD5,
  "(***ERROR***)" implies that the decoder disagrees with the signalled
  MD5. "[rxMD5:...]" is the signalled MD5 if different.
\end{itemize}
\\

\Option{OutputDecodedSEIMessagesFilename} &
%\ShortOption{\None} &
\Default{\NotSet} &
When a non-empty file name is specified, information regarding any decoded SEI messages will be output to the indicated file. If the file name is '-', then stdout is used instead.
\\

\Option{SEICTIFilename} &
%\ShortOption{\None} &
\Default{\NotSet} &
Specifies that the colour transform information (CTI) SEI message should be applied to the output video, with the output written to this file.
If no value is specified, the SEI message is ignored and no mapping is applied.
\\

\Option{SEIAnnotatedRegionsInfoFilename} &
%\ShortOption{\None} &
\Default{\NotSet} &
When a non-empty file name is specified, object information using the decoded SEI messages will be output to the indicated file.
If no value is specified, the SEI message will not be output.
\\

\Option{OutputColourSpaceConvert} &
\Default{\NotSet} &
Specifies the colour space conversion to apply to 444 video. Permitted values are:
\par
\begin{tabular}{lp{0.45\textwidth}}
  UNCHANGED   & No colour space conversion is applied \\
  YCrCbToYCbCr & Swap the second and third components \\
  GBRtoRGB     & Reorder the three components \\
\end{tabular}
If no value is specified, no colour space conversion is applied. The list may eventually also include RGB to YCbCr or YCgCo conversions.\\
\\

\Option{PYUV} &
\Default{false} &
When true, output 10-bit and 12-bit YUV data as 5-byte and 3-byte (respectively) packed YUV data. See doc/pyuv_format.pdf for details. Ignored for interlaced output.
\\

\Option{SEINoDisplay} &
\Default{false} &
When true, do not output frames for which there is an SEI NoDisplay message.
\\

\Option{ClipOutputVideoToRec709Range} &
%\ShortOption{\None} &
\Default{0} &
If 1 then clip output video to the Rec. 709 Range on saving when OutputBitDepth is less than InternalBitDepth.
\\

\end{OptionTableNoShorthand}


\subsection{Using the decoder analyser}
If the decoder is compiled with the macro RExt__DECODER_DEBUG_BIT_STATISTICS defined as 1 (either externally, or by editing TypeDef.h), the decoder will gather fractional bit counts associated with the different syntax elements, producing a table of the number of bits per syntax element, and where appropriate, according to block size and colour component/channel.
The Linux makefile will compile both the analyser and standard version when the `all' or `everything' target is used (where the latter will also build  high-bit-depth executables).


\section{Block statistics extension}
\label{sec:block-stat-extens}

The block statistics extension enables straightforward visualization and statistical analysis of coding tool
usage in encoded bitstreams. The extension enables the reference
software encoder and decoder to write out statistics files in a configurable
way, which in turn can be loaded into a suitable YUV player for overlay of the
reconstructed YUV sequence, or can be used for statistical analysis at a
selectable scope (e.g. block/picture/sequence level). An example implementation
for such visualization is available with the open-source YUView player
(\url{https://github.com/IENT/YUView}). 


\subsection{Usage}
\label{sec:usage}

The software has to be compiled with the macros ENABLE_TRACING and
K0149_BLOCK_STATISTICS  defined as 1. The statistics can be written by either
encoder or decoder.

The extension adds additional trace channels to the ``dtrace'' functionality of
the software. The following trace channels were added:
\begin{description}
\item[D_BLOCK_STATISTICS_ALL] All syntax elements are written, no matter whether
  they are actually encoded or derived.
\item[D_BLOCK_STATISTICS_CODED] Tries to write only syntax elements, which have
  also been encoded.
\end{description}

The following additional encoder options are available (part of ``dtrace''). See
the file dtrace_next.h for more details.

\begin{OptionTableNoShorthand}{Decoder options}{tab:decoder-block-statistics}
\Option{TraceFile} &
%\ShortOption{\None} &
\Default{\None} &
File name of the produced trace file.
\\

\Option{TraceRule} &
%\ShortOption{-b} &
\Default{\NotSet} &
Specifies which traces should be saved, and for which POCs.
\\

\end{OptionTableNoShorthand}

Concrete examples of calls for  generating a block statistics file are:
\begin{minted}{bash}
bin/DecoderAppStatic -b str/BasketballDrive_1920x1080_QP37.vvc \
    --TraceFile="stats/BasketballDrive_1920x1080_QP37_coded.vtmbmsstats" \
    --TraceRule="D_BLOCK_STATISTICS_CODED:poc>=0"

bin/DecoderAppStatic -b str/BasketballDrive_1920x1080_QP37.vvc \
    --TraceFile="stats/BasketballDrive_1920x1080_QP37_all.vtmbmsstats" \
    --TraceRule="D_BLOCK_STATISTICS_ALL:poc>=0"   
\end{minted}


\subsection{Block statistics file formats}
\label{sec:block-stat-file}
The trace file will contain a header listing information of all available block
statistics. For each statistic it lists a type and a scale for vectors or range
for integers if applicable: 
\begin{verbatim}
# VTMBMS Block Statistics
# Sequence size: [832x 480]
# Block Statistic Type: PredMode; Flag; 
# Block Statistic Type: MergeFlag; Flag; 
# Block Statistic Type: MVL0; Vector; Scale: 4
# Block Statistic Type: MVL1; Vector; Scale: 4
# Block Statistic Type: IPCM; Flag; 
# Block Statistic Type: Y_IntraMode; Integer; [0, 73]
# Block Statistic Type: Cb_IntraMode; Integer; [0, 73]
\end{verbatim}

Two formats are available for the statistics for each block, a human readable
format and a CSV based format. The header remains the same for both cases. 

For both formats each row contains the information for one block statistic. The
order of the data is: picture order count (POC), location of top left corner of
the block, size of the block, name of the statistic, and value of the
statistic. 
The macro BLOCK_STATS_AS_CSV is available in order to choose the required format.
The human readable format can also be easily processed with other software, for
example YUView, using regular expressions. The CSV based formats provides the
universal interface required by spreadsheet applications.

The human readable format is based on the format used for the other dtrace
statistics. Some examples for this format are:
\begin{verbatim}
BlockStat: POC 16 @( 112,   0) [ 8x 8] SkipFlag=1
BlockStat: POC 16 @( 112,   0) [ 8x 8] InterDir=1
BlockStat: POC 16 @( 112,   0) [ 8x 8] MergeFlag=1
BlockStat: POC 16 @( 112,   0) [ 8x 8] MergeIdx=0
BlockStat: POC 16 @( 112,   0) [ 8x 8] MergeType=0
BlockStat: POC 16 @( 112,   0) [ 8x 8] MVPIdxL0=255
BlockStat: POC 16 @( 112,   0) [ 8x 8] MVPNumL0=255
BlockStat: POC 16 @( 112,   0) [ 8x 8] RefIdxL0=0
BlockStat: POC 16 @( 112,   0) [ 8x 8] MVDL0={   0,   0}
BlockStat: POC 16 @( 112,   0) [ 8x 8] MVL0={ -70,  18}
BlockStat: POC 16 @( 112,   8) [ 8x 8] PredMode=0
BlockStat: POC 16 @( 112,   8) [ 8x 8] PartSize=0
\end{verbatim}

Some examples of the CSV based format are:
\begin{verbatim}
BlockStat;16; 112;   0; 8; 8;SkipFlag;1
BlockStat;16; 112;   0; 8; 8;InterDir;1
BlockStat;16; 112;   0; 8; 8;MergeFlag;1
BlockStat;16; 112;   0; 8; 8;MergeIdx;0
BlockStat;16; 112;   0; 8; 8;MergeType;0
BlockStat;16; 112;   0; 8; 8;MVPIdxL0;255
BlockStat;16; 112;   0; 8; 8;MVPNumL0;255
BlockStat;16; 112;   0; 8; 8;RefIdxL0;0
BlockStat;16; 112;   0; 8; 8;MVDL0;   0;   0
BlockStat;16; 112;   0; 8; 8;MVL0; -70;  18
BlockStat;16; 112;   8; 8; 8;PredMode;0
BlockStat;16; 112;   8; 8; 8;PartSize;0
\end{verbatim}

\subsection{Visualization}
\label{sec:visualization}

The block statistics can be viewed with YUView, which is freely available under
GPLv3: \url{https://github.com/IENT/YUView}. The latest releases and the master
branch have the functionality required for  viewing the block statistics. YUView
assumes that the file extension of block statistics file is
“.vtmbmsstats”. However, if a file is not recognized you can choose from a list
of supported file formats.


Statistics can be overlaid with YUV sequences. Some example snapshots are:

\begin{figure}[htpb]
  \centering
  \includegraphics[width=0.8\linewidth]{figures/YUView}
  \caption{YUView}
  \label{fig:yuview}
\end{figure}

\begin{figure}[htpb]
  \centering
  \includegraphics[width=0.5\linewidth]{figures/raceHorsesShot2MotionVectors}
  \caption{Motion vectors}
  \label{fig:motion-vectors}
\end{figure}


\begin{figure}[htpb]
  \centering
  \includegraphics[width=0.5\linewidth]{figures/raceHorsesShot3SkipFlag}
  \caption{Skip flag}
  \label{fig:skip-flag}
\end{figure}

\subsection{Adding statistics}
\label{sec:adding-statistics}

In order to add further block statistics, do the following:


\begin{description}
\item[source/Lib/CommonLib/dtrace_blockstatistics.h]
  Add your statistic to the BlockStatistic enum:	
\begin{minted}{c++}
enum class BlockStatistic {
  // general
  PredMode,
  PartSize,
  Depth,
\end{minted}
  
Further, add your statistic to the map blockstatistic2description:
\begin{minted}{c++}
static const std::map<BlockStatistic, 
  std::tuple<std::string, BlockStatisticType, std::string>> 
  blockstatistic2description =
{
  { BlockStatistic::PredMode, 
    std::tuple<std::string, BlockStatisticType, std::string>
    {"PredMode", BlockStatisticType::Flag, ""}},
  { BlockStatistic::MergeFlag,
    std::tuple<std::string, BlockStatisticType, std::string>
    {"MergeFlag", BlockStatisticType::Flag, ""}},
  { BlockStatistic::MVL0,
    std::tuple<std::string, BlockStatisticType, std::string>
    {"MVL0", BlockStatisticType::Vector, "Scale: 4"}},
  YOURS
\end{minted}


\item[source/Lib/CommonLib/dtrace_blockstatistics.cpp] All code for
  writing syntax elements is kept in this file in
  getAndStoreBlockStatistics. This function is called once for each
  CTU, after it has been en/decoded. The following macros have been
  defined to facilitate writing of block statistics:
\begin{minted}{c++}
DTRACE_BLOCK_SCALAR(ctx,channel,cs_cu_pu,stat_type,val)   
DTRACE_BLOCK_SCALAR_CHROMA(ctx,channel,cs_cu_pu,stat_type,val)
DTRACE_BLOCK_VECTOR(ctx,channel,cu_pu,stat_type,v_x,v_y)    
DTRACE_BLOCK_AFFINETF(ctx,channel,pu,stat_type,v_x0,v_y0,v_x1,v_y1,v_x2,v_y2) 
\end{minted}

An example:
\begin{minted}{c++}
DTRACE_BLOCK_SCALAR(g_trace_ctx, D_BLOCK_STATISTICS_ALL, 
  cu, GetBlockStatisticName(BlockStatistic::PredMode), cu.predMode);
\end{minted}


\item[Block statistics for debugging] The statistics can also be used
  to write out other data, not just syntax elements. Add your
  statistics to dtrace_blockstatistics.h. Where it should be used the
  following headers have to be included:
\begin{minted}{c++}
#include "dtrace_next.h"
#include "dtrace_blockstatistics.h"
\end{minted}
\end{description}


\section{Coding tool statistics extension for green metadata}
\label{sec:green-meta-sei}

The encoder and the decoder include an extension that generates coding tool statistic. In the encoder, the extension calculates green metadata for encoding green SEI messages, in particular complexity metrics for decoder power reduction. The decoder extension can be used for cross-checking the correct functionality of the encoding extension.

The output of the analyzer can be enabled with the option 'GMFA' (Green Metadata Feature Analyzer). The output file name is specified with the flag 'GMFAFile'.
Furthermore, it is possible to generate a framewise analysis with the option 'GMFAFramewise'. The output file is generated in a Matlab-readable way. Here is an example for both the encoder and the decoder:

\begin{minted}{bash}
bin/EncoderAppStatic -b bitstream.vvc  --GMFA 1 --GMFAFramewise=1 --GMFAFile="bitstream.m" [encoder options]

bin/DecoderAppStatic -b bitstream.vvc  --GMFA 1 --GMFAFramewise=1 --GMFAFile="bitstream.m" [decoder options]
\end{minted}

The output file contains arrays with statistics on the use of coding tools on block-size level. As an example, the number of intra-coded blocks is returned as:

\begin{minted}{bash}
n.intraBlocks = [...
0  0  0  0  0  0  0  0 ;...
0  0  0  16412  2142  54  0  0 ;...
0  0  41654  41906  9780  665  27  0 ;...
0  0  23494  22855  8641  906  26  0 ;...
0  0  4670  4797  4030  1215  60  0 ;...
0  0  433  507  881  1104  84  0 ;...
0  0  38  48  43  122  131  0 ;...
0  0  0  0  0  0  0  0 ];
\end{minted}

The horizontal position indicates the logarithm to the basis 2 block width (1, 2, 4, .., 128) and the vertical position the block height, accordingly. In this example, the bit stream contains $16{,}412$ intra-coded blocks of size $8\times 2$.

More information can be found in JVET-P0085 and \url{10.1109/ICIP40778.2020.9190840}.


\section{Using the stream merge tool}
\label{sec:stream-merge-tool}

The StreamMergeApp tool takes multiple single-layer (singe nuh_layer_id) bistreams 
as inputs and merge them into a multi-layer bistream by interleaving the Picture Units
from the input single layer bistreams. During the merge, the tool assigns a new unique
nuh_layer_id for each input bitstream as well as unique parameter sets identifiers for each layer.
Then the decoder can specify which layer bitstream to be decoded through the command line option "-p nuh_layer_id".

Some current limitations of the tool:
\begin{itemize}
\item All input bitstreams are single layer and thus all layers in the output bitstream are independent layers.
\item Each layer in the output bitstream is abitrarily put in an individual OLS and is also an output layer.
\item All parameter sets from the input bitstreams are treated as different parameter sets. There is thus no parameters sets sharing in the output bitstream.
\item The slice header in the input bitstreams shall contain no picture header structure and no alf information.
\end{itemize}


\subsection{Usage}
\label{sec:stream-merge-usage}

\begin{minted}{bash}
StreamMergeApp 	<bitstream1> <bitstream2> [<bitstream3> ...] <outfile>
\end{minted}

The command line options bistreamX specify the file names of the input single-layer 
bistreams. At least two input bitstreams need to be specified. The merged multi-layer 
bistream will be stored into the outfile.


\section{Using the subpicture merge tool}
\label{sec:subpicture-merge-tool}

The SubpicMergeApp takes multiple bitstreams as inputs and merges them into one output bitstream where each input bitstream forms a single subpicture. Subpicture layout and input bitstreams are defined in a subpicture list file. Sequence parameter set and picture parameter set are modified accordingly based on the layout.

The merge tool has an alternative mode for merging YUV files. This mode can be used for verifying YUV output after decoding merged bitstream.

If VTM encoder is used for encoding input bitstreams, it is recommnended that ALF, CCALF, joint chroma coding, LMCS and AMaxBT are disabled. This prevents those tools having parameters with different values in different subpictures which would result in merged bitstream being non-conformant.

\subsection{Usage}
\label{sec:subpicture-merge-usage}

\begin{minted}{bash}
SubpicMergeApp [-l <subpiclistfile>] [-o <outfile>] [-m 0|1] [-yuv 0|1] [-d <bitdepth>] [-f 400|420|422|444]
\end{minted}

\begin{table}[ht]
\footnotesize
\centering
\begin{tabular}{lp{0.5\textwidth}}
\hline
 \thead{Option} &
 \thead{Description} \\
\hline
\texttt{--help} & Prints parameter usage. \\
\texttt{-l} & File containing list of input pictures to be merged \\
\texttt{-o} & Output file name \\
\texttt{-m} & Enable mixed NALU type bitstreams merging \\
\texttt{-yuv} & Perform YUV merging (instead of bitstream merging) \\
\texttt{-d} & Bitdepth for YUV merging \\
\texttt{-f} & Chroma format for YUV merging, 420 (default), 400, 422 or 444 \\
\hline
\end{tabular}
\end{table}

Format of the subpicture list file given with '-l' command is as follows:

\begin{minted}{bash}
subpic1_width  subpic1_height  subpic1_x  subpic1_y  subpic1_bitstream_file
subpic2_width  subpic2_height  subpic2_x  subpic2_y  subpic2_bitstream_file
...
subpicN_width  subpicN_height  subpicN_x  subpicN_y  subpicN_bitstream_file
\end{minted}

Coordinates x and y define the location of top-left corner of the subpicture in the merged picture. Parameters width, height, x and y are given in units of luma samples.

YUV merging uses the same file format, only difference being that YUV file name is supplied instead of bitstream file name.

\end{document}