Skip to content
Snippets Groups Projects
software-manual.tex 259 KiB
Newer Older
\documentclass[a4paper,11pt]{jvetdoc}
Yan Zhang's avatar
Yan Zhang committed

\usepackage{geometry}[2010/02/12]

\usepackage{hyperref}
\hypersetup{colorlinks=true,
            linkcolor=black,      % color of internal links (change box color with linkbordercolor)
            citecolor=black,      % color of links to bibliography
            filecolor=black,      % color of file links
            urlcolor=blue}
\usepackage{color,soul}

\usepackage[position=bottom]{subfig}
\captionsetup[subfloat]{position=top}
\usepackage{multirow}
\usepackage{dcolumn}
\newcolumntype{.}{D{.}{.}{-1}}
\usepackage{colortbl}
\usepackage{makecell}
\usepackage{longtable}
\usepackage{array}
\usepackage{algorithm2e}
\usepackage{amsmath}

\urlstyle{same}

% code highlighting
\usepackage{minted,xcolor}
\definecolor{bggray}{gray}{0.95}
\setminted{
bgcolor=bggray,
xleftmargin=3ex,
breaklines=true,
fontsize=\footnotesize}


\usepackage[strings]{underscore}
\usepackage{csquotes}
\MakeOuterQuote{"}
\EnableQuotes

\newcommand\None{}
\newcommand\NotSet{}
\makeatletter
\newcommand{\Option}[1]{\ifx\optOption\@empty\gdef\optOption{#1}\else\g@addto@macro\optOption{ \\ #1}\fi}
\newcommand{\ShortOption}[1]{\ifx\optShortOption\@empty\gdef\optShortOption{#1}\else\g@addto@macro\optShortOption{ \\ #1}\fi}
\newcommand{\Default}[1]{\ifx\optDefault\@empty\gdef\optDefault{#1}\else\g@addto@macro\optDefault{ \\ #1}\fi}
\newcommand{\clearOptions}{\gdef\optOption{}\gdef\optShortOption{}\gdef\optDefault{}}
\makeatother
\newenvironment{OptionTable}[1]{%
	\footnotesize
	\def\arraystretch{1.8}
	\clearOptions
	\begin{longtable}{l<{\makecell[tl]{\optOption}}%
	                  >{\texttt\bgroup}l<{\makecell[tl]{\optShortOption}\egroup}%
	                  c<{\makecell[tc]{\optDefault}}%
	                  >{\def\arraystretch{1.0}}p{0.5\textwidth}<{\clearOptions}}
	\caption{#1} \\
	\hspace*{12em}&&\hspace*{8em}&\kill
	\hline
	 \thead{Option} &
	 \egroup\thead{Shorthand}\bgroup &
	 \thead{Default} &
	 \thead{Description} \\
	\hline
	\endfirsthead
	\caption[]{#1 (Continued)} \\
	\hspace*{12em}&&\hspace*{8em}&\kill
	\hline
	 \thead{Option} &
	 \egroup\thead{Shorthand}\bgroup &
	 \thead{Default} &
	 \thead{Description} \\
	\hline
	\endhead
	 \multicolumn{4}{r}{Continued...}\\
	 \hline
	\endfoot
	 \hline
	\endlastfoot
}{%
	\hline
	\end{longtable}
}

\newenvironment{OptionTableNoShorthand}[2]{%
	\scriptsize
	\def\arraystretch{1.8}
	\clearOptions
	\begin{longtable}{l<{\makecell[tl]{\optOption}}%
	                  c<{\makecell[tc]{\optDefault}}%
	                  >{\def\arraystretch{1.0}}p{0.5\textwidth}<{\clearOptions}}
	\caption{#1} \label{#2} \\
	\hspace*{12em}&\hspace*{8em}&\kill
	\hline
	 \thead{Option} &
	 \thead{Default} &
	 \thead{Description} \\
	\hline
	\endfirsthead
	\caption[]{#1 (Continued)} \\
	\hspace*{12em}&\hspace*{8em}&\kill
	\hline
	 \thead{Option} &
	 \thead{Default} &
	 \thead{Description} \\
	\hline
	\endhead
	 \multicolumn{3}{r}{Continued...}\\
	 \hline
	\endfoot
	 \hline
	\endlastfoot
}{%
	\hline
	\end{longtable}
}

\newenvironment{SEIListTable}[1]{%
	\scriptsize
	\def\arraystretch{1.8}
	\clearOptions
	\begin{longtable}{c<{\makecell[tl]{\optOption}}%
	                  l<{\makecell[tc]{\optDefault}}%
	                  >{\def\arraystretch{1.0}}p{0.3\textwidth}<{\clearOptions}}
	\caption{#1} \\
	\hspace*{12em}&\hspace*{8em}&\kill
	\hline
	 \thead{SEI Number} &
	 \thead{SEI Name} &
	 \thead{Table number of encoder controls, if available} \\
	\hline
	\endfirsthead
	\caption[]{#1 (Continued)} \\
	\hspace*{12em}&\hspace*{8em}&\kill
	\hline
	 \thead{SEI Number} &
	 \thead{SEI Name} &
	 \thead{Table number of encoder controls, if available} \\
	\hline
	\endhead
	 \multicolumn{3}{r}{Continued...}\\
	 \hline
	\endfoot
	 \hline
	\endlastfoot
}{%
	\hline
	\end{longtable}
}

\newenvironment{MacroTable}[1]{%
	\scriptsize
	\def\arraystretch{1.3}
	\clearOptions
	\begin{longtable}{lcp{0.5\textwidth}}
	 \caption{#1} \\
	%\hspace*{12em}&&\hspace*{8em}&\kill
	 \hline
	  \thead{Option} &
	  \thead{Default} &
	  \thead{Description} \\
	 \hline
	\endfirsthead
	 \caption[]{#1 (Continued)} \\
	 \hline
	  \thead{Option} &
	  \thead{Default} &
	  \thead{Description} \\
	 \hline
	\endhead
	 \multicolumn{3}{r}{Continued...}\\
	 \hline
	\endfoot
	 \hline
	\endlastfoot
}{%
	\end{longtable}
}

\title{VTM Software Manual}
\author{%
	Frank Bossen
	\email{frank@bossentech.com}
	\and
	David Flynn
	\and
	Xiang Li
	\email{xlxiangli@google.com}
Yan Zhang's avatar
Yan Zhang committed
	\and
	Karl Sharman
	\email{karl.sharman@eu.sony.com}
	\and
	Karsten S\"uhring
	\email{karsten.suehring@hhi.fraunhofer.de}
}

\jvetmeeting{}
\jvetdocnum{Software Manual}
\jvetdocstatus{Software AHG working document}
\jvetdocpurpose{Information}
\jvetdocsource{AHG chairs}

\begin{document}
\maketitle
\begin{abstract}
This document is a user manual describing usage of the VTM reference software
for the VVC project. It applies to version 23.7 of the software.
Yan Zhang's avatar
Yan Zhang committed
\end{abstract}

\tableofcontents
\listoftables


\section{General Information}
Reference software is being made available to provide a reference
implementation of the HEVC standard being developed by the Joint 
Video Experts Team (JVET) regrouping experts from
ITU-T SG 16 and ISO/IEC SC29 WG5. One of the main goals of the
Yan Zhang's avatar
Yan Zhang committed
reference software is to provide a basis upon which to conduct
experiments in order to determine which coding tools provide desired
coding performance. It is not meant to be a particularly efficient
implementation of anything, and one may notice its apparent
unsuitability for a particular use. It should not be construed to be a
reflection of how complex a production-quality implementation of a
future VVC standard would be.

This document aims to provide guidance on the usage of the reference
software. It is widely suspected to be incomplete and suggestions for
improvements are welcome. Such suggestions and general inquiries may be
sent to the general JVET email reflector on
\url{https://lists.rwth-aachen.de/postorius/lists/jvet.lists.rwth-aachen.de/} 
(registration required).

\subsection*{Bug reporting}
Bugs should be reported on the issue tracker set up at:

\url{https://jvet.hhi.fraunhofer.de/trac/vvc/}

\section{Installation and compilation}
The software may be retrieved from the GitLab server located at:

\url{https://vcgit.hhi.fraunhofer.de/jvet/VVCSoftware_VTM}

Table~\ref{tab:project-files} lists the compiler environments and versions 
for which building the software is tested.

Note that the software makes use of C++14 language features, which may not
Yan Zhang's avatar
Yan Zhang committed
be available in older compilers.

\begin{table}[ht]
\caption{Supported compilers}
\label{tab:project-files}
\centering
\begin{tabular}{ll}
\hline
 \thead{Compiler environment} &
 \thead{Versions} \\
\hline
MS Visual Studio  & 2017 and 2019 \\
GCC               & 7.3, 8.3 and 9.3\\
Yan Zhang's avatar
Yan Zhang committed
Xcode/clang       & latest \\
\hline
\end{tabular}
\end{table}

By default the software is built as 64-bit binaries to be used on a 64-bit OS. 
This allows the software to use more than 2GB of RAM.

The software uses CMake to create platform-specific build files. 

\subsection {Dependencies}

For generating and verifying cryptographic signatures using digitally signed content SEI messages, OpenSSL is required in version 1.1.1 or greater.
Testing is performed on OpenSSL 3.
If OpenSSL is not found or the version is too low, only parsing of digitally signed content SEI messages will be available.

Detection of OpenSSL can be disabled using the cmake option "-DENABLE_SEARCH_OPENSSL=off"

Yan Zhang's avatar
Yan Zhang committed
\subsection {Build instructions for plain CMake (suggested)}

\textbf{Note:} A working CMake installation is required for building the software.

CMake generates configuration files for the compiler environment/development
environment on each platform. The following is a list of examples for Windows
(MS Visual Studio), macOS (Xcode) and Linux (make).

Open a command prompt on your system and change into the root directory
of this project.

Create a build directory in the root directory:
\begin{minted}{bash}
mkdir build 
\end{minted}
Use one of the following CMake commands, based on your platform. Feel free to change the 
commands to satisfy your needs.

\textbf{Windows Visual Studio 2015 64 Bit:}
\begin{minted}{bash}
cd build
cmake .. -G "Visual Studio 14 2015 Win64"
\end{minted}
Then open the generated solution file in MS Visual Studio.

\textbf{macOS Xcode:}
\begin{minted}{bash}
cd build
cmake .. -G "Xcode"
\end{minted}
Then open the generated work space in Xcode.

\textbf{Linux}

For generating Linux Release Makefile:
\begin{minted}{bash}
cd build
cmake .. -DCMAKE_BUILD_TYPE=Release
\end{minted}
For generating Linux Debug Makefile:
\begin{minted}{bash}
cd build
cmake .. -DCMAKE_BUILD_TYPE=Debug
\end{minted}
Then type
\begin{minted}{bash}
make -j
\end{minted}
to build the software.

For more details, refer to the CMake documentation: \url{https://cmake.org/cmake/help/latest/}

\subsection {Build instructions for make}

\textbf{Note:}
The build instructions in this section require the make tool and Python
to be installed, which are part of usual Linux and macOS environments. 
See section \ref{windowsinstall} for installation instruction for Python 
and GnuWin32 on Windows.

Open a command prompt on your system and change into the root directory
of this project.

To use the default system compiler simply call:
\begin{minted}{bash}
make all
\end{minted}
For MSYS2 and MinGW:
Open an MSYS MinGW 64-Bit terminal and change into the root directory
of this project.

Call:
\begin{minted}{bash}
make all toolset=gcc
\end{minted}

\subsection{Tool Installation on Windows}
\label{windowsinstall}

Download CMake: \url{http://www.cmake.org/} and install it.

Python and GnuWin32 are not mandatory, but they simplify the build process for the user.

\begin{table}[ht]
\footnotesize
\centering
\begin{tabular}{ll}
\hline
Python     &    \url{https://www.python.org/downloads/release/python-371/} \\
GnuWin32   &    \url{https://sourceforge.net/projects/getgnuwin32/files/getgnuwin32/0.6.30/GetGnuWin32-0.6.3.exe/download} \\
\hline
\end{tabular}
\end{table}

To use MinGW, install MSYS2:
\url{http://repo.msys2.org/distrib/msys2-x86_64-latest.exe}

Installation instructions:
\url{https://www.msys2.org/}

Install the needed toolchains:
\begin{minted}{bash}
pacman -S --needed base-devel mingw-w64-i686-toolchain mingw-w64-x86_64-toolchain git subversion mingw-w64-i686-cmake mingw-w64-x86_64-cmake
\end{minted}

%%%%
%%%%
%%%%
\section{Using the encoder}

\begin{minted}{bash}
EncoderApp [--help] [-li -c config.cfg] [-li --parameter=value]
Yan Zhang's avatar
Yan Zhang committed
\end{minted}

\begin{table}[ht]
\footnotesize
\centering
\begin{tabular}{lp{0.5\textwidth}}
\hline
 \thead{Option} &
 \thead{Description} \\
\hline
\texttt{--help} & Prints parameter usage. \\
\texttt{-li} & Applies to its next config file or command line parameter only to define  i-th layer encoding option. If empty, the configuration file applies to all layers\\
Yan Zhang's avatar
Yan Zhang committed
\texttt{-c} & Defines configuration file to use.  Multiple configuration files
     may be used with repeated --c options. \\
\texttt{--}\emph{parameter}\texttt{=}\emph{value}
    & Assigns value to a given parameter as further described below.
      Some parameters are also supported by shorthand
      "--\em{opt}~\emph{value}". These are shown in brackets after the parameter
      name in the tables of this document\\
\hline
\end{tabular}
\end{table}

Sample configuration files are provided in the cfg/ folder.
Parameters are defined by the last value encountered on the command line.
Therefore if a setting is set via a configuration file, and then a subsequent
command line parameter changes that same setting, the command line parameter
value will be used.

\subsection{GOP structure table}
\label{sec:gop-structure}
Defines the cyclic GOP structure that will be used repeatedly
throughout the sequence. The table should contain GOPSize lines,
named Frame1, Frame2, etc. The frames are listed in decoding
order, so Frame1 is the first frame in decoding order, Frame2 is
the second and so on. Among other things, the table specifies all
reference pictures kept by the decoder for each frame. This
includes pictures that are used for reference for the current
picture as well as pictures that will be used for reference in
the future. The encoder will not automatically calculate which
pictures have to be kept for future references, they must
be specified. Note that some specified reference frames for
pictures encoded in the very first GOP after an IDR frame might
not be available. This is handled automatically by the encoder,
so the reference pictures can be given in the GOP structure table
as if there were infinitely many identical GOPs before the
current one. Each line in the table contains the parameters used
for the corresponding frame, separated by whitespace:

\begin{itemize}
\item[]\textbf{Type}: Slice type, can be either I, P or B.

\item[]\textbf{POC}: Display order of the frame within a GOP, ranging
from 1 to GOPSize.

\item[]\textbf{QPOffset}: QP offset is added to the QP parameter to set
the final QP value to use for this frame.

\item[]\textbf{QPOffsetModelOff}: Offset parameter to a linear model to adjust final QP based on QP + QPoffset.

\item[]\textbf{QPOffsetModelScale}: Scale parameter to a linear model to adjust final QP based on QP + QPoffset.

\item[]\textbf{SliceCbQPOffset}: The slice-level Cb QP offset.

\item[]\textbf{SliceCrQPOffset}: The slice-level Cr QP offset.

\item[]\textbf{QPFactor}: Weight used during rate distortion
optimization. Higher values mean lower quality and less bits. Typical
range is between
0.3 and 1.

\item[]\textbf{tcOffsetDiv2}: An in-loop deblocking filter parameter for luma component, tcOffsetDiv2 
is added to the base parameter DeblockingFilterTcOffset_div2 to set the final tc_offset_div2 
Yan Zhang's avatar
Yan Zhang committed
parameter for this picture signalled in the slice segment header. The final 
value of tc_offset_div2 shall be an integer number in the range $-12..12$.
Yan Zhang's avatar
Yan Zhang committed

\item[]\textbf{betaOffsetDiv2}: An in-loop deblocking filter parameter for luma component, betaOffsetDiv2 
is added to the base parameter DeblockingFilterBetaOffset_div2 to set the final beta_offset_div2 
Yan Zhang's avatar
Yan Zhang committed
parameter for this picture signalled in the slice segment header. The final 
value of beta_offset_div2 shall be an integer number in the range $-12..12$.

\item[]\textbf{CbTcOffsetDiv2}: An in-loop deblocking filter parameter for Cb component, CbTcOffsetDiv2 
is added to the base parameter DeblockingFilterCbTcOffset_div2 to set the final tc_offset_div2 
parameter for this picture signalled in the slice segment header. The final 
value of tc_offset_div2 shall be an integer number in the range $-12..12$.

\item[]\textbf{CbBetaOffsetDiv2}: An in-loop deblocking filter parameter for Cb component, CbBetaOffsetDiv2 
is added to the base parameter DeblockingFilterCbBetaOffset_div2 to set the final beta_offset_div2 
parameter for this picture signalled in the slice segment header. The final 
value of beta_offset_div2 shall be an integer number in the range $-12..12$.

\item[]\textbf{CrTcOffsetDiv2}: An in-loop deblocking filter parameter for Cr component, CrTcOffsetDiv2 
is added to the base parameter DeblockingFilterCrTcOffset_div2 to set the final tc_offset_div2 
parameter for this picture signalled in the slice segment header. The final 
value of tc_offset_div2 shall be an integer number in the range $-12..12$.

\item[]\textbf{CrBetaOffsetDiv2}: An in-loop deblocking filter parameter for Cr component, CrBetaOffsetDiv2 
is added to the base parameter DeblockingFilterCrBetaOffset_div2 to set the final beta_offset_div2 
parameter for this picture signalled in the slice segment header. The final 
value of beta_offset_div2 shall be an integer number in the range $-12..12$.
Yan Zhang's avatar
Yan Zhang committed

\item[]\textbf{temporal_id}: Temporal layer of the frame. A frame cannot
predict from a frame with a higher temporal id. If a frame with higher
temporal IDs is listed among a frame's reference pictures, it is
not used, but is kept for possible use in future frames.

\item[]\textbf{num_ref_pics_active_L0}: Number of reference pictures in lists L0
that are used during coding.

\item[]\textbf{num_ref_pics_L0}: Size of reference picture list L0.
This includes pictures that are used for reference for the
current picture as well as pictures that will be used for reference in
the future.

\item[]\textbf{reference_pictures_L0}: A space-separated list of
num_ref_pics integers, specifying the POC of the reference pictures
kept, relative the POC of the current frame. The picture list shall be
ordered as their intendend order in the L0.
Note that any pictures not supplied in this list and in the list of L1 will be discarded and
therefore not available as reference pictures later.

When ExplicitILRP is true, a layer-specific GOP structure configuration can be provided (using the -li encoder
parameter), in which inter-layer reference pictures are specified using a 0.x syntax, with 0 meaning
zero POC difference, and x is the layer of the reference picture.

Yan Zhang's avatar
Yan Zhang committed
\item[]\textbf{num_ref_pics_active_L1}: Number of reference pictures in lists L1
that are used during coding.

\item[]\textbf{num_ref_pics_L1}: Size of reference picture list L1.
This includes pictures that are used for reference for the
current picture as well as pictures that will be used for reference in
the future.

\item[]\textbf{reference_pictures_L1}: A space-separated list of
num_ref_pics integers, specifying the POC of the reference pictures
kept, relative the POC of the current frame. The picture list shall be
ordered as their intendend order in the L1.
Note that any pictures not supplied in this list and in the list of L0 will be discarded and
therefore not available as reference pictures later.

When ExplicitILRP is true, the same syntax as the one described for L0 can be used to specify
inter-layer reference pictures.


Yan Zhang's avatar
Yan Zhang committed
For example, consider the coding structure of Figure~\ref{fig:gop-example}.
This coding structure is of size 4. The pictures are listed in decoding
order. Frame1 shall therefore describe picture with $\textrm{POC}=4$. It
references picture 0, and therefore has 4 as a reference picture.
Similarly, Frame2 has a POC of 2, and since it references pictures 0 and
4, its reference pictures are listed as \verb|2 -2|. Frame3 is a special
case: even though it only references pictures with POC 0 and 2, it also
needs to include the picture with POC 4, which must be kept in order to
be used as a reference picture in the future. Note that picture with POC 4 can be
included in the L0 or L1. The reference picture list for Frame3 therefore becomes \verb|1 -1 -3|. 
Frame4 has a POC of 3 and its list of reference pictures is \verb|1 -1|.
\end{itemize}

\begin{figure}[h]
\caption{A GOP structure}
\label{fig:gop-example}
\centering
\includegraphics[width=0.7\textwidth]{figures/gop-structure-example}
\end{figure}

In order to specify this to the encoder, the parameters in
Table~\ref{tab:gop-example} could be used.

\begin{table}[ht]
\footnotesize
\caption{GOP structure example}
\label{tab:gop-example}
\centering
\begin{tabular}{lrrrr}
\hline
 \thead{} &
 \thead{Frame1} &
 \thead{Frame2} &
 \thead{Frame3} &
 \thead{Frame4} \\
\hline
Type                   &   P  &    B   &         B   &       B \\
POC                    &   4  &    2   &         1   &       3 \\
QPOffset               &   1  &    2   &         3   &       3 \\
QPOffsetModelOff       & 0.0  &  0.0   &       0.0   &     0.0 \\
QPOffsetModelScale     & 0.0  &  0.0   &       0.0   &     0.0 \\
SliceCbQPOffset        &   0  &    0   &         0   &       0 \\
SliceCrQPOffset        &   0  &    0   &         0   &       0 \\
QPfactor               & 0.5  &  0.5   &       0.5   &     0.5 \\
tcOffsetDiv2           &   0  &    1   &         2   &       2 \\  
betaOffsetDiv2         &   0  &    0   &         0   &       0 \\
CbTcOffsetDiv2         &   0  &    0   &         0   &       0 \\  
CbBetaOffsetDiv2       &   0  &    0   &         0   &       0 \\
CrTcOffsetDiv2         &   0  &    0   &         0   &       0 \\  
CrBetaOffsetDiv2       &   0  &    0   &         0   &       0 \\
Yan Zhang's avatar
Yan Zhang committed
temporal_id            &   0  &    1   &         2   &       2 \\
num_ref_pics_active_L0 &   1  &    1   &         1   &       1 \\
num_ref_pics_L0        &   1  &    1   &         1   &       1 \\
reference_pictures_L0  &   4  &    2   &         1   &       1 \\
num_ref_pics_active_L1 &   0  &    1   &         1   &       1 \\
num_ref_pics_L1        &   0  &    1   &         2   &       1 \\
reference_pictures_L1  &      & $-$2   & $-$1 $-$3   &    $-$1 \\
\hline
\end{tabular}
\end{table}

Here, the frames used for prediction have been given higher
quality by assigning a lower QP offset. Also, the non-reference
frames have been marked as belonging to a higher temporal layer,
to make it possible to decode only every other frame. Note: each
line should contain information for one frame, so this
configuration would be specified as:

\begin{verbatim}
Frame1: P 4 1 0 0 0.5 0 0 0 0 0 0 0 1 1  4 1 1  4
Frame2: B 2 2 0 0 0.5 1 0 0 0 0 0 1 1 1  2 1 1 -2
Frame3: B 1 3 0 0 0.5 2 0 0 0 0 0 2 1 1  1 1 2 -1 -3
Frame4: B 3 3 0 0 0.5 2 0 0 0 0 0 2 1 1  1 1 1 -1
Yan Zhang's avatar
Yan Zhang committed
\end{verbatim}




%%%%
%%%%
%%%%
\newgeometry{tmargin=1.6cm,lmargin=1cm,rmargin=1cm,bmargin=1in,nohead}
\subsection{Encoder parameters}

%%
%% File, I/O and source parameters
%%
Shorthand alternatives for the parameter that can be used on the command line are shown in brackets after the parameter name.

\begin{OptionTableNoShorthand}{File, I/O and source parameters.}{tab:fileIO}
\Option{InputFile (-i)} &
%\ShortOption{-i} &
\Default{\NotSet} &
Specifies the input video file. If the file extension is Y4M, picture width, picture height, input bitdepth, chroma format and frame rate from Y4M will override the input from cfg and command line options.
Yan Zhang's avatar
Yan Zhang committed

Video data must be in a raw 4:2:0, or 4:2:2 planar format, 4:4:4 planar format (Y$'$CbCr, RGB or GBR), or in a raw 4:0:0 format. 

Note: When the bit depth of samples is larger than 8, each sample is encoded in
2 bytes (little endian, LSB-justified).
\\

\Option{BitstreamFile (-b)} &
%\ShortOption{-b} &
\Default{\NotSet} &
Specifies the output coded bit stream file.
\\

\Option{ReconFile (-o)} &
%\ShortOption{-o} &
\Default{\NotSet} &
Specifies the output locally reconstructed video file. If more than one layer is encoded (i.e. MaxLayers > 1), a reconstructed file is written for each layer and the layer index is added as suffix to ReconFile. If one or more dots exist in the file name, the layer id is added before the last dot, e.g. 'reconst.yuv' becomes 'reconst0.yuv' for layer id 0, 'reconst' becomes 'reconst0'. If the file extension is Y4M, picture width, picture height, bitdepth, chroma format and frame rate of the current encoding will be output to the Y4M file.
Yan Zhang's avatar
Yan Zhang committed
\\

\Option{SourceWidth (-wdt)}%
\Option{SourceHeight (-hgt)} &
%\ShortOption{-wdt}%
%\ShortOption{-hgt} &
\Default{0}%
\Default{0} &
Specifies the width and height of the input video in luma samples.
\\


\Option{SourceScalingRatioHor}%
\Option{SourceScalingRatioVer} &
\Default{1.0}%
\Default{1.0} &
Specifies a scaling ratio to apply in hor and vert direction to the pictures read from input video file.
Note: The SourceWidth and SourceHeight are multiplied by these scaling factors. This option is useful for spatial scalability in a multi layer scenario to use enhancement layer source when base layer source is not available. 
\\


Yan Zhang's avatar
Yan Zhang committed
\Option{InputBitDepth}
 &
%\ShortOption{\None} &
\Default{8} &
Specifies the bit depth of the input video.
\\

\Option{MSBExtendedBitDepth} &
%\ShortOption{\None} &
\Default{0} &
Extends the input video by adding MSBs of value 0. When 0, no extension is applied and the InputBitDepth is used.

The MSBExtendedBitDepth becomes the effective file InputBitDepth for subsequent processing.
\\

\Option{InternalBitDepth} &
%\ShortOption{\None} &
\Default{0} &
Specifies the bit depth used for coding. When 0, the setting defaults to the
value of the MSBExtendedBitDepth.

If the input video is a different bit depth to InternalBitDepth, it is
automatically converted by:
\begin{displaymath}
\left\lfloor
 \frac{\mathrm{Pel} * 2^{\mathrm{InternalBitDepth}}}{
     2^{\mathrm{MSBExtendedBitDepth}}}
\right\rfloor
\end{displaymath}

Note: The effect of this option is as if the input video is externally
converted to the MSBExtendedBitDepth and then to the InternalBitDepth
and then coded with this value as InputBitDepth. The codec has no
notion of different bit depths.
\\

\Option{OutputBitDepth} &
%\ShortOption{\None} &
\Default{0} &
Specifies the bit depth of the output locally reconstructed video file.
When 0, the setting defaults to the value of InternalBitDepth.
Note: This option has no effect on the decoding process.
\\

\Option{InputBitDepthC}%
\Option{MSBExtendedBitDepthC}%
\Option{OutputBitDepthC} &
%\ShortOption{\None} &
\Default{0}%
\Default{0}%
\Default{0} &
Specifies the various bit-depths for chroma components.  These only need
to be specified if non-equal luma and chroma bit-depth processing is
required. When 0, the setting defaults to the corresponding non-Chroma value.
\\

\Option{InputColourSpaceConvert} &
%\ShortOption{\None} &
\Default{\NotSet} &
The colour space conversion to apply to input video. Permitted values are:
\par
\begin{tabular}{lp{0.3\textwidth}}
  UNCHANGED & No colour space conversion is applied \\
  YCbCrToYCrCb & Swap the second and third components \\
  YCbCrtoYYY & Set the second and third components to the values in the first \\
  RGBtoGBR & Reorder the three components \\
\end{tabular}
\par
If no value is specified, no colour space conversion is applied. The list may eventually also include RGB to YCbCr or YCgCo conversions.
\\

\Option{SNRInternalColourSpace} &
%\ShortOption{\None} &
\Default{false} &
When this is set true, then no colour space conversion is applied prior to PSNR calculation, otherwise the inverse of InputColourSpaceConvert is applied.
\\

\Option{OutputInternalColourSpace} &
%\ShortOption{\None} &
\Default{false} &
When this is set true, then no colour space conversion is applied to the reconstructed video, otherwise the inverse of InputColourSpaceConvert is applied.
\\

\Option{InputChromaFormat} &
%\ShortOption{\None} &
\Default{420} &
Specifies the chroma format used in the input file. Permitted values (depending on the profile) are 400, 420, 422 or 444.
\\

\Option{ChromaFormatIDC (-cf)} &
%\ShortOption{-cf} &
\Default{0} &
Specifies the chroma format to use for processing. Permitted values (depending on the profile) are 400, 420, 422 or 444; the value of 0 indicates that the value of InputChromaFormat should be used instead.
\\

\Option{MSEBasedSequencePSNR} &
%\ShortOption{\None} &
\Default{false} &
When 0, the PSNR output is a linear average of the frame PSNRs; when 1, additional PSNRs are output which are formed from the average MSE of all the frames. The latter is useful when coding near-losslessly, where occasional frames become lossless.
\\

\Option{PrintFrameMSE} &
%\ShortOption{\None} &
\Default{false} &
When 1, the Mean Square Error (MSE) values of each frame will also be output alongside the default PSNR values.
\\

\Option{PrintSequenceMSE} &
%\ShortOption{\None} &
\Default{false} &
When 1, the Mean Square Error (MSE) values of the entire sequence will also be output alongside the default PSNR values.
\\

\Option{PrintWPSNR} &
%\ShortOption{\None} &
\Default{false} &
When 1, weighted PSNR (wPSNR) values of the entire sequence will also be output.
\\

\Option{PrintHighPrecEncTime} &
%\ShortOption{\None} &
\Default{false} &
When 1, prints per-frame encoding time in floating-point format. Otherwise prints an integer number of seconds.
\\

\Option{PrintRefLayerMetrics} &
%\ShortOption{\None} &
\Default{false} &
When 1, PSNR between current layer and the first reference layer (rescaled to the current layer size if needed) of the entire sequence will also be output. Only the first reference layer is processed for this metric.
\\


Yan Zhang's avatar
Yan Zhang committed
\Option{SummaryOutFilename} &
%\ShortOption{\None} &
\Default{false} &
Filename to use for producing summary output file. If empty, do not produce a file.
\\

\Option{SummaryPicFilenameBase} &
%\ShortOption{\None} &
\Default{false} &
Base filename to use for producing summary picture output files. The actual filenames used will have I.txt, P.txt and B.txt appended. If empty, do not produce a file.
\\

\Option{SummaryVerboseness} &
%\ShortOption{\None} &
\Default{false} &
Specifies the level of the verboseness of the text output.
\\

\Option{CabacZeroWordPaddingEnabled} &
%\ShortOption{\None} &
\Default{false} &
When 1, CABAC zero word padding will be enabled. This is currently not the default value for the setting.
\\

\Option{ConformanceWindowMode} &
%\ShortOption{\None} &
Yan Zhang's avatar
Yan Zhang committed
Specifies how the parameters related to the conformance window are interpreted (cropping/padding).
The following modes are available:
\par
\begin{tabular}{cp{0.43\textwidth}}
0 & No cropping / padding \\
1 & Automatic padding to the next minimum CU size \\
2 & Padding according to parameters HorizontalPadding and VerticalPadding \\
3 & Cropping according to parameters ConfWinLeft, ConfWinRight, ConfWinTop and ConfWinBottom \\
\end{tabular}
\\

\Option{HorizontalPadding (-pdx)}%
\Option{VerticalPadding (-pdy)} &
%\ShortOption{-pdx}%
%\ShortOption{-pdy} &
\Default{0} &
Specifies the horizontal and vertical padding to be applied to the input
video in luma samples when ConformanceWindowMode is 2.  Must be a multiple of
the chroma resolution (e.g. a multiple of two for 4:2:0).
\\

\Option{ConfWinLeft}%
\Option{ConfWinRight}%
\Option{ConfWinTop}%
\Option{ConfWinBottom} &
%\ShortOption{\None} &
\Default{0} &
Specifies the horizontal and vertical cropping to be applied to the
Taoran Lu's avatar
Taoran Lu committed
input video in luma samples when ConformanceWindowMode is 3. 
Yan Zhang's avatar
Yan Zhang committed
Must be a multiple of the chroma resolution (e.g. a multiple of
two for 4:2:0).
\\

\Option{ScalingWindow} &
\Default{0} &
Enable scaling window.
\\

\Option{ScalWinLeft (-swl)}%
\Option{ScalWinRight (-swr)}%
\Option{ScalWinTop (-swt)}%
\Option{ScalWinBottom (-swb)} &
Specifies the horizontal and vertical offset for the scaling window. 
Must be a multiple of the chroma resolution (e.g. a multiple of two for 4:2:0).
\\

Yan Zhang's avatar
Yan Zhang committed
\Option{FrameRate (-fr)} &
%\ShortOption{-fr} &
\Default{0} &
Specifies the frame rate of the input video. A frame rate may be specified by two numbers
such as 30000:1001 to define a non-integer value (e.g., 29.97).
Yan Zhang's avatar
Yan Zhang committed

Note: This option affects the reported bit rates.
Yan Zhang's avatar
Yan Zhang committed
\\

\Option{FrameSkip (-fs)} &
%\ShortOption{-fs} &
\Default{0} &
Specifies a number of frames to skip at beginning of input video file.
\\

\Option{FramesToBeEncoded (-f)} &
%\ShortOption{-f} &
\Default{0} &
Specifies the number of frames to be encoded (see note regarding TemporalSubsampleRatio). When 0, all frames are coded.
\\

\Option{TemporalSubsampleRatio (-ts)} &
%\ShortOption{-fs} &
\Default{1} &
Temporally subsamples the input video sequence. A value of $N$ will skip $(N-1)$ frames of input video after each coded input video frame. Note the FramesToBeEncoded does not account for the temporal skipping of frames, which will reduce the number of frames encoded accordingly. The reported bit rates will be reduced and VUI information is scaled so as to present the video at the correct speed. The minimum and default value is 1.
\\

\Option{FieldCoding} &
%\ShortOption{\None} &
\Default{false} &
When 1, indicates that field-based coding is to be applied.
\\

\Option{TopFieldFirst (-Tff)} &
%\ShortOption{\None} &
\Default{0} &
Indicates the order of the fields packed into the input frame. When 1, the top field is temporally first.
\\

\Option{ClipInputVideoToRec709Range} &
%\ShortOption{\None} &
\Default{0} &
If 1 then clip input video to the Rec. 709 Range on loading when InternalBitDepth is less than MSBExtendedBitDepth.
\\

\Option{ClipOutputVideoToRec709Range} &
%\ShortOption{\None} &
\Default{0} &
If 1 then clip output video to the Rec. 709 Range on saving when OutputBitDepth is less than InternalBitDepth.
\\

\Option{EfficientFieldIRAPEnabled} &
%\ShortOption{\None} &
\Default{1} &
Enable to code fields in a specific, potentially more efficient, order.
\\

\Option{HarmonizeGopFirstFieldCoupleEnabled} &
%\ShortOption{\None} &
\Default{1} &
Enables harmonization of Gop first field couple.
\\

\Option{AccessUnitDelimiter} &
%\ShortOption{\None} &
Yan Zhang's avatar
Yan Zhang committed
Add Access Unit Delimiter NAL units between all Access Units.
\\

\Option{EnablePictureHeaderInSliceHeader} &
%\ShortOption{\None} &
\Default{1} &
Enable Picture Header to be signalled in Slice Header when encoding with single slice per picture.
\\

\Option{RPR} &
%\ShortOption{\None} &
\Default{true} &
Specifies the value of sps_ref_pic_resampling_enabled_flag.
\\

\Option{ScalingRatioHor} &
%\ShortOption{\None} &
\Default{1.0} &
Scaling ratio in horizontal direction for reference picture resampling. When GOPBasedRPR is true unless ratio is defined the ratio will be set to 2.0.
\\

\Option{ScalingRatioVer} &
%\ShortOption{\None} &
\Default{1.0} &
Scaling ratio in vertical direction for reference picture resampling. When GOPBasedRPR is true unless ratio is defined the ratio will be set to 2.0.
\\

\Option{GOPBasedRPR} &
%\ShortOption{\None} &
\Default{false} &
Enables decision to encode pictures in GOP in full resolution or one of three downscaled resolutions (default is $1/2$, $2/3$ and $4/5$ in both dimensions).
First picture in GOP is rescaled to half resolution and then upscaled to full resolution. The luma PSNR of the rescaled picture compared to the source picture is compared with
PSNR thresholds for respective resolution: $(PsnrThresholdRPR - (QP - 37) * 0.5) < upscaledPSNR$.
The smallest resolution that has PSNR above the threshold is selected.
\\

\Option{GOPBasedRPRQPTh} &
%\ShortOption{\None} &
\Default{32} &
QP threshold parameter that determines which QP GOP-based RPR is invoked for given by $QP >= GOPBasedRPRQPTh$.
\\

\Option{ScalingRatioHor2} &
%\ShortOption{\None} &
\Default{1.5} &
Scaling ratio in hor direction for GOP based RPR ($2/3$).
\\

\Option{ScalingRatioVer2} &
%\ShortOption{\None} &
\Default{1.5} &
Scaling ratio in ver direction for GOP based RPR ($2/3$).
\\

\Option{ScalingRatioHor3} &
%\ShortOption{\None} &
\Default{1.25} &
Scaling ratio in hor direction for GOP based RPR ($4/5$).
\\

\Option{ScalingRatioVer3} &