Newer
Older
\documentclass[a4paper,11pt]{jvetdoc}
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
\usepackage{geometry}[2010/02/12]
\usepackage{hyperref}
\hypersetup{colorlinks=true,
linkcolor=black, % color of internal links (change box color with linkbordercolor)
citecolor=black, % color of links to bibliography
filecolor=black, % color of file links
urlcolor=blue}
\usepackage{color,soul}
\usepackage[position=bottom]{subfig}
\captionsetup[subfloat]{position=top}
\usepackage{multirow}
\usepackage{dcolumn}
\newcolumntype{.}{D{.}{.}{-1}}
\usepackage{colortbl}
\usepackage{makecell}
\usepackage{longtable}
\usepackage{array}
\usepackage{algorithm2e}
\usepackage{amsmath}
\urlstyle{same}
% code highlighting
\usepackage{minted,xcolor}
\definecolor{bggray}{gray}{0.95}
\setminted{
bgcolor=bggray,
xleftmargin=3ex,
breaklines=true,
fontsize=\footnotesize}
\usepackage[strings]{underscore}
\usepackage{csquotes}
\MakeOuterQuote{"}
\EnableQuotes
\newcommand\None{}
\newcommand\NotSet{}
\makeatletter
\newcommand{\Option}[1]{\ifx\optOption\@empty\gdef\optOption{#1}\else\g@addto@macro\optOption{ \\ #1}\fi}
\newcommand{\ShortOption}[1]{\ifx\optShortOption\@empty\gdef\optShortOption{#1}\else\g@addto@macro\optShortOption{ \\ #1}\fi}
\newcommand{\Default}[1]{\ifx\optDefault\@empty\gdef\optDefault{#1}\else\g@addto@macro\optDefault{ \\ #1}\fi}
\newcommand{\clearOptions}{\gdef\optOption{}\gdef\optShortOption{}\gdef\optDefault{}}
\makeatother
\newenvironment{OptionTable}[1]{%
\footnotesize
\def\arraystretch{1.8}
\clearOptions
\begin{longtable}{l<{\makecell[tl]{\optOption}}%
>{\texttt\bgroup}l<{\makecell[tl]{\optShortOption}\egroup}%
c<{\makecell[tc]{\optDefault}}%
>{\def\arraystretch{1.0}}p{0.5\textwidth}<{\clearOptions}}
\caption{#1} \\
\hspace*{12em}&&\hspace*{8em}&\kill
\hline
\thead{Option} &
\egroup\thead{Shorthand}\bgroup &
\thead{Default} &
\thead{Description} \\
\hline
\endfirsthead
\caption[]{#1 (Continued)} \\
\hspace*{12em}&&\hspace*{8em}&\kill
\hline
\thead{Option} &
\egroup\thead{Shorthand}\bgroup &
\thead{Default} &
\thead{Description} \\
\hline
\endhead
\multicolumn{4}{r}{Continued...}\\
\hline
\endfoot
\hline
\endlastfoot
}{%
\hline
\end{longtable}
}
\newenvironment{OptionTableNoShorthand}[2]{%
\scriptsize
\def\arraystretch{1.8}
\clearOptions
\begin{longtable}{l<{\makecell[tl]{\optOption}}%
c<{\makecell[tc]{\optDefault}}%
>{\def\arraystretch{1.0}}p{0.5\textwidth}<{\clearOptions}}
\caption{#1} \label{#2} \\
\hspace*{12em}&\hspace*{8em}&\kill
\hline
\thead{Option} &
\thead{Default} &
\thead{Description} \\
\hline
\endfirsthead
\caption[]{#1 (Continued)} \\
\hspace*{12em}&\hspace*{8em}&\kill
\hline
\thead{Option} &
\thead{Default} &
\thead{Description} \\
\hline
\endhead
\multicolumn{3}{r}{Continued...}\\
\hline
\endfoot
\hline
\endlastfoot
}{%
\hline
\end{longtable}
}
\newenvironment{SEIListTable}[1]{%
\scriptsize
\def\arraystretch{1.8}
\clearOptions
\begin{longtable}{c<{\makecell[tl]{\optOption}}%
l<{\makecell[tc]{\optDefault}}%
>{\def\arraystretch{1.0}}p{0.3\textwidth}<{\clearOptions}}
\caption{#1} \\
\hspace*{12em}&\hspace*{8em}&\kill
\hline
\thead{SEI Number} &
\thead{SEI Name} &
\thead{Table number of encoder controls, if available} \\
\hline
\endfirsthead
\caption[]{#1 (Continued)} \\
\hspace*{12em}&\hspace*{8em}&\kill
\hline
\thead{SEI Number} &
\thead{SEI Name} &
\thead{Table number of encoder controls, if available} \\
\hline
\endhead
\multicolumn{3}{r}{Continued...}\\
\hline
\endfoot
\hline
\endlastfoot
}{%
\hline
\end{longtable}
}
\newenvironment{MacroTable}[1]{%
\scriptsize
\def\arraystretch{1.3}
\clearOptions
\begin{longtable}{lcp{0.5\textwidth}}
\caption{#1} \\
%\hspace*{12em}&&\hspace*{8em}&\kill
\hline
\thead{Option} &
\thead{Default} &
\thead{Description} \\
\hline
\endfirsthead
\caption[]{#1 (Continued)} \\
\hline
\thead{Option} &
\thead{Default} &
\thead{Description} \\
\hline
\endhead
\multicolumn{3}{r}{Continued...}\\
\hline
\endfoot
\hline
\endlastfoot
}{%
\end{longtable}
}
\title{VTM Software Manual}
\author{%
Frank Bossen
\email{frank@bossentech.com}
\and
David Flynn
\and
Xiang Li
\and
Karl Sharman
\email{karl.sharman@eu.sony.com}
\and
Karsten S\"uhring
\email{karsten.suehring@hhi.fraunhofer.de}
}
\jvetmeeting{}
\jvetdocnum{Software Manual}
\jvetdocstatus{Software AHG working document}
\jvetdocpurpose{Information}
\jvetdocsource{AHG chairs}
\begin{document}
\maketitle
\begin{abstract}
This document is a user manual describing usage of the VTM reference software
for the VVC project. It applies to version 23.7 of the software.
\end{abstract}
\tableofcontents
\listoftables
\section{General Information}
Reference software is being made available to provide a reference
implementation of the HEVC standard being developed by the Joint
Video Experts Team (JVET) regrouping experts from
ITU-T SG 16 and ISO/IEC SC29 WG5. One of the main goals of the
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
reference software is to provide a basis upon which to conduct
experiments in order to determine which coding tools provide desired
coding performance. It is not meant to be a particularly efficient
implementation of anything, and one may notice its apparent
unsuitability for a particular use. It should not be construed to be a
reflection of how complex a production-quality implementation of a
future VVC standard would be.
This document aims to provide guidance on the usage of the reference
software. It is widely suspected to be incomplete and suggestions for
improvements are welcome. Such suggestions and general inquiries may be
sent to the general JVET email reflector on
\url{https://lists.rwth-aachen.de/postorius/lists/jvet.lists.rwth-aachen.de/}
(registration required).
\subsection*{Bug reporting}
Bugs should be reported on the issue tracker set up at:
\url{https://jvet.hhi.fraunhofer.de/trac/vvc/}
\section{Installation and compilation}
The software may be retrieved from the GitLab server located at:
\url{https://vcgit.hhi.fraunhofer.de/jvet/VVCSoftware_VTM}
Table~\ref{tab:project-files} lists the compiler environments and versions
for which building the software is tested.
Note that the software makes use of C++14 language features, which may not
be available in older compilers.
\begin{table}[ht]
\caption{Supported compilers}
\label{tab:project-files}
\centering
\begin{tabular}{ll}
\hline
\thead{Compiler environment} &
\thead{Versions} \\
\hline
Xcode/clang & latest \\
\hline
\end{tabular}
\end{table}
By default the software is built as 64-bit binaries to be used on a 64-bit OS.
This allows the software to use more than 2GB of RAM.
The software uses CMake to create platform-specific build files.
\subsection {Dependencies}
For generating and verifying cryptographic signatures using digitally signed content SEI messages, OpenSSL is required in version 1.1.1 or greater.
Testing is performed on OpenSSL 3.
If OpenSSL is not found or the version is too low, only parsing of digitally signed content SEI messages will be available.
Detection of OpenSSL can be disabled using the cmake option "-DENABLE_SEARCH_OPENSSL=off"
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
\subsection {Build instructions for plain CMake (suggested)}
\textbf{Note:} A working CMake installation is required for building the software.
CMake generates configuration files for the compiler environment/development
environment on each platform. The following is a list of examples for Windows
(MS Visual Studio), macOS (Xcode) and Linux (make).
Open a command prompt on your system and change into the root directory
of this project.
Create a build directory in the root directory:
\begin{minted}{bash}
mkdir build
\end{minted}
Use one of the following CMake commands, based on your platform. Feel free to change the
commands to satisfy your needs.
\textbf{Windows Visual Studio 2015 64 Bit:}
\begin{minted}{bash}
cd build
cmake .. -G "Visual Studio 14 2015 Win64"
\end{minted}
Then open the generated solution file in MS Visual Studio.
\textbf{macOS Xcode:}
\begin{minted}{bash}
cd build
cmake .. -G "Xcode"
\end{minted}
Then open the generated work space in Xcode.
\textbf{Linux}
For generating Linux Release Makefile:
\begin{minted}{bash}
cd build
cmake .. -DCMAKE_BUILD_TYPE=Release
\end{minted}
For generating Linux Debug Makefile:
\begin{minted}{bash}
cd build
cmake .. -DCMAKE_BUILD_TYPE=Debug
\end{minted}
Then type
\begin{minted}{bash}
make -j
\end{minted}
to build the software.
For more details, refer to the CMake documentation: \url{https://cmake.org/cmake/help/latest/}
\subsection {Build instructions for make}
\textbf{Note:}
The build instructions in this section require the make tool and Python
to be installed, which are part of usual Linux and macOS environments.
See section \ref{windowsinstall} for installation instruction for Python
and GnuWin32 on Windows.
Open a command prompt on your system and change into the root directory
of this project.
To use the default system compiler simply call:
\begin{minted}{bash}
make all
\end{minted}
For MSYS2 and MinGW:
Open an MSYS MinGW 64-Bit terminal and change into the root directory
of this project.
Call:
\begin{minted}{bash}
make all toolset=gcc
\end{minted}
\subsection{Tool Installation on Windows}
\label{windowsinstall}
Download CMake: \url{http://www.cmake.org/} and install it.
Python and GnuWin32 are not mandatory, but they simplify the build process for the user.
\begin{table}[ht]
\footnotesize
\centering
\begin{tabular}{ll}
\hline
Python & \url{https://www.python.org/downloads/release/python-371/} \\
GnuWin32 & \url{https://sourceforge.net/projects/getgnuwin32/files/getgnuwin32/0.6.30/GetGnuWin32-0.6.3.exe/download} \\
\hline
\end{tabular}
\end{table}
To use MinGW, install MSYS2:
\url{http://repo.msys2.org/distrib/msys2-x86_64-latest.exe}
Installation instructions:
\url{https://www.msys2.org/}
Install the needed toolchains:
\begin{minted}{bash}
pacman -S --needed base-devel mingw-w64-i686-toolchain mingw-w64-x86_64-toolchain git subversion mingw-w64-i686-cmake mingw-w64-x86_64-cmake
\end{minted}
%%%%
%%%%
%%%%
\section{Using the encoder}
\begin{minted}{bash}

Karsten Suehring
committed
EncoderApp [--help] [-li -c config.cfg] [-li --parameter=value]
\end{minted}
\begin{table}[ht]
\footnotesize
\centering
\begin{tabular}{lp{0.5\textwidth}}
\hline
\thead{Option} &
\thead{Description} \\
\hline
\texttt{--help} & Prints parameter usage. \\
\texttt{-li} & Applies to its next config file or command line parameter only to define i-th layer encoding option. If empty, the configuration file applies to all layers\\
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
\texttt{-c} & Defines configuration file to use. Multiple configuration files
may be used with repeated --c options. \\
\texttt{--}\emph{parameter}\texttt{=}\emph{value}
& Assigns value to a given parameter as further described below.
Some parameters are also supported by shorthand
"--\em{opt}~\emph{value}". These are shown in brackets after the parameter
name in the tables of this document\\
\hline
\end{tabular}
\end{table}
Sample configuration files are provided in the cfg/ folder.
Parameters are defined by the last value encountered on the command line.
Therefore if a setting is set via a configuration file, and then a subsequent
command line parameter changes that same setting, the command line parameter
value will be used.
\subsection{GOP structure table}
\label{sec:gop-structure}
Defines the cyclic GOP structure that will be used repeatedly
throughout the sequence. The table should contain GOPSize lines,
named Frame1, Frame2, etc. The frames are listed in decoding
order, so Frame1 is the first frame in decoding order, Frame2 is
the second and so on. Among other things, the table specifies all
reference pictures kept by the decoder for each frame. This
includes pictures that are used for reference for the current
picture as well as pictures that will be used for reference in
the future. The encoder will not automatically calculate which
pictures have to be kept for future references, they must
be specified. Note that some specified reference frames for
pictures encoded in the very first GOP after an IDR frame might
not be available. This is handled automatically by the encoder,
so the reference pictures can be given in the GOP structure table
as if there were infinitely many identical GOPs before the
current one. Each line in the table contains the parameters used
for the corresponding frame, separated by whitespace:
\begin{itemize}
\item[]\textbf{Type}: Slice type, can be either I, P or B.
\item[]\textbf{POC}: Display order of the frame within a GOP, ranging
from 1 to GOPSize.
\item[]\textbf{QPOffset}: QP offset is added to the QP parameter to set
the final QP value to use for this frame.
\item[]\textbf{QPOffsetModelOff}: Offset parameter to a linear model to adjust final QP based on QP + QPoffset.
\item[]\textbf{QPOffsetModelScale}: Scale parameter to a linear model to adjust final QP based on QP + QPoffset.
\item[]\textbf{SliceCbQPOffset}: The slice-level Cb QP offset.
\item[]\textbf{SliceCrQPOffset}: The slice-level Cr QP offset.
\item[]\textbf{QPFactor}: Weight used during rate distortion
optimization. Higher values mean lower quality and less bits. Typical
range is between
0.3 and 1.
\item[]\textbf{tcOffsetDiv2}: An in-loop deblocking filter parameter for luma component, tcOffsetDiv2
is added to the base parameter DeblockingFilterTcOffset_div2 to set the final tc_offset_div2
parameter for this picture signalled in the slice segment header. The final
value of tc_offset_div2 shall be an integer number in the range $-12..12$.
\item[]\textbf{betaOffsetDiv2}: An in-loop deblocking filter parameter for luma component, betaOffsetDiv2
is added to the base parameter DeblockingFilterBetaOffset_div2 to set the final beta_offset_div2
parameter for this picture signalled in the slice segment header. The final
value of beta_offset_div2 shall be an integer number in the range $-12..12$.
\item[]\textbf{CbTcOffsetDiv2}: An in-loop deblocking filter parameter for Cb component, CbTcOffsetDiv2
is added to the base parameter DeblockingFilterCbTcOffset_div2 to set the final tc_offset_div2
parameter for this picture signalled in the slice segment header. The final
value of tc_offset_div2 shall be an integer number in the range $-12..12$.
\item[]\textbf{CbBetaOffsetDiv2}: An in-loop deblocking filter parameter for Cb component, CbBetaOffsetDiv2
is added to the base parameter DeblockingFilterCbBetaOffset_div2 to set the final beta_offset_div2
parameter for this picture signalled in the slice segment header. The final
value of beta_offset_div2 shall be an integer number in the range $-12..12$.
\item[]\textbf{CrTcOffsetDiv2}: An in-loop deblocking filter parameter for Cr component, CrTcOffsetDiv2
is added to the base parameter DeblockingFilterCrTcOffset_div2 to set the final tc_offset_div2
parameter for this picture signalled in the slice segment header. The final
value of tc_offset_div2 shall be an integer number in the range $-12..12$.
\item[]\textbf{CrBetaOffsetDiv2}: An in-loop deblocking filter parameter for Cr component, CrBetaOffsetDiv2
is added to the base parameter DeblockingFilterCrBetaOffset_div2 to set the final beta_offset_div2
parameter for this picture signalled in the slice segment header. The final
value of beta_offset_div2 shall be an integer number in the range $-12..12$.
\item[]\textbf{temporal_id}: Temporal layer of the frame. A frame cannot
predict from a frame with a higher temporal id. If a frame with higher
temporal IDs is listed among a frame's reference pictures, it is
not used, but is kept for possible use in future frames.
\item[]\textbf{num_ref_pics_active_L0}: Number of reference pictures in lists L0
that are used during coding.
\item[]\textbf{num_ref_pics_L0}: Size of reference picture list L0.
This includes pictures that are used for reference for the
current picture as well as pictures that will be used for reference in
the future.
\item[]\textbf{reference_pictures_L0}: A space-separated list of
num_ref_pics integers, specifying the POC of the reference pictures
kept, relative the POC of the current frame. The picture list shall be
ordered as their intendend order in the L0.
Note that any pictures not supplied in this list and in the list of L1 will be discarded and
therefore not available as reference pictures later.
When ExplicitILRP is true, a layer-specific GOP structure configuration can be provided (using the -li encoder
parameter), in which inter-layer reference pictures are specified using a 0.x syntax, with 0 meaning
zero POC difference, and x is the layer of the reference picture.
\item[]\textbf{num_ref_pics_active_L1}: Number of reference pictures in lists L1
that are used during coding.
\item[]\textbf{num_ref_pics_L1}: Size of reference picture list L1.
This includes pictures that are used for reference for the
current picture as well as pictures that will be used for reference in
the future.
\item[]\textbf{reference_pictures_L1}: A space-separated list of
num_ref_pics integers, specifying the POC of the reference pictures
kept, relative the POC of the current frame. The picture list shall be
ordered as their intendend order in the L1.
Note that any pictures not supplied in this list and in the list of L0 will be discarded and
therefore not available as reference pictures later.
When ExplicitILRP is true, the same syntax as the one described for L0 can be used to specify
inter-layer reference pictures.
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
For example, consider the coding structure of Figure~\ref{fig:gop-example}.
This coding structure is of size 4. The pictures are listed in decoding
order. Frame1 shall therefore describe picture with $\textrm{POC}=4$. It
references picture 0, and therefore has 4 as a reference picture.
Similarly, Frame2 has a POC of 2, and since it references pictures 0 and
4, its reference pictures are listed as \verb|2 -2|. Frame3 is a special
case: even though it only references pictures with POC 0 and 2, it also
needs to include the picture with POC 4, which must be kept in order to
be used as a reference picture in the future. Note that picture with POC 4 can be
included in the L0 or L1. The reference picture list for Frame3 therefore becomes \verb|1 -1 -3|.
Frame4 has a POC of 3 and its list of reference pictures is \verb|1 -1|.
\end{itemize}
\begin{figure}[h]
\caption{A GOP structure}
\label{fig:gop-example}
\centering
\includegraphics[width=0.7\textwidth]{figures/gop-structure-example}
\end{figure}
In order to specify this to the encoder, the parameters in
Table~\ref{tab:gop-example} could be used.
\begin{table}[ht]
\footnotesize
\caption{GOP structure example}
\label{tab:gop-example}
\centering
\begin{tabular}{lrrrr}
\hline
\thead{} &
\thead{Frame1} &
\thead{Frame2} &
\thead{Frame3} &
\thead{Frame4} \\
\hline
Type & P & B & B & B \\
POC & 4 & 2 & 1 & 3 \\
QPOffset & 1 & 2 & 3 & 3 \\
QPOffsetModelOff & 0.0 & 0.0 & 0.0 & 0.0 \\
QPOffsetModelScale & 0.0 & 0.0 & 0.0 & 0.0 \\
SliceCbQPOffset & 0 & 0 & 0 & 0 \\
SliceCrQPOffset & 0 & 0 & 0 & 0 \\
QPfactor & 0.5 & 0.5 & 0.5 & 0.5 \\
tcOffsetDiv2 & 0 & 1 & 2 & 2 \\
betaOffsetDiv2 & 0 & 0 & 0 & 0 \\
CbTcOffsetDiv2 & 0 & 0 & 0 & 0 \\
CbBetaOffsetDiv2 & 0 & 0 & 0 & 0 \\
CrTcOffsetDiv2 & 0 & 0 & 0 & 0 \\
CrBetaOffsetDiv2 & 0 & 0 & 0 & 0 \\
temporal_id & 0 & 1 & 2 & 2 \\
num_ref_pics_active_L0 & 1 & 1 & 1 & 1 \\
num_ref_pics_L0 & 1 & 1 & 1 & 1 \\
reference_pictures_L0 & 4 & 2 & 1 & 1 \\
num_ref_pics_active_L1 & 0 & 1 & 1 & 1 \\
num_ref_pics_L1 & 0 & 1 & 2 & 1 \\
reference_pictures_L1 & & $-$2 & $-$1 $-$3 & $-$1 \\
\hline
\end{tabular}
\end{table}
Here, the frames used for prediction have been given higher
quality by assigning a lower QP offset. Also, the non-reference
frames have been marked as belonging to a higher temporal layer,
to make it possible to decode only every other frame. Note: each
line should contain information for one frame, so this
configuration would be specified as:
\begin{verbatim}
Frame1: P 4 1 0 0 0.5 0 0 0 0 0 0 0 1 1 4 1 1 4
Frame2: B 2 2 0 0 0.5 1 0 0 0 0 0 1 1 1 2 1 1 -2
Frame3: B 1 3 0 0 0.5 2 0 0 0 0 0 2 1 1 1 1 2 -1 -3
Frame4: B 3 3 0 0 0.5 2 0 0 0 0 0 2 1 1 1 1 1 -1
\end{verbatim}
%%%%
%%%%
%%%%
\newgeometry{tmargin=1.6cm,lmargin=1cm,rmargin=1cm,bmargin=1in,nohead}
\subsection{Encoder parameters}
%%
%% File, I/O and source parameters
%%
Shorthand alternatives for the parameter that can be used on the command line are shown in brackets after the parameter name.
\begin{OptionTableNoShorthand}{File, I/O and source parameters.}{tab:fileIO}
\Option{InputFile (-i)} &
%\ShortOption{-i} &
\Default{\NotSet} &
Specifies the input video file. If the file extension is Y4M, picture width, picture height, input bitdepth, chroma format and frame rate from Y4M will override the input from cfg and command line options.
Video data must be in a raw 4:2:0, or 4:2:2 planar format, 4:4:4 planar format (Y$'$CbCr, RGB or GBR), or in a raw 4:0:0 format.
Note: When the bit depth of samples is larger than 8, each sample is encoded in
2 bytes (little endian, LSB-justified).
\\
\Option{BitstreamFile (-b)} &
%\ShortOption{-b} &
\Default{\NotSet} &
Specifies the output coded bit stream file.
\\
\Option{ReconFile (-o)} &
%\ShortOption{-o} &
\Default{\NotSet} &
Specifies the output locally reconstructed video file. If more than one layer is encoded (i.e. MaxLayers > 1), a reconstructed file is written for each layer and the layer index is added as suffix to ReconFile. If one or more dots exist in the file name, the layer id is added before the last dot, e.g. 'reconst.yuv' becomes 'reconst0.yuv' for layer id 0, 'reconst' becomes 'reconst0'. If the file extension is Y4M, picture width, picture height, bitdepth, chroma format and frame rate of the current encoding will be output to the Y4M file.
\\
\Option{SourceWidth (-wdt)}%
\Option{SourceHeight (-hgt)} &
%\ShortOption{-wdt}%
%\ShortOption{-hgt} &
\Default{0}%
\Default{0} &
Specifies the width and height of the input video in luma samples.
\\
\Option{SourceScalingRatioHor}%
\Option{SourceScalingRatioVer} &
\Default{1.0}%
\Default{1.0} &
Specifies a scaling ratio to apply in hor and vert direction to the pictures read from input video file.
Note: The SourceWidth and SourceHeight are multiplied by these scaling factors. This option is useful for spatial scalability in a multi layer scenario to use enhancement layer source when base layer source is not available.
\\
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
\Option{InputBitDepth}
&
%\ShortOption{\None} &
\Default{8} &
Specifies the bit depth of the input video.
\\
\Option{MSBExtendedBitDepth} &
%\ShortOption{\None} &
\Default{0} &
Extends the input video by adding MSBs of value 0. When 0, no extension is applied and the InputBitDepth is used.
The MSBExtendedBitDepth becomes the effective file InputBitDepth for subsequent processing.
\\
\Option{InternalBitDepth} &
%\ShortOption{\None} &
\Default{0} &
Specifies the bit depth used for coding. When 0, the setting defaults to the
value of the MSBExtendedBitDepth.
If the input video is a different bit depth to InternalBitDepth, it is
automatically converted by:
\begin{displaymath}
\left\lfloor
\frac{\mathrm{Pel} * 2^{\mathrm{InternalBitDepth}}}{
2^{\mathrm{MSBExtendedBitDepth}}}
\right\rfloor
\end{displaymath}
Note: The effect of this option is as if the input video is externally
converted to the MSBExtendedBitDepth and then to the InternalBitDepth
and then coded with this value as InputBitDepth. The codec has no
notion of different bit depths.
\\
\Option{OutputBitDepth} &
%\ShortOption{\None} &
\Default{0} &
Specifies the bit depth of the output locally reconstructed video file.
When 0, the setting defaults to the value of InternalBitDepth.
Note: This option has no effect on the decoding process.
\\
\Option{InputBitDepthC}%
\Option{MSBExtendedBitDepthC}%
\Option{OutputBitDepthC} &
%\ShortOption{\None} &
\Default{0}%
\Default{0}%
\Default{0} &
Specifies the various bit-depths for chroma components. These only need
to be specified if non-equal luma and chroma bit-depth processing is
required. When 0, the setting defaults to the corresponding non-Chroma value.
\\
\Option{InputColourSpaceConvert} &
%\ShortOption{\None} &
\Default{\NotSet} &
The colour space conversion to apply to input video. Permitted values are:
\par
\begin{tabular}{lp{0.3\textwidth}}
UNCHANGED & No colour space conversion is applied \\
YCbCrToYCrCb & Swap the second and third components \\
YCbCrtoYYY & Set the second and third components to the values in the first \\
RGBtoGBR & Reorder the three components \\
\end{tabular}
\par
If no value is specified, no colour space conversion is applied. The list may eventually also include RGB to YCbCr or YCgCo conversions.
\\
\Option{SNRInternalColourSpace} &
%\ShortOption{\None} &
\Default{false} &
When this is set true, then no colour space conversion is applied prior to PSNR calculation, otherwise the inverse of InputColourSpaceConvert is applied.
\\
\Option{OutputInternalColourSpace} &
%\ShortOption{\None} &
\Default{false} &
When this is set true, then no colour space conversion is applied to the reconstructed video, otherwise the inverse of InputColourSpaceConvert is applied.
\\
\Option{InputChromaFormat} &
%\ShortOption{\None} &
\Default{420} &
Specifies the chroma format used in the input file. Permitted values (depending on the profile) are 400, 420, 422 or 444.
\\
\Option{ChromaFormatIDC (-cf)} &
%\ShortOption{-cf} &
\Default{0} &
Specifies the chroma format to use for processing. Permitted values (depending on the profile) are 400, 420, 422 or 444; the value of 0 indicates that the value of InputChromaFormat should be used instead.
\\
\Option{MSEBasedSequencePSNR} &
%\ShortOption{\None} &
\Default{false} &
When 0, the PSNR output is a linear average of the frame PSNRs; when 1, additional PSNRs are output which are formed from the average MSE of all the frames. The latter is useful when coding near-losslessly, where occasional frames become lossless.
\\
\Option{PrintFrameMSE} &
%\ShortOption{\None} &
\Default{false} &
When 1, the Mean Square Error (MSE) values of each frame will also be output alongside the default PSNR values.
\\
\Option{PrintSequenceMSE} &
%\ShortOption{\None} &
\Default{false} &
When 1, the Mean Square Error (MSE) values of the entire sequence will also be output alongside the default PSNR values.
\\
\Option{PrintWPSNR} &
%\ShortOption{\None} &
\Default{false} &
When 1, weighted PSNR (wPSNR) values of the entire sequence will also be output.
\\
\Option{PrintHighPrecEncTime} &
%\ShortOption{\None} &
\Default{false} &
When 1, prints per-frame encoding time in floating-point format. Otherwise prints an integer number of seconds.
\\
charles Salmon-Legagneur
committed
\Option{PrintRefLayerMetrics} &
%\ShortOption{\None} &
\Default{false} &
When 1, PSNR between current layer and the first reference layer (rescaled to the current layer size if needed) of the entire sequence will also be output. Only the first reference layer is processed for this metric.
\\
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
\Option{SummaryOutFilename} &
%\ShortOption{\None} &
\Default{false} &
Filename to use for producing summary output file. If empty, do not produce a file.
\\
\Option{SummaryPicFilenameBase} &
%\ShortOption{\None} &
\Default{false} &
Base filename to use for producing summary picture output files. The actual filenames used will have I.txt, P.txt and B.txt appended. If empty, do not produce a file.
\\
\Option{SummaryVerboseness} &
%\ShortOption{\None} &
\Default{false} &
Specifies the level of the verboseness of the text output.
\\
\Option{CabacZeroWordPaddingEnabled} &
%\ShortOption{\None} &
\Default{false} &
When 1, CABAC zero word padding will be enabled. This is currently not the default value for the setting.
\\
\Option{ConformanceWindowMode} &
%\ShortOption{\None} &
\Default{1} &
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
Specifies how the parameters related to the conformance window are interpreted (cropping/padding).
The following modes are available:
\par
\begin{tabular}{cp{0.43\textwidth}}
0 & No cropping / padding \\
1 & Automatic padding to the next minimum CU size \\
2 & Padding according to parameters HorizontalPadding and VerticalPadding \\
3 & Cropping according to parameters ConfWinLeft, ConfWinRight, ConfWinTop and ConfWinBottom \\
\end{tabular}
\\
\Option{HorizontalPadding (-pdx)}%
\Option{VerticalPadding (-pdy)} &
%\ShortOption{-pdx}%
%\ShortOption{-pdy} &
\Default{0} &
Specifies the horizontal and vertical padding to be applied to the input
video in luma samples when ConformanceWindowMode is 2. Must be a multiple of
the chroma resolution (e.g. a multiple of two for 4:2:0).
\\
\Option{ConfWinLeft}%
\Option{ConfWinRight}%
\Option{ConfWinTop}%
\Option{ConfWinBottom} &
%\ShortOption{\None} &
\Default{0} &
Specifies the horizontal and vertical cropping to be applied to the
input video in luma samples when ConformanceWindowMode is 3.
Must be a multiple of the chroma resolution (e.g. a multiple of
two for 4:2:0).
\\
\Option{ScalingWindow} &
\Default{0} &
Enable scaling window.
\\
\Option{ScalWinLeft (-swl)}%
\Option{ScalWinRight (-swr)}%
\Option{ScalWinTop (-swt)}%
\Option{ScalWinBottom (-swb)} &
\Default{0} &
Specifies the horizontal and vertical offset for the scaling window.
Must be a multiple of the chroma resolution (e.g. a multiple of two for 4:2:0).
\\
\Option{FrameRate (-fr)} &
%\ShortOption{-fr} &
\Default{0} &
Specifies the frame rate of the input video. A frame rate may be specified by two numbers
such as 30000:1001 to define a non-integer value (e.g., 29.97).
Note: This option affects the reported bit rates.
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
\\
\Option{FrameSkip (-fs)} &
%\ShortOption{-fs} &
\Default{0} &
Specifies a number of frames to skip at beginning of input video file.
\\
\Option{FramesToBeEncoded (-f)} &
%\ShortOption{-f} &
\Default{0} &
Specifies the number of frames to be encoded (see note regarding TemporalSubsampleRatio). When 0, all frames are coded.
\\
\Option{TemporalSubsampleRatio (-ts)} &
%\ShortOption{-fs} &
\Default{1} &
Temporally subsamples the input video sequence. A value of $N$ will skip $(N-1)$ frames of input video after each coded input video frame. Note the FramesToBeEncoded does not account for the temporal skipping of frames, which will reduce the number of frames encoded accordingly. The reported bit rates will be reduced and VUI information is scaled so as to present the video at the correct speed. The minimum and default value is 1.
\\
\Option{FieldCoding} &
%\ShortOption{\None} &
\Default{false} &
When 1, indicates that field-based coding is to be applied.
\\
\Option{TopFieldFirst (-Tff)} &
%\ShortOption{\None} &
\Default{0} &
Indicates the order of the fields packed into the input frame. When 1, the top field is temporally first.
\\
\Option{ClipInputVideoToRec709Range} &
%\ShortOption{\None} &
\Default{0} &
If 1 then clip input video to the Rec. 709 Range on loading when InternalBitDepth is less than MSBExtendedBitDepth.
\\
\Option{ClipOutputVideoToRec709Range} &
%\ShortOption{\None} &
\Default{0} &
If 1 then clip output video to the Rec. 709 Range on saving when OutputBitDepth is less than InternalBitDepth.
\\
\Option{EfficientFieldIRAPEnabled} &
%\ShortOption{\None} &
\Default{1} &
Enable to code fields in a specific, potentially more efficient, order.
\\
\Option{HarmonizeGopFirstFieldCoupleEnabled} &
%\ShortOption{\None} &
\Default{1} &
Enables harmonization of Gop first field couple.
\\
\Option{AccessUnitDelimiter} &
%\ShortOption{\None} &
Add Access Unit Delimiter NAL units between all Access Units.
\\
\Option{EnablePictureHeaderInSliceHeader} &
%\ShortOption{\None} &
\Default{1} &
Enable Picture Header to be signalled in Slice Header when encoding with single slice per picture.
\\
Zhipin Deng
committed
\Option{RPR} &
%\ShortOption{\None} &
\Default{true} &
Specifies the value of sps_ref_pic_resampling_enabled_flag.
\\
\Option{ScalingRatioHor} &
%\ShortOption{\None} &
\Default{1.0} &
Kenneth Andersson
committed
Scaling ratio in horizontal direction for reference picture resampling. When GOPBasedRPR is true unless ratio is defined the ratio will be set to 2.0.
\\
\Option{ScalingRatioVer} &
%\ShortOption{\None} &
\Default{1.0} &
Kenneth Andersson
committed
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
Scaling ratio in vertical direction for reference picture resampling. When GOPBasedRPR is true unless ratio is defined the ratio will be set to 2.0.
\\
\Option{GOPBasedRPR} &
%\ShortOption{\None} &
\Default{false} &
Enables decision to encode pictures in GOP in full resolution or one of three downscaled resolutions (default is $1/2$, $2/3$ and $4/5$ in both dimensions).
First picture in GOP is rescaled to half resolution and then upscaled to full resolution. The luma PSNR of the rescaled picture compared to the source picture is compared with
PSNR thresholds for respective resolution: $(PsnrThresholdRPR - (QP - 37) * 0.5) < upscaledPSNR$.
The smallest resolution that has PSNR above the threshold is selected.
\\
\Option{GOPBasedRPRQPTh} &
%\ShortOption{\None} &
\Default{32} &
QP threshold parameter that determines which QP GOP-based RPR is invoked for given by $QP >= GOPBasedRPRQPTh$.
\\
\Option{ScalingRatioHor2} &
%\ShortOption{\None} &
\Default{1.5} &
Scaling ratio in hor direction for GOP based RPR ($2/3$).
\\
\Option{ScalingRatioVer2} &
%\ShortOption{\None} &
\Default{1.5} &
Scaling ratio in ver direction for GOP based RPR ($2/3$).
\\
\Option{ScalingRatioHor3} &
%\ShortOption{\None} &
\Default{1.25} &
Scaling ratio in hor direction for GOP based RPR ($4/5$).
\\
\Option{ScalingRatioVer3} &