From 218ef7e6f80336d72ba2dae941ae60f367ec4247 Mon Sep 17 00:00:00 2001 From: Thierry Dumas <thierry.dumas@interdigital.com> Date: Tue, 7 Feb 2023 10:42:55 +0000 Subject: [PATCH] Update the documentation of the neural network-based intra prediction and a training script. --- README.md | 46 ++--------- .../training_intra_prediction/CMakeLists.txt | 14 ---- training/training_intra_prediction/README.md | 82 +++++++++++++++++-- .../generating_data_intra.py | 2 + .../run_setups_vtm.sh | 38 --------- .../run_setups_vtm_regular.sh | 13 +++ .../run_setups_vtm_search.sh | 11 +++ .../run_setups_vtm_tm_inf.sh | 11 +++ 8 files changed, 118 insertions(+), 99 deletions(-) delete mode 100644 training/training_intra_prediction/run_setups_vtm.sh create mode 100644 training/training_intra_prediction/run_setups_vtm_regular.sh create mode 100644 training/training_intra_prediction/run_setups_vtm_search.sh create mode 100644 training/training_intra_prediction/run_setups_vtm_tm_inf.sh diff --git a/README.md b/README.md index 60beddecfa..a702cb37a1 100644 --- a/README.md +++ b/README.md @@ -411,43 +411,11 @@ To specify model paths, use e.g. following command lines. Note that model paths Low-complexity version of the neural network-based intra prediction mode ------------------------------------------------------------------------ - -Below, it is assumed that the current directory is the root directory of the VTM-11-NNVC repository. - -1. Initialize and update SADL if not already done. - -```sh -git submodule init -git submodule update -``` - -2. Download the models of the low-complexity version of the neural network-based intra prediction mode. - -```sh -curl --user "ri-nextgen_r:0Hl5X4yy" -o "models_ee1_2023_january.tgz" "sftp://sftp.interdigital.com/JVET-EE1-3.2/models_ee1_2023_january.tgz" -``` - -If the above command does not work, perhaps because of the `curl` version, please try the command below. - -```sh -sftp ri-nextgen_r@sftp:JVET-EE1-3.2/models_ee1_2023_january.tgz ./ -``` - -using "0Hl5X4yy" as password. - -3. Uncompress the obtained "models_ee1_2023_january.tgz". - -```sh -tar -xvf models_ee1_2023_january.tgz && mv models_ee2_2023_january models_intra_prediction -``` - -4. Rate-distortion tests with the low-complexity version of the neural network-based intra prediction mode in 16-bit signed integer. - * VTM-11-NNVC can be compiled as usual, i.e. without any additional macro passed as argument to `cmake`. - * To run VTM-11-NNVC with the low-complexity version of the neural network-based intra prediction mode, the neural networks being in 16-bit signed integer, please provide the following two arguments to the encoder and the decoder: - * --DescriptionPairHeightWidthPathToGraphOutput=4,4,graph_output_4_4_int16.sadl;4,8,graph_output_4_8_int16.sadl;4,16,graph_output_4_16_int16.sadl;4,32,graph_output_4_32_int16.sadl;8,8,graph_output_8_8_int16.sadl;8,16,graph_output_8_16_int16.sadl;16,16,graph_output_16_16_int16.sadl; - * --PrefixAbsolutePathsToGraphsOutput=models_intra_prediction - -Note that, in 4., `--PrefixAbsolutePathsToGraphsOutput` gives the encoder and the decoder the path -to the directory storing the neural network models, i.e. the files ".sadl". - +When building VTM-11-NNVC, if the macro `JVET_AB0149_INTRA_PRED` is equal to 1 (default), the low-complexity version of the NN-based +intra prediction mode is activated. By default, the encoder and decoder parameter `PrefixAbsolutePathsToGraphsOutput` is equal to +"models/intra" as it is assumed that the VTM-11-NNVC encoder/decoder executable is run from the root directory of the project. If +the VTM-11-NNVC encoder/decoder executable is run from a directory different from the root directory of the project, please add +the following argument when running the VTM-11-NNVC encoder/decoder executable: `--PrefixAbsolutePathsToGraphsOutput=path_to_directory_models_intra`, +where `path_to_directory_models_intra` is the path to the directory "models/intra" relatively to the directory from which the +VTM-11-NNVC encoder/decoder executable is run. diff --git a/training/training_intra_prediction/CMakeLists.txt b/training/training_intra_prediction/CMakeLists.txt index ef78d8173b..34ed05a076 100644 --- a/training/training_intra_prediction/CMakeLists.txt +++ b/training/training_intra_prediction/CMakeLists.txt @@ -142,27 +142,13 @@ if(USE_SEARCH) if(USE_TM_INF) message(FATAL_ERROR "When `USE_SEARCH` is 1, `USE_TM_INF` has to be 0.") endif() - add_compile_definitions(JVET_AB0149_INTRA_PRED=1) - add_compile_definitions(JVET_AB0149_SEARCH_PNN=1) - add_compile_definitions(JVET_AB0149_TM_INF_PNN=0) - add_compile_definitions(NN_FIXED_POINT_IMPLEMENTATION=0) - add_compile_definitions(SPARSE_SUPPORT=1) set(TAG_SUFFIX "/search") elseif(USE_TM_INF) if(USE_SEARCH) message(FATAL_ERROR "When `USE_TM_INF` is 1, `USE_SEARCH` has to be 0.") endif() - add_compile_definitions(JVET_AB0149_INTRA_PRED=1) - add_compile_definitions(JVET_AB0149_SEARCH_PNN=0) - add_compile_definitions(JVET_AB0149_TM_INF_PNN=1) - add_compile_definitions(NN_FIXED_POINT_IMPLEMENTATION=0) - add_compile_definitions(SPARSE_SUPPORT=1) set(TAG_SUFFIX "/tm_inf") else() - add_compile_definitions(JVET_AB0149_INTRA_PRED=0) - add_compile_definitions(JVET_AB0149_SEARCH_PNN=0) - add_compile_definitions(JVET_AB0149_TM_INF_PNN=0) - add_compile_definitions(NN_FIXED_POINT_IMPLEMENTATION=1) set(TAG_SUFFIX "/regular") endif() if(MSVC) diff --git a/training/training_intra_prediction/README.md b/training/training_intra_prediction/README.md index 47be48e6d8..cf610de16f 100644 --- a/training/training_intra_prediction/README.md +++ b/training/training_intra_prediction/README.md @@ -12,6 +12,13 @@ It is assumed that the current directory is the root directory of the VTM-11-NNV ## I. Setups +If the initialization of the SADL submodule is not already done, + +```sh +git submodule init +git submodule update +``` + The first setup consists in building three different versions of VTM-11-NNVC. The first version, stored in the directory at "training/training_intra_prediction/bin/regular", corresponds to VTM-11-NNVC without Filter-Set, without the neural network-based intra prediction mode. It is used to generate @@ -23,15 +30,68 @@ at "training/training_intra_prediction/bin/tm_inf", corresponds to VTM-11-NNVC w in floats, with the neural network-based intra prediction mode in floats. It is used for the final rate-distortion tests. -In the file at "training/training_intra_prediction/run_setups_vtm.sh", please set the two variables -`PATH_TO_GPLUSPLUS` and `PATH_TO_GCC` following the associated comments. Then, - ```sh -git submodule init -git submodule update cd training/training_intra_prediction -chmod 755 run_setups_vtm.sh -./run_setups_vtm.sh +``` + +### I.1. Building the first version of VTM-11-NNVC + +In "../../source/Lib/CommonLib/TypeDef.h", please set the first six macros as follows. + +```sh +#define JVET_AB0149_ACTIVATE_WRITER_BLOCK_STATS 1 +#define JVET_AB0149_INTRA_PRED 0 +#define JVET_AB0149_SEARCH_PNN 0 +#define JVET_AB0149_TM_INF_PNN 0 +#define NN_FIXED_POINT_IMPLEMENTATION 0 +#define SPARSE_SUPPORT 0 +``` + +Then, + +```sh +chmod 755 run_setups_vtm_regular.sh +./run_setups_vtm_regular.sh +``` + +### I.2. Building the second version of VTM-11-NNVC + +In "../../source/Lib/CommonLib/TypeDef.h", please set the first six macros as follows. + +```sh +#define JVET_AB0149_ACTIVATE_WRITER_BLOCK_STATS 1 +#define JVET_AB0149_INTRA_PRED 1 +#define JVET_AB0149_SEARCH_PNN 1 +#define JVET_AB0149_TM_INF_PNN 0 +#define NN_FIXED_POINT_IMPLEMENTATION 0 +#define SPARSE_SUPPORT 1 +``` + +Then, + +```sh +chmod 755 run_setups_vtm_search.sh +./run_setups_vtm_search.sh +``` + +### I.3. Building the third version of VTM-11-NNVC + +In "../../source/Lib/CommonLib/TypeDef.h", please set the first six macros as follows. + +```sh +#define JVET_AB0149_ACTIVATE_WRITER_BLOCK_STATS 1 +#define JVET_AB0149_INTRA_PRED 1 +#define JVET_AB0149_SEARCH_PNN 0 +#define JVET_AB0149_TM_INF_PNN 1 +#define NN_FIXED_POINT_IMPLEMENTATION 0 +#define SPARSE_SUPPORT 1 +``` + +Then, + +```sh +chmod 755 run_setups_vtm_tm_inf.sh +./run_setups_vtm_tm_inf.sh ``` ## II. Dispatching the paths to raw data @@ -231,6 +291,12 @@ The argument `--is_last_cycle` indicates that, at the end of the current trainin The neural network graphs used by the low-complexity version of the neural network-based intra prediction mode in VTM-11-NNVC are now stored in the directory at "../../models_intra_prediction". Please use these neural network graphs for the rate-distortion tests of VTM-11-NNVC with the low-complexity version of the neural network-based -intra prediction mode, the neural networks being in floats. +intra prediction mode, the neural networks being in floats. Please use the VTM-11-NNVC encoder and decoder executables +stored in the directory at "bin/tm_inf" for these rate-distortion tests. + +Given the above explanation, when running either the VTM-11-NNVC encoder executable or the VTM-11-NNVC decoder executable, +the following arguments must be specified: `--PrefixAbsolutePathsToGraphsOutput=models_intra_prediction` and +`--DescriptionPairHeightWidthPathToGraphOutput=4,4,target_4_4/float/graph_output.pb;4,8,target_4_8/float/graph_output.pb;4,16,target_4_16/float/graph_output.pb;4,32,target_4_32/float/graph_output.pb;8,8,target_8_8/float/graph_output.pb;8,16,target_8_16/float/graph_output.pb;16,16,target_16_16/float/graph_output.pb;`. +Here, it is assumed that the two executables are run from the root directory of the VTM-11-NNVC project. diff --git a/training/training_intra_prediction/generating_data_intra.py b/training/training_intra_prediction/generating_data_intra.py index 53a71c404f..60b9daf155 100644 --- a/training/training_intra_prediction/generating_data_intra.py +++ b/training/training_intra_prediction/generating_data_intra.py @@ -336,6 +336,7 @@ class GeneratorDataPart(object): args_encoding = [dict_picked['path_to_exe_encoder'],'-c',dict_picked['path_to_cfg'],'-i',dict_chr_own['pbfe'],'-b',dict_picked['path_to_bitstream'],'-wdt',str(width_video),'-hgt',str(height_video),'--InputBitDepth={}'.format(input_bit_depth),'--OutputBitDepth={}'.format(input_bit_depth),'--InputChromaFormat={}'.format(input_chroma_format),'--SEIDecodedPictureHash=1','--ReconFile=','--FramesToBeEncoded={}'.format(nb_frames),'--FrameSkip={}'.format(nb_fixed),'--QP={}'.format(numpy.random.choice(dict_picked['qps_int']).item())] if dict_picked['descr_stacked_up']: args_encoding.append('--DescriptionPairHeightWidthPathToGraphOutput={}'.format(dict_picked['descr_stacked_up'])) + args_encoding.append('--PrefixAbsolutePathsToGraphsOutput=') if pcetx: args_encoding += ['-c', pcetx] else: @@ -352,6 +353,7 @@ class GeneratorDataPart(object): args_decoding = [dict_picked['path_to_exe_decoder'],'-b',dict_picked['path_to_bitstream'],'-o',dict_picked['path_provided_2'],'--OutputBitDepth={}'.format(input_bit_depth)] if dict_picked['descr_stacked_up']: args_decoding.append('--DescriptionPairHeightWidthPathToGraphOutput={}'.format(dict_picked['descr_stacked_up'])) + args_decoding.append('--PrefixAbsolutePathsToGraphsOutput=') if dict_picked['path_provided_1']: args_decoding.append('--PathToQuadtreeRecords={}'.format(dict_picked['path_provided_1'])) if self._are_csts_mapgsym: diff --git a/training/training_intra_prediction/run_setups_vtm.sh b/training/training_intra_prediction/run_setups_vtm.sh deleted file mode 100644 index ce5a61fe83..0000000000 --- a/training/training_intra_prediction/run_setups_vtm.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/bash - -############ Parameters to be set ################################# - -# Path to the G++ executable. -PATH_TO_GPLUSPLUS="" - -# Path to the GCC executable. -PATH_TO_GCC="" - -################################################################### - -cd ../.. -mkdir build -cd build - -# Step 0: VTM-11-NNVC without Filter-Set, without the neural network-based intra prediction mode is built (for writing training data). -mkdir regular -cd regular -cmake3 ../../training/training_intra_prediction -DCMAKE_CXX_COMPILER=${PATH_TO_GPLUSPLUS} -DCMAKE_C_COMPILER=${PATH_TO_GCC} -DCMAKE_CXX_FLAGS_RELEASE="-O3 -DNDEBUG" -make -j 8 -cd .. - -# Step 1: VTM-11-NNVC without Filter-Set, with the neural network-based intra prediction mode in floating-point is built (for writing training data). -mkdir search -cd search -cmake3 ../../training/training_intra_prediction -DCMAKE_CXX_COMPILER=${PATH_TO_GPLUSPLUS} -DCMAKE_C_COMPILER=${PATH_TO_GCC} -DCMAKE_CXX_FLAGS_RELEASE="-O3 -DNDEBUG" -DUSE_SEARCH=1 -make -j 8 -cd .. - -# Step 2: VTM-11-NNVC without Filter-Set, with the neural network-based intra prediction mode in floating-point is built (for final rate-distortion tests). -mkdir tm_inf -cd tm_inf -cmake3 ../../training/training_intra_prediction -DCMAKE_CXX_COMPILER=${PATH_TO_GPLUSPLUS} -DCMAKE_C_COMPILER=${PATH_TO_GCC} -DCMAKE_CXX_FLAGS_RELEASE="-O3 -DNDEBUG" -DUSE_TM_INF=1 -make -j 8 -cd ../.. - - diff --git a/training/training_intra_prediction/run_setups_vtm_regular.sh b/training/training_intra_prediction/run_setups_vtm_regular.sh new file mode 100644 index 0000000000..c6d619080c --- /dev/null +++ b/training/training_intra_prediction/run_setups_vtm_regular.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +cd ../.. +mkdir build +cd build + +# Step 0: VTM-11-NNVC without Filter-Set, without the neural network-based intra prediction mode is built (for writing training data). +mkdir regular +cd regular +cmake ../../training/training_intra_prediction -DCMAKE_CXX_FLAGS_RELEASE="-O3 -DNDEBUG" +make -j 8 +cd ../.. + diff --git a/training/training_intra_prediction/run_setups_vtm_search.sh b/training/training_intra_prediction/run_setups_vtm_search.sh new file mode 100644 index 0000000000..f369a2b5c9 --- /dev/null +++ b/training/training_intra_prediction/run_setups_vtm_search.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +cd ../../build + +# Step 1: VTM-11-NNVC without Filter-Set, with the neural network-based intra prediction mode in floating-point is built (for writing training data). +mkdir search +cd search +cmake ../../training/training_intra_prediction -DCMAKE_CXX_FLAGS_RELEASE="-O3 -DNDEBUG" -DUSE_SEARCH=1 +make -j 8 +cd ../.. + diff --git a/training/training_intra_prediction/run_setups_vtm_tm_inf.sh b/training/training_intra_prediction/run_setups_vtm_tm_inf.sh new file mode 100644 index 0000000000..ca53304ac5 --- /dev/null +++ b/training/training_intra_prediction/run_setups_vtm_tm_inf.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +cd ../../build + +# Step 2: VTM-11-NNVC without Filter-Set, with the neural network-based intra prediction mode in floating-point is built (for final rate-distortion tests). +mkdir tm_inf +cd tm_inf +cmake ../../training/training_intra_prediction -DCMAKE_CXX_FLAGS_RELEASE="-O3 -DNDEBUG" -DUSE_TM_INF=1 +make -j 8 +cd ../.. + -- GitLab