Skip to content
Snippets Groups Projects
Commit 98f34f48 authored by Yun Li's avatar Yun Li
Browse files

Merge branch 'hop4_dev' into 'VTM-11.0_nnvc'

Hop4 dev

See merge request jvet-ahg-nnvc/VVCSoftware_VTM!223
parents 4d4af117 e51ad858
No related branches found
No related tags found
No related merge requests found
{
"stage1": {
"training": {
"mse_epoch": 37,
"max_epochs": 40,
"component_loss_weightings": [
6,
2
],
"dataloader": {
"batch_size": 64
},
"optimizer": {
"lr": 0.0001
},
"lr_scheduler": {
"milestones": [
30,
34
]
}
}
},
"stage2": {
"encdec_bvi": {
"vtm_option": "--NnlfHopDebugOption=1 --NnlfOption=4 --NnlfModelName=[stage1/conversion/full_path_filename]",
"vtm_dec_option": "--NnlfModelName=[stage1/conversion]"
},
"encdec_bvi_valid": {
"vtm_option": "--NnlfHopDebugOption=1 --NnlfOption=4 --NnlfModelName=[stage1/conversion/full_path_filename]",
"vtm_dec_option": "--NnlfModelName=[stage1/conversion]"
},
"encdec_tvd": {
"vtm_option": "--NnlfHopDebugOption=1 --NnlfOption=4 --NnlfModelName=[stage1/conversion/full_path_filename]",
"vtm_dec_option": "--NnlfModelName=[stage1/conversion]"
},
"encdec_tvd_valid": {
"vtm_option": "--NnlfHopDebugOption=1 --NnlfOption=4 --NnlfModelName=[stage1/conversion/full_path_filename]",
"vtm_dec_option": "--NnlfModelName=[stage1/conversion]"
},
"training": {
"mse_epoch": 18,
"max_epochs": 20,
"component_loss_weightings": [
6,
2
],
"dataloader": {
"batch_size": 64
},
"optimizer": {
"lr": 0.0001
},
"lr_scheduler": {
"milestones": [
15,
17
]
}
}
},
"stage3": {
"encdec_bvi": {
"vtm_option": "--NnlfOption=1 --NnlfModelName=[stage2/quantize/full_path_filename]",
"vtm_dec_option": "--NnlfModelName=[stage2/conversion]"
},
"encdec_bvi_valid": {
"vtm_option": "--NnlfOption=1 --NnlfModelName=[stage2/quantize/full_path_filename]",
"vtm_dec_option": "--NnlfModelName=[stage2/conversion]"
},
"encdec_tvd": {
"vtm_option": "--NnlfOption=1 --NnlfModelName=[stage2/quantize/full_path_filename]",
"vtm_dec_option": "--NnlfModelName=[stage2/conversion]"
},
"encdec_tvd_valid": {
"vtm_option": "--NnlfOption=1 --NnlfModelName=[stage2/quantize/full_path_filename]",
"vtm_dec_option": "--NnlfModelName=[stage2/conversion]"
},
"training": {
"mse_epoch": 18,
"max_epochs": 20,
"component_loss_weightings": [
16,
2
],
"dataloader": {
"batch_size": 32
},
"optimizer": {
"lr": 0.0002
},
"lr_scheduler": {
"milestones": [
15,
17,
18
]
}
}
}
}
{ "model" : {
"path": "../../HOP/model/model.py",
"class" : "Net",
"input_channels" : [
[
"rec_before_dbf_Y",
"rec_before_dbf_U",
"rec_before_dbf_V"
],
[
"pred_Y",
"pred_U",
"pred_V"
],
[
"bs_Y",
"bs_U",
"bs_V"
],
[ "qp_base" ],
[ "qp_slice" ],
[ "ipb_Y" ]
],
"input_kernels" : [
3,
3,
1,
1,
1,
1
],
"D1" : 192,
"D2" : 32,
"D3" : 16,
"D4" : 16,
"D5" : 16,
"D6" : 48,
"N" : 24,
"C" : 64,
"C1" : 192,
"C21" : 32,
"C22" : 80,
"C31" : 48
} }
"""
/* The copyright in this software is being made available under the BSD
* License, included below. This software may be subject to other third party
* and contributor rights, including patent rights, and no such rights are
* granted under this license.
*
* Copyright (c) 2010-2024, ITU/ISO/IEC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
"""
from typing import Union, Tuple, Optional, Type, Iterable, List, Dict
import torch
from torch import nn
from torch.nn import functional as F
# ugly global to avoid putting the export model inside the model
model_for_export = None
class Conv(nn.Sequential):
def __init__(
self,
in_channels: int,
out_channels: int,
kernel_size: Union[int, Tuple[int, int]],
stride: Union[int, Tuple[int, int]] = 1,
padding: Optional[Union[int, Tuple[int, int]]] = None,
is_separable: bool = False,
hidden_separable_channels: Optional[int] = None,
post_activation: Optional[Type] = nn.PReLU,
index: int = 0,
groups: int = 1,
**kwargs,
):
"""
Args:
in_channels: the number of input channels
out_channels: the number of output channels
kernel_size: the convolution's kernel size
stride: the convolution's stride(s)
padding: the convolution's padding
is_separable: whether to implement convolution separably
hidden_separable_channels: If is_separable, the number of hidden channels between convolutions. If None, use out_channels
post_activation: activation function to use after convolution. If None, no activation after convolution
**kwargs: additional kwargs to pass to nn.Conv2d
"""
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = (
(kernel_size, kernel_size) if isinstance(kernel_size, int) else kernel_size
)
self.stride = (stride, stride) if isinstance(stride, int) else stride
if padding is not None:
self.padding = (padding, padding) if isinstance(padding, int) else padding
else:
self.padding = tuple([k // 2 for k in self.kernel_size])
self.is_separable = is_separable
self.post_activation = post_activation
if self.is_separable:
self.hidden_separable_channels = hidden_separable_channels or out_channels
if index % 2 == 0:
modules = [
nn.Conv2d(
self.in_channels,
self.hidden_separable_channels,
(self.kernel_size[0], 1),
(self.stride[0], 1),
(self.padding[0], 0),
groups=groups,
**kwargs,
),
nn.Conv2d(
self.hidden_separable_channels,
self.out_channels,
(1, self.kernel_size[1]),
(1, self.stride[1]),
(0, self.padding[1]),
**kwargs,
),
]
else:
modules = [
nn.Conv2d(
self.in_channels,
self.hidden_separable_channels,
(1, self.kernel_size[1]),
(1, self.stride[1]),
(0, self.padding[1]),
groups=groups,
**kwargs,
),
nn.Conv2d(
self.hidden_separable_channels,
self.out_channels,
(self.kernel_size[0], 1),
(self.stride[0], 1),
(self.padding[0], 0),
**kwargs,
),
]
else:
modules = [
nn.Conv2d(
self.in_channels,
self.out_channels,
self.kernel_size,
self.stride,
self.padding,
**kwargs,
)
]
if self.post_activation is not None:
modules.append(self.post_activation())
super(Conv, self).__init__(*modules)
class MultiBranchModule(nn.Module):
"""A module representing multple, parallel branches. If the input is a list, each element in the list is fed into the corresponding branch,
otherwise the input is fed into every branch. The outputs of each branch are then merged."""
def __init__(self, *branch_modules, merge_dimension: int = -3):
"""
Args:
branch_modules: modules to run in parallel
merge_dimension: the dimension to merge outputs from each branch
"""
super().__init__()
self.branches = nn.ModuleList(branch_modules)
self.merge_dimension = merge_dimension
def forward(self, args: Union[torch.Tensor, List[torch.Tensor]]) -> torch.Tensor:
inputs = args if isinstance(args, list) else len(self.branches) * [args]
branch_outputs = [branch(input) for branch, input in zip(self.branches, inputs)]
return torch.cat(branch_outputs, dim=self.merge_dimension)
class Attn(nn.Module):
def __init__(self, to_train, c, n_h):
super(Attn, self).__init__()
self.n_h = n_h
self.c = c
self.to_train = to_train
self.conv1_1 = nn.Conv2d(c, c, kernel_size=1, bias=False)
self.conv1_2 = nn.Conv2d(c, c, kernel_size=1, bias=False)
self.conv1_3 = nn.Conv2d(c, c, kernel_size=1, bias=False)
self.conv3_1 = nn.Conv2d(
c, c, kernel_size=(3, 3), padding=(1, 1), groups=c, bias=False
)
self.conv3_2 = nn.Conv2d(
c, c, kernel_size=(3, 3), padding=(1, 1), groups=c, bias=False
)
self.conv3_3 = nn.Conv2d(
c, c, kernel_size=(3, 3), padding=(1, 1), groups=c, bias=False
)
self.conv2 = nn.Conv2d(c, c, kernel_size=1, bias=False)
def forward(self, x):
q = self.conv3_1(self.conv1_1(x))
k = self.conv3_2(self.conv1_2(x))
v = self.conv3_3(self.conv1_3(x))
s = q.shape
if self.to_train:
q = q.reshape(s[0], self.n_h, self.c // self.n_h, -1)
k = k.reshape(s[0], self.n_h, self.c // self.n_h, -1)
v = v.reshape(s[0], self.n_h, self.c // self.n_h, -1)
else:
q = q.reshape(1, self.n_h, self.c // self.n_h, -1)
k = k.reshape(1, self.n_h, self.c // self.n_h, -1)
v = v.reshape(1, self.n_h, self.c // self.n_h, -1)
map = torch.matmul(q, k.transpose(-2, -1))
p = torch.matmul(map, v)
p = p.reshape(s)
p = self.conv2(p)
return p
class TFBlock(nn.Module):
def __init__(self, to_train, c=64, n_h=2, ext=2.66):
super(TFBlock, self).__init__()
self.attention = Attn(to_train, c, n_h)
def forward(self, x):
x = x + self.attention(x)
return x
class ResidualBlock(nn.Sequential):
def __init__(
self,
C: int = 64,
C1: int = 160,
C21: int = 32,
C22: int = 32,
C31: int = 64,
index: int = 0,
):
super(ResidualBlock, self).__init__(
MultiBranchModule(
Conv(C, C1, kernel_size=1),
Conv(
C,
C22,
kernel_size=3,
is_separable=True,
hidden_separable_channels=C21,
index=index,
groups=2,
),
),
Conv(C1 + C22, C, kernel_size=1, post_activation=None),
Conv(
C,
C,
kernel_size=3,
post_activation=None,
is_separable=True,
hidden_separable_channels=C31,
index=index,
groups=2,
),
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
return x + super(ResidualBlock, self).forward(x)
class ResidualBlock_TF(nn.Sequential):
def __init__(
self,
to_train: bool,
C: int = 64,
C1: int = 160,
C21: int = 32,
C22: int = 32,
C31: int = 64,
index: int = 0,
):
super(ResidualBlock_TF, self).__init__(
MultiBranchModule(
Conv(C, C1, kernel_size=1),
Conv(
C,
C22,
kernel_size=3,
is_separable=True,
hidden_separable_channels=C21,
index=index,
groups=2,
),
),
Conv(C1 + C22, C, kernel_size=1, post_activation=None),
Conv(
C,
C,
kernel_size=3,
post_activation=None,
is_separable=True,
hidden_separable_channels=C31,
index=index,
groups=2,
),
TFBlock(to_train, C, 2, 2.66),
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
return x + super(ResidualBlock_TF, self).forward(x)
class SADLNet(nn.Sequential):
"""The network used during SADL inference"""
def __init__(
self,
to_train: bool = True,
input_channels: Iterable[int] = [3, 3, 3, 1, 1, 1],
input_kernels: Iterable[int] = [3, 3, 3, 1, 1, 3],
D1: int = 192,
D2: int = 32,
D3: int = 16,
D4: int = 16,
D5: int = 16,
D6: int = 48,
N: int = 20,
C: int = 64,
C1: int = 160,
C21: int = 32,
C22: int = 32,
C31: int = 64,
output_channels: int = 6,
):
"""
Args:
input_channels: the number of channels expected for each input
input_kernels: the kernel size for each input convolution
output_channels: the number of output channels
"""
self.input_channels = input_channels
self.input_kernels = input_kernels
self.input_features = [D1, D2, D3, D4, D4, D5]
super(SADLNet, self).__init__(
MultiBranchModule(
*[
Conv(c, d, kernel_size=k)
for c, d, k in zip(
self.input_channels, self.input_features, self.input_kernels
)
]
),
Conv(sum(self.input_features), D6, kernel_size=1),
Conv(D6, C, kernel_size=3, stride=2),
*[ResidualBlock(C, C1, C21, C22, C31, index=i) for i in range(7)],
ResidualBlock_TF(to_train, C, C1, C21, C22, C31, index=7),
*[ResidualBlock(C, C1, C21, C22, C31, index=i) for i in range(8, 14)],
ResidualBlock_TF(to_train, C, C1, C21, C22, C31, index=14),
*[ResidualBlock(C, C1, C21, C22, C31, index=i) for i in range(15, N)],
Conv(C, C, kernel_size=3),
Conv(C, output_channels, kernel_size=3, post_activation=None),
)
# model for export: batch size=1 and no dynamic axis on reshape
if to_train:
global model_for_export
model_for_export = SADLNet(
False,
input_channels,
input_kernels,
D1,
D2,
D3,
D4,
D5,
D6,
N,
C,
C1,
C21,
C22,
C31,
output_channels,
)
def get_example_inputs(
self, patch_size: Union[int, Tuple[int, int]] = 144, batch_size: int = 1
):
patch_size = (
(patch_size, patch_size) if isinstance(patch_size, int) else patch_size
)
return [
torch.rand(
batch_size, conv.in_channels, *patch_size, device=conv[0].weight.device
)
for conv in self[0].branches
]
def to_onnx(
self,
filename: str,
patch_size: int = 144,
batch_size: int = 1,
opset: int = 10,
**kwargs,
) -> None:
mode = self.training
self.eval()
global model_for_export
model_for_export.load_state_dict(self.state_dict())
model_for_export.eval()
torch.onnx.export(
model_for_export,
self.get_example_inputs(patch_size, batch_size),
filename,
input_names=["in"],
dynamic_axes={"in": {2: "h", 3: "w"}},
opset_version=opset,
**kwargs,
)
self.train(mode)
class Net(nn.Module):
"""Wrapper for SADL model that implements input pre- and post-processing for training."""
def __init__(
self,
input_channels: Iterable[Iterable[str]] = [
["rec_before_dbf_Y", "rec_before_dbf_U", "rec_before_dbf_V"],
["pred_Y", "pred_U", "pred_V"],
["bs_Y", "bs_U", "bs_V"],
["qp_base"],
["qp_slice"],
["ipb_Y"],
],
input_kernels: Iterable[int] = [3, 3, 1, 1, 1, 1],
D1: int = 192,
D2: int = 32,
D3: int = 16,
D4: int = 16,
D5: int = 16,
D6: int = 48,
N: int = 25,
C: int = 64,
C1: int = 192,
C21: int = 32,
C22: int = 64,
C31: int = 48,
path: str = None,
):
super(Net, self).__init__()
assert len(input_channels) == len(
input_kernels
), "[ERROR] input size and kernels size not equal"
self.input_channels = input_channels
sizes = [len(a) for a in input_channels]
self.SADL_model = SADLNet(
True,
sizes,
input_kernels,
D1,
D2,
D3,
D4,
D5,
D6,
N,
C,
C1,
C21,
C22,
C31,
6,
)
self.chroma_upsampler = nn.Upsample(scale_factor=2, mode="nearest")
def preprocess_args(
self, batch: Dict[str, torch.Tensor]
) -> Dict[str, torch.Tensor]:
return [
torch.cat([batch[name] for name in input_], dim=1)
for input_ in self.input_channels
]
def postprocess_outputs(
self, batch: Dict[str, torch.Tensor], out: torch.Tensor
) -> Tuple[torch.Tensor, torch.Tensor]:
Y_res, UV_res = out.split([4, 2], dim=1)
return (
batch["rec_before_dbf_Y"] + F.pixel_shuffle(Y_res, 2),
torch.cat((batch["rec_before_dbf_U"], batch["rec_before_dbf_V"]), dim=1)[
..., ::2, ::2
]
+ UV_res,
)
def forward(
self, batch: Dict[str, torch.Tensor]
) -> Tuple[torch.Tensor, torch.Tensor]:
args = self.preprocess_args(batch)
out = self.SADL_model(args)
return self.postprocess_outputs(batch, out)
{
"binaries": {
"sadl_path": "/path/to/src/sadl"
},
"model" : {
"path": "/path/to/src/training/training_scripts/NN_Filtering/HOP/model/model.py"
},
"dataset": {
"div2k_train": {
"//": "dataset of png to convert",
"path": "/path/to/DIV2K/DIV2K_train_HR"
},
"div2k_valid": {
"path": "/path/to/DIV2K/DIV2K_valid_HR"
},
"bvi": {
"//": "dataset of yuv",
"path": "/path/to/bviorg",
"dataset_file": "/path/to/src/training/training_scripts/NN_Filtering/common/datasets/bvi.json"
},
"bvi_valid": {
"//": "dataset of yuv",
"path": "/path/to/bviorg",
"dataset_file": "/path/to/src/training/training_scripts/NN_Filtering/common/datasets/bvi_valid.json"
},
"tvd": {
"//": "dataset of yuv",
"path": "/path/to/tvdorg",
"dataset_file": "/path/to/src/training/training_scripts/NN_Filtering/common/datasets/tvd.json"
},
"tvd_valid": {
"//": "dataset of yuv",
"path": "/path/to/tvdorg",
"dataset_file": "/path/to/src/training/training_scripts/NN_Filtering/common/datasets/tvd_valid.json"
}
},
"stage1": {
"yuv": {
"//": "path to store yuv files dataset",
"path": "/path/to/stage1/yuv"
},
"yuv_valid": {
"//": "path to store yuv files dataset",
"path": "/path/to/stage1/yuv"
},
"encdec": {
"//": "path to store the shell script and all the generated files by the encoder/decoder",
"path": "/path/to/stage1/encdec",
"vtm_enc": "/path/to/src/bin/EncoderAppStatic",
"vtm_dec": "/path/to/src/bin/DecoderAppStatic",
"vtm_cfg": "/path/to/src/cfg/encoder_intra_vtm.cfg"
},
"encdec_valid": {
"//": "path to store the shell script and all the generated files by the encoder/decoder",
"path": "/path/to/stage1/encdec",
"vtm_enc": "/path/to/src/bin/EncoderAppStatic",
"vtm_dec": "/path/to/src/bin/DecoderAppStatic",
"vtm_cfg": "/path/to/src/cfg/encoder_intra_vtm.cfg"
},
"dataset": {
"//": "path to store the full dataset which will be used by the training",
"path": "/path/to/stage1/dataset"
},
"dataset_valid": {
"//": "path to store the full dataset which will be used by the training",
"path": "/path/to/stage1/dataset"
},
"training": {
"path": "/path/to/stage1/train"
},
"conversion": {
"//": "full path to output the model. input model is taken in training/ckpt_dir",
"full_path_filename": "/path/to/stage1/train/model_float.sadl"
}
},
"stage2": {
"yuv_tvd": {
"//": "path to store yuv files dataset",
"path": "/path/to/stage2/yuv"
},
"yuv_tvd_valid": {
"//": "path to store yuv files dataset",
"path": "/path/to/stage2/yuv"
},
"yuv_bvi": {
"//": "path to store yuv files dataset",
"path": "/path/to/stage2/yuv"
},
"yuv_bvi_valid": {
"//": "path to store yuv files dataset",
"path": "/path/to/stage2/yuv"
},
"encdec_bvi": {
"//": "path to store the shell script and all the generated files by the encoder/decoder",
"path": "/path/to/stage2/encdec",
"vtm_enc": "/path/to/src/bin/EncoderAppStatic",
"vtm_dec": "/path/to/src/bin/DecoderAppStatic",
"vtm_cfg": "/path/to/src/cfg/encoder_randomaccess_vtm.cfg"
},
"encdec_bvi_valid": {
"//": "path to store the shell script and all the generated files by the encoder/decoder",
"path": "/path/to/stage2/encdec",
"vtm_enc": "/path/to/src/bin/EncoderAppStatic",
"vtm_dec": "/path/to/src/bin/DecoderAppStatic",
"vtm_cfg": "/path/to/src/cfg/encoder_randomaccess_vtm.cfg"
},
"encdec_tvd": {
"//": "path to store the shell script and all the generated files by the encoder/decoder",
"path": "/path/to/stage2/encdec",
"vtm_enc": "/path/to/src/bin/EncoderAppStatic",
"vtm_dec": "/path/to/src/bin/DecoderAppStatic",
"vtm_cfg": "/path/to/src/cfg/encoder_randomaccess_vtm.cfg"
},
"encdec_tvd_valid": {
"//": "path to store the shell script and all the generated files by the encoder/decoder",
"path": "/path/to/stage2/encdec",
"vtm_enc": "/path/to/src/bin/EncoderAppStatic",
"vtm_dec": "/path/to/src/bin/DecoderAppStatic",
"vtm_cfg": "/path/to/src/cfg/encoder_randomaccess_vtm.cfg"
},
"dataset": {
"//": "path to store the full dataset which will be used by the training",
"path": "/path/to/stage2/dataset"
},
"dataset_valid": {
"//": "path to store the full dataset which will be used by the training",
"path": "/path/to/stage2/dataset"
},
"training": {
"path": "/path/to/stage2/train"
},
"conversion": {
"//": "full path to output the model. input model is taken in training/ckpt_dir",
"full_path_filename": "/path/to/stage2/train/model_float.sadl"
},
"quantize": {
"//": "full path to output the quantized model",
"full_path_filename": "/path/to/stage2/train/model_int16.sadl"
}
},
"stage3": {
"encdec_bvi": {
"//": "path to store the shell script and all the generated files by the encoder/decoder",
"path": "/path/to/stage3/encdec",
"vtm_enc": "/path/to/src/bin/EncoderAppStatic",
"vtm_dec": "/path/to/src/bin/DecoderAppStatic",
"vtm_cfg": "/path/to/src/cfg/encoder_randomaccess_vtm.cfg"
},
"encdec_bvi_valid": {
"//": "path to store the shell script and all the generated files by the encoder/decoder",
"path": "/path/to/stage3/encdec",
"vtm_enc": "/path/to/src/bin/EncoderAppStatic",
"vtm_dec": "/path/to/src/bin/DecoderAppStatic",
"vtm_cfg": "/path/to/src/cfg/encoder_randomaccess_vtm.cfg"
},
"encdec_tvd": {
"//": "path to store the shell script and all the generated files by the encoder/decoder",
"path": "/path/to/stage3/encdec",
"vtm_enc": "/path/to/src/bin/EncoderAppStatic",
"vtm_dec": "/path/to/src/bin/DecoderAppStatic",
"vtm_cfg": "/path/to/src/cfg/encoder_randomaccess_vtm.cfg"
},
"encdec_tvd_valid": {
"//": "path to store the shell script and all the generated files by the encoder/decoder",
"path": "/path/to/stage3/encdec",
"vtm_enc": "/path/to/src/bin/EncoderAppStatic",
"vtm_dec": "/path/to/src/bin/DecoderAppStatic",
"vtm_cfg": "/path/to/src/cfg/encoder_randomaccess_vtm.cfg"
},
"dataset": {
"//": "path to store the full dataset which will be used by the training",
"path": "/path/to/stage3/dataset"
},
"dataset_valid": {
"//": "path to store the full dataset which will be used by the training",
"path": "/path/to/stage3/dataset"
},
"training": {
"path": "/path/to/stage3/train"
},
"conversion": {
"//": "full path to output the model. input model is taken in training/ckpt_dir",
"full_path_filename": "/path/to/stage3/train/model_float.sadl"
},
"quantize": {
"//": "full path to output the quantized model",
"full_path_filename": "/path/to/stage3/train/model_int16.sadl"
}
}
}
"0 11 1 11 2 11 3 11 4 11 5 11 6 11 7 0 8 11 10 11 12 11 13 0 14 11 16 11 18 11 19 0 20 11 22 11 24 11 25 0 26 11 28 11 30 11 31 0 32 11 34 11 36 11 37 0 38 11 40 11 42 0 44 11 45 0 46 11 48 11 50 11 51 0 52 11 54 11 56 11 57 0 58 11 60 11 62 11 63 0 64 11 66 11 67 0 68 11 70 11 72 0 74 11 75 0 76 11 78 11 79 0 80 11 82 11 83 0 84 11 87 11 88 0 89 11 91 11 93 11 94 0 95 11 97 11 98 0 99 11 101 11 103 0 105 11 106 0 107 11 109 11 110 0 111 11 113 11 114 0 115 11 118 11 119 0 120 11 122 11 124 11 125 0 126 11 128 11 129 0 130 11 132 11 134 0 136 11 137 0 138 11 140 11 141 0 142 11 144 11 145 0 146 11 149 11 150 0 151 11 153 11 155 11 156 0 157 11 159 11 160 0 161 11 163 11 165 0 167 11 168 0 169 11 171 11 172 0 173 11 175 11 176 0 177 11 180 11 181 0 182 11 184 11 186 11 187 0 188 11 190 11 191 0 192 11 194 11 196 0 198 11 199 0 200 11 202 11 203 0 204 11 206 11 207 0 208 11 211 11 212 0 213 11 215 11 217 11 218 0 219 11 221 11 222 0 223 11 225 11 227 0 229 11 230 0 231 11 233 11 234 0 235 11 237 11 238 0 239 11 242 11 243 0 244 11 246 11 248 11 249 0 250 11 252 11 253 0 254 11 256 11 258 0 260 11 261 0 262 11 264 11 265 0 266 11 268 11 269 0 270 11 273 11 274 0 275 11 277 11 279 11 280 0 281 11 283 11 284 0 285 11 287 11 289 0 291 11 292 0 293 11 295 11 296 0 297 11 299 11 300 0 301 11 303 11 304 0 305 11 306 0 307 11 308 0 309 11 310 0 311 11 312 0 313 11 314 0 315 0 317 0 319 0 321 0 323 0 325 0 327 0 329 0 331 0 333 0 335 0 337 0 338 0 340 0 342 0 343 0 345 0 347 0 349 0 351 11 352 0 355 11 356 0 357 11 359 11 361 11 362 0 363 11 365 11 366 0 367 11 369 11 371 0 373 11 374 0 375 11 377 11 378 0 379 11 381 11 382 0 383 11 386 11 387 0 388 11 390 11 392 11 393 0 394 11 396 11 397 0 398 11 400 11 402 0 404 11 405 0 406 11 408 11 409 0 410 11 412 11 413 0 414 11 417 11 418 0 419 11 421 11 423 11 424 0 425 11 427 11 428 0 429 11 431 11 433 0 435 11 436 0 437 11 439 11 440 0 441 11 443 11 444 0 445 11 448 11 449 0 450 11 452 11 454 11 455 0 456 11 458 11 459 0 460 11 462 11 464 0 466 11 467 0 468 11 470 11 471 0 472 11 474 11 475 0 476 11 479 11 480 0 481 11 483 11 485 11 486 0 487 11 489 11 490 0 491 11 493 11 495 0 497 11 498 0 499 11 501 11 502 0 503 11 505 11 506 0 507 11 510 11 511 0 512 11 514 11 516 11 517 0 518 11 520 11 521 0 522 11 524 11 526 0 528 11 529 0 530 11 532 11 533 0 534 11 536 11 537 0 538 11 541 11 542 0 543 11 545 11 547 11 548 0 549 11 551 11 552 0 553 11 555 11 557 0 559 11 560 0 561 11 563 11 564 0 565 11 567 11 568 0 569 11 571 11 572 0 573 11 574 0 575 11 576 0 577 11 578 0 579 11 580 0 581 11 582 0 583 0 585 0 587 0 589 0 591 0 593 0 595 0 597 0 599 0 601 0 603 0 605 0 606 0 608 0 610 0 611 0 613 0 615 0 617 0 619 11 620 0 623 11 624 0 625 11 627 11 629 11 630 0 631 11 633 11 634 0 635 11 637 11 639 0 641 11 642 0 643 11 645 11 646 0 647 11 649 11 650 0 651 11 654 11 655 0 656 11 658 11 660 11 661 0 662 11 664 11 665 0 666 11 668 11 670 0 672 11 673 0 674 11 676 11 677 0 678 11 680 11 681 0 682 11 685 11 686 0 687 11 689 11 691 11 692 0 693 11 695 11 696 0 697 11 699 11 701 0 703 11 704 0 705 11 707 11 708 0 709 11 711 11 712 0 713 11 716 11 717 0 718 11 720 11 722 11 723 0 724 11 726 11 727 0 728 11 730 11 732 0 734 11 735 0 736 11 738 11 739 0 740 11 742 11 743 0 744 11 747 11 748 0 749 11 751 11 753 11 754 0 755 11 757 11 758 0 759 11 761 11 763 0 765 11 766 0 767 11 769 11 770 0 771 11 773 11 774 0 775 11 778 11 779 0 780 11 782 11 784 11 785 0 786 11 788 11 789 0 790 11 792 11 794 0 796 11 797 0 798 11 800 11 801 0 802 11 804 11 805 0 806 11 809 11 810 0 811 11 813 11 815 11 816 0 817 11 819 11 820 0 821 11 823 11 825 0 827 11 828 0 829 11 831 11 832 0 833 11 835 11 836 0 837 11 840 11 841 0 842 11 844 11 846 11 847 0 848 11 850 11 851 0 852 11 854 11 856 0 858 11 859 0 860 11 862 11 863 0 864 11 866 11 867 0 868 11 871 11 872 0 873 11 875 11 877 11 878 0 879 11 881 11 882 0 883 11 885 11 887 0 889 11 890 0 891 11 893 11 894 0 895 11 897 11 898 0 899 11 902 11 903 0 904 11 906 11 908 11 909 0 910 11"
\ No newline at end of file
# High Operating Point model training
## Overview
### Notes
The following training stages should be performed using specified NNVC version in order to reproduce current models. Note that some options or performance results might differ when using the latest NNVC version.
### Preparation of the directory
At minima, paths should first be set. The file ``training/training_scripts/NN_Filtering/HOP/paths.json`` should be copy and edited to match your environment.
All keys with the name ``path`` should be edited to fit your particular environement.
Additionally, you should also edit the variable ``vtm_xx`` to point to the VTM binaries and configuration files, the ``sadl_path`` to point to the sadl repository.
Assuming that all data are on the same storage, then the following directory structure can be used:
```
- src [ contains the NNVC repository]
- DIV2K [ contains the original div2k dataset]
- bviorg [ contains the original BVI dataset ]
- tvdorg [ contains the original TVD dataset ]
- stage1 [ will be created by the scripts]
- stage2 [ will be created by the scripts]
- stage3 [ will be created by the scripts]
```
To create ``src`` the following commands can be used:
```sh
git clone https://vcgit.hhi.fraunhofer.de/jvet-ahg-nnvc/VVCSoftware_VTM.git src;
cd src;
git checkout VTM-11.0_nnvc
git submodule init
git submodule update
mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE=Release ..
make -j
cd ../..
```
To create the DIV2K directory:
```sh
mkdir DIV2K;
cd DIV2K;
wget http://data.vision.ee.ethz.ch/cvl/DIV2K/DIV2K_train_HR.zip
unzip DIV2K_train_HR.zip
wget http://data.vision.ee.ethz.ch/cvl/DIV2K/DIV2K_valid_HR.zip
unzip DIV2K_valid_HR.zip
cd ..
```
To create the bviorg directory:
```sh
mkdir bviorg;
cd bviorg;
wget https://data.bris.ac.uk/datasets/tar/3h0hduxrq4awq2ffvhabjzbzi1.zip
unzip 3h0hduxrq4awq2ffvhabjzbzi1.zip
cd ..
```
To create the tvdorg directory:
```sh
mkdir tvdorg;
# download TVD dataset from https://multimedia.tencent.com/resources/tvd
```
With this file structure, the ``paths.json`` is simply edited by just replacing ``/path/to`` by the absolute path of the root directory of the experiment.
If your datasets are on different storages, just edit the relevant lines in ``paths.json``.
### Creation of the consolidated configuration file
The main configuration file is ``common/common_config.json``.
The HOP specific parameters are in ``HOP/hop_config.json``.
The variables specific to your environment is ``HOP/paths.json``.
The model is in ``HOP/model/model.json``.
A unique configuration is create by merging all the different files using the following command:
```sh
cp src/training_scripts/NN_Filtering/HOP/paths.json .
# edit paths.json
python3 src/training/tools/create_config.py src/training/training_scripts/NN_Filtering/common/common_config.json src/training/training_scripts/NN_Filtering/HOP/hop_config.json src/training/training_scripts/NN_Filtering/HOP/model/model.json paths.json > my_config.json
```
Then this file will be used in command line below, you should be able to run the process just by copy/pasting all lines of shell below.
Other keys should not be edited except for testing reasons.
### Advanced customization
The folllowing keys can be edited to adapt to your needs without affecting the results:
```
/verbose
/stageX/training/dataloader/num_workers
/stageX/training/loggers/*
```
## I- Model Stage I
Total size required for stage1 (without deleting intermediate data) is about 3312GB.
### A- Data extraction for intra from vanilla VTM
#### 1. Dataset preparation - div2k conversion
Convert div2k (4:4:4 RGB -> YUV420 10 bits):
```sh
python3 tools/convert_dataset.py --json_config my_config.json --input_dataset dataset/div2k_train --augmentation --output_location stage1/yuv
python3 tools/convert_dataset.py --json_config my_config.json --input_dataset dataset/div2k_valid --output_location stage1/yuv_valid
```
dataset files are placed in the target directory (as set in the config.json ["stage1"]["yuv"]["path"]), a json file named ["stage1"]["yuv"]["dataset_filename"] is updated with the new data.
** Note ** After conversion, the dataset size is about 21GB.
#### 2. Prepare scripts for encoding/decoding of the dataset
Please note that a VTM without NN tools is used. NNVC-4.0 to NNVC-6.0 tags can be used to generate the binaries and cfg file. The configuration file is the vanilla VTM one (see config.json).
The macro for data dump should be:
```
// which data are used for inference/dump
#define NNVC_USE_REC_BEFORE_DBF 1 // reconstruction before DBF
#define NNVC_USE_PRED 1 // prediction
#define NNVC_USE_BS 1 // BS of DBF
#define NNVC_USE_QP 1 // QP slice
#define JVET_AC0089_NNVC_USE_BPM_INFO 1 // JVET-AC0089: dump Block Prediction Mode
```
Other macros can be set to 0.
Extract cfg files and encoding/decoding script:
```sh
python3 tools/dataset_to_encoding_script.py --json_config my_config.json --input_dataset stage1/yuv --output_location stage1/encdec
python3 tools/dataset_to_encoding_script.py --json_config my_config.json --input_dataset stage1/yuv_valid --output_location stage1/encdec_valid
```
It will generate the cfg files for the dataset and a shell script to encode and decode all sequences in the dataset in the directory ["stage1"]["encdec"]["path"].
#### 3. Encode/decode the sequences:
Loop on all sequences to encode:
```sh
cd stage1/encdec;
N1=32000;
for((i=0;i<N1;i++)); do
./encode_decode_dataset.sh $i;
done
N2=1000;
for((i=0;i<N2;i++)); do
./encode_decode_dataset_valid.sh $i;
done
```
or you can use the script to encode on your cluster. N is the number of sequences (run ./encode_decode_dataset.sh to get the value N).
The script will perform the following step for each sequence:
1. encodes the yuv and produces an encoding log
2. decodes the bitstream, produces a decoding log and dump the data in ["stage1"]["encdec"]["path"]/["dump_dir"]
** Note ** The size requirement is: about 815GB for the training set and about 32GB for the validation set.
#### 4. Create a consolidated dataset
```sh
python3 tools/concatenate_dataset.py --json_config my_config.json --input_dir_json stage1/encdec --output_json stage1/encdec
python3 tools/concatenate_dataset.py --json_config my_config.json --input_dir_json stage1/encdec_valid --output_json stage1/encdec_valid
```
It will generate a unique dataset in ["stage1"]["encdec"]["path"] from all individual datasets in ["stage1"]["encdec"]["path"]/["dump_dir"] and encoder logs in ["stage1"]["encdec"]["enc_dir"].
#### 5a. Create an offline dataset with all batches
```sh
python3 tools/create_unified_dataset.py --json_config my_config.json \
--nb_patches -1 --patch_size 128 --border_size 8 --input_dataset stage1/encdec \
--components org_Y,org_U,org_V,pred_Y,pred_U,pred_V,rec_before_dbf_Y,rec_before_dbf_U,rec_before_dbf_V,bs_Y,bs_U,bs_V,qp_base,qp_slice,ipb_Y \
--output_location stage1/dataset
python3 tools/create_unified_dataset.py --json_config my_config.json \
--nb_patches -1 --patch_size 128 --border_size 8 --input_dataset stage1/encdec_valid \
--components org_Y,org_U,org_V,pred_Y,pred_U,pred_V,rec_before_dbf_Y,rec_before_dbf_U,rec_before_dbf_V,bs_Y,bs_U,bs_V,qp_base,qp_slice,ipb_Y \
--output_location stage1/dataset_valid
```
It will generate a unique dataset of patches ready for training in ["stage1"]["dataset"]["path"] from the dataset in ["stage1"]["encdec"]["path"].
**Note:** the directories in encdec can now be deleted if there is no need to regenerate an offline dataset.
The dataset can be visualize using
```sh
python3 tools/example/display_patches_dataset.py stage1/dataset/dataset.json
```
** Note ** The size requirement is: about 2300GB for the training set and about 91GB for the validation set.
#### 5b. Create a dataset for on-the-fly batch extraction
TODO
### B- Training stage
#### 1. Train HOP model on intra frames
If you need to adapt the settings of your device for training, please edit the file ``my_config.json`` (default parameters). You can also change the loggers verbosity in these files.
When ready, simply run:
```sh
python3 training_scripts/NN_Filtering/common/training/main.py --json_config my_config.json --stage 1
```
** Note ** The training is expected to be about 7h per epoch on a A100.
#### 2. Convert model to SADL
The last ONNX model is converted into float SADL format.
```sh
python3 training_scripts/NN_Filtering/common/convert/to_sadl.py \
--json_config my_config.json \
--input_model stage1/training --output_model stage1/conversion/full_path_filename
```
The converter will use:
* the json config file used for training
* the file model_onnx_filename of the training as input (usually last.onnx)
* output the file model_filename of the conversion section
**Note:** the directory in dataset can now be deleted if there is no need to retrain.
#### 3. Test model
To test the float model, the type should be set as float in ``TypeDef.h``:
```
#if NN_LF_UNIFIED
using TypeSadlLFUnified=float;
#endif
```
and the correct data macros should be set:
```
// which data are used for inference/dump
#define NNVC_USE_REC_BEFORE_DBF 1 // reconstruction before DBF
#define NNVC_USE_PRED 1 // prediction
#define NNVC_USE_BS 1 // BS of DBF
#define NNVC_USE_QP 1 // QP slice
#define JVET_AC0089_NNVC_USE_BPM_INFO 1 // JVET-AC0089: dump Block Prediction Mode
```
##### 3.1 Test on intra only
The model is tested with NNVC-5.1 using the following parameters:
```
--NnlfOption=4 --NnlfModelName=stage1/train/model_float.sadl --NnlfDebugOption=1
```
The configuration file is ``encoder_xxx_vtm.cfg`` and the anchor VTM-11.0\_NNVC.
##### 3.2 Test with forced usage
The model is tested with NNVC-5.1 using the following parameters:
```
--NnlfOption=4 --NnlfModelName=stage1/train/model_float.sadl --NnlfDebugOption=2
```
The configuration file is ``encoder_xxx_vtm.cfg`` and the anchor VTM-11.0\_NNVC.
The flag ``NnlfDebugOption`` is also needed at decoder since it forces the usage of the model on inter slice using the intra "part" of the model.
## II- Model Stage 2
Total size required for stage2 (without deleting intermediate data) is about 5TB.
### A- Data extraction
##### 0. Training from stage II
An official model at the end of stage I is available in the directory ``models`` in order to train directly from stage II (in order to skip stage I).
To do so, the model ``HOP_float.I.sadl`` is used to generate the dataset for stage II. Please replace the model path by the one of ``HOP_float.I.sadl`` in your ``my_config.json`` in order to use the official model.
#### 1. Dataset preparation - bvi/tvd conversion
Convert BVI-DVC 64 frames sequence into 65 frames sequence, and create the TVD dataset file (YUV are still taken from source).
```sh
python3 tools/convert_dataset.py --json_config my_config.json --input_dataset dataset/bvi --output_location stage2/yuv_bvi --extend 65
python3 tools/convert_dataset.py --json_config my_config.json --input_dataset dataset/bvi_valid --output_location stage2/yuv_bvi_valid --extend 65
python3 tools/convert_dataset.py --json_config my_config.json --input_dataset dataset/tvd --output_location stage2/yuv_tvd
python3 tools/convert_dataset.py --json_config my_config.json --input_dataset dataset/tvd_valid --output_location stage2/yuv_tvd_valid
```
dataset files are placed in the target directory (as set in the config.json ["stage2"]["yuv"]["path"]), a json file named ["stage2"]["yuv"]["dataset_filename"] is updated with the new data.
** Note ** The size requirement is: about 130 GB for the extended BVI dataset.
#### 2. Prepare scripts for encoding/decoding of the dataset
Please note that a VTM with the model of stage 1 is used. NNVC-5.1 is used to generate the binaries and cfg file. The configuration file is the vanilla VTM one (see config.json), the input model is added automatically.
The macro for data dump should be:
```
// which data are used for inference/dump
#define NNVC_USE_REC_BEFORE_DBF 1 // reconstruction before DBF
#define NNVC_USE_PRED 1 // prediction
#define NNVC_USE_BS 1 // BS of DBF
#define NNVC_USE_QP 1 // QP slice
#define JVET_AC0089_NNVC_USE_BPM_INFO 1 // JVET-AC0089: dump Block Prediction Mode
```
Other macros can be set to 0.
Extract cfg files and encoding/decoding script:
```sh
python3 tools/dataset_to_encoding_script.py --json_config my_config.json --input_dataset stage2/yuv_tvd --output_location stage2/encdec_tvd
python3 tools/dataset_to_encoding_script.py --json_config my_config.json --input_dataset stage2/yuv_tvd_valid --output_location stage2/encdec_tvd_valid
python3 tools/dataset_to_encoding_script.py --json_config my_config.json --input_dataset stage2/yuv_bvi --output_location stage2/encdec_bvi
python3 tools/dataset_to_encoding_script.py --json_config my_config.json --input_dataset stage2/yuv_bvi_valid --output_location stage2/encdec_bvi_valid
```
It will generate the cfg files for the dataset and a shell script to encode and decode all sequences in the dataset in the directory ["stage2"]["encdec"]["path"].
#### 3. Encode/decode the sequences:
Loop on all sequences to encode, for example:
```sh
cd stage2/encdec;
for((i=0;i<90;i++)); do
./encode_decode_dataset_tvd.sh $i;
done
for((i=0;i<10;i++)); do
./encode_decode_dataset_tvd_valid.sh $i;
done
for((i=0;i<3025;i++)); do
./encode_decode_dataset_bvi.sh $i;
done
for((i=0;i<75;i++)); do
./encode_decode_dataset_bvi_valid.sh $i;
done
```
or you can use the script to encode on your cluster. N is the number of sequences (run ./encode_decode_dataset.sh to get the value N).
** Note ** The size requirement is: about 3.3TB for the dumped data.
#### 4. Create consolidated datasets
```sh
LIST="bvi bvi_valid tvd tvd_valid";
for DS in $LIST; do
python3 tools/concatenate_dataset.py --json_config my_config.json --input_dir_json stage2/encdec_${DS} --input_dir_dataset_json stage2/yuv_${DS} --output_json stage2/encdec_${DS}
done
```
It will generate a unique dataset for each dataset in ["stage2"]["encdec"]["path"] from all individual datasets in ["stage2"]["encdec_xxx"]["path"]/["dump_dir"] and encoder logs in ["stage2"]["encdec_xxx"]["enc_dir"].
** Note ** The size requirement is: about 1.6TB for the datasets.
#### 5a. Create offline datasets with all batches
```sh
LIST="bvi tvd";
for DS in $LIST; do
python3 tools/create_unified_dataset.py --json_config my_config.json \
--nb_patches -1 --patch_size 128 --border_size 8 --input_dataset stage2/encdec_${DS} \
--components org_Y,org_U,org_V,pred_Y,pred_U,pred_V,rec_before_dbf_Y,rec_before_dbf_U,rec_before_dbf_V,bs_Y,bs_U,bs_V,qp_base,qp_slice,ipb_Y \
--output_location stage2/dataset/dataset_files/${DS}
python3 tools/create_unified_dataset.py --json_config my_config.json \
--nb_patches -1 --patch_size 128 --border_size 8 --input_dataset stage2/encdec_${DS}_valid \
--components org_Y,org_U,org_V,pred_Y,pred_U,pred_V,rec_before_dbf_Y,rec_before_dbf_U,rec_before_dbf_V,bs_Y,bs_U,bs_V,qp_base,qp_slice,ipb_Y \
--output_location stage2/dataset_valid/dataset_files/${DS}
done
```
The div2k dataset is also reused. The json files are copied into the stage2/dataset directory:
```
cp stage1/dataset/*.json stage2/dataset/;
```
**Note:** the directories in encdec can now be deleted if there is no need to regenerate an offline dataset.
#### 5b. create a dataset for on-the-fly batch extraction
TODO
### B- Training stage
#### 1. Train HOP model
If you need to adapt the settings of your device for training, please edit the file ``my_config.json``. You can also change the loggers verbosity in these files.
When ready, simply run:
```sh
python3 training_scripts/NN_Filtering/common/training/main.py --json_config my_config.json --stage 2
```
#### 2. Convert model
The last ONNX model is converted into float SADL format.
```sh
python3 training_scripts/NN_Filtering/common/convert/to_sadl.py --json_config my_config.json \
--input_model stage2/training --output_model stage2/conversion/full_path_filename
```
**Note:** the directory in dataset can now be deleted if there is no need to retrain.
#### 3. Integerize model
The float model is integerized into int16 SADL format using a naive quantization (all quantizers to 2^11).
First the ``naive_quantization`` software should be build. Please refer to SADL documentation to build the software.
```sh
python3 training_scripts/NN_Filtering/HOP/quantize/quantize.py --json_config my_config.json --input_model stage2/conversion/full_path_filename --output_model stage2/quantize/full_path_filename
```
Please note that the command should be run in the directory
#### 4. Test model
To test the int16 model, the type should be set as int16 in ``TypeDef.h``:
```
#if NN_LF_UNIFIED
using TypeSadlLFUnified=int16_t;
#endif
```
and the correct data macros should be set:
```
// which data are used for inference/dump
#define NNVC_USE_REC_BEFORE_DBF 1 // reconstruction before DBF
#define NNVC_USE_PRED 1 // prediction
#define NNVC_USE_BS 1 // BS of DBF
#define NNVC_USE_QP 1 // QP slice
#define JVET_AC0089_NNVC_USE_BPM_INFO 1 // JVET-AC0089: dump Block Prediction Mode
```
The model is tested with NNVC-5.1 using the following parameters:
```
--NnlfOption=4 --NnlfModelName=stage2/train/model_int16.sadl
```
The configuration file is ``encoder_xxx_nnvc.cfg`` and the anchor VTM-11.0\_NNVC v5.1.
## III- Model Stage 3
### A- Data extraction
#### 1. Dataset preparation
##### 1.1
All previously prepared datasets will be reused for stage 3.
##### 1.2 Training from stage III
An official model at the end of stage II is available in the directory ``models`` in order to train directly from stage III.
To do so, the model ``HOP_int16.II.sadl`` is used to generate the dataset for stage III. Please replace the model path by the one of ``HOP_int16.II.sadl`` in your ``config.json`` in order to use the official model. Checksums for the resulting dataset are also available in the nnvc-ctc repository.
#### 2. Prepare script for encoding/decoding of the dataset
This step is similar to II-A-2, only the used model is different. Please note that the int16 model is used to generate the dataset.
Please note that a VTM with the model of stage 1 is used. NNVC-5.1 can be used to generate the binaries and cfg file. The configuration file is the vanilla VTM one (see config.json), the input model is added automatically.
The macro for data dump should be:
```
// which data are used for inference/dump
#define NNVC_USE_REC_BEFORE_DBF 1 // reconstruction before DBF
#define NNVC_USE_PRED 1 // prediction
#define NNVC_USE_BS 1 // BS of DBF
#define NNVC_USE_QP 1 // QP slice
#define JVET_AC0089_NNVC_USE_BPM_INFO 1 // JVET-AC0089: dump Block Prediction Mode
```
Other macros can be set to 0.
Extract cfg files and encoding/decoding script:
```sh
python3 tools/dataset_to_encoding_script.py --json_config my_config.json --input_dataset stage2/yuv_tvd --output_location stage3/encdec_tvd
python3 tools/dataset_to_encoding_script.py --json_config my_config.json --input_dataset stage2/yuv_tvd_valid --output_location stage3/encdec_tvd_valid
python3 tools/dataset_to_encoding_script.py --json_config my_config.json --input_dataset stage2/yuv_bvi --output_location stage3/encdec_bvi
python3 tools/dataset_to_encoding_script.py --json_config my_config.json --input_dataset stage2/yuv_bvi_valid --output_location stage3/encdec_bvi_valid
```
It will generate the cfg files for the dataset and a shell script to encode and decode all sequences in the dataset in the directory ["stage3"]["encdec"]["path"].
#### 3. Encode/decode the sequences:
This step is similar to II-A-3.
Loop on all sequences to encode:
```sh
cd stage2/encdec;
for((i=0;i<N1;i++)); do
./encode_decode_dataset_tvd.sh $i;
done
for((i=0;i<N2;i++)); do
./encode_decode_dataset_tvd_valid.sh $i;
done
for((i=0;i<N3;i++)); do
./encode_decode_dataset_bvi.sh $i;
done
for((i=0;i<N4;i++)); do
./encode_decode_dataset_bvi_valid.sh $i;
done
```
or you can use the script to encode on your cluster. N is the number of sequences (run ./encode_decode_dataset.sh to get the value N).
#### 3. Create a consolidated dataset
This step is similar to II-A-4.
```sh
LIST="bvi bvi_valid tvd tvd_valid";
for DS in $LIST; do
python3 tools/concatenate_dataset.py --json_config my_config.json --input_dir_json stage3/encdec_${DS} --input_dir_dataset_json stage2/yuv_${DS} --output_json stage3/encdec_${DS}
done
```
It will generate a unique dataset for each dataset in ["stage3"]["encdec"]["path"] from all individual datasets in ["stage3"]["encdec_xxx"]["path"]/["dump_dir"] and encoder logs in ["stage3"]["encdec_xxx"]["enc_dir"].
#### 4a. Create an offline dataset with all batches
This step is similar to II-A-5a.
```sh
LIST="bvi tvd";
for DS in $LIST; do
python3 tools/create_unified_dataset.py --json_config my_config.json \
--nb_patches -1 --patch_size 128 --border_size 8 --input_dataset stage3/encdec_${DS} \
--components org_Y,org_U,org_V,pred_Y,pred_U,pred_V,rec_before_dbf_Y,rec_before_dbf_U,rec_before_dbf_V,bs_Y,bs_U,bs_V,qp_base,qp_slice,ipb_Y \
--output_location stage3/dataset/dataset_files/${DS}
python3 tools/create_unified_dataset.py --json_config my_config.json \
--nb_patches -1 --patch_size 128 --border_size 8 --input_dataset stage3/encdec_${DS}_valid \
--components org_Y,org_U,org_V,pred_Y,pred_U,pred_V,rec_before_dbf_Y,rec_before_dbf_U,rec_before_dbf_V,bs_Y,bs_U,bs_V,qp_base,qp_slice,ipb_Y \
--output_location stage3/dataset_valid/dataset_files/${DS}
done
```
**Note:** the directories in encdec can now be deleted if there is no need to regenerate an offline dataset.
#### 5b. create a dataset for on-the-fly batch extraction
TODO
### B- Training stage
If you need to adapt the settings of your device for training, please edit the file ``training_default.json`` (default parameters) or ``stage3.json`` (parameters specific to stage2). You can also change the loggers verbosity in these files.
When ready, simply run:
```sh
python3 training_scripts/NN_Filtering/common/training/main.py --json_config my_config.json --stage 3
```
#### 2. Convert model
The last ONNX model is converted into float SADL format.
```sh
python3 training_scripts/NN_Filtering/common/convert/to_sadl.py --json_config my_config.json \
--input_model stage3/training --output_model stage3/conversion/full_path_filename
```
#### 3. Integerized model
The float model is integerized into int16 SADL format using a naive quantization (all quantizers to 2^11).
First the ``naive_quantization`` software should be build. Please refer to SADL documentation to build the software.
```sh
python3 training_scripts/NN_Filtering/HOP/quantize/quantize.py --json_config my_config.json --input_model stage3/conversion/full_path_filename --output_model stage3/quantize/full_path_filename
```
**Note:** the directory in dataset can now be deleted if there is no need to retrain.
#### 4. Test model
To test the int16 model, the type should be set as int16 in ``TypeDef.h``:
```
#if NN_LF_UNIFIED
using TypeSadlLFUnified=int16_t;
#endif
```
and the correct data macros should be set:
```
// which data are used for inference/dump
#define NNVC_USE_REC_BEFORE_DBF 1 // reconstruction before DBF
#define NNVC_USE_PRED 1 // prediction
#define NNVC_USE_BS 1 // BS of DBF
#define NNVC_USE_QP 1 // QP slice
#define JVET_AC0089_NNVC_USE_BPM_INFO 1 // JVET-AC0089: dump Block Prediction Mode
```
The model is tested with the last NNVC version using the following parameters:
```
--NnlfOption=1 --NnlfModelName=stage3/train/model_int16.sadl
```
The configuration file is ``encoder_xxx_nnvc.cfg`` and the anchor the last NNVC version.
Note: the NnlfOption has been changed to 1 from NNVC-7.0.
## IV-Advanced
### A-Dataset
#### 1- Format
dataset:
either start/count/path
OR
path/dataset_file
preprend path variable to dataset_file if dataset_file is just a name
dataset:
if contains path_yuv: prepend filename, else assume same location json
### A-Parameters
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment