From 80a2d1c9bc1f35257dbb0ee9d1a0663a8e5aaaaf Mon Sep 17 00:00:00 2001
From: Franck Galpin <franck.galpin@interdigital.com>
Date: Wed, 27 Jul 2022 17:49:32 +0200
Subject: [PATCH] first version data dumper + data loader + sample program +
 documentation

---
 README.md                                   | 222 ++++++++++++++-
 source/App/DecoderApp/DecApp.cpp            | 282 ++++++++++++++++++--
 source/App/DecoderApp/DecApp.h              |  26 ++
 source/App/DecoderApp/DecAppCfg.cpp         |   5 +-
 source/App/DecoderApp/DecAppCfg.h           |   3 +
 source/App/EncoderApp/EncAppCfg.cpp         |   4 +
 source/Lib/CommonLib/CodingStructure.cpp    |  43 ++-
 source/Lib/CommonLib/CodingStructure.h      |  25 ++
 source/Lib/CommonLib/LoopFilter.cpp         |  53 ++++
 source/Lib/CommonLib/Picture.cpp            | 121 +++++++++
 source/Lib/CommonLib/Picture.h              |  42 +++
 source/Lib/CommonLib/TypeDef.h              |  11 +
 source/Lib/DecoderLib/DecCu.cpp             |  17 ++
 source/Lib/DecoderLib/DecLib.cpp            |  28 ++
 source/Lib/DecoderLib/DecSlice.cpp          |   4 +-
 training/data_loader/data_loader.py         | 185 +++++++++++++
 training/example/create_unified_dataset.py  |  72 +++++
 training/example/display_patches_dataset.py |  25 ++
 training/example/sample_test.sh             |  48 ++++
 training/tools/concatenate_dataset.py       | 104 ++++++++
 20 files changed, 1299 insertions(+), 21 deletions(-)
 create mode 100644 training/data_loader/data_loader.py
 create mode 100644 training/example/create_unified_dataset.py
 create mode 100755 training/example/display_patches_dataset.py
 create mode 100644 training/example/sample_test.sh
 create mode 100644 training/tools/concatenate_dataset.py

diff --git a/README.md b/README.md
index d2853b793a..015c31c044 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
-VTM reference software for VVC
+AhG 11 reference software for NNVC
 ==============================
 
-This software package is the reference software for Versatile Video Coding (VVC). The reference software includes both encoder and decoder functionality.
+This software package is the reference software for AhG11. The reference software includes both encoder and decoder functionality.
 
 Reference software is useful in aiding users of a video coding standard to establish and test conformance and interoperability, and to educate users and demonstrate the capabilities of the standard. For these purposes, this software is provided as an aid for the study and implementation of Versatile Video Coding.
 
@@ -9,9 +9,26 @@ The software has been jointly developed by the ITU-T Video Coding Experts Group
 
 A software manual, which contains usage instructions, can be found in the "doc" subdirectory of this software package.
 
+Content
+==================
+The package contains the following components:
+- a base codec based on VTM-11.0_nnvc (VTM-11.0 + JVET-V0056 patch)
+- a data dumping feature at the decoder, activated with the macro NNVC\_DUMP\_DATA in TypeDef.h. Encoder log is also slightly changed in order to log some useful information.
+- a data loading feature in python, compatible with the data dumped by the decoder
+- training scripts xxxx
+- an inference feature based on SADL supporting both float and int16 model
+
 Build instructions
 ==================
 
+Submodule 
+----------------------------------------------
+The SADL sub module first need to be initialized the first time using, using the following commands:
+```bash
+git submodule init
+git submodule update
+```
+
 The CMake tool is used to create platform-specific build files. 
 
 Although CMake may be able to generate 32-bit binaries, **it is generally suggested to build 64-bit binaries**. 32-bit binaries are not able to access more than 2GB of RAM, which will not be sufficient for coding larger image formats. Building in 32-bit environments is not tested and will not be supported.
@@ -132,3 +149,204 @@ Install the needed toolchains:
 pacman -S --needed base-devel mingw-w64-i686-toolchain mingw-w64-x86_64-toolchain git subversion mingw-w64-i686-cmake mingw-w64-x86_64-cmake
 ```
 
+
+Data dumping
+==================
+
+The dumping of a dataset for a particular bitstream is obtained using:
+```bash
+DecoderAppStatic -b D_BlowingBubbles_416x240_50Hz_8bit_P420_37.bin --DumpBasename D_BlowingBubbles_416x240_50Hz_8bit_P420_37
+```
+It will produce several data files and a json description of the dump.
+We assume an encoder log file is also associated with each produced bitstream. The encoder log files should have necessary information (using the --Verbosity=6). The link between the decoder dump and the encoder configuration is done using the bitstream name. 
+The provided scripts will automatically create a json file containing all information (dumped data, original YUV, etc.) providing the correct directories.
+
+Data format
+----------------------------------------------
+The data dumper use the following format for each component. 
+Note: a set of macros allows to enable/disable the data to be dumped/used.
+
+- basename\_rec\_before\_dbf.yuv:  reconstruction before deblocking filter (macro NNVC\_USE\_REC\_BEFORE\_DBF): YUV format, 10bits
+- basename\_rec\_after\_dbf.yuv:  reconstruction after deblocking filter (macro NNVC\_USE\_REC\_AFTER\_DBF): YUV format, 10bits
+- basename\_pred.yuv: prediction (macro NNVC\_USE\_PRED): YUV format, 10bits
+- basename\_bs.yuv: boundaries strength (macro NNVC\_USE\_BS): YUV format, 10bits. Boundaries strength of the deblocking filter, normalize on 10 bits
+- basename\_cu\_average.yuv: cu average (macro NNVC\_USE\_CU\_AVERAGE): YUV format, 10 bits. Contains the average of each CU.
+- basename\_qp.yuv: QP slice (macro NNVC\_USE\_QP): int32, binary format 
+- basename\_slicetype.yuv: slice type (macro NNVC\_USE\_SLICETYPE): int32, binary format 
+
+
+Json file 
+----------------------------------------------
+During data dump, a json file is produced for each bitstream, with the name basename.json.
+The format of the json file is as follows:
+```json
+{
+ "suffix_rec_after_dbf": "_rec_after_dbf.yuv", 
+ "suffix_pred": "_pred.yuv",
+ "suffix_cu_average": "_cu_average.yuv",
+ "suffix_bs": "_bs.yuv",
+ "suffix_qp": "_qp.dat",
+ "suffix_slicetype": "_slicetype.dat",
+ [
+  {
+    "qp_base" : 32,
+    "bsname": "D_BlowingBubbles_416x240_50Hz_8bit_P420_37.bin",
+    "basename": "D_BlowingBubbles_416x240_50Hz_8bit_P420_37",
+    "width": 416,
+    "height": 240,
+    "data_count": 65
+  }
+ ]
+}
+```
+If a particular data is present, the corresponding suffix variable is defined.
+An array of one element is described:
+- the widht and height of the frames
+- the number of frames dumped (data\_count)
+- the concatenation of a suffix and basename gives the full filename for this data
+- bsname: help to find the correspondance between the dataset and the original encoded sequence
+- qp\_base: QP base to encode the sequence
+
+
+Build the database 
+----------------------------------------------
+The database building is done in 2 passes.
+
+First all individual dataset files are concatenated into one dataset file:
+```bash
+training/tools/concatenate_dataset.py --input_dir_json dir1 --input_dir_json dir2 --output_json pre_dataset.json
+```
+It will give a file with all individual json files concatenated:
+```json
+{
+ "suffix_rec_after_dbf": "_rec_after_dbf.yuv",
+ "suffix_pred": "_pred.yuv",
+ "suffix_cu_average": "_cu_average.yuv",
+ "suffix_bs": "_bs.yuv",
+ "suffix_qp": "_qp.dat",
+ "suffix_slicetype": "_slicetype.dat",
+ "data": [
+  {
+   "bsname": "D_RaceHorses_416x240_30Hz_8bit_P420_37_P2.bin",
+   "qp_base": 37,
+   "basename": "D_RaceHorses_416x240_30Hz_8bit_P420_37_P2",
+   "width": 416,
+   "height": 240,
+   "data_count": 33,
+   "dirname": "db"
+  },
+  {
+   "bsname": "D_BlowingBubbles_416x240_50Hz_8bit_P420_32_P1.bin",
+   "qp_base": 32,
+   "basename": "D_BlowingBubbles_416x240_50Hz_8bit_P420_32_P1",
+   "width": 416,
+   "height": 240,
+   "data_count": 65,
+   "dirname": "db"
+  },
+...]
+}
+```
+For each dataset, the directory _dirname_ is added to the data field.
+
+
+
+The file is then consolidated with information from the encoder log or the encoder configuration file used to encode the sequence in order to extract the YUV original file and other information:
+```bash
+training/tools/concatenate_dataset.py --input_json pre_dataset.json --input_dir_encoder direnc1 --input_dir_encoder direnc2 --output_json dataset.json --log_extension log
+```
+Note: To use the encoder configuration files, use the the correction extension (for example --log_extension cfg).
+
+It results in a file with all information needed by the data loader.
+```json
+{
+ "suffix_rec_after_dbf": "_rec_after_dbf.yuv",
+ "suffix_pred": "_pred.yuv",
+ "suffix_cu_average": "_cu_average.yuv",
+ "suffix_bs": "_bs.yuv",
+ "suffix_qp": "_qp.dat",
+ "suffix_slicetype": "_slicetype.dat",
+ "data": [
+  {
+   "bsname": "D_RaceHorses_416x240_30Hz_8bit_P420_37_P2.bin",
+   "qp_base": 37,
+   "basename": "D_RaceHorses_416x240_30Hz_8bit_P420_37_P2",
+   "width": 416,
+   "height": 240,
+   "data_count": 33,
+   "dirname": "db",
+   "original_yuv": "/home/library01/VCL_VIDEO/JVET/Linux/ClassD/D_RaceHorses_416x240_30Hz_8bit_P420.yuv",
+   "original_temporal_subsample": 1,
+   "original_frame_skip": 32,
+   "original_bitdepth": 8
+  },
+  {
+   "bsname": "D_BlowingBubbles_416x240_50Hz_8bit_P420_32_P1.bin",
+   "qp_base": 32,
+   "basename": "D_BlowingBubbles_416x240_50Hz_8bit_P420_32_P1",
+   "width": 416,
+   "height": 240,
+   "data_count": 65,
+   "dirname": "db",
+   "original_yuv": "/home/library01/VCL_VIDEO/JVET/Linux/ClassD/D_BlowingBubbles_416x240_50Hz_8bit_P420.yuv",
+   "original_temporal_subsample": 1,
+   "original_frame_skip": 0,
+   "original_bitdepth": 8
+  },
+  ...
+  ]
+}
+```
+The following fields are added to each data:
+- original\_yuv: location of the original yuv file
+- original\_bitdepth: bitdepth of the original yuv file
+- original\_frame\_skip: frame to skip in original yuv file (offset from the beginning to find the data)
+- original\_temporal\_subsample: correspond to TemporalSubsampleRatio in VTM (used for AI configuration)
+
+Note: the last 2 fields are present in the encoder logs only if the NNVC encoder has been used to generate the sequence. If not present, it is assume frame\_skip=0 and temporal\_subsample=1.
+
+
+Data loading
+==================
+The module data_loader shows an example of data loading. The basic usage is as follows (see also the example program create\_unified\_dataset.py for an example of usage): 
+```python
+import data_loader
+patch_size=128
+border_size=8
+
+dl=data_loader.DataLoader(input_json,patch_size)
+print("Nb samples available: {}".format(dl.nb_patches()))
+print("Available components: {}".format(dl.components))
+
+# list of components to extract
+comps=["org_Y", "pred_Y", "qp_base"]
+p = dl.getPatchData(42,comps,border_size)
+# p now contains the 42th patches in the database
+```
+
+On the fly loading
+----------------------------------------------
+Call getPatchData on random index to create a batch of data. 
+
+Pro: do not use more disk space because data is created on the fly from original dataset.
+
+Cons: slower to generate a batch. The loading function should be put in an async funciton to avoid I/O latency.
+
+
+Offline loading
+----------------------------------------------
+One can use the create\_unified\_dataset.py program to create a large dataset of already prepared patches.
+
+Pro: faster: only open one file and perform random reading inside.
+
+Cons: consume more disk space (float patches, duplication of the data).
+
+
+Full example of data dumping and data loading
+==================
+The script training/example/sample_test.sh shows a full example of data dumping, dataset creation and data loading. 
+The loaded data are put into one binary file with all the patches inside. This file can be used to fast I/O during training.
+Finally, a sample program allows to visualize the resulting dumped patches.
+
+
+
diff --git a/source/App/DecoderApp/DecApp.cpp b/source/App/DecoderApp/DecApp.cpp
index 85f63bb0f7..c01e3c7a86 100644
--- a/source/App/DecoderApp/DecApp.cpp
+++ b/source/App/DecoderApp/DecApp.cpp
@@ -322,9 +322,76 @@ uint32_t DecApp::decode()
         }
         if( ( m_cDecLib.getVPS() != nullptr && ( m_cDecLib.getVPS()->getMaxLayers() == 1 || xIsNaluWithinTargetOutputLayerIdSet( &nalu ) ) ) || m_cDecLib.getVPS() == nullptr )
         {
-          m_cVideoIOYuvReconFile[nalu.m_nuhLayerId].open( reconFileName, true, m_outputBitDepth, m_outputBitDepth, bitDepths.recon ); // write mode
+          m_cVideoIOYuvReconFile[nalu.m_nuhLayerId].open( reconFileName, true, m_outputBitDepth, m_outputBitDepth, bitDepths.recon ); // write mode          
         }
       }
+#if NNVC_DUMP_DATA
+      if( !m_dumpBasename.empty() && m_dumpDataCnt<0 ) {
+        if( ( m_cDecLib.getVPS() != nullptr && ( m_cDecLib.getVPS()->getMaxLayers() == 1 || xIsNaluWithinTargetOutputLayerIdSet( &nalu ) ) ) || m_cDecLib.getVPS() == nullptr )
+        {
+          const BitDepths &bitDepths=pcListPic->front()->cs->sps->getBitDepths(); // use bit depths of first reconstructed picture.
+          for( uint32_t channelType = 0; channelType < MAX_NUM_CHANNEL_TYPE; channelType++ )
+          {
+            if( m_outputBitDepth[channelType] == 0 )
+            {
+              m_outputBitDepth[channelType] = bitDepths.recon[channelType];
+            }
+          }
+#if NNVC_DUMP_DATA
+          m_jsonFile.open(m_dumpBasename + ".json");
+          m_jsonFile << "{\n";
+#if NNVC_USE_REC_BEFORE_DBF
+          m_cVideoIOYuvReconBeforeDbfFile[nalu.m_nuhLayerId].open(m_dumpBasename + "_rec_before_dbf.yuv", true,
+                                                                 m_outputBitDepth, m_outputBitDepth,
+                                                                 bitDepths.recon);   // write mode
+          m_jsonFile << " \"suffix_rec_before_dbf\": \"_rec_before_dbf.yuv\",\n";
+#endif
+#if NNVC_USE_REC_AFTER_DBF
+          m_cVideoIOYuvReconAfterDbfFile[nalu.m_nuhLayerId].open(m_dumpBasename + "_rec_after_dbf.yuv", true,
+                                                                 m_outputBitDepth, m_outputBitDepth,
+                                                                 bitDepths.recon);   // write mode
+          m_jsonFile << " \"suffix_rec_after_dbf\": \"_rec_after_dbf.yuv\",\n";
+#endif
+#if NNVC_USE_PRED
+          m_cVideoIOYuvPredFile[nalu.m_nuhLayerId].open(m_dumpBasename + "_pred.yuv", true, m_outputBitDepth,
+                                                        m_outputBitDepth, bitDepths.recon);   // write mode
+          m_jsonFile << " \"suffix_pred\": \"_pred.yuv\",\n";
+#endif
+#if NNVC_USE_CU_AVERAGE
+          m_cVideoIOYuvCUAverageFile[nalu.m_nuhLayerId].open(m_dumpBasename + "_cu_average.yuv", true, m_outputBitDepth,
+                                                             m_outputBitDepth,
+                                                             bitDepths.recon);   // write mode
+          m_jsonFile << " \"suffix_cu_average\": \"_cu_average.yuv\",\n";
+#endif
+#if NNVC_USE_BS
+          m_cVideoIOYuvBsMapFile[nalu.m_nuhLayerId].open(m_dumpBasename + "_bs.yuv", true, m_outputBitDepth,
+                                                         m_outputBitDepth, bitDepths.recon);   // write mode
+          m_jsonFile << " \"suffix_bs\": \"_bs.yuv\",\n";
+#endif
+#if NNVC_USE_QP
+          m_qpFile.open(m_dumpBasename + "_qp.dat", std::ios::binary);   // write mode
+          m_jsonFile << " \"suffix_qp\": \"_qp.dat\",\n";
+#endif
+#if NNVC_USE_SLICETYPE
+          m_sliceTypeFile.open(m_dumpBasename + "_slicetype.dat", std::ios::binary);   // write mode
+          m_jsonFile << " \"suffix_slicetype\": \"_slicetype.dat\",\n";
+#endif
+          const SPS *       activeSPS = (pcListPic->front()->cs->sps);
+          const PPS *       activePPS = (pcListPic->front()->cs->pps);
+          const std::string basename  = m_dumpBasename.substr(m_dumpBasename.find_last_of("/\\") + 1);
+          const std::string bsname  = m_bitstreamFileName.substr(m_bitstreamFileName.find_last_of("/\\") + 1);
+          m_jsonFile << " \"data\": [\n"
+                        "  {\n"
+                     << "    \"bsname\": \""<<bsname<<"\",\n"
+                     << "    \"qp_base\": "<<activePPS->getPicInitQPMinus26()+26<<",\n"
+                     << "    \"basename\": \"" << basename << "\",\n"
+                     << "    \"width\": " << activeSPS->getMaxPicWidthInLumaSamples() << ",\n"
+                     << "    \"height\": " << activeSPS->getMaxPicHeightInLumaSamples() << ",\n";
+          m_dumpDataCnt = 0;
+#endif
+        }
+      }
+#endif
       // update file bitdepth shift if recon bitdepth changed between sequences
       for( uint32_t channelType = 0; channelType < MAX_NUM_CHANNEL_TYPE; channelType++ )
       {
@@ -535,6 +602,53 @@ void DecApp::xDestroyDecLib()
     }
   }
 
+#if NNVC_DUMP_DATA
+  if (!m_dumpBasename.empty())
+  {
+#if NNVC_USE_REC_AFTER_DBF
+    for (auto &recAfterDbfFile: m_cVideoIOYuvReconAfterDbfFile)
+    {
+      recAfterDbfFile.second.close();
+    }
+#endif
+#if NNVC_USE_REC_BEFORE_DBF
+    for (auto &recBeforeDbfFile: m_cVideoIOYuvReconBeforeDbfFile)
+    {
+      recBeforeDbfFile.second.close();
+    }
+#endif
+#if NNVC_USE_PRED
+    for (auto &predFile: m_cVideoIOYuvPredFile)
+    {
+      predFile.second.close();
+    }
+#endif
+#if NNVC_USE_CU_AVERAGE
+    for (auto &cuAverageFile: m_cVideoIOYuvCUAverageFile)
+    {
+      cuAverageFile.second.close();
+    }
+#endif
+#if NNVC_USE_BS
+    for (auto &bsMapFile: m_cVideoIOYuvBsMapFile)
+    {
+      bsMapFile.second.close();
+    }
+#endif
+#if NNVC_USE_QP
+    m_qpFile.close();
+#endif
+#if NNVC_USE_SLICETYPE
+    m_sliceTypeFile.close();
+#endif
+    m_jsonFile << "    \"data_count\": " << m_dumpDataCnt << "\n"
+               << "  }\n"
+               << " ]\n"
+               << "}";
+    m_jsonFile.close();
+  }
+#endif
+
   // destroy decoder class
   m_cDecLib.destroy();
 }
@@ -683,15 +797,82 @@ void DecApp::xWriteOutput( PicList* pcListPic, uint32_t tId )
           else
           {
             m_cVideoIOYuvReconFile[pcPic->layerId].write( pcPic->getRecoBuf().get( COMPONENT_Y ).width, pcPic->getRecoBuf().get( COMPONENT_Y ).height, pcPic->getRecoBuf(),
-                                        m_outputColourSpaceConvert,
-                                        m_packedYUVMode,
-                                        conf.getWindowLeftOffset() * SPS::getWinUnitX( chromaFormatIDC ),
-                                        conf.getWindowRightOffset() * SPS::getWinUnitX( chromaFormatIDC ),
-                                        conf.getWindowTopOffset() * SPS::getWinUnitY( chromaFormatIDC ),
-                                        conf.getWindowBottomOffset() * SPS::getWinUnitY( chromaFormatIDC ),
-                                        NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range );
-            }
+                                                         m_outputColourSpaceConvert,
+                                                         m_packedYUVMode,
+                                                         conf.getWindowLeftOffset() * SPS::getWinUnitX( chromaFormatIDC ),
+                                                         conf.getWindowRightOffset() * SPS::getWinUnitX( chromaFormatIDC ),
+                                                         conf.getWindowTopOffset() * SPS::getWinUnitY( chromaFormatIDC ),
+                                                         conf.getWindowBottomOffset() * SPS::getWinUnitY( chromaFormatIDC ),
+                                                         NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range );
+          }
+        }
+#if  NNVC_DUMP_DATA
+        if (!m_dumpBasename.empty())
+        {
+          const Window &conf = pcPic->getConformanceWindow();
+          const SPS* sps = pcPic->cs->sps;
+          ChromaFormat chromaFormatIDC = sps->getChromaFormatIdc();
+          m_dumpDataCnt++;
+#if NNVC_USE_REC_AFTER_DBF
+          m_cVideoIOYuvReconAfterDbfFile[pcPic->layerId].write(
+            pcPic->getRecAfterDbfBuf().get(COMPONENT_Y).width, pcPic->getRecAfterDbfBuf().get(COMPONENT_Y).height,
+            pcPic->getRecAfterDbfBuf(), m_outputColourSpaceConvert, m_packedYUVMode,
+            conf.getWindowLeftOffset() * SPS::getWinUnitX(chromaFormatIDC),
+            conf.getWindowRightOffset() * SPS::getWinUnitX(chromaFormatIDC),
+            conf.getWindowTopOffset() * SPS::getWinUnitY(chromaFormatIDC),
+            conf.getWindowBottomOffset() * SPS::getWinUnitY(chromaFormatIDC), NUM_CHROMA_FORMAT,
+            m_bClipOutputVideoToRec709Range);
+#endif
+#if NNVC_USE_REC_BEFORE_DBF
+          m_cVideoIOYuvReconBeforeDbfFile[pcPic->layerId].write(
+            pcPic->getRecBeforeDbfBuf().get(COMPONENT_Y).width, pcPic->getRecBeforeDbfBuf().get(COMPONENT_Y).height,
+            pcPic->getRecBeforeDbfBuf(), m_outputColourSpaceConvert, m_packedYUVMode,
+            conf.getWindowLeftOffset() * SPS::getWinUnitX(chromaFormatIDC),
+            conf.getWindowRightOffset() * SPS::getWinUnitX(chromaFormatIDC),
+            conf.getWindowTopOffset() * SPS::getWinUnitY(chromaFormatIDC),
+            conf.getWindowBottomOffset() * SPS::getWinUnitY(chromaFormatIDC), NUM_CHROMA_FORMAT,
+            m_bClipOutputVideoToRec709Range);
+#endif
+#if NNVC_USE_PRED
+          m_cVideoIOYuvPredFile[pcPic->layerId].write(
+            pcPic->getPredBufCustom().get(COMPONENT_Y).width, pcPic->getPredBufCustom().get(COMPONENT_Y).height,
+            pcPic->getPredBufCustom(), m_outputColourSpaceConvert, m_packedYUVMode,
+            conf.getWindowLeftOffset() * SPS::getWinUnitX(chromaFormatIDC),
+            conf.getWindowRightOffset() * SPS::getWinUnitX(chromaFormatIDC),
+            conf.getWindowTopOffset() * SPS::getWinUnitY(chromaFormatIDC),
+            conf.getWindowBottomOffset() * SPS::getWinUnitY(chromaFormatIDC), NUM_CHROMA_FORMAT,
+            m_bClipOutputVideoToRec709Range);
+#endif
+#if NNVC_USE_CU_AVERAGE
+          m_cVideoIOYuvCUAverageFile[pcPic->layerId].write(
+            pcPic->getCuAverageBuf().get(COMPONENT_Y).width, pcPic->getCuAverageBuf().get(COMPONENT_Y).height,
+            pcPic->getCuAverageBuf(), m_outputColourSpaceConvert, m_packedYUVMode,
+            conf.getWindowLeftOffset() * SPS::getWinUnitX(chromaFormatIDC),
+            conf.getWindowRightOffset() * SPS::getWinUnitX(chromaFormatIDC),
+            conf.getWindowTopOffset() * SPS::getWinUnitY(chromaFormatIDC),
+            conf.getWindowBottomOffset() * SPS::getWinUnitY(chromaFormatIDC), NUM_CHROMA_FORMAT,
+            m_bClipOutputVideoToRec709Range);
+#endif
+#if NNVC_USE_BS
+          m_cVideoIOYuvBsMapFile[pcPic->layerId].write(
+            pcPic->getBsMapBuf().get(COMPONENT_Y).width, pcPic->getBsMapBuf().get(COMPONENT_Y).height,
+            pcPic->getBsMapBuf(), m_outputColourSpaceConvert, m_packedYUVMode,
+            conf.getWindowLeftOffset() * SPS::getWinUnitX(chromaFormatIDC),
+            conf.getWindowRightOffset() * SPS::getWinUnitX(chromaFormatIDC),
+            conf.getWindowTopOffset() * SPS::getWinUnitY(chromaFormatIDC),
+            conf.getWindowBottomOffset() * SPS::getWinUnitY(chromaFormatIDC), NUM_CHROMA_FORMAT,
+            m_bClipOutputVideoToRec709Range);
+#endif
+#if NNVC_USE_QP
+          int qp = pcPic->cs->slice->getSliceQp();
+          m_qpFile.write((const char *) &qp, sizeof(qp));
+#endif
+#if NNVC_USE_SLICETYPE
+          int st = pcPic->cs->slice->getSliceType();
+          m_sliceTypeFile.write((const char *) &st, sizeof(st));
+#endif
         }
+#endif
         writeLineToOutputLog(pcPic);
 
         // update POC of display order
@@ -830,15 +1011,84 @@ void DecApp::xFlushOutput( PicList* pcListPic, const int layerId )
             else
             {
               m_cVideoIOYuvReconFile[pcPic->layerId].write( pcPic->getRecoBuf().get( COMPONENT_Y ).width, pcPic->getRecoBuf().get( COMPONENT_Y ).height, pcPic->getRecoBuf(),
-                                        m_outputColourSpaceConvert,
-                                        m_packedYUVMode,
-                                        conf.getWindowLeftOffset() * SPS::getWinUnitX( chromaFormatIDC ),
-                                        conf.getWindowRightOffset() * SPS::getWinUnitX( chromaFormatIDC ),
-                                        conf.getWindowTopOffset() * SPS::getWinUnitY( chromaFormatIDC ),
-                                        conf.getWindowBottomOffset() * SPS::getWinUnitY( chromaFormatIDC ),
-                                        NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range );
+                                                           m_outputColourSpaceConvert,
+                                                           m_packedYUVMode,
+                                                           conf.getWindowLeftOffset() * SPS::getWinUnitX( chromaFormatIDC ),
+                                                           conf.getWindowRightOffset() * SPS::getWinUnitX( chromaFormatIDC ),
+                                                           conf.getWindowTopOffset() * SPS::getWinUnitY( chromaFormatIDC ),
+                                                           conf.getWindowBottomOffset() * SPS::getWinUnitY( chromaFormatIDC ),
+                                                           NUM_CHROMA_FORMAT, m_bClipOutputVideoToRec709Range );
               }
           }
+          // write to file
+#if NNVC_DUMP_DATA
+          if (!m_dumpBasename.empty())
+          {
+            const Window &conf = pcPic->getConformanceWindow();
+            const SPS* sps = pcPic->cs->sps;
+            ChromaFormat chromaFormatIDC = sps->getChromaFormatIdc();
+            ++m_dumpDataCnt;
+#if NNVC_USE_REC_BEFORE_DBF
+            m_cVideoIOYuvReconBeforeDbfFile[pcPic->layerId].write(
+              pcPic->getRecBeforeDbfBuf().get(COMPONENT_Y).width, pcPic->getRecBeforeDbfBuf().get(COMPONENT_Y).height,
+              pcPic->getRecBeforeDbfBuf(), m_outputColourSpaceConvert, m_packedYUVMode,
+              conf.getWindowLeftOffset() * SPS::getWinUnitX(chromaFormatIDC),
+              conf.getWindowRightOffset() * SPS::getWinUnitX(chromaFormatIDC),
+              conf.getWindowTopOffset() * SPS::getWinUnitY(chromaFormatIDC),
+              conf.getWindowBottomOffset() * SPS::getWinUnitY(chromaFormatIDC), NUM_CHROMA_FORMAT,
+              m_bClipOutputVideoToRec709Range);
+#endif
+#if NNVC_USE_REC_AFTER_DBF
+            m_cVideoIOYuvReconAfterDbfFile[pcPic->layerId].write(
+              pcPic->getRecAfterDbfBuf().get(COMPONENT_Y).width, pcPic->getRecAfterDbfBuf().get(COMPONENT_Y).height,
+              pcPic->getRecAfterDbfBuf(), m_outputColourSpaceConvert, m_packedYUVMode,
+              conf.getWindowLeftOffset() * SPS::getWinUnitX(chromaFormatIDC),
+              conf.getWindowRightOffset() * SPS::getWinUnitX(chromaFormatIDC),
+              conf.getWindowTopOffset() * SPS::getWinUnitY(chromaFormatIDC),
+              conf.getWindowBottomOffset() * SPS::getWinUnitY(chromaFormatIDC), NUM_CHROMA_FORMAT,
+              m_bClipOutputVideoToRec709Range);
+#endif
+#if NNVC_USE_PRED
+            m_cVideoIOYuvPredFile[pcPic->layerId].write(
+              pcPic->getPredBufCustom().get(COMPONENT_Y).width, pcPic->getPredBufCustom().get(COMPONENT_Y).height,
+              pcPic->getPredBufCustom(), m_outputColourSpaceConvert, m_packedYUVMode,
+              conf.getWindowLeftOffset() * SPS::getWinUnitX(chromaFormatIDC),
+              conf.getWindowRightOffset() * SPS::getWinUnitX(chromaFormatIDC),
+              conf.getWindowTopOffset() * SPS::getWinUnitY(chromaFormatIDC),
+              conf.getWindowBottomOffset() * SPS::getWinUnitY(chromaFormatIDC), NUM_CHROMA_FORMAT,
+              m_bClipOutputVideoToRec709Range);
+#endif
+#if NNVC_USE_CU_AVERAGE
+            m_cVideoIOYuvCUAverageFile[pcPic->layerId].write(
+              pcPic->getCuAverageBuf().get(COMPONENT_Y).width, pcPic->getCuAverageBuf().get(COMPONENT_Y).height,
+              pcPic->getCuAverageBuf(), m_outputColourSpaceConvert, m_packedYUVMode,
+              conf.getWindowLeftOffset() * SPS::getWinUnitX(chromaFormatIDC),
+              conf.getWindowRightOffset() * SPS::getWinUnitX(chromaFormatIDC),
+              conf.getWindowTopOffset() * SPS::getWinUnitY(chromaFormatIDC),
+              conf.getWindowBottomOffset() * SPS::getWinUnitY(chromaFormatIDC), NUM_CHROMA_FORMAT,
+              m_bClipOutputVideoToRec709Range);
+#endif
+#if NNVC_USE_BS
+            m_cVideoIOYuvBsMapFile[pcPic->layerId].write(
+              pcPic->getBsMapBuf().get(COMPONENT_Y).width, pcPic->getBsMapBuf().get(COMPONENT_Y).height,
+              pcPic->getBsMapBuf(), m_outputColourSpaceConvert, m_packedYUVMode,
+              conf.getWindowLeftOffset() * SPS::getWinUnitX(chromaFormatIDC),
+              conf.getWindowRightOffset() * SPS::getWinUnitX(chromaFormatIDC),
+              conf.getWindowTopOffset() * SPS::getWinUnitY(chromaFormatIDC),
+              conf.getWindowBottomOffset() * SPS::getWinUnitY(chromaFormatIDC), NUM_CHROMA_FORMAT,
+              m_bClipOutputVideoToRec709Range);
+#endif
+#if NNVC_USE_QP
+            int qp = pcPic->cs->slice->getSliceQp();
+            m_qpFile.write((const char *) &qp, sizeof(qp));
+#endif
+#if NNVC_USE_SLICETYPE
+            int st = pcPic->cs->slice->getSliceType();
+            m_sliceTypeFile.write((const char *) &st, sizeof(st));
+#endif
+          }
+#endif
+
           writeLineToOutputLog(pcPic);
 #if JVET_S0078_NOOUTPUTPRIORPICFLAG
         }
diff --git a/source/App/DecoderApp/DecApp.h b/source/App/DecoderApp/DecApp.h
index 11f88ed5ee..784f034851 100644
--- a/source/App/DecoderApp/DecApp.h
+++ b/source/App/DecoderApp/DecApp.h
@@ -62,6 +62,32 @@ private:
   DecLib          m_cDecLib;                     ///< decoder class
   std::unordered_map<int, VideoIOYuv>      m_cVideoIOYuvReconFile;        ///< reconstruction YUV class
 
+#if NNVC_DUMP_DATA
+  std::ofstream                            m_jsonFile;
+  int                                      m_dumpDataCnt=-1; // counter for data dump
+#if NNVC_USE_REC_BEFORE_DBF
+  std::unordered_map<int, VideoIOYuv>     m_cVideoIOYuvReconBeforeDbfFile;        ///< reconstruction YUV
+#endif
+#if NNVC_USE_REC_AFTER_DBF
+   std::unordered_map<int, VideoIOYuv>     m_cVideoIOYuvReconAfterDbfFile;        ///< reconstruction YUV
+#endif
+#if NNVC_USE_PRED
+  std::unordered_map<int, VideoIOYuv>      m_cVideoIOYuvPredFile;         ///< prediction
+#endif
+#if NNVC_USE_CU_AVERAGE
+  std::unordered_map<int, VideoIOYuv>      m_cVideoIOYuvCUAverageFile;    ///< partition
+#endif
+#if NNVC_USE_BS
+  std::unordered_map<int, VideoIOYuv>      m_cVideoIOYuvBsMapFile;        ///< bs map
+#endif
+#if NNVC_USE_QP
+  std::ofstream                            m_qpFile;    ///< qp slice
+#endif
+#if NNVC_USE_SLICETYPE
+  std::ofstream                            m_sliceTypeFile;    ///< slice type
+#endif
+#endif
+
   // for output control
   int             m_iPOCLastDisplay;              ///< last POC in display order
   std::ofstream   m_seiMessageFileStream;         ///< Used for outputing SEI messages.
diff --git a/source/App/DecoderApp/DecAppCfg.cpp b/source/App/DecoderApp/DecAppCfg.cpp
index d96c20493f..801c24335b 100644
--- a/source/App/DecoderApp/DecAppCfg.cpp
+++ b/source/App/DecoderApp/DecAppCfg.cpp
@@ -77,6 +77,10 @@ bool DecAppCfg::parseCfg( int argc, char* argv[] )
   ("BitstreamFile,b",           m_bitstreamFileName,                   string(""), "bitstream input file name")
   ("ReconFile,o",               m_reconFileName,                       string(""), "reconstructed YUV output file name\n")
 
+#if NNVC_DUMP_DATA
+  ("DumpBasename",                  m_dumpBasename,                        string(""), "basename for data dumping\n")
+#endif
+    
   ("OplFile,-opl",              m_oplFilename ,                        string(""), "opl-file name without extension for conformance testing\n")
 
 #if ENABLE_SIMD_OPT
@@ -250,7 +254,6 @@ DecAppCfg::DecAppCfg()
 : m_bitstreamFileName()
 , m_reconFileName()
 , m_oplFilename()
-
 , m_iSkipFrame(0)
 // m_outputBitDepth array initialised below
 , m_outputColourSpaceConvert(IPCOLOURSPACE_UNCHANGED)
diff --git a/source/App/DecoderApp/DecAppCfg.h b/source/App/DecoderApp/DecAppCfg.h
index ba7c0338ef..35fef707cc 100644
--- a/source/App/DecoderApp/DecAppCfg.h
+++ b/source/App/DecoderApp/DecAppCfg.h
@@ -59,6 +59,9 @@ protected:
   std::string   m_bitstreamFileName;                    ///< input bitstream file name
   std::string   m_reconFileName;                        ///< output reconstruction file name
 
+#if NNVC_DUMP_DATA
+  std::string   m_dumpBasename;                         ///< output basename for data
+#endif  
   std::string   m_oplFilename;                        ///< filename to output conformance log.
 
   int           m_iSkipFrame;                           ///< counter for frames prior to the random access point to skip
diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp
index b38001ebc7..4ceb924d80 100644
--- a/source/App/EncoderApp/EncAppCfg.cpp
+++ b/source/App/EncoderApp/EncAppCfg.cpp
@@ -3811,6 +3811,10 @@ void EncAppCfg::xPrintParameter()
   //msg( DETAILS, "\n" );
   msg( DETAILS, "Input          File                    : %s\n", m_inputFileName.c_str() );
   msg( DETAILS, "Bitstream      File                    : %s\n", m_bitstreamFileName.c_str() );
+#if NNVC_INFO_ENCODER
+  msg( DETAILS, "TemporalSubsampleRatio                 : %d\n", m_temporalSubsampleRatio );
+  msg( DETAILS, "FrameSkip                              : %d\n", m_FrameSkip);
+#endif
   msg( DETAILS, "Reconstruction File                    : %s\n", m_reconFileName.c_str() );
   msg( DETAILS, "Real     Format                        : %dx%d %gHz\n", m_iSourceWidth - m_confWinLeft - m_confWinRight, m_iSourceHeight - m_confWinTop - m_confWinBottom, (double)m_iFrameRate / m_temporalSubsampleRatio );
   msg( DETAILS, "Internal Format                        : %dx%d %gHz\n", m_iSourceWidth, m_iSourceHeight, (double)m_iFrameRate / m_temporalSubsampleRatio );
diff --git a/source/Lib/CommonLib/CodingStructure.cpp b/source/Lib/CommonLib/CodingStructure.cpp
index b655d445fa..2f0fc04fa1 100644
--- a/source/Lib/CommonLib/CodingStructure.cpp
+++ b/source/Lib/CommonLib/CodingStructure.cpp
@@ -107,6 +107,9 @@ void CodingStructure::destroy()
   parent    = nullptr;
 
   m_pred.destroy();
+#if NNVC_USE_PRED
+  m_predCustom.destroy();
+#endif
   m_resi.destroy();
   m_reco.destroy();
   m_orgr.destroy();
@@ -895,6 +898,9 @@ void CodingStructure::create(const ChromaFormat &_chromaFormat, const Area& _are
 
   m_reco.create( area );
   m_pred.create( area );
+#if NNVC_USE_PRED
+  m_predCustom.create( area );
+#endif
   m_resi.create( area );
   m_orgr.create( area );
 }
@@ -910,6 +916,9 @@ void CodingStructure::create(const UnitArea& _unit, const bool isTopLayer, const
 
   m_reco.create( area );
   m_pred.create( area );
+#if NNVC_USE_PRED
+  m_predCustom.create( area );
+#endif
   m_resi.create( area );
   m_orgr.create( area );
 }
@@ -1082,6 +1091,16 @@ void CodingStructure::rebindPicBufs()
   {
     m_pred.destroy();
   }
+#if NNVC_USE_PRED
+  if( !picture->M_BUFS( 0, PIC_PREDICTION_CUSTOM ).bufs.empty() )
+  {
+    m_predCustom.createFromBuf( picture->M_BUFS( 0, PIC_PREDICTION_CUSTOM ) );
+  }
+  else
+  {
+    m_predCustom.destroy();
+  }
+#endif
   if (!picture->M_BUFS(0, PIC_RESIDUAL).bufs.empty())
   {
     m_resi.createFromBuf(picture->M_BUFS(0, PIC_RESIDUAL));
@@ -1240,12 +1259,15 @@ void CodingStructure::useSubStructure( const CodingStructure& subStruct, const C
   if( parent )
   {
     // copy data to picture
+#if NNVC_USE_PRED
+    getPredBufCustom              ( clippedArea ).copyFrom( subStruct.getPredBuf( clippedArea ) );
+    getPredBuf                    ( clippedArea ).copyFrom( subStruct.getPredBuf( clippedArea ) );
+#endif
     if( cpyPred )    getPredBuf   ( clippedArea ).copyFrom( subPredBuf );
     if( cpyResi )    getResiBuf   ( clippedArea ).copyFrom( subResiBuf );
     if( cpyReco )    getRecoBuf   ( clippedArea ).copyFrom( subRecoBuf );
     if( cpyOrgResi ) getOrgResiBuf( clippedArea ).copyFrom( subStruct.getOrgResiBuf( clippedArea ) );
   }
-
   if( cpyPred ) picture->getPredBuf( clippedArea ).copyFrom( subPredBuf );
   if( cpyResi ) picture->getResiBuf( clippedArea ).copyFrom( subResiBuf );
   if( cpyReco ) picture->getRecoBuf( clippedArea ).copyFrom( subRecoBuf );
@@ -1562,6 +1584,13 @@ const CPelBuf     CodingStructure::getPredBuf(const CompArea &blk)     const { r
        PelUnitBuf CodingStructure::getPredBuf(const UnitArea &unit)          { return getBuf(unit, PIC_PREDICTION); }
 const CPelUnitBuf CodingStructure::getPredBuf(const UnitArea &unit)    const { return getBuf(unit, PIC_PREDICTION); }
 
+#if NNVC_USE_PRED
+       PelBuf     CodingStructure::getPredBufCustom(const CompArea &blk)           { return getBuf(blk,  PIC_PREDICTION_CUSTOM); }
+const CPelBuf     CodingStructure::getPredBufCustom(const CompArea &blk)     const { return getBuf(blk,  PIC_PREDICTION_CUSTOM); }
+       PelUnitBuf CodingStructure::getPredBufCustom(const UnitArea &unit)          { return getBuf(unit, PIC_PREDICTION_CUSTOM); }
+const CPelUnitBuf CodingStructure::getPredBufCustom(const UnitArea &unit)    const { return getBuf(unit, PIC_PREDICTION_CUSTOM); }
+#endif
+
        PelBuf     CodingStructure::getResiBuf(const CompArea &blk)           { return getBuf(blk,  PIC_RESIDUAL); }
 const CPelBuf     CodingStructure::getResiBuf(const CompArea &blk)     const { return getBuf(blk,  PIC_RESIDUAL); }
        PelUnitBuf CodingStructure::getResiBuf(const UnitArea &unit)          { return getBuf(unit, PIC_RESIDUAL); }
@@ -1602,6 +1631,12 @@ PelBuf CodingStructure::getBuf( const CompArea &blk, const PictureType &type )
   const ComponentID compID = blk.compID;
 
   PelStorage* buf = type == PIC_PREDICTION ? &m_pred : ( type == PIC_RESIDUAL ? &m_resi : ( type == PIC_RECONSTRUCTION ? &m_reco : ( type == PIC_ORG_RESI ? &m_orgr : nullptr ) ) );
+#if NNVC_USE_PRED
+  if (type == PIC_PREDICTION_CUSTOM)
+  {
+    buf = &m_predCustom;
+  }
+#endif
 
   CHECK( !buf, "Unknown buffer requested" );
 
@@ -1636,6 +1671,12 @@ const CPelBuf CodingStructure::getBuf( const CompArea &blk, const PictureType &t
   const ComponentID compID = blk.compID;
 
   const PelStorage* buf = type == PIC_PREDICTION ? &m_pred : ( type == PIC_RESIDUAL ? &m_resi : ( type == PIC_RECONSTRUCTION ? &m_reco : ( type == PIC_ORG_RESI ? &m_orgr : nullptr ) ) );
+#if NNVC_USE_PRED
+  if (type == PIC_PREDICTION_CUSTOM)
+  {
+    buf = &m_predCustom;
+  }
+#endif
 
   CHECK( !buf, "Unknown buffer requested" );
 
diff --git a/source/Lib/CommonLib/CodingStructure.h b/source/Lib/CommonLib/CodingStructure.h
index b5ae7ac630..085af75296 100644
--- a/source/Lib/CommonLib/CodingStructure.h
+++ b/source/Lib/CommonLib/CodingStructure.h
@@ -62,6 +62,21 @@ enum PictureType
   PIC_ORIGINAL_INPUT,
   PIC_TRUE_ORIGINAL_INPUT,
   PIC_FILTERED_ORIGINAL_INPUT,
+#if NNVC_USE_CU_AVERAGE
+  PIC_CU_AVERAGE,
+#endif
+#if NNVC_USE_BS
+  PIC_BS_MAP,
+#endif
+#if NNVC_USE_PRED
+  PIC_PREDICTION_CUSTOM,
+#endif
+#if NNVC_USE_REC_BEFORE_DBF
+  PIC_REC_BEFORE_DBF,
+#endif
+#if NNVC_USE_REC_AFTER_DBF
+  PIC_REC_AFTER_DBF,
+#endif
   NUM_PIC_TYPES
 };
 extern XUCache g_globalUnitCache;
@@ -228,6 +243,9 @@ private:
   std::vector<SAOBlkParam> m_sao;
 
   PelStorage m_pred;
+#if NNVC_USE_PRED
+  PelStorage m_predCustom;
+#endif
   PelStorage m_resi;
   PelStorage m_reco;
   PelStorage m_orgr;
@@ -268,6 +286,13 @@ public:
          PelUnitBuf   getPredBuf(const UnitArea &unit);
   const CPelUnitBuf   getPredBuf(const UnitArea &unit) const;
 
+#if NNVC_USE_PRED
+         PelBuf       getPredBufCustom(const CompArea &blk);
+  const CPelBuf       getPredBufCustom(const CompArea &blk) const;
+         PelUnitBuf   getPredBufCustom(const UnitArea &unit);
+  const CPelUnitBuf   getPredBufCustom(const UnitArea &unit) const;
+#endif
+
          PelBuf       getResiBuf(const CompArea &blk);
   const CPelBuf       getResiBuf(const CompArea &blk) const;
          PelUnitBuf   getResiBuf(const UnitArea &unit);
diff --git a/source/Lib/CommonLib/LoopFilter.cpp b/source/Lib/CommonLib/LoopFilter.cpp
index 2dd4e854ec..c61fe502f3 100644
--- a/source/Lib/CommonLib/LoopFilter.cpp
+++ b/source/Lib/CommonLib/LoopFilter.cpp
@@ -394,6 +394,48 @@ void LoopFilter::xDeblockCU( CodingUnit& cu, const DeblockEdgeDir edgeDir )
   }
 
   const unsigned uiPelsInPart = pcv.minCUWidth;
+#if NNVC_USE_BS
+  auto storeBS = [this, cu, uiPelsInPart, edgeDir]( char bs, Position pos, ComponentID comp) -> void
+  {
+    Size sz;
+    Position pos_pq;
+
+    int edge_longside = uiPelsInPart;
+    int edge_shortside = 2;
+
+    int scale_x = getComponentScaleX( comp, cu.chromaFormat );
+    int scale_y = getComponentScaleY( comp, cu.chromaFormat );
+
+    if(edgeDir == EDGE_HOR)
+    {
+      sz.width = edge_longside;
+      sz.height= edge_shortside << scale_y;
+      pos_pq = Position( pos.x, pos.y- ( 1<< scale_y) );
+    }
+    else
+    {
+      sz.height= edge_longside;
+      sz.width = edge_shortside << scale_x;
+      pos_pq = Position( pos.x - ( 1<< scale_x) , pos.y);
+    }
+
+    CompArea edge_area(comp, cu.chromaFormat, pos_pq, sz, true);
+    auto bs_comp = BsGet(bs, comp);
+    Pel to_fill = bs_comp == 2 ? 1023 : ( bs_comp == 1 ? 512 : 0);
+    auto target_buff = cu.slice->getPic()->getBsMapBuf(edge_area);
+    for( auto i = 0; i < target_buff.height; ++i)
+    {
+      for( auto j = 0; j < target_buff.width; ++j)
+      {
+        target_buff.at( j, i ) = std::max(target_buff.at( j, i ), to_fill );
+        
+      }
+      
+    }
+    
+
+  };
+#endif
 
   for( int y = 0; y < area.height; y += uiPelsInPart )
   {
@@ -409,10 +451,21 @@ void LoopFilter::xDeblockCU( CodingUnit& cu, const DeblockEdgeDir edgeDir )
         if(cu.treeType != TREE_C)
         {
           bS |= xGetBoundaryStrengthSingle( cu, edgeDir, localPos, CHANNEL_TYPE_LUMA );
+#if NNVC_USE_BS
+          if(cu.blocks[COMPONENT_Y].valid())
+            storeBS(bS, Position(area.x + x, area.y + y), COMPONENT_Y);
+#endif
         }
         if(cu.treeType != TREE_L && cu.chromaFormat != CHROMA_400 && cu.blocks[COMPONENT_Cb].valid())
         {
           bS |= xGetBoundaryStrengthSingle( cu, edgeDir, localPos, CHANNEL_TYPE_CHROMA );
+#if NNVC_USE_BS
+          if ( pcv.chrFormat != CHROMA_400 && cu.blocks[COMPONENT_Cb].valid() )
+          {
+            storeBS(bS, Position(area.x + x, area.y + y), COMPONENT_Cb);
+            storeBS(bS, Position(area.x + x, area.y + y), COMPONENT_Cr);
+          }
+#endif
         }
         m_aapucBS[edgeDir][rasterIdx] = bS;
       }
diff --git a/source/Lib/CommonLib/Picture.cpp b/source/Lib/CommonLib/Picture.cpp
index a7205badc3..011913ecf9 100644
--- a/source/Lib/CommonLib/Picture.cpp
+++ b/source/Lib/CommonLib/Picture.cpp
@@ -207,6 +207,18 @@ void Picture::create( const ChromaFormat &_chromaFormat, const Size &size, const
   const Area a      = Area( Position(), size );
   M_BUFS( 0, PIC_RECONSTRUCTION ).create( _chromaFormat, a, _maxCUSize, margin, MEMORY_ALIGN_DEF_SIZE );
   M_BUFS( 0, PIC_RECON_WRAP ).create( _chromaFormat, a, _maxCUSize, margin, MEMORY_ALIGN_DEF_SIZE );
+#if NNVC_USE_CU_AVERAGE
+  M_BUFS( 0, PIC_CU_AVERAGE ).create( _chromaFormat, a, _maxCUSize, margin, MEMORY_ALIGN_DEF_SIZE );
+#endif
+#if NNVC_USE_BS
+  M_BUFS( 0, PIC_BS_MAP ).create( _chromaFormat, a, _maxCUSize, margin, MEMORY_ALIGN_DEF_SIZE );
+#endif
+#if NNVC_USE_REC_BEFORE_DBF
+  M_BUFS( 0, PIC_REC_BEFORE_DBF ).create( _chromaFormat, a, _maxCUSize, margin, MEMORY_ALIGN_DEF_SIZE );
+#endif
+#if NNVC_USE_REC_AFTER_DBF
+  M_BUFS( 0, PIC_REC_AFTER_DBF ).create( _chromaFormat, a, _maxCUSize, margin, MEMORY_ALIGN_DEF_SIZE );
+#endif
 
   if( !_decoder )
   {
@@ -277,6 +289,11 @@ void Picture::createTempBuffers( const unsigned _maxCUSize )
   {
     M_BUFS( jId, PIC_PREDICTION                   ).create( chromaFormat, a,   _maxCUSize );
     M_BUFS( jId, PIC_RESIDUAL                     ).create( chromaFormat, a,   _maxCUSize );
+#if NNVC_USE_PRED
+    const Area aTemp( Position{ 0, 0 }, lumaSize() );
+    if (getPredBufCustom().bufs.empty())
+      M_BUFS( jId, PIC_PREDICTION_CUSTOM          ).create( chromaFormat, aTemp,   _maxCUSize );
+#endif
 #if ENABLE_SPLIT_PARALLELISM
     if (jId > 0)
     {
@@ -305,6 +322,12 @@ void Picture::destroyTempBuffers()
       {
         M_BUFS(jId, t).destroy();
       }
+#if NNVC_USE_PRED && !NNVC_DUMP_DATA
+      if (t == PIC_PREDICTION_CUSTOM )
+      {
+        M_BUFS( jId, t ).destroy();
+      }
+#endif
 #if ENABLE_SPLIT_PARALLELISM
       if (t == PIC_RECONSTRUCTION && jId > 0)
       {
@@ -349,6 +372,51 @@ const CPelBuf     Picture::getResiBuf(const CompArea &blk)  const { return getBu
        PelUnitBuf Picture::getResiBuf(const UnitArea &unit)       { return getBuf(unit, PIC_RESIDUAL); }
 const CPelUnitBuf Picture::getResiBuf(const UnitArea &unit) const { return getBuf(unit, PIC_RESIDUAL); }
 
+#if NNVC_USE_CU_AVERAGE
+       PelBuf     Picture::getCuAverageBuf(const ComponentID compID, bool /*wrap*/)   { return getBuf(compID,               PIC_CU_AVERAGE); }
+       PelUnitBuf Picture::getCuAverageBuf(bool /*wrap*/)                             { return M_BUFS(scheduler.getSplitPicId(), PIC_CU_AVERAGE); }
+       PelUnitBuf Picture::getCuAverageBuf(const UnitArea &unit)                  { return getBuf(unit, PIC_CU_AVERAGE); }
+const CPelUnitBuf Picture::getCuAverageBuf(const UnitArea &unit) const            { return getBuf(unit, PIC_CU_AVERAGE); }
+#endif
+#if NNVC_USE_BS
+       PelBuf     Picture::getBsMapBuf(const ComponentID compID, bool /*wrap*/)       { return getBuf(compID,               PIC_BS_MAP); }
+       PelUnitBuf Picture::getBsMapBuf(bool /*wrap*/)                                 { return M_BUFS(scheduler.getSplitPicId(), PIC_BS_MAP); }
+       PelUnitBuf Picture::getBsMapBuf(const UnitArea &unit)                      { return getBuf(unit, PIC_BS_MAP); }
+const CPelUnitBuf Picture::getBsMapBuf(const UnitArea &unit) const                { return getBuf(unit, PIC_BS_MAP); }
+       PelBuf     Picture::getBsMapBuf(const CompArea &blk)                       { return getBuf(blk, PIC_BS_MAP); }
+const CPelBuf     Picture::getBsMapBuf(const CompArea &blk) const                 { return getBuf(blk, PIC_BS_MAP); }
+#endif
+
+#if NNVC_USE_REC_BEFORE_DBF
+       PelBuf     Picture::getRecBeforeDbfBuf(const ComponentID compID, bool /*wrap*/)       { return getBuf(compID,               PIC_REC_BEFORE_DBF); }
+       PelUnitBuf Picture::getRecBeforeDbfBuf(bool /*wrap*/)                                 { return M_BUFS(scheduler.getSplitPicId(), PIC_REC_BEFORE_DBF); }
+       PelBuf     Picture::getRecBeforeDbfBuf(const CompArea &blk)        { return getBuf(blk,  PIC_REC_BEFORE_DBF); }
+const CPelBuf     Picture::getRecBeforeDbfBuf(const CompArea &blk)  const { return getBuf(blk,  PIC_REC_BEFORE_DBF); }
+       PelUnitBuf Picture::getRecBeforeDbfBuf(const UnitArea &unit)       { return getBuf(unit, PIC_REC_BEFORE_DBF); }
+const CPelUnitBuf Picture::getRecBeforeDbfBuf(const UnitArea &unit) const { return getBuf(unit, PIC_REC_BEFORE_DBF);}
+#endif
+
+
+#if NNVC_USE_REC_AFTER_DBF
+PelBuf     Picture::getRecAfterDbfBuf(const ComponentID compID, bool /*wrap*/)       { return getBuf(compID,               PIC_REC_AFTER_DBF); }
+PelUnitBuf Picture::getRecAfterDbfBuf(bool /*wrap*/)                                 { return M_BUFS(scheduler.getSplitPicId(), PIC_REC_AFTER_DBF); }
+PelBuf     Picture::getRecAfterDbfBuf(const CompArea &blk)        { return getBuf(blk,  PIC_REC_AFTER_DBF); }
+const CPelBuf     Picture::getRecAfterDbfBuf(const CompArea &blk)  const { return getBuf(blk,  PIC_REC_AFTER_DBF); }
+PelUnitBuf Picture::getRecAfterDbfBuf(const UnitArea &unit)       { return getBuf(unit, PIC_REC_AFTER_DBF); }
+const CPelUnitBuf Picture::getRecAfterDbfBuf(const UnitArea &unit) const { return getBuf(unit, PIC_REC_AFTER_DBF);}
+#endif
+
+
+#if NNVC_USE_PRED
+    //   PelUnitBuf Picture::getPredBuf(bool /*wrap*/)                                  { return M_BUFS(scheduler.getSplitPicId(), PIC_PREDICTION); }
+       PelBuf     Picture::getPredBufCustom(const ComponentID compID, bool /*wrap*/)       { return getBuf(compID,               PIC_PREDICTION_CUSTOM); }
+       PelUnitBuf Picture::getPredBufCustom(bool /*wrap*/)                                 { return M_BUFS(scheduler.getSplitPicId(), PIC_PREDICTION_CUSTOM); }
+       PelBuf     Picture::getPredBufCustom(const CompArea &blk)        { return getBuf(blk,  PIC_PREDICTION_CUSTOM); }
+const CPelBuf     Picture::getPredBufCustom(const CompArea &blk)  const { return getBuf(blk,  PIC_PREDICTION_CUSTOM); }
+       PelUnitBuf Picture::getPredBufCustom(const UnitArea &unit)       { return getBuf(unit, PIC_PREDICTION_CUSTOM); }
+const CPelUnitBuf Picture::getPredBufCustom(const UnitArea &unit) const { return getBuf(unit, PIC_PREDICTION_CUSTOM);}
+#endif
+
        PelBuf     Picture::getRecoBuf(const ComponentID compID, bool wrap)       { return getBuf(compID,                    wrap ? PIC_RECON_WRAP : PIC_RECONSTRUCTION); }
 const CPelBuf     Picture::getRecoBuf(const ComponentID compID, bool wrap) const { return getBuf(compID,                    wrap ? PIC_RECON_WRAP : PIC_RECONSTRUCTION); }
        PelBuf     Picture::getRecoBuf(const CompArea &blk, bool wrap)            { return getBuf(blk,                       wrap ? PIC_RECON_WRAP : PIC_RECONSTRUCTION); }
@@ -403,7 +471,60 @@ void Picture::finalInit( const VPS* vps, const SPS& sps, const PPS& pps, PicHead
     memset(m_spliceIdx, 0, m_ctuNums * sizeof(int));
   }
 }
+#if NNVC_USE_CU_AVERAGE
+void Picture::dumpCUAverage(CodingUnit* cu, int stride, Pel* p, ComponentID compID)
+{
+  const CompArea&  a = cu->block(compID);
+  int cuX = a.x;
+  int cuY = a.y;
+  int cuH = a.height;
+  int cuW = a.width;
+  int idx;
+
+  double avg = 0;
+  for (int i = 0; i < cuH; i++)
+  {
+    for (int j = 0; j < cuW; j++)
+    {
+      idx = (i + cuY) * stride + cuX + j;
+      avg += p[idx];
+    }
+  }
+  avg /= cuH * cuW;
+  for (int i = 0; i < cuH; i++)
+  {
+    for (int j = 0; j < cuW; j++)
+    {
+      idx = (i + cuY) * stride + cuX + j;
+      p[idx] = Pel(avg);
+    }
+  }
 
+
+}
+void Picture::dumpPicCuAverage()
+{
+  CodingStructure&  cs            = *(this->cs);
+  PelBuf bufY = getCuAverageBuf(COMPONENT_Y);
+  PelBuf bufCb = getCuAverageBuf(COMPONENT_Cb);
+  PelBuf bufCr = getCuAverageBuf(COMPONENT_Cr);
+  getCuAverageBuf().copyFrom(getRecoBuf());
+  uint64_t culength=cs.cus.size();
+  for(uint64_t n=0;n<culength;n++)
+  {
+    CodingUnit* cu=cs.cus.at(n);
+    if (cu->chType == CHANNEL_TYPE_LUMA)
+    {
+      dumpCUAverage(cu, bufY.stride, bufY.buf, COMPONENT_Y);
+    }
+    if ((cu->chType == CHANNEL_TYPE_CHROMA) || (cs.slice->getSliceType() != I_SLICE))
+    {
+      dumpCUAverage(cu, bufCb.stride, bufCb.buf, COMPONENT_Cb);
+      dumpCUAverage(cu, bufCr.stride, bufCr.buf, COMPONENT_Cr);
+    }
+  }
+}
+#endif
 void Picture::allocateNewSlice()
 {
   slices.push_back(new Slice);
diff --git a/source/Lib/CommonLib/Picture.h b/source/Lib/CommonLib/Picture.h
index 66073bf619..ab95ce3028 100644
--- a/source/Lib/CommonLib/Picture.h
+++ b/source/Lib/CommonLib/Picture.h
@@ -133,6 +133,48 @@ struct Picture : public UnitArea
          PelUnitBuf getResiBuf(const UnitArea &unit);
   const CPelUnitBuf getResiBuf(const UnitArea &unit) const;
 
+#if NNVC_USE_CU_AVERAGE
+         PelBuf     getCuAverageBuf(const ComponentID compID, bool wrap=false);
+         PelUnitBuf getCuAverageBuf(bool wrap=false);
+         PelUnitBuf getCuAverageBuf(const UnitArea &unit);
+  const CPelUnitBuf getCuAverageBuf(const UnitArea &unit) const;
+  void              dumpCUAverage(CodingUnit* cu, int stride, Pel* p, ComponentID compID);
+  void              dumpPicCuAverage();
+#endif
+#if NNVC_USE_BS
+         PelBuf     getBsMapBuf(const ComponentID compID, bool wrap=false);
+         PelUnitBuf getBsMapBuf(bool wrap=false);
+         PelUnitBuf getBsMapBuf(const UnitArea &unit);
+const   CPelUnitBuf getBsMapBuf(const UnitArea &unit) const;
+         PelBuf     getBsMapBuf(const CompArea &blk);
+const   CPelBuf     getBsMapBuf(const CompArea &blk) const;
+#endif
+#if NNVC_USE_PRED
+     //    PelUnitBuf getPredBuf(bool wrap=false);
+         PelBuf     getPredBufCustom(const ComponentID compID, bool wrap=false);
+         PelUnitBuf getPredBufCustom(bool wrap=false);
+         PelBuf     getPredBufCustom(const CompArea &blk);
+  const CPelBuf     getPredBufCustom(const CompArea &blk)  const;
+         PelUnitBuf getPredBufCustom(const UnitArea &unit);
+  const CPelUnitBuf getPredBufCustom(const UnitArea &unit) const;
+#endif
+#if NNVC_USE_REC_BEFORE_DBF
+  PelBuf     getRecBeforeDbfBuf(const ComponentID compID, bool wrap=false);
+  PelUnitBuf getRecBeforeDbfBuf(bool wrap=false);
+  PelBuf     getRecBeforeDbfBuf(const CompArea &blk);
+  const CPelBuf     getRecBeforeDbfBuf(const CompArea &blk)  const;
+  PelUnitBuf getRecBeforeDbfBuf(const UnitArea &unit);
+  const CPelUnitBuf getRecBeforeDbfBuf(const UnitArea &unit) const;
+#endif
+#if NNVC_USE_REC_AFTER_DBF
+         PelBuf     getRecAfterDbfBuf(const ComponentID compID, bool wrap=false);
+         PelUnitBuf getRecAfterDbfBuf(bool wrap=false);
+         PelBuf     getRecAfterDbfBuf(const CompArea &blk);
+  const CPelBuf     getRecAfterDbfBuf(const CompArea &blk)  const;
+         PelUnitBuf getRecAfterDbfBuf(const UnitArea &unit);
+  const CPelUnitBuf getRecAfterDbfBuf(const UnitArea &unit) const;
+#endif
+  
          PelBuf     getRecoBuf(const ComponentID compID, bool wrap=false);
   const CPelBuf     getRecoBuf(const ComponentID compID, bool wrap=false) const;
          PelBuf     getRecoBuf(const CompArea &blk, bool wrap=false);
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index 8af59c7fab..8a7bd6df4e 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -51,6 +51,17 @@
 #include <cassert>
 
 // clang-format off
+#define NNVC_INFO_ENCODER                                 1 // add some info in encoder logs necessary to extract data
+#define NNVC_DUMP_DATA                                    1
+
+// which data are used for inference/dump
+#define NNVC_USE_REC_BEFORE_DBF                           1 // reconstruction before DBF
+#define NNVC_USE_REC_AFTER_DBF                            1 // reconstruction after DBF
+#define NNVC_USE_PRED                                     1 // prediction
+#define NNVC_USE_BS                                       1 // BS of DBF
+#define NNVC_USE_CU_AVERAGE                               1 // average on the CU
+#define NNVC_USE_QP                                       1 // QP slice
+#define NNVC_USE_SLICETYPE                                1 // slice type
 
 //########### place macros to be removed in next cycle below this line ###############
 
diff --git a/source/Lib/DecoderLib/DecCu.cpp b/source/Lib/DecoderLib/DecCu.cpp
index eeec347435..35705a9914 100644
--- a/source/Lib/DecoderLib/DecCu.cpp
+++ b/source/Lib/DecoderLib/DecCu.cpp
@@ -183,6 +183,10 @@ void DecCu::xIntraRecBlk( TransformUnit& tu, const ComponentID compID )
 
         PelBuf piPred       = cs.getPredBuf( area );
 
+#if NNVC_USE_PRED
+        PelBuf piPredCustom = cs.getPredBufCustom(area);
+#endif
+  
   const PredictionUnit &pu  = *tu.cs->getPU( area.pos(), chType );
   const uint32_t uiChFinalMode  = PU::getFinalIntraMode( pu, chType );
   PelBuf pReco              = cs.getRecoBuf(area);
@@ -311,11 +315,18 @@ void DecCu::xIntraRecBlk( TransformUnit& tu, const ComponentID compID )
   }
 #if KEEP_PRED_AND_RESI_SIGNALS
   pReco.reconstruct( piPred, piResi, tu.cu->cs->slice->clpRng( compID ) );
+#else
+#if NNVC_USE_PRED
+  piPredCustom.copyFrom( piPred );
+  pReco.reconstruct( piPred, piResi, tu.cu->cs->slice->clpRng( compID ) );
 #else
   piPred.reconstruct( piPred, piResi, tu.cu->cs->slice->clpRng( compID ) );
 #endif
+#endif
 #if !KEEP_PRED_AND_RESI_SIGNALS
+#if !NNVC_USE_PRED
   pReco.copyFrom( piPred );
+#endif
 #endif
   if (slice.getLmcsEnabledFlag() && (m_pcReshape->getCTUFlag() || slice.isIntra()) && compID == COMPONENT_Y)
   {
@@ -696,6 +707,9 @@ void DecCu::xReconInter(CodingUnit &cu)
     const CompArea &area = cu.blocks[COMPONENT_Y];
     CompArea    tmpArea(COMPONENT_Y, area.chromaFormat, Position(0, 0), area.size());
     PelBuf tmpPred;
+#endif
+#if NNVC_USE_PRED
+    cs.getPredBufCustom(cu).copyFrom(cs.getPredBuf(cu));
 #endif
     if (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
     {
@@ -727,6 +741,9 @@ void DecCu::xReconInter(CodingUnit &cu)
   }
   else
   {
+#if NNVC_USE_PRED
+    cs.getPredBufCustom(cu).copyClip(cs.getPredBuf(cu), cs.slice->clpRngs());
+#endif
     cs.getRecoBuf(cu).copyClip(cs.getPredBuf(cu), cs.slice->clpRngs());
     if (cs.slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag() && !cu.firstPU->ciipFlag && !CU::isIBC(cu))
     {
diff --git a/source/Lib/DecoderLib/DecLib.cpp b/source/Lib/DecoderLib/DecLib.cpp
index 759931f1d1..555cfd7b08 100644
--- a/source/Lib/DecoderLib/DecLib.cpp
+++ b/source/Lib/DecoderLib/DecLib.cpp
@@ -633,14 +633,42 @@ void DecLib::executeLoopFilters()
             const uint32_t height = (yPos + pcv.maxCUHeight > pcv.lumaHeight) ? (pcv.lumaHeight - yPos) : pcv.maxCUHeight;
             const UnitArea area(cs.area.chromaFormat, Area(xPos, yPos, width, height));
             cs.getRecoBuf(area).get(COMPONENT_Y).rspSignal(m_cReshaper.getInvLUT());
+#if NNVC_USE_CU_AVERAGE
+            m_pcPic->getCuAverageBuf(area).get(COMPONENT_Y).rspSignal(m_cReshaper.getInvLUT());
+#endif
+          }
+        }
+      }
+#if NNVC_USE_PRED
+      uint64_t culength=cs.cus.size();
+      for(uint64_t n=0;n<culength;n++)
+      {
+        CodingUnit* cu=cs.cus.at(n);
+        if (cu->slice->getLmcsEnabledFlag())
+        {
+          if (((cu->predMode == MODE_INTRA || cu->predMode == MODE_IBC) && cu->chType != CHANNEL_TYPE_CHROMA) || (cu->predMode == MODE_INTER && m_cReshaper.getCTUFlag() && cu->firstPU->ciipFlag))
+          {
+            m_pcPic->getPredBufCustom(cu->block(COMPONENT_Y)).rspSignal(m_cReshaper.getInvLUT());
           }
         }
       }
+#endif
+
       m_cReshaper.setRecReshaped(false);
       m_cSAO.setReshaper(&m_cReshaper);
   }
+#if NNVC_USE_BS
+  m_pcPic->getBsMapBuf().fill(0);
+#endif
+#if NNVC_USE_REC_BEFORE_DBF
+  m_pcPic->getRecBeforeDbfBuf().copyFrom(m_pcPic->getRecoBuf());
+#endif
+
   // deblocking filter
   m_cLoopFilter.loopFilterPic( cs );
+#if NNVC_USE_REC_AFTER_DBF
+  m_pcPic->getRecAfterDbfBuf().copyFrom(m_pcPic->getRecoBuf());
+#endif
   CS::setRefinedMotionField(cs);
   if( cs.sps->getSAOEnabledFlag() )
   {
diff --git a/source/Lib/DecoderLib/DecSlice.cpp b/source/Lib/DecoderLib/DecSlice.cpp
index 57c1c92c42..51125336a7 100644
--- a/source/Lib/DecoderLib/DecSlice.cpp
+++ b/source/Lib/DecoderLib/DecSlice.cpp
@@ -286,7 +286,9 @@ void DecSlice::decompressSlice( Slice* slice, InputBitstream* bitstream, int deb
       }
     }
   }
-
+#if NNVC_USE_CU_AVERAGE
+  pic->dumpPicCuAverage();
+#endif
   // deallocate all created substreams, including internal buffers.
   for( auto substr: ppcSubstreams )
   {
diff --git a/training/data_loader/data_loader.py b/training/data_loader/data_loader.py
new file mode 100644
index 0000000000..26736b8927
--- /dev/null
+++ b/training/data_loader/data_loader.py
@@ -0,0 +1,185 @@
+import json
+import math
+import sys
+from typing import NamedTuple
+import numpy as np
+import struct
+
+class PatchInfo(NamedTuple):
+    data_index: int
+    frame_index: int
+    patch_x0: int
+    patch_y0: int
+    
+    
+def readData(patch_size,border_size,norm,fn,off,ctype,h,w,x0,y0):
+    t = np.zeros((patch_size+2*border_size,patch_size+2*border_size),dtype='float32') # implicit zeros padding
+    with open(fn,"rb") as file:
+         frame = np.fromfile(file,dtype=ctype,count=h*w,offset=off).reshape(h,w)
+         cropc = [ y0-border_size, y0+patch_size+border_size, x0-border_size, x0+patch_size+border_size ]
+         srcc = [max(cropc[0], 0), min(cropc[1], frame.shape[0]), max(cropc[2], 0), min(cropc[3], frame.shape[1])]
+         dstc = [srcc[0] - cropc[0], srcc[1] - cropc[0], srcc[2] - cropc[2], srcc[3] - cropc[2]]
+         t[dstc[0]:dstc[1], dstc[2]:dstc[3]] = frame[srcc[0]:srcc[1], srcc[2]:srcc[3]]
+         return t.astype('float32')/norm
+    
+def readOne(patch_size,border_size,norm,fn,off,ctype):
+    with open(fn,"rb") as file:
+         if ctype == 'int32':
+             file.seek(off)
+             v = float(struct.unpack("i",file.read(4))[0])/norm
+         else:
+             sys.exit("readOne todo")
+         t = np.full((patch_size+2*border_size,patch_size+2*border_size),v,dtype='float32') 
+         return t
+    
+              
+  
+class DataLoader:
+    normalizer_rec  = 1024.0
+    normalizer_pred = 1024.0
+    normalizer_bs   = 1024.0
+    normalizer_cu_average = 1024.0
+    normalizer_org8bits = 256.0
+    normalizer_org10bits = 1024.0
+    normalizer_qp   = 256.0
+    components=[]
+    database=None # contains the whole database
+    patch_info=None # contains adress of each patch in the database: dataset index, frame index in the dataset, patch_index in the frame
+    suffix={} # suffix for each file
+    
+    # patch_size in luma sample
+    def __init__(self, jsonfile, patch_size, qp_filter=-1, slice_type_filter=-1):
+        self.patch_size=patch_size
+        self.patch_info=[]
+        with open(jsonfile, "r") as file:
+         content = file.read()
+         dcontent = json.loads(content)
+         if qp_filter>0 and 'suffix_qp' not in dcontent:
+             sys.exit("Filtering on qp impossible: no qp data in the dataset")
+         if slice_type_filter>0 and 'suffix_slicetype' not in dcontent:
+             sys.exit("Filtering on slice type impossible: no slice data in the dataset")
+         if qp_filter>0 or slice_type_filter>0:
+             sys.exit("todo")
+         self.components.append("org_Y")
+         self.components.append("org_U")
+         self.components.append("org_V")
+         if  'suffix_rec_after_dbf' in dcontent: 
+             self.suffix['rec_after_dbf']=dcontent['suffix_rec_after_dbf']
+             self.components.append("rec_after_dbf_Y")
+             self.components.append("rec_after_dbf_U")
+             self.components.append("rec_after_dbf_V")
+         if  'suffix_rec_before_dbf' in dcontent: 
+             self.suffix['rec_before_dbf']=dcontent['suffix_rec_before_dbf']
+             self.components.append("rec_before_dbf_Y")
+             self.components.append("rec_before_dbf_U")
+             self.components.append("rec_before_dbf_V")
+         if  'suffix_pred' in dcontent: 
+             self.suffix['pred']=dcontent['suffix_pred']
+             self.components.append("pred_Y")
+             self.components.append("pred_U")
+             self.components.append("pred_V")
+         if  'suffix_bs' in dcontent: 
+             self.suffix['bs']=dcontent['suffix_bs']             
+             self.components.append("bs_Y")
+             self.components.append("bs_U")
+             self.components.append("bs_V")
+         if  'suffix_cu_average' in dcontent: 
+             self.suffix['cu_average']=dcontent['suffix_cu_average']    
+             self.components.append("cu_average_Y")
+             self.components.append("cu_average_U")
+             self.components.append("cu_average_V")
+         if  'suffix_qp' in dcontent: 
+             self.components.append("qp_slice")
+             self.suffix['qp_slice']=dcontent['suffix_qp']    
+         self.components.append("qp_base") # always here
+         if  'suffix_slicetype' in dcontent: 
+             self.components.append("slice_type")
+             self.suffix['slice_type']=dcontent['suffix_slicetype']    
+             
+         self.database=dcontent['data']
+         # create array of patches adress
+         for didx in range(len(self.database)):
+             d=self.database[didx]
+             nb_w=int(math.ceil(float(d['width'])/patch_size))
+             nb_h=int(math.ceil(float(d['height'])/patch_size))
+             for fidx in range(int(d['data_count'])):
+                 for y0 in range(nb_h):
+                     for x0 in range(nb_w):
+                        self.patch_info.append(PatchInfo(didx,fidx,x0,y0))
+                     
+    def nb_patches(self):
+         return len(self.patch_info)
+     
+
+    def getPatchData(self,idx,comp,border_size=0):
+        assert(idx<len(self.patch_info))
+        pinfo=self.patch_info[idx]
+        d=self.database[pinfo.data_index]
+        psize=self.patch_size
+        bsize=border_size
+        # print(pinfo,d)
+        chroma_block=('_U' in comp[0] or '_V' in comp[0])
+        w=int(d['width'])
+        h=int(d['height'])
+        frame_size_Y=w*h
+        if chroma_block:
+            psize//=2
+            bsize//=2
+            w//=2
+            h//=2
+        tsize=bsize+psize+bsize
+        x0 = pinfo.patch_x0*psize
+        y0 = pinfo.patch_y0*psize
+        t = np.zeros((1,tsize,tsize,len(comp)),dtype='float32')
+        
+        for idx, c in enumerate(comp):
+            assert(c in self.components)
+                           
+            if 'org' in c:
+                fn=d['original_yuv']
+                off_frame=d['original_frame_skip']+pinfo.frame_index
+                if d['original_bitdepth'] == 8: # 8bits
+                    norm=self.normalizer_org8bits
+                    b='uint8' 
+                    nbb = 1
+                else: # 10bits
+                    norm=self.normalizer_org10bits
+                    b='uint16'                
+                    nbb = 2
+                off = off_frame*(frame_size_Y*nbb*3//2)
+                if c == 'org_U': 
+                    off+=frame_size_Y*nbb                  
+                elif c == 'org_V': 
+                    off+=frame_size_Y*nbb+(frame_size_Y*nbb)//4
+                v = readData(psize,bsize,norm,fn,off,b,h,w,x0,y0)
+                
+            elif 'rec_after_dbf' in c or 'rec_before_dbf' in c or 'pred' in c or 'cu_average' in c or 'bs' in c:
+                fn=d['dirname']+'/'+d['basename']+self.suffix[c[:-2]]
+                nbb=2 # 16 bits data
+                off=pinfo.frame_index*(frame_size_Y*nbb*3//2)
+                if '_U' in c: 
+                    off+=frame_size_Y*nbb
+                elif '_V' in c: 
+                    off+=frame_size_Y*nbb+(frame_size_Y*nbb)//4
+                if   'rec_after_dbf' in c or 'rec_before_dbf' in c: norm = self.normalizer_rec
+                elif 'pred' in c :          norm = self.normalizer_pred
+                elif 'bs' in c :            norm = self.normalizer_bs
+                elif 'cu_average' in c :     norm = self.normalizer_cu_average
+                               
+                v = readData(psize,bsize,norm,fn,off,'uint16',h,w,x0,y0)                
+                
+            elif c == 'qp_slice':
+                fn=d['dirname']+'/'+d['basename']+self.suffix['qp_slice']
+                norm=self.normalizer_qp
+                off=pinfo.frame_index*4
+                v = readOne(psize,bsize,norm,fn,off,'int32')
+
+            elif c == 'qp_base':
+                norm=self.normalizer_qp
+                f = float(d['qp_base'])/norm                
+                v = np.full((tsize,tsize),f,dtype='float32')                 
+            else:
+                 sys.exit("Unkwown component {}".format(c))
+            t[0,:,:,idx]=v
+        return t
+            
diff --git a/training/example/create_unified_dataset.py b/training/example/create_unified_dataset.py
new file mode 100644
index 0000000000..c692dcdd0e
--- /dev/null
+++ b/training/example/create_unified_dataset.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import argparse
+import sys, os
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'data_loader'))
+import data_loader
+import random
+import json
+
+parser = argparse.ArgumentParser(prog='create_unified_dataset', usage='sample program to demonstrate the use of the dataloader. It creates a raw binary file containing data patches in float.', 
+                                  formatter_class=argparse.RawDescriptionHelpFormatter,
+                                 epilog=
+'''Typical usage:
+   create_unified_dataset.py --input_json dataset.json --nb_samples 1000 --patch_size 128 --border_size 8 --output_file patches.bin''')
+parser.add_argument("--input_json", action="store", nargs='?', type=str, help="input json database.")
+parser.add_argument("--patch_size", action="store", nargs='?', default=128, type=int, help="patch size to extract")
+parser.add_argument("--border_size", action="store", nargs='?', default=0, type=int, help="border size of the patch")
+parser.add_argument("--components", action="store", nargs='?', type=str, help="comma separated list of components to put in each patch")
+parser.add_argument("--nb_patches", action="store", default=1, nargs='?', type=int, help="nb patches to extract")
+parser.add_argument("--output_file", action="store", nargs='?', type=str, help="output binary file (patches in float format)")
+args = parser.parse_args()
+
+
+dl=data_loader.DataLoader(args.input_json,args.patch_size)
+
+print("Nb samples available: {}".format(dl.nb_patches()))
+print("Available components: {}".format(dl.components))
+
+if args.components is None:
+    sys.exit("Please choose components to use")
+
+comps = args.components.split(",")
+
+luma_only=False
+for c in comps:
+    if '_Y' in c:
+        luma_only = True
+
+chroma_only=False
+for c in comps:
+    if '_U' in c or '_V' in c:
+        chroma_only = True
+
+if luma_only and chroma_only:
+    sys.exit("Cannot dump mix of luma and chroma patches (different size)")
+
+        
+if args.output_file:
+    with open(args.output_file,"wb") as file:
+        print("Dump {} patches in {}".format(args.nb_patches,args.output_file))
+        print(" Patch: {}x{} + {} border, components={}".format(args.patch_size,args.patch_size,args.border_size,comps))
+        for i in range(args.nb_patches):
+            idx = random.randint(0,dl.nb_patches())
+            p = dl.getPatchData(idx,comps,args.border_size)
+            p.tofile(file)
+    with open(args.output_file+'.json',"w") as file:
+      jout={}  
+      jout['data']=args.output_file
+      jout["components"]=comps
+      if '_U' in comps[0] or '_V' in comps[0]:
+          jout["patch_size"]=args.patch_size//2
+          jout["border_size"]=args.border_size//2
+      else:
+          jout["patch_size"]=args.patch_size
+          jout["border_size"]=args.border_size
+      jout["nb_patches"]=args.nb_patches
+      jout["original_db"]=args.input_json
+      s = json.dumps(jout,indent=1)
+      file.write(s)
+      print("Wrote json description in {}".format(args.output_file+'.json'))
+    
+
diff --git a/training/example/display_patches_dataset.py b/training/example/display_patches_dataset.py
new file mode 100755
index 0000000000..9b2756a552
--- /dev/null
+++ b/training/example/display_patches_dataset.py
@@ -0,0 +1,25 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import json
+import sys
+
+
+with open(sys.argv[1], "r") as file:
+  content = file.read()
+  dcontent = json.loads(content)
+         
+file=open(dcontent['data'],"r")
+s=dcontent['patch_size']
+b=dcontent['border_size']
+d=len(dcontent['components'])
+c=dcontent['nb_patches']
+sb=s+2*b
+
+for c in range(c):
+ t=np.fromfile(file,dtype='float32',count=sb*sb*d).reshape((sb,sb,d))
+ for i in range(d):
+   plt.title(dcontent['components'][i])
+   plt.imshow(t[:,:,i],cmap='gray',vmin=0.,vmax=1.)
+   plt.show()
+   
+
diff --git a/training/example/sample_test.sh b/training/example/sample_test.sh
new file mode 100644
index 0000000000..a8c436cdd0
--- /dev/null
+++ b/training/example/sample_test.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+# assume these directories contain bitstream and encoding logs
+# for encoding, it assumes that --Verbosity=6 has been used during encoding and that the encoding logs are put in a file.
+LISTDIR_BS="bs/BVI bs/UVG bs/DIV2K";
+
+
+DEC=bin/DecoderAppStatic;
+CREATE_DB=training/tools/concatenate_dataset.py
+EXAMPLE=training/example/create_unified_dataset.py;
+DISPLAYPATCH=training/example/display_patches_dataset.py;
+DB_DIR=db; # put all files in the same directory, assume no name clash
+mkdir -p $DB_DIR;
+
+# example of encoding:
+# cd bs/BVI
+# for all file in BVI
+#   EncoderAppStatic --Verbosity=6 -i $file ... > file.log
+# done
+
+for D in $LISTDIR_BS; do
+ echo "Process $D bistream";
+ for F in ${D}/*.bin; do
+  BN=$(basename ${F});
+  BN2=${BN/.bin/}; # extract suitable name for each dataset
+  $DEC -b $F --DumpBasename=${DB_DIR}/${BN2};
+ done
+done
+
+echo "Process 1 dataset";
+python3 ${CREATE_DB} --input_dir_json db --output_json pre_db.json
+echo "Consolidate with encoder info";
+python3 ${CREATE_DB} --input_dir_encoder ${LISTDIR_BS} --input_json pre_db.json --output_json db.json
+
+
+echo "Generate a binary dataset using luma info"
+python3 $EXAMPLE --input_json db.json --output_file patches_luma.bin --nb_patches 4  --components "org_Y,pred_Y,rec_before_dbf_Y,rec_after_dbf_Y,bs_Y,cu_average_Y,qp_slice,qp_base" --border_size 8
+
+echo "Display the result"
+python3 $DISPLAYPATCH patches_luma.bin.json 
+
+echo "Generate a binary dataset using chroma info"
+python3 $EXAMPLE --input_json db.json --output_file patches_chroma.bin --nb_patches 4  --components "org_U,org_V,pred_U,pred_V,rec_after_dbf_U,rec_after_dbf_V,bs_U,bs_V,cu_average_U" --border_size 8
+
+echo "Display the result"
+python3 $DISPLAYPATCH patches_chroma.bin.json 
+
+
diff --git a/training/tools/concatenate_dataset.py b/training/tools/concatenate_dataset.py
new file mode 100644
index 0000000000..af36525c24
--- /dev/null
+++ b/training/tools/concatenate_dataset.py
@@ -0,0 +1,104 @@
+import argparse
+import glob
+import sys
+import json
+import re
+import os
+
+parser = argparse.ArgumentParser(prog='concatenate dataset', usage='create a global dataset from all the json file in a given directory. ', 
+                                  formatter_class=argparse.RawDescriptionHelpFormatter,
+                                 epilog=
+'''2 modes available:
+   concatenate_dataset.py --input_dir dir1 --input_dir dir2 --output_json pre_dataset.json
+   concatenate_dataset.py --input_json pre_dataset.json --input_dir_encoder direnc1 --input_dir_encoder direnc2 --output_json dataset.json''')
+parser.add_argument("--input_dir_json", action="append", nargs='+', type=str, help="directory containing individual json files. Multiple options possible.")
+parser.add_argument("--input_json", action="store", nargs='?', type=str, help="input json database.")
+parser.add_argument("--input_dir_encoder", action="append", nargs='+', type=str, help="directory containing individual encoder log files or encoder cfg files. Multiple options possible.")
+parser.add_argument("--log_extension", default="log", action="store", nargs='?', type=str, help="encoder log extension")
+parser.add_argument("--output_json", action="store", nargs='?', type=str, help="name of the output file with concatenated files", required=True)
+args=parser.parse_args()
+
+# mode 1: concatenate all indiviual dataset into 1 file, setting the dirname to find the data
+if args.input_dir_json is not None:
+    header={}
+    lastheader=None
+    db=[]
+    flat=[d for d1 in args.input_dir_json for d in d1]
+    for d in flat:
+        files = glob.glob(d+'/*.json')
+        print("Processing directory {}: {} files".format(d,len(files)))
+        for f in files:
+           with open(f, "r") as file:
+               content = file.read()
+               dcontent = json.loads(content)
+               header={}
+               for key in dcontent:
+                   if "suffix_" in key:
+                       header[key]=dcontent[key]
+               if lastheader is not None and not lastheader == header:
+                   sys.exit("File {} does not contain the same data as other files".format(f))
+               lastheader = header
+               for data in dcontent['data']:
+                   if 'dirname' not in data: # no dirname yet
+                      data['dirname']=d
+                   db.append(data)
+    
+    jout=header
+    jout["data"]=db
+    s = json.dumps(jout,indent=1)
+    with open(args.output_json, "w") as file:
+      file.write(s)
+
+
+# mode 2: consolidate a dataset file by adding information on original yuv from encoder logs information     
+if args.input_json is not None:
+    db_logs={}
+    flat=[d for d1 in args.input_dir_encoder for d in d1]
+    for d in flat:
+        files = glob.glob(d+'/*.'+args.log_extension)
+        print("Processing directory {}: {} files".format(d,len(files)))
+        for f in files:
+           with open(f, "r") as file:
+              info={"FrameSkip": 0, "TemporalSubsampleRatio": 1} # default              
+              name=None
+              for line in file:
+                  m = re.match("^Input\s*File\s*:\s*([^\s]+)", line)
+                  if m:
+                      info['InputFile']=m.group(1)
+                  m = re.match("^Bitstream\s*File\s*:\s*([^\s]+)", line)
+                  if m:
+                      name=os.path.basename(m.group(1))
+                  m = re.match("^TemporalSubsampleRatio\s*:\s*([0-9]+)", line)
+                  if m:
+                      info['TemporalSubsampleRatio']=m.group(1)
+#                  m = re.match("^QP\s*:\s*([0-9]+)", line)
+ #                 if m:
+  #                    info['QP']=m.group(1)
+                  m = re.match("^FrameSkip\s*:\s*([0-9]+)", line)
+                  if m:
+                      info['FrameSkip']=m.group(1)
+                  m = re.match("^Input\s+bit\s+depth\s*:\s*\(Y:([0-9]+),", line)
+                  if m:
+                      info['InputBitDepth']=m.group(1)
+                  m = re.match("^InputBitDepth\s*:\s*([0-9]+)", line)
+                  if m:
+                       info['InputBitDepth']=m.group(1)
+              if name is not None:
+                  if len(info) != 4:
+                    sys.exit("Not enough information extracted for bitstream {}".format(name))
+                  db_logs[name]=info        
+    print(db_logs)
+    with open(args.input_json, "r") as file:
+      content = file.read()
+      dcontent = json.loads(content)
+      for d in dcontent['data']:
+          if d['bsname'] in db_logs:
+              info=db_logs[d['bsname']]
+              d['original_yuv']=info['InputFile']
+              d['original_temporal_subsample']=int(info['TemporalSubsampleRatio'])
+              d['original_frame_skip']=int(info['FrameSkip'])
+#              d['qp_base']=int(info['QP'])
+              d['original_bitdepth']=int(info['InputBitDepth'])
+      s = json.dumps(dcontent,indent=1)
+      with open(args.output_json, "w") as file:
+        file.write(s)
-- 
GitLab