diff --git a/.gitattributes b/.gitattributes
index d09fb877fee831265087f35c552ac7b3ab9dea11..2403eed923487ecd622f213a8c3c278007db1880 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -509,3 +509,6 @@ models/NnlfSet1_CombinedIntraInter/NnlfSet1_LumaCNNFilter_float.sadl filter=lfs
 models/NnlfSet1_CombinedIntraInter/NnlfSet1_LumaCNNFilter_int16.sadl filter=lfs diff=lfs merge=lfs -text
 models/NnlfSet1_AdditionalInter/NnlfSet1_LumaCNNFilter_InterSlice_MultipleFrames_int16.sadl filter=lfs diff=lfs merge=lfs -text
 models/NnlfSet1_AdditionalInter/NnlfSet1_LumaCNNFilter_InterSlice_MultipleFrames_float.sadl filter=lfs diff=lfs merge=lfs -text
+models/NnlfSetLC/*.sadl filter=lfs diff=lfs merge=lfs -text
+models/NnlfSetLC/* filter=lfs diff=lfs merge=lfs -text
+training/training_scripts/Nn_Filtering_Set_LC/3_training/*.json filter=lfs diff=lfs merge=lfs -text
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 09fb01a630912aa6bf98052c6e081b5738d173e8..6500b6890317cfedec1bfc275805782b0b84a644 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -76,8 +76,9 @@ endif()
 # bb_enable_warnings( gcc warnings-as-errors -Wno-sign-compare )
 # bb_enable_warnings( gcc -Wno-unused-variable )
 # bb_enable_warnings( gcc-4.8 warnings-as-errors -Wno-unused-variable )
+# -Wno-ignored-attributes is for tensorflow
 # for gcc 8.2:
-bb_enable_warnings( gcc warnings-as-errors -Wno-sign-compare -Wno-class-memaccess)
+bb_enable_warnings( gcc warnings-as-errors -Wno-sign-compare -Wno-class-memaccess -Wno-ignored-attributes)
 
 if( XCODE )
   bb_enable_warnings( clang warnings-as-errors
diff --git a/README.md b/README.md
index 3d54053ab7fb2d8dd1d3eab18fbbb66936743f5f..58d2a3a3d9d3582cd10c9a20244a06d14b9070dd 100644
--- a/README.md
+++ b/README.md
@@ -399,6 +399,21 @@ To specify model paths, use e.g. following command lines. Note that model paths
 --NnlfSet1IntraLumaModel="models/NnlfSet1_LumaCNNFilter_IntraSlice_int16.sadl"
 --NnlfSet1IntraChromaModel="models/NnlfSet1_ChromaCNNFilter_IntraSlice_int16.sadl"
 
+LC NN-based loop filter set
+----------------------------------------------
+To activate LC(low complexity) NN-based loop filter set, use --NnlfOption=3, or equivalently -c cfg/nn-based/NnlfOption\_3.cfg  
+
+The LC NNLF consists of the CP Decomposed & Fused (CPDF) LC models proposed in JVET-AD0156.  
+
+The LC NNLF models are realized in SADL float and int16 frameworks. This can be set with the help of the macro ``NN_FIXED_POINT_IMPLEMENTATION`` in the file ``source/Lib/CommonLib/TypeDef.h``. The macro needs to be set to either 0 or 1 for float or int16 respectively.
+
+To specify LC model paths, refer to following examples in command line. Note that model paths should be specified at both encoder and decoder. The path needs to be set based on the framework that the software is built for.
+
+SADL float: ``--LCModelPath=models/NnlfSetLC/LC_float_model0.sadl,models/NnlfSetLC/LC_float_model1.sadl,models/NnlfSetLC/LC_float_model2.sadl,models/NnlfSetLC/LC_float_model3.sadl``
+
+SADL int16: ``--LCModelPath=models/NnlfSetLC/LC_int16_model0.sadl,models/NnlfSetLC/LC_int16_model1.sadl,models/NnlfSetLC/LC_int16_model2.sadl,models/NnlfSetLC/LC_int16_model3.sadl``
+
+The training and model conversion to SADL details can be found at ``training/training_scripts/Nn_Filtering_Set_LC/README.pdf``
 
 NN-based loop filter encoder optimization
 ----------------------------------------------
diff --git a/cfg/nn-based/NnlfOption_3.cfg b/cfg/nn-based/NnlfOption_3.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..3972a8fad5e8f65811056bef5e20a49e372b7240
--- /dev/null
+++ b/cfg/nn-based/NnlfOption_3.cfg
@@ -0,0 +1 @@
+NnlfOption                 : 3   # NN-based loop filter, 0: disable NN filter; 1: Use NN-based loop filter set 0; 2: Use NN-based loop filter set 1; 3: Use LC NN-based loop filter set
\ No newline at end of file
diff --git a/models/NnlfSetLC/LC_float_model0.sadl b/models/NnlfSetLC/LC_float_model0.sadl
new file mode 100644
index 0000000000000000000000000000000000000000..3b3df55f24942e22bcdeddcc20a6b9d61c5f5355
--- /dev/null
+++ b/models/NnlfSetLC/LC_float_model0.sadl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a0cf99d11bbe0ed193e4208cdc14c359761c6379e0d1066a0c3942febf49ed4
+size 223231
diff --git a/models/NnlfSetLC/LC_float_model1.sadl b/models/NnlfSetLC/LC_float_model1.sadl
new file mode 100644
index 0000000000000000000000000000000000000000..ef608515a38ac1ec64bcdfbe096c4e3deb519ccf
--- /dev/null
+++ b/models/NnlfSetLC/LC_float_model1.sadl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a99a35d468751f7a93666214f19b3e017c47150c2611f1758288390a90bdc6c0
+size 223231
diff --git a/models/NnlfSetLC/LC_float_model2.sadl b/models/NnlfSetLC/LC_float_model2.sadl
new file mode 100644
index 0000000000000000000000000000000000000000..7369ce56bc632372d27ffaf78e78c6743c79cc1a
--- /dev/null
+++ b/models/NnlfSetLC/LC_float_model2.sadl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c21fbc1b175f0e578a8fa76b4c52772813a121516b8fdb2689445284f301694
+size 223231
diff --git a/models/NnlfSetLC/LC_float_model3.sadl b/models/NnlfSetLC/LC_float_model3.sadl
new file mode 100644
index 0000000000000000000000000000000000000000..210a7cda501aaa29d369e51c91d4906dc4f6aced
--- /dev/null
+++ b/models/NnlfSetLC/LC_float_model3.sadl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e344d52eb539c41d64373a3914677c38c2e74539f19b52ef54949372ebbb458c
+size 223231
diff --git a/models/NnlfSetLC/LC_int16_model0.sadl b/models/NnlfSetLC/LC_int16_model0.sadl
new file mode 100644
index 0000000000000000000000000000000000000000..64d2bc40283d803ef838d2643ad990137bfe284f
--- /dev/null
+++ b/models/NnlfSetLC/LC_int16_model0.sadl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dde1b10c41e8bd9762ca9cfce03017fa92e93f758cdf36c6eda55f08b146a137
+size 118787
diff --git a/models/NnlfSetLC/LC_int16_model1.sadl b/models/NnlfSetLC/LC_int16_model1.sadl
new file mode 100644
index 0000000000000000000000000000000000000000..1ed9897b64b35b9c381cdff201d7e365d5a81c91
--- /dev/null
+++ b/models/NnlfSetLC/LC_int16_model1.sadl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e686d36830af5da326bf096de283bf485d607b436bb4d8701176cc92b0127abc
+size 118787
diff --git a/models/NnlfSetLC/LC_int16_model2.sadl b/models/NnlfSetLC/LC_int16_model2.sadl
new file mode 100644
index 0000000000000000000000000000000000000000..ec59bd45d60cb86a2be33794bb2275cfd125917c
--- /dev/null
+++ b/models/NnlfSetLC/LC_int16_model2.sadl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:27b2de502f26a2f245cd4b13eecd89c7f87efbc2b1789df7e84386f468da46a6
+size 118787
diff --git a/models/NnlfSetLC/LC_int16_model3.sadl b/models/NnlfSetLC/LC_int16_model3.sadl
new file mode 100644
index 0000000000000000000000000000000000000000..2ed315c71497361bd5a3e96b9faf003628df96c3
--- /dev/null
+++ b/models/NnlfSetLC/LC_int16_model3.sadl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:711be695081f0d0ad815de49c8cd592f14ce57ed458cc2bf4a40af7954bf4a9a
+size 118787
diff --git a/source/App/BitstreamExtractorApp/CMakeLists.txt b/source/App/BitstreamExtractorApp/CMakeLists.txt
index 52458f61d33845ece13b941e57b09c065d600c10..cf7dacaa283109db931f28df4f34c2bbc5562e1e 100644
--- a/source/App/BitstreamExtractorApp/CMakeLists.txt
+++ b/source/App/BitstreamExtractorApp/CMakeLists.txt
@@ -60,7 +60,7 @@ if( CMAKE_COMPILER_IS_GNUCC AND BUILD_STATIC )
   target_compile_definitions( ${EXE_NAME} PUBLIC ENABLE_WPP_STATIC_LINK=1 )
 endif()
 
-target_link_libraries( ${EXE_NAME} CommonLib EncoderLib DecoderLib Utilities Threads::Threads ${ADDITIONAL_LIBS} )
+target_link_libraries( ${EXE_NAME} CommonLib EncoderLib DecoderLib Utilities Threads::Threads ${ADDITIONAL_LIBS})
 
 # lldb custom data formatters
 if( XCODE )
diff --git a/source/App/DecoderApp/DecAppCfg.cpp b/source/App/DecoderApp/DecAppCfg.cpp
index 21dcb5fc5ec8f38550a174c4cdef813b29b59b5a..23f6143ac55cb035830a8d719316a3824f7f6b4e 100644
--- a/source/App/DecoderApp/DecAppCfg.cpp
+++ b/source/App/DecoderApp/DecAppCfg.cpp
@@ -106,6 +106,11 @@ bool DecAppCfg::parseCfg( int argc, char* argv[] )
   ("NnpfModelPath", m_nnpfModelPath, string(""), "paths to post-filter models: <path_0>,[<path_1>,...,<path_n>]\n")
   ("NnpfReconFile",  m_postReconFileName, string(""), "path to post-filtered reconstruction")
 #endif
+
+#if NN_FILTERING_SET_LC
+  ( "LCModelPath",              m_lcModelPath,                 default_lc_model_path, "LC model path\n")
+#endif
+
   ("OplFile,-opl",              m_oplFilename ,                        string(""), "opl-file name without extension for conformance testing\n")
 
 #if ENABLE_SIMD_OPT
diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp
index 6ed57bdb3f89211e2e28834d7f1942856e727a63..25a0edcb262610e77db3b53c107aea1c9c93b5e0 100644
--- a/source/App/EncoderApp/EncApp.cpp
+++ b/source/App/EncoderApp/EncApp.cpp
@@ -1118,6 +1118,10 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setUseNnlfSet0                                       (m_nnlfSet0);
 #endif
 
+#if NN_FILTERING_SET_LC
+  m_cEncLib.setUseNnlfSetLC                                       (m_nnlfSetLC);
+#endif
+
   m_cEncLib.setUseALF                                            ( m_alf );
 #if JVET_T0064
   m_cEncLib.setALFStrength                                       (m_alfStrength);
diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp
index a35d50009dae539b5c9d4f1f5567d6c06d35928f..4feda3d1a1bf7595b64a70b814754e7abc7f77cc 100644
--- a/source/App/EncoderApp/EncAppCfg.cpp
+++ b/source/App/EncoderApp/EncAppCfg.cpp
@@ -736,6 +736,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("c",    po::parseConfigFile, "configuration file name")
 #if NN_FILTERING_SET_0
   ("ModelPath,-mp",                                   m_ModelPath,                         default_model_path, "model path\n")
+#endif
+#if NN_FILTERING_SET_LC
+  ( "LCModelPath",                                    m_lcModelPath,                       default_lc_model_path, "LC model path\n")
 #endif
   ("WarnUnknowParameter,w",                           warnUnknowParameter,                                  0, "warn for unknown configuration parameters instead of failing")
   ("isSDR",                                           sdr,                                              false, "compatibility")
@@ -1156,7 +1159,7 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("GolombRiceParameterAdaptation",                   m_persistentRiceAdaptationEnabledFlag,            false, "Enable the adaptation of the Golomb-Rice parameter over the course of each slice")
   ("AlignCABACBeforeBypass",                          m_cabacBypassAlignmentEnabledFlag,                false, "Align the CABAC engine to a defined fraction of a bit prior to coding bypass data. Must be 1 in high bit rate profile, 0 otherwise")
 #if NN_COMMON_SPS
-  ("NnlfOption",                                      m_nnlfOption,                                         0, "NN-based in-loop filter option (0:disable nnlf, 1: enable nnlf-0, 2: enable nnlf-1)")
+  ("NnlfOption",                                      m_nnlfOption,                                         0, "NN-based in-loop filter option (0:disable nnlf, 1: enable nnlf-0, 2: enable nnlf-1, 3: enable nnlf-LC)")
 #endif
   ("SAO",                                             m_bUseSAO,                                         true, "Enable Sample Adaptive Offset")
   ("TestSAODisableAtPictureLevel",                    m_bTestSAODisableAtPictureLevel,                  false, "Enables the testing of disabling SAO at the picture level after having analysed all blocks")
@@ -1521,6 +1524,11 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
 #if NN_FILTERING_SET_0
   ("NnlfSet0",                                        m_nnlfSet0,                               true,             "NN-based loop filter set 0\n")
 #endif
+
+#if NN_FILTERING_SET_LC
+  ("NnlfSetLC",                                        m_nnlfSetLC,                               true,             "NN-based loop filter set 2\n")
+#endif
+
     ("TemporalFilter",                                m_gopBasedTemporalFilterEnabled,          false,            "Enable GOP based temporal filter. Disabled per default")
     ("TemporalFilterFutureReference",                 m_gopBasedTemporalFilterFutureReference,   true,            "Enable referencing of future frames in the GOP based temporal filter. This is typically disabled for Low Delay configurations.")
     ("TemporalFilterStrengthFrame*",                  m_gopBasedTemporalFilterStrengths, std::map<int, double>(), "Strength for every * frame in GOP based temporal filter, where * is an integer. E.g. --TemporalFilterStrengthFrame8 0.95 will enable GOP based temporal filter at every 8th frame with strength 0.95.")
@@ -2444,12 +2452,15 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   }
 
 #if NN_COMMON_SPS
-  CHECK(m_nnlfOption != 0 && m_nnlfOption != 1 && m_nnlfOption != 2, "Undefined NN-based in-loop filter option");
+  CHECK(m_nnlfOption != 0 && m_nnlfOption != 1 && m_nnlfOption != 2 && m_nnlfOption != 3, "Undefined NN-based in-loop filter option");
 #if NN_FILTERING_SET_0
   m_nnlfSet0 = true;
 #endif
 #if NN_FILTERING_SET_1
   m_nnlfSet1 = true;
+#endif
+#if NN_FILTERING_SET_LC
+  m_nnlfSetLC = true;
 #endif
   if (m_nnlfOption == 0)
   {
@@ -2459,6 +2470,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
 #endif
 #if NN_FILTERING_SET_1
     m_nnlfSet1 = false;
+#endif
+#if NN_FILTERING_SET_LC
+    m_nnlfSetLC = false;
 #endif
   }
   else
@@ -2471,6 +2485,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
 #if NN_FILTERING_SET_1
     m_nnlfSet1 = m_nnlfSet == 1 ? true : false;
     m_encDbOpt = m_nnlfSet1 ? true : m_encDbOpt;
+#endif
+#if NN_FILTERING_SET_LC
+    m_nnlfSetLC = m_nnlfSet == 2 ? true : false;
 #endif
   }
 #endif
@@ -4228,6 +4245,9 @@ void EncAppCfg::xPrintParameter()
 #if NN_FILTERING_SET_1
   msg( VERBOSE, "NNLFSET1:%d ", (m_nnlfSet1)?(1):(0));
 #endif
+#if NN_FILTERING_SET_LC
+  msg( VERBOSE, "NnlfSetLC:%d ", m_nnlfSetLC?1:0);
+#endif
 #if JVET_AB0068_AC0328_NNLF_RDO
   msg( VERBOSE, "EncNnlfOpt:%d ", m_encNnlfOpt ? 1 : 0);
 #endif
diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h
index eda98c8ff6202d5f85c06a092764b673ee5fa9c4..7208274c9115c63c8acf9f089cadf27dbf71a571 100644
--- a/source/App/EncoderApp/EncAppCfg.h
+++ b/source/App/EncoderApp/EncAppCfg.h
@@ -758,6 +758,10 @@ protected:
   bool        m_nnlfSet0;                                     ///< CNN Loop Filter
 #endif
 
+#if NN_FILTERING_SET_LC
+  bool        m_nnlfSetLC;                                     ///< LC CNN Loop Filter
+#endif
+
   bool        m_alf;                                          ///< Adaptive Loop Filter
 #if JVET_T0064
   double      m_alfStrength;
diff --git a/source/Lib/CommonLib/CMakeLists.txt b/source/Lib/CommonLib/CMakeLists.txt
index 1feff1e9633292271aa8b679e1b56974796405ff..515dea48473c7ab7ceb4760fd0afd877c7a7c164 100644
--- a/source/Lib/CommonLib/CMakeLists.txt
+++ b/source/Lib/CommonLib/CMakeLists.txt
@@ -111,6 +111,7 @@ if( MSVC )
   set_property( SOURCE NNInference.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ")
   set_property( SOURCE NNFilterSet0.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ")
   set_property( SOURCE NNFilterSet1.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ")
+  set_property( SOURCE NNFilterSetLC.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ")
 elseif( UNIX OR MINGW )
   if( NNLF_BUILD_WITH_AVX512 STREQUAL "1" )
     set_property( SOURCE NNSuperResolution.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -ffast-math")
@@ -118,12 +119,14 @@ elseif( UNIX OR MINGW )
     set_property( SOURCE NNInference.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -ffast-math")
     set_property( SOURCE NNFilterSet0.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -ffast-math")
     set_property( SOURCE NNFilterSet1.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -ffast-math")
+    set_property( SOURCE NNFilterSetLC.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -ffast-math")
   else()
     set_property( SOURCE NNSuperResolution.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2 -ffast-math")
     set_property( SOURCE NNPostFilter.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2 -ffast-math")
     set_property( SOURCE NNInference.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2 -ffast-math")
     set_property( SOURCE NNFilterSet0.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2 -ffast-math")
     set_property( SOURCE NNFilterSet1.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2 -ffast-math")
+    set_property( SOURCE NNFilterSetLC.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2 -ffast-math")
   endif()
 endif()
 if(UNIX)
diff --git a/source/Lib/CommonLib/CodingStatistics.h b/source/Lib/CommonLib/CodingStatistics.h
index 0cde637774645429049e67804d6143178cdf8f4b..2e08daf20837767a972a7806be788011ddd73400 100644
--- a/source/Lib/CommonLib/CodingStatistics.h
+++ b/source/Lib/CommonLib/CodingStatistics.h
@@ -103,6 +103,9 @@ enum CodingStatisticsType
 
   STATS__CABAC_BITS__LFNST,
   STATS__CABAC_BITS__ALF,
+#if NN_FILTERING_SET_LC
+  STATS__CABAC_BITS__LCNNLF,
+#endif
   STATS__CABAC_TRM_BITS,
   STATS__CABAC_FIXED_BITS,
   STATS__BYTE_ALIGNMENT_BITS,
diff --git a/source/Lib/CommonLib/CodingStructure.cpp b/source/Lib/CommonLib/CodingStructure.cpp
index 92b4741aa80c2d9d8ca18bbe22f1697d24b18413..7a744617d3b06d781b66b3c48bef5ab0fb3b0870 100644
--- a/source/Lib/CommonLib/CodingStructure.cpp
+++ b/source/Lib/CommonLib/CodingStructure.cpp
@@ -1118,6 +1118,16 @@ void CodingStructure::rebindPicBufs()
   {
     m_nnPostFiltered.destroy();
   }
+#endif
+#if NN_FILTERING_SET_LC
+  if (!picture->M_BUFS(0, PIC_DEC_LCNN_FILTERED).bufs.empty())
+  {
+    m_lcnn_filtered.createFromBuf(picture->M_BUFS(0, PIC_DEC_LCNN_FILTERED));
+  }
+  else
+  {
+    m_lcnn_filtered.destroy();
+  }
 #endif
   if( pcv->isEncoder )
   {
@@ -1618,6 +1628,12 @@ const CPelBuf     CodingStructure::getResiBuf(const CompArea &blk)     const { r
        PelUnitBuf CodingStructure::getResiBuf(const UnitArea &unit)          { return getBuf(unit, PIC_RESIDUAL); }
 const CPelUnitBuf CodingStructure::getResiBuf(const UnitArea &unit)    const { return getBuf(unit, PIC_RESIDUAL); }
 
+#if NN_FILTERING_SET_LC
+       PelBuf     CodingStructure::getLCnnlfBuf(const CompArea &blk)           { return getBuf(blk,  PIC_DEC_LCNN_FILTERED); }
+const CPelBuf     CodingStructure::getLCnnlfBuf(const CompArea &blk)     const { return getBuf(blk,  PIC_DEC_LCNN_FILTERED); }
+       PelUnitBuf CodingStructure::getLCnnlfBuf(const UnitArea &unit)          { return getBuf(unit, PIC_DEC_LCNN_FILTERED); }
+const CPelUnitBuf CodingStructure::getLCnnlfBuf(const UnitArea &unit)    const { return getBuf(unit, PIC_DEC_LCNN_FILTERED); }
+#endif
        PelBuf     CodingStructure::getRecoBuf(const CompArea &blk)           { return getBuf(blk,  PIC_RECONSTRUCTION); }
 const CPelBuf     CodingStructure::getRecoBuf(const CompArea &blk)     const { return getBuf(blk,  PIC_RECONSTRUCTION); }
        PelUnitBuf CodingStructure::getRecoBuf(const UnitArea &unit)          { return getBuf(unit, PIC_RECONSTRUCTION); }
@@ -1659,8 +1675,12 @@ PelBuf CodingStructure::getBuf( const CompArea &blk, const PictureType &type )
 
   const ComponentID compID = blk.compID;
 
-#if JVET_AC0055_NN_POST_FILTERING
+#if JVET_AC0055_NN_POST_FILTERING && NN_FILTERING_SET_LC
+  PelStorage* buf = type == PIC_PREDICTION ? &m_pred : ( type == PIC_RESIDUAL ? &m_resi : ( type == PIC_RECONSTRUCTION ? &m_reco : ( type == PIC_ORG_RESI ? &m_orgr : (type == PIC_NN_POST_FILTERED ? &m_nnPostFiltered : (type == PIC_DEC_LCNN_FILTERED ? &m_lcnn_filtered : nullptr ) ) ) ) );
+#elif JVET_AC0055_NN_POST_FILTERING
   PelStorage* buf = type == PIC_PREDICTION ? &m_pred : ( type == PIC_RESIDUAL ? &m_resi : ( type == PIC_RECONSTRUCTION ? &m_reco : ( type == PIC_ORG_RESI ? &m_orgr : (type == PIC_NN_POST_FILTERED ? &m_nnPostFiltered : nullptr ) ) ) );
+#elif NN_FILTERING_SET_LC
+  PelStorage* buf = type == PIC_PREDICTION ? &m_pred : ( type == PIC_RESIDUAL ? &m_resi : ( type == PIC_RECONSTRUCTION ? &m_reco : ( type == PIC_ORG_RESI ? &m_orgr : (type == PIC_DEC_LCNN_FILTERED ? &m_lcnn_filtered : nullptr ) ) ) );
 #else
   PelStorage* buf = type == PIC_PREDICTION ? &m_pred : ( type == PIC_RESIDUAL ? &m_resi : ( type == PIC_RECONSTRUCTION ? &m_reco : ( type == PIC_ORG_RESI ? &m_orgr : nullptr ) ) );
 #endif
@@ -1702,9 +1722,12 @@ const CPelBuf CodingStructure::getBuf( const CompArea &blk, const PictureType &t
   }
 
   const ComponentID compID = blk.compID;
-
-#if JVET_AC0055_NN_POST_FILTERING
+#if JVET_AC0055_NN_POST_FILTERING && NN_FILTERING_SET_LC
+  const PelStorage* buf = type == PIC_PREDICTION ? &m_pred : ( type == PIC_RESIDUAL ? &m_resi : ( type == PIC_RECONSTRUCTION ? &m_reco : ( type == PIC_ORG_RESI ? &m_orgr : ( type == PIC_NN_POST_FILTERED ? &m_nnPostFiltered : ( type == PIC_DEC_LCNN_FILTERED ? &m_lcnn_filtered : nullptr ) ) ) ) );
+#elif JVET_AC0055_NN_POST_FILTERING
   const PelStorage* buf = type == PIC_PREDICTION ? &m_pred : ( type == PIC_RESIDUAL ? &m_resi : ( type == PIC_RECONSTRUCTION ? &m_reco : ( type == PIC_ORG_RESI ? &m_orgr : ( type == PIC_NN_POST_FILTERED ? &m_nnPostFiltered : nullptr ) ) ) );
+#elif NN_FILTERING_SET_LC
+  const PelStorage* buf = type == PIC_PREDICTION ? &m_pred : ( type == PIC_RESIDUAL ? &m_resi : ( type == PIC_RECONSTRUCTION ? &m_reco : ( type == PIC_ORG_RESI ? &m_orgr : ( type == PIC_DEC_LCNN_FILTERED ? &m_lcnn_filtered : nullptr ) ) ) );
 #else
   const PelStorage* buf = type == PIC_PREDICTION ? &m_pred : ( type == PIC_RESIDUAL ? &m_resi : ( type == PIC_RECONSTRUCTION ? &m_reco : ( type == PIC_ORG_RESI ? &m_orgr : nullptr ) ) );
 #endif
diff --git a/source/Lib/CommonLib/CodingStructure.h b/source/Lib/CommonLib/CodingStructure.h
index ca6ab60882f7dc8245544fabf356f24e95ed7447..8522dba625bed0db96ccadbd756cfa0c40cec717 100644
--- a/source/Lib/CommonLib/CodingStructure.h
+++ b/source/Lib/CommonLib/CodingStructure.h
@@ -68,6 +68,9 @@ enum PictureType
 #if NNVC_USE_BS
   PIC_BS_MAP,
 #endif
+#if NN_FILTERING_SET_LC
+  PIC_DEC_LCNN_FILTERED,
+#endif
 #if NNVC_USE_PRED
   PIC_PREDICTION_CUSTOM,
 #endif
@@ -266,6 +269,9 @@ private:
 #if JVET_AC0055_NN_POST_FILTERING
   PelStorage m_nnPostFiltered;
 #endif
+#if NN_FILTERING_SET_LC
+  PelStorage m_lcnn_filtered;
+#endif
 
   TCoeff *m_coeffs [ MAX_NUM_COMPONENT ];
   Pel    *m_pcmbuf [ MAX_NUM_COMPONENT ];
@@ -357,6 +363,13 @@ public:
          PelUnitBuf   getResiBuf()                                { return m_resi; }
   const CPelUnitBuf   getResiBuf()                          const { return m_resi; }
 
+#if NN_FILTERING_SET_LC
+         PelBuf       getLCnnlfBuf(const CompArea &blk);
+  const CPelBuf       getLCnnlfBuf(const CompArea &blk) const;
+         PelUnitBuf   getLCnnlfBuf(const UnitArea &unit);
+  const CPelUnitBuf   getLCnnlfBuf(const UnitArea &unit) const;
+#endif
+
   // org-resi buffer
          PelBuf       getOrgResiBuf(const ComponentID &compID)       { return m_orgr.get(compID); }
   const CPelBuf       getOrgResiBuf(const ComponentID &compID) const { return m_orgr.get(compID); }
diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h
index 945ae33f84f657a02d7c538f9cee4c28de27c9dc..167ada2a5b222599ea4613db86b4081356106284 100644
--- a/source/Lib/CommonLib/CommonDef.h
+++ b/source/Lib/CommonLib/CommonDef.h
@@ -120,6 +120,14 @@ typedef enum
   AFFINE_MODEL_NUM
 } EAffineModel;
 
+#if NN_FILTERING_SET_LC
+enum LCnnlfMode
+{
+  LCNNLF_OFF,
+  LCNNLF_ALL_ON,
+  LCNNLF_CTU_ONOFF
+};
+#endif
 static const int AFFINE_ME_LIST_SIZE =                             4;
 static const int AFFINE_ME_LIST_SIZE_LD =                          3;
 static const double AFFINE_ME_LIST_MVP_TH =                        1.0;
diff --git a/source/Lib/CommonLib/Contexts.cpp b/source/Lib/CommonLib/Contexts.cpp
index 876a59fd1f01ae5b261e9af97bffdaa8756adc05..8cb56bc50049e628388ef45b500954334d6e1917 100644
--- a/source/Lib/CommonLib/Contexts.cpp
+++ b/source/Lib/CommonLib/Contexts.cpp
@@ -828,6 +828,18 @@ const CtxSet ContextSetCfg::nnlfSet1ParamIdx = ContextSetCfg::addCtxSet
 });
 #endif
 
+#if NN_FILTERING_SET_LC
+const CtxSet ContextSetCfg::ctbLCnnlfFlag =
+{
+  ContextSetCfg::addCtxSet
+  ({
+  {  33,  52,  46,  25,  61,  54,  25,  61,  54, },
+  {  13,  23,  46,   4,  61,  54,  19,  46,  54, },
+  {  62,  39,  39,  54,  39,  39,  31,  39,  39, },
+  {   0,   0,   0,   4,   0,   0,   1,   0,   0, },
+  })
+};
+#endif
 const CtxSet ContextSetCfg::ctbAlfAlternative = ContextSetCfg::addCtxSet
 ({
   {  11,  26, },
diff --git a/source/Lib/CommonLib/Contexts.h b/source/Lib/CommonLib/Contexts.h
index aedbfa3ac54094855bfbfbba9feeb37eee54b9ce..1f6814389b73caf39dbc3d1df9be883e18483eb0 100644
--- a/source/Lib/CommonLib/Contexts.h
+++ b/source/Lib/CommonLib/Contexts.h
@@ -265,6 +265,9 @@ public:
   static const CtxSet   BcwIdx;
 #if NN_FILTERING_SET_0
   static const CtxSet   ctbCnnlfFlag;
+#endif
+#if NN_FILTERING_SET_LC
+  static const CtxSet   ctbLCnnlfFlag;
 #endif
   static const CtxSet   ctbAlfFlag;
   static const CtxSet   ctbAlfAlternative;
diff --git a/source/Lib/CommonLib/NNFilterSetLC.cpp b/source/Lib/CommonLib/NNFilterSetLC.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8e435343f5f72eb2f0b4b9364286eb99dc54bdf4
--- /dev/null
+++ b/source/Lib/CommonLib/NNFilterSetLC.cpp
@@ -0,0 +1,463 @@
+/* The copyright in this software is being made available under the BSD
+ * License, included below. This software may be subject to other third party
+ * and contributor rights, including patent rights, and no such rights are
+ * granted under this license.
+ *
+ * Copyright (c) 2010-2023, ITU/ISO/IEC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file     NNFilterSetLC.cpp
+    \brief    cnn loop filter class
+*/
+#include "NNFilterSetLC.h"
+
+#define HAVE_INTTYPES_H 1
+#define __STDC_FORMAT_MACROS
+#include <sadl/model.h>
+#include "NNInference.h"
+
+#if NN_FILTERING_SET_LC
+#include "CodingStructure.h"
+#include "Picture.h"
+
+NNFilterSetLC::NNFilterSetLC()
+{
+  for(auto &p: m_Module) p=std::make_unique<sadl::Model<TypeSadl>>();
+  
+  for (int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++)
+  {
+    m_ctuEnableFlag[compIdx] = nullptr;
+  }
+  m_loadFlag = false;
+}
+
+NNFilterSetLC::~NNFilterSetLC() = default;
+
+void NNFilterSetLC::initNumCTUs(int m_numCTUsInPic)
+{ 
+  m_numCtusInFrame = m_numCTUsInPic;
+}
+void NNFilterSetLC::destroy() 
+{
+  for (int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++)
+  {
+    if (m_ctuEnableFlag[compIdx]) 
+    {
+      delete[] m_ctuEnableFlag[compIdx];
+      m_ctuEnableFlag[compIdx] = nullptr;
+    }
+  }
+}
+
+
+void NNFilterSetLC::filterPicture( PelUnitBuf in, PelUnitBuf bs_map, PelUnitBuf out, int model_selector /*=MAX_INT*/, int qp /*=MAX_INT*/)
+{
+  if(m_ModelPaths.empty())
+  {
+    out.copyFrom(in);
+    return;
+  }
+  int    blockSize = 64;
+
+  int    boundary_ext = BOUNDARY_EXT;
+  int    boundary_ext_interleave = boundary_ext/2;
+
+  PelBuf bsMapBufY  = bs_map.get(COMPONENT_Y);
+  PelBuf bsMapBufCb = bs_map.get(COMPONENT_Cb);
+  PelBuf bsMapBufCr = bs_map.get(COMPONENT_Cr);
+
+  PelBuf reconBufY  = in.get(COMPONENT_Y);
+  PelBuf reconBufCb = in.get(COMPONENT_Cb);
+  PelBuf reconBufCr = in.get(COMPONENT_Cr);
+  PelBuf nnFilteredBufY  = out.get(COMPONENT_Y);
+  PelBuf nnFilteredBufCb = out.get(COMPONENT_Cb);
+  PelBuf nnFilteredBufCr = out.get(COMPONENT_Cr);
+
+  int pic_w = reconBufY.width;
+  int pic_h = reconBufY.height;
+
+#if NN_FIXED_POINT_IMPLEMENTATION
+  const int org_quantizers_in   = ORG_QUANTIZERS_IN;
+  const int sadl_quantizers_in  = SADL_QUANTIZERS_IN;
+  const int out_quantizers_in   = OUT_QUANTIZERS_IN;
+  const int in_left_shift       = sadl_quantizers_in - org_quantizers_in;
+  const int out_right_shift     = out_quantizers_in - org_quantizers_in;
+#else
+    float in_maxValue = 1023;
+#endif
+
+  sadl::Model<TypeSadl>& model = *m_Module[model_selector];
+
+
+  sadl::Tensor<TypeSadl> map_cu;
+  map_cu = m_Input[model_selector][0];
+  for( int y = 0; y < pic_h; y+=blockSize*2)
+  {
+    for( int x = 0; x < pic_w; x+=blockSize*2)
+    {
+      for( int i = - boundary_ext_interleave; i < blockSize + boundary_ext_interleave; ++i)
+      {
+        for( int j = - boundary_ext_interleave; j < blockSize + boundary_ext_interleave; ++j)
+        {
+          int pos_x = std::max(0, std::min( x/2 + j, pic_w / 2 - 1 ));
+          int pos_y = std::max(0, std::min( y/2 + i, pic_h / 2 - 1 ));
+
+          int pos_x_luma_tl = std::max(0, std::min(x + j*2, pic_w - 1));
+          int pos_y_luma_tl = std::max(0, std::min(y + i*2, pic_h - 1));
+          int pos_x_luma_br = std::max(0, std::min(pos_x_luma_tl + 1, pic_w - 1));
+          int pos_y_luma_br = std::max(0, std::min(pos_y_luma_tl + 1, pic_h - 1));
+
+#if NN_FIXED_POINT_IMPLEMENTATION
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 0 ) = reconBufY.at(  pos_x_luma_tl, pos_y_luma_tl) << in_left_shift;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 1 ) = reconBufY.at(  pos_x_luma_br, pos_y_luma_tl) << in_left_shift;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 2 ) = reconBufY.at(  pos_x_luma_tl, pos_y_luma_br) << in_left_shift;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 3 ) = reconBufY.at(  pos_x_luma_br, pos_y_luma_br) << in_left_shift;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 4 ) = reconBufCb.at( pos_x,     pos_y)     << in_left_shift;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 5 ) = reconBufCr.at( pos_x,     pos_y)     << in_left_shift;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 6 ) = round(std::pow(2, (qp-42.)/6.) * std::pow(2, sadl_quantizers_in));
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 7  ) = bsMapBufY.at(  pos_x_luma_tl, pos_y_luma_tl)   << in_left_shift;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 8 ) = bsMapBufCb.at( pos_x,     pos_y)     << in_left_shift;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 9 ) = bsMapBufCr.at( pos_x,     pos_y)     << in_left_shift;
+#else
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 0 ) = reconBufY.at(  pos_x_luma_tl, pos_y_luma_tl)   / in_maxValue;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 1 ) = reconBufY.at(  pos_x_luma_br, pos_y_luma_tl)   / in_maxValue;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 2 ) = reconBufY.at(  pos_x_luma_tl, pos_y_luma_br) / in_maxValue;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 3 ) = reconBufY.at(  pos_x_luma_br, pos_y_luma_br) / in_maxValue;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 4 ) = reconBufCb.at( pos_x,     pos_y)     / in_maxValue;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 5 ) = reconBufCr.at( pos_x,     pos_y)     / in_maxValue;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 6 ) = std::pow(2, (qp-42.)/6.);
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 7  ) = bsMapBufY.at(  pos_x_luma_tl, pos_y_luma_tl)   / in_maxValue;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 8 ) = bsMapBufCb.at( pos_x,     pos_y)     / in_maxValue;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 9 ) = bsMapBufCr.at( pos_x,     pos_y)     / in_maxValue;
+#endif
+        }
+      }
+
+      m_Input[model_selector][0] = map_cu;
+      NNInference::infer<TypeSadl>(model, m_Input[model_selector]);
+      sadl::Tensor<TypeSadl> map_out = model.result(0);
+      for( int i = 0; i < blockSize; ++i)
+      {
+        for( int j = 0; j < blockSize; ++j)
+        {
+          int pos_x = x/2 + j;
+          int pos_y = y/2 + i;
+          if( pos_x >= pic_w/2 || pos_y >= pic_h / 2)
+          {
+            continue;
+          }
+          int i_slice = i + boundary_ext_interleave;
+          int j_slice = j + boundary_ext_interleave;
+#if NN_FIXED_POINT_IMPLEMENTATION
+          nnFilteredBufY.at(pos_x * 2, pos_y * 2)         = ( int(map_out(0, i_slice, j_slice, 0) + (map_cu(0, i_slice, j_slice, 0) << (out_right_shift - in_left_shift)))) * (1 << (NN_RESIDUE_ADDITIONAL_SHIFT - out_right_shift));
+          nnFilteredBufY.at(pos_x * 2 + 1, pos_y * 2)     = ( int(map_out(0, i_slice, j_slice, 1) + (map_cu(0, i_slice, j_slice, 1) << (out_right_shift - in_left_shift)))) * (1 << (NN_RESIDUE_ADDITIONAL_SHIFT - out_right_shift));
+          nnFilteredBufY.at(pos_x * 2, pos_y * 2 + 1)     = ( int(map_out(0, i_slice, j_slice, 2) + (map_cu(0, i_slice, j_slice, 2) << (out_right_shift - in_left_shift)))) * (1 << (NN_RESIDUE_ADDITIONAL_SHIFT - out_right_shift));
+          nnFilteredBufY.at(pos_x * 2 + 1, pos_y * 2 + 1) = ( int(map_out(0, i_slice, j_slice, 3) + (map_cu(0, i_slice, j_slice, 3) << (out_right_shift - in_left_shift)))) * (1 << (NN_RESIDUE_ADDITIONAL_SHIFT - out_right_shift));
+          nnFilteredBufCb.at(pos_x, pos_y)                = ( int(map_out(0, i_slice, j_slice, 4) + (map_cu(0, i_slice, j_slice, 4) << (out_right_shift - in_left_shift)))) * (1 << (NN_RESIDUE_ADDITIONAL_SHIFT - out_right_shift));
+          nnFilteredBufCr.at(pos_x, pos_y)                = ( int(map_out(0, i_slice, j_slice, 5) + (map_cu(0, i_slice, j_slice, 5) << (out_right_shift - in_left_shift)))) * (1 << (NN_RESIDUE_ADDITIONAL_SHIFT - out_right_shift));
+#else
+          nnFilteredBufY.at(pos_x * 2, pos_y * 2)         = int( (map_out(0, i_slice, j_slice, 0) + map_cu(0, i_slice, j_slice, 0)) * in_maxValue * (1 << NN_RESIDUE_ADDITIONAL_SHIFT) + 0.5);
+          nnFilteredBufY.at(pos_x * 2 + 1, pos_y * 2)     = int( (map_out(0, i_slice, j_slice, 1) + map_cu(0, i_slice, j_slice, 1)) * in_maxValue * (1 << NN_RESIDUE_ADDITIONAL_SHIFT) + 0.5);
+          nnFilteredBufY.at(pos_x * 2, pos_y * 2 + 1)     = int( (map_out(0, i_slice, j_slice, 2) + map_cu(0, i_slice, j_slice, 2)) * in_maxValue * (1 << NN_RESIDUE_ADDITIONAL_SHIFT) + 0.5);
+          nnFilteredBufY.at(pos_x * 2 + 1, pos_y * 2 + 1) = int( (map_out(0, i_slice, j_slice, 3) + map_cu(0, i_slice, j_slice, 3)) * in_maxValue * (1 << NN_RESIDUE_ADDITIONAL_SHIFT) + 0.5);
+          nnFilteredBufCb.at(pos_x, pos_y)                = int( (map_out(0, i_slice, j_slice, 4) + map_cu(0, i_slice, j_slice, 4)) * in_maxValue * (1 << NN_RESIDUE_ADDITIONAL_SHIFT) + 0.5);
+          nnFilteredBufCr.at(pos_x, pos_y)                = int( (map_out(0, i_slice, j_slice, 5) + map_cu(0, i_slice, j_slice, 5)) * in_maxValue * (1 << NN_RESIDUE_ADDITIONAL_SHIFT) + 0.5);
+#endif
+        }
+      }
+    }
+  }
+
+  
+}
+
+
+void NNFilterSetLC::filterPictureDecoder(CodingStructure& cs,  PelUnitBuf in, PelUnitBuf out, PelUnitBuf recon, int model_selector )
+{
+  if(m_ModelPaths.empty())
+  {
+    out.copyFrom(in);
+    return;
+  }
+  int    blockSize = 64;
+  int    boundary_ext = BOUNDARY_EXT;
+  int    boundary_ext_interleave = boundary_ext/2;
+  PelBuf inBufY  = in.get(COMPONENT_Y);
+  PelBuf inBufCb = in.get(COMPONENT_Cb);
+  PelBuf inBufCr = in.get(COMPONENT_Cr);
+  PelBuf bsMapBufY  = cs.picture->getBsMapBuf().get(COMPONENT_Y);
+  PelBuf bsMapBufCb = cs.picture->getBsMapBuf().get(COMPONENT_Cb);
+  PelBuf bsMapBufCr = cs.picture->getBsMapBuf().get(COMPONENT_Cr);
+  PelBuf nnFilteredBufY  = out.get(COMPONENT_Y);
+  PelBuf nnFilteredBufCb = out.get(COMPONENT_Cb);
+  PelBuf nnFilteredBufCr = out.get(COMPONENT_Cr);
+  PelBuf reconBufY  = recon.get(COMPONENT_Y);
+  PelBuf reconBufCb = recon.get(COMPONENT_Cb);
+  PelBuf reconBufCr = recon.get(COMPONENT_Cr);
+
+  // get CTU enable flags
+  for( int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++ )
+  {
+    m_ctuEnableFlag[compIdx] = cs.picture->getLCnnlfCtuEnableFlag( compIdx );
+  }
+
+  int pic_w = inBufY.width;
+  int pic_h = inBufY.height;
+
+#if NN_FIXED_POINT_IMPLEMENTATION
+  const int org_quantizers_in   = ORG_QUANTIZERS_IN;
+  const int sadl_quantizers_in  = SADL_QUANTIZERS_IN;
+  const int out_quantizers_in   = OUT_QUANTIZERS_IN;
+  const int in_left_shift       = sadl_quantizers_in - org_quantizers_in;
+  const int out_right_shift     = out_quantizers_in - org_quantizers_in;
+#else
+    float in_maxValue = 1023;
+#endif
+
+  sadl::Model<TypeSadl>& model = *m_Module[model_selector];
+  sadl::Tensor<TypeSadl> map_cu;
+  map_cu = m_Input[model_selector][0];
+
+  int ctuIdx = 0;
+  for (int y = 0; y < pic_h; y += blockSize * 2)
+  {
+    for (int x = 0; x < pic_w; x += blockSize * 2)
+    {
+      for( int i = - boundary_ext_interleave; i < blockSize + boundary_ext_interleave; ++i)
+      {
+        for( int j = - boundary_ext_interleave; j < blockSize + boundary_ext_interleave; ++j)
+        {
+          int pos_x = std::max(0, std::min(x / 2 + j, pic_w / 2 - 1));
+          int pos_y = std::max(0, std::min(y / 2 + i, pic_h / 2 - 1));
+
+          int pos_x_luma_tl = std::max(0, std::min(x + j * 2, pic_w - 1));
+          int pos_y_luma_tl = std::max(0, std::min(y + i * 2, pic_h - 1));
+          int pos_x_luma_br = std::max(0, std::min(pos_x_luma_tl + 1, pic_w - 1));
+          int pos_y_luma_br = std::max(0, std::min(pos_y_luma_tl + 1, pic_h - 1));
+#if NN_FIXED_POINT_IMPLEMENTATION
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 0) = inBufY.at(pos_x_luma_tl, pos_y_luma_tl) << in_left_shift;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 1) = inBufY.at(pos_x_luma_br, pos_y_luma_tl) << in_left_shift;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 2) = inBufY.at(pos_x_luma_tl, pos_y_luma_br) << in_left_shift;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 3) = inBufY.at(pos_x_luma_br, pos_y_luma_br) << in_left_shift;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 4) = inBufCb.at(pos_x, pos_y) << in_left_shift;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 5) = inBufCr.at(pos_x, pos_y) << in_left_shift;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 6 ) = round(std::pow(2, (cs.slice->getSliceQp() -42.)/6.) * std::pow(2, sadl_quantizers_in));
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 7 ) = bsMapBufY.at(  pos_x_luma_tl, pos_y_luma_tl)   << in_left_shift;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 8 ) = bsMapBufCb.at( pos_x,     pos_y)     << in_left_shift;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 9 ) = bsMapBufCr.at( pos_x,     pos_y)     << in_left_shift;
+#else
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 0) = inBufY.at(pos_x_luma_tl, pos_y_luma_tl) / in_maxValue;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 1) = inBufY.at(pos_x_luma_br, pos_y_luma_tl) / in_maxValue;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 2) = inBufY.at(pos_x_luma_tl, pos_y_luma_br) / in_maxValue;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 3) = inBufY.at(pos_x_luma_br, pos_y_luma_br) / in_maxValue;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 4) = inBufCb.at(pos_x, pos_y) / in_maxValue;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 5) = inBufCr.at(pos_x, pos_y) / in_maxValue;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 6 ) = std::pow(2, (cs.slice->getSliceQp() -42.)/6.);
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 7 ) =  bsMapBufY.at(  pos_x_luma_tl, pos_y_luma_tl)   / in_maxValue;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 8 ) = bsMapBufCb.at( pos_x,     pos_y)     / in_maxValue;
+          map_cu(0, boundary_ext_interleave + i, boundary_ext_interleave + j, 9 ) = bsMapBufCr.at( pos_x,     pos_y)     / in_maxValue;
+#endif
+        }
+      }
+      m_Input[model_selector][0] = map_cu;
+      NNInference::infer<TypeSadl>(model, m_Input[model_selector]);
+      sadl::Tensor<TypeSadl> map_out = model.result(0);
+      for (int i = 0; i < blockSize; ++i)
+      {
+        for (int j = 0; j < blockSize; ++j)
+        {
+          int pos_x = x / 2 + j;
+          int pos_y = y / 2 + i;
+          if (pos_x >= pic_w / 2 || pos_y >= pic_h / 2)
+          {
+            continue;
+          }
+          int i_slice = i + boundary_ext_interleave;
+          int j_slice = j + boundary_ext_interleave;
+          if (m_ctuEnableFlag[COMPONENT_Y][ctuIdx])
+          {
+#if NN_FIXED_POINT_IMPLEMENTATION
+            nnFilteredBufY.at(pos_x * 2, pos_y * 2) =
+              (int(map_out(0, i_slice, j_slice, 0) + (map_cu(0, i_slice, j_slice, 0) << (out_right_shift - in_left_shift)))) * (1 << (NN_RESIDUE_ADDITIONAL_SHIFT - out_right_shift));
+            nnFilteredBufY.at(pos_x * 2 + 1, pos_y * 2) =
+              (int(map_out(0, i_slice, j_slice, 1) + (map_cu(0, i_slice, j_slice, 1) << (out_right_shift - in_left_shift)))) * (1 << (NN_RESIDUE_ADDITIONAL_SHIFT - out_right_shift));
+            nnFilteredBufY.at(pos_x * 2, pos_y * 2 + 1) =
+              (int(map_out(0, i_slice, j_slice, 2) + (map_cu(0, i_slice, j_slice, 2) << (out_right_shift - in_left_shift)))) * (1 << (NN_RESIDUE_ADDITIONAL_SHIFT - out_right_shift));
+            nnFilteredBufY.at(pos_x * 2 + 1, pos_y * 2 + 1) =
+              (int(map_out(0, i_slice, j_slice, 3) + (map_cu(0, i_slice, j_slice, 3) << (out_right_shift - in_left_shift)))) * (1 << (NN_RESIDUE_ADDITIONAL_SHIFT - out_right_shift));
+#else
+            nnFilteredBufY.at(pos_x * 2, pos_y * 2) =
+              int((map_out(0, i_slice, j_slice, 0) + map_cu(0, i_slice, j_slice, 0)) * in_maxValue * (1 << NN_RESIDUE_ADDITIONAL_SHIFT) + 0.5);
+            nnFilteredBufY.at(pos_x * 2 + 1, pos_y * 2) =
+              int((map_out(0, i_slice, j_slice, 1) + map_cu(0, i_slice, j_slice, 1)) * in_maxValue * (1 << NN_RESIDUE_ADDITIONAL_SHIFT) + 0.5);
+            nnFilteredBufY.at(pos_x * 2, pos_y * 2 + 1) =
+              int((map_out(0, i_slice, j_slice, 2) + map_cu(0, i_slice, j_slice, 2)) * in_maxValue * (1 << NN_RESIDUE_ADDITIONAL_SHIFT) + 0.5);
+            nnFilteredBufY.at(pos_x * 2 + 1, pos_y * 2 + 1) =
+              int((map_out(0, i_slice, j_slice, 3) + map_cu(0, i_slice, j_slice, 3)) * in_maxValue * (1 << NN_RESIDUE_ADDITIONAL_SHIFT) + 0.5);
+#endif
+          }
+          else
+          {
+            nnFilteredBufY.at(pos_x * 2, pos_y * 2) = reconBufY.at(pos_x * 2, pos_y * 2) << NN_RESIDUE_ADDITIONAL_SHIFT;
+            nnFilteredBufY.at(pos_x * 2 + 1, pos_y * 2)     = reconBufY.at(pos_x * 2 + 1, pos_y * 2) << NN_RESIDUE_ADDITIONAL_SHIFT;
+            nnFilteredBufY.at(pos_x * 2, pos_y * 2 + 1)     = reconBufY.at(pos_x * 2, pos_y * 2 + 1) << NN_RESIDUE_ADDITIONAL_SHIFT;
+            nnFilteredBufY.at(pos_x * 2 + 1, pos_y * 2 + 1) = reconBufY.at(pos_x * 2 + 1, pos_y * 2 + 1) << NN_RESIDUE_ADDITIONAL_SHIFT;
+          }
+          if (m_ctuEnableFlag[COMPONENT_Cb][ctuIdx])
+          {
+#if NN_FIXED_POINT_IMPLEMENTATION
+            nnFilteredBufCb.at(pos_x, pos_y) =
+              (int(map_out(0, i_slice, j_slice, 4) + (map_cu(0, i_slice, j_slice, 4) << (out_right_shift - in_left_shift)))) * (1 << (NN_RESIDUE_ADDITIONAL_SHIFT - out_right_shift));
+#else
+            nnFilteredBufCb.at(pos_x, pos_y) =
+              int((map_out(0, i_slice, j_slice, 4) + map_cu(0, i_slice, j_slice, 4)) * in_maxValue * (1 << NN_RESIDUE_ADDITIONAL_SHIFT) + 0.5);
+#endif
+          }
+          else
+          {
+            nnFilteredBufCb.at(pos_x, pos_y) = reconBufCb.at(pos_x, pos_y) << NN_RESIDUE_ADDITIONAL_SHIFT;
+          }
+          if (m_ctuEnableFlag[COMPONENT_Cr][ctuIdx])
+          {
+#if NN_FIXED_POINT_IMPLEMENTATION
+            nnFilteredBufCr.at(pos_x, pos_y) =
+              (int(map_out(0, i_slice, j_slice, 5) + (map_cu(0, i_slice, j_slice, 5) << (out_right_shift - in_left_shift)))) * (1 << (NN_RESIDUE_ADDITIONAL_SHIFT - out_right_shift));
+#else
+            nnFilteredBufCr.at(pos_x, pos_y) =
+              int((map_out(0, i_slice, j_slice, 5) + map_cu(0, i_slice, j_slice, 5)) * in_maxValue * (1 << NN_RESIDUE_ADDITIONAL_SHIFT) + 0.5);
+#endif
+          }
+          else
+          {
+            nnFilteredBufCr.at(pos_x, pos_y) = reconBufCr.at(pos_x, pos_y) << NN_RESIDUE_ADDITIONAL_SHIFT;
+          }
+        }
+      }
+
+
+      CHECK(ctuIdx >= m_numCtusInFrame, "ctuIdx should be less than no. of CTUs in frame");
+      ctuIdx++;
+    }
+  }
+}
+
+
+void NNFilterSetLC::scaleResidue(PelUnitBuf recoBuf, PelUnitBuf filteredBuf, PicHeader * ph)
+{
+  PelBuf reconBufY       = recoBuf.get(COMPONENT_Y);
+  PelBuf reconBufCb      = recoBuf.get(COMPONENT_Cb);
+  PelBuf reconBufCr      = recoBuf.get(COMPONENT_Cr);
+  PelBuf nnFilteredBufY  = filteredBuf.get(COMPONENT_Y);
+  PelBuf nnFilteredBufCb = filteredBuf.get(COMPONENT_Cb);
+  PelBuf nnFilteredBufCr = filteredBuf.get(COMPONENT_Cr);
+
+  int pic_w = reconBufY.width;
+  int pic_h = reconBufY.height;
+
+  const int scaleY  = ph->getLCNnScale(COMPONENT_Y);
+  const int scaleCb = ph->getLCNnScale(COMPONENT_Cb);
+  const int scaleCr = ph->getLCNnScale(COMPONENT_Cr);
+
+  int shiftY = LCNN_RESIDUE_SCALE_SHIFT + NN_RESIDUE_ADDITIONAL_SHIFT;
+  int shiftCb = LCNN_RESIDUE_SCALE_SHIFT + NN_RESIDUE_ADDITIONAL_SHIFT;
+  int shiftCr = LCNN_RESIDUE_SCALE_SHIFT + NN_RESIDUE_ADDITIONAL_SHIFT; 
+  
+  int offsetY = (1 << shiftY)/2 ;
+  int offsetCb = (1 << shiftCb)/2;
+  int offsetCr = (1 << shiftCr)/2;
+
+  for (int y = 0; y < pic_h; y++)
+  {
+    for (int x = 0; x < pic_w; x++)
+    {
+      nnFilteredBufY.at(x, y) = Clip3(0, 1023, reconBufY.at(x, y) + (((nnFilteredBufY.at(x, y) - (reconBufY.at(x, y) << NN_RESIDUE_ADDITIONAL_SHIFT)) * scaleY + offsetY ) >> shiftY));
+    }
+  }
+
+  for (int y = 0; y < pic_h/2; y++)
+  {
+    for (int x = 0; x < pic_w/2; x++)
+    {
+      nnFilteredBufCb.at(x, y) = Clip3(0, 1023, reconBufCb.at(x, y) + (((nnFilteredBufCb.at(x, y) - (reconBufCb.at(x, y) << NN_RESIDUE_ADDITIONAL_SHIFT)) * scaleCb + offsetCb ) >> shiftCb));
+      nnFilteredBufCr.at(x, y) = Clip3(0, 1023, reconBufCr.at(x, y) + (((nnFilteredBufCr.at(x, y) - (reconBufCr.at(x, y) << NN_RESIDUE_ADDITIONAL_SHIFT)) * scaleCr + offsetCr ) >> shiftCr));
+    }
+  }
+}
+
+bool NNFilterSetLC::loadModel( std::string model_path )
+{
+  if(m_loadFlag)
+  {
+    return true;
+  }
+  size_t len = model_path.size();
+  size_t pos = 0;
+  size_t idx = 0;
+  std::string one_path;
+  auto remained_path = model_path;
+  do
+  {
+    auto pos_delimiter = model_path.find(",", pos);
+    one_path = "";
+
+    if(pos_delimiter == std::string::npos)
+    {
+      one_path = model_path.substr(pos);
+      pos = len;
+    }
+    else
+    {
+      one_path = model_path.substr(pos, pos_delimiter - pos);
+      pos = pos_delimiter + 1;
+    }
+
+    if(one_path.size())
+    {
+      sadl::Model<TypeSadl>& model = *m_Module[idx];
+      ifstream file(one_path, ios::binary);
+      if (!model.load(file)) {
+        cerr << "[ERROR] Unable to read model " << one_path << endl;
+        exit(-1);
+      }
+
+      m_Input[idx] = model.getInputsTemplate();
+
+      if (!model.init(m_Input[idx])) {
+        cerr << "[ERROR] issue during initialization" << endl;
+        exit(-1);
+      }
+      m_ModelPaths.push_back(one_path);
+      idx++;
+    }
+  }while( pos < len);
+
+  m_loadFlag = true;
+  return true;
+}
+
+#endif
\ No newline at end of file
diff --git a/source/Lib/CommonLib/NNFilterSetLC.h b/source/Lib/CommonLib/NNFilterSetLC.h
new file mode 100644
index 0000000000000000000000000000000000000000..329728f3fceca621bf81a2aa6bf53f18c66bb37f
--- /dev/null
+++ b/source/Lib/CommonLib/NNFilterSetLC.h
@@ -0,0 +1,86 @@
+/* The copyright in this software is being made available under the BSD
+ * License, included below. This software may be subject to other third party
+ * and contributor rights, including patent rights, and no such rights are
+ * granted under this license.
+ *
+ * Copyright (c) 2010-2023, ITU/ISO/IEC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file     NNFilterSetLC.h
+    \brief    cnn loop filter class (header)
+*/
+
+#ifndef __NNFILTERSETLC__
+#define __NNFILTERSETLC__
+
+#include "CommonDef.h"
+
+#if NN_FILTERING_SET_LC
+
+#include "Unit.h"
+#include "Picture.h"
+#include <fstream>
+
+using namespace std;
+
+// fwd
+namespace sadl {
+template<typename T> class Model;
+template<typename T> class Tensor;
+}
+
+class NNFilterSetLC
+{
+public:
+  NNFilterSetLC();
+  virtual ~NNFilterSetLC();
+
+  void filterPicture( PelUnitBuf in, PelUnitBuf bs_map, PelUnitBuf out, int model_selector = MAX_INT, int qp = MAX_INT);
+
+  void filterPictureDecoder(CodingStructure& cs,  PelUnitBuf in, PelUnitBuf out, PelUnitBuf recon, int model_selector = MAX_INT);
+
+  void initNumCTUs(int m_numCTUsInPic);
+  void destroy();
+
+  void scaleResidue(PelUnitBuf recoBuf, PelUnitBuf filteredBuf, PicHeader *ph);
+  bool loadModel( std::string model_path );
+
+public:
+  int m_NumCoresToUse = 1;
+  bool m_loadFlag;
+  std::vector<std::string> m_ModelPaths;
+  uint8_t *m_ctuEnableFlag[MAX_NUM_COMPONENT];
+  uint16_t m_numCtusInFrame;
+  
+protected:
+  std::unique_ptr<sadl::Model<TypeSadl>> m_Module[4];
+  std::vector<sadl::Tensor<TypeSadl>> m_Input[4];
+};
+#endif
+
+#endif
diff --git a/source/Lib/CommonLib/NNInference.cpp b/source/Lib/CommonLib/NNInference.cpp
index bb606123f66c5e1e65c67d28fd9dcbdd263b845f..b72eff039db80e0802b46be4347c419e8283a992 100644
--- a/source/Lib/CommonLib/NNInference.cpp
+++ b/source/Lib/CommonLib/NNInference.cpp
@@ -52,7 +52,6 @@ void NNInference::infer(sadl::Model<int16_t> &model, std::vector<sadl::Tensor<in
         exit(-1);
     }
 }
-
 #else
 template<>
 void NNInference::infer(sadl::Model<float> &model, std::vector<sadl::Tensor<float>> &inputs)
diff --git a/source/Lib/CommonLib/Picture.cpp b/source/Lib/CommonLib/Picture.cpp
index 3067e43e1731c9f570497c59b2891700ad97b673..5c1a94cec3e73a0949d2c5748eed4558a6447764 100644
--- a/source/Lib/CommonLib/Picture.cpp
+++ b/source/Lib/CommonLib/Picture.cpp
@@ -209,6 +209,9 @@ void Picture::create( const ChromaFormat &_chromaFormat, const Size &size, const
   const Area a      = Area( Position(), size );
   M_BUFS( 0, PIC_RECONSTRUCTION ).create( _chromaFormat, a, _maxCUSize, margin, MEMORY_ALIGN_DEF_SIZE );
   M_BUFS( 0, PIC_RECON_WRAP ).create( _chromaFormat, a, _maxCUSize, margin, MEMORY_ALIGN_DEF_SIZE );
+#if NN_FILTERING_SET_LC
+  M_BUFS( 0, PIC_DEC_LCNN_FILTERED ).create( _chromaFormat, a, _maxCUSize, margin, MEMORY_ALIGN_DEF_SIZE );
+#endif
 #if NNVC_USE_PARTITION_AS_CU_AVERAGE
   M_BUFS( 0, PIC_PARTITION_CU_AVERAGE ).create( _chromaFormat, a, _maxCUSize, margin, MEMORY_ALIGN_DEF_SIZE );
 #endif
@@ -396,7 +399,12 @@ const CPelUnitBuf Picture::getBsMapBuf(const UnitArea &unit) const
        PelBuf     Picture::getBsMapBuf(const CompArea &blk)                       { return getBuf(blk, PIC_BS_MAP); }
 const CPelBuf     Picture::getBsMapBuf(const CompArea &blk) const                 { return getBuf(blk, PIC_BS_MAP); }
 #endif
-
+#if NN_FILTERING_SET_LC
+       PelBuf     Picture::getLCnnlfBuf(const ComponentID compID)        { return getBuf(compID, PIC_DEC_LCNN_FILTERED); }
+const CPelBuf     Picture::getLCnnlfBuf(const ComponentID compID)  const { return getBuf(compID, PIC_DEC_LCNN_FILTERED); }
+       PelUnitBuf Picture::getLCnnlfBuf()                           { return M_BUFS(0,    PIC_DEC_LCNN_FILTERED); }
+const CPelUnitBuf Picture::getLCnnlfBuf()                     const { return M_BUFS(0,    PIC_DEC_LCNN_FILTERED); }
+#endif
 #if NNVC_USE_REC_BEFORE_DBF
        PelBuf     Picture::getRecBeforeDbfBuf(const ComponentID compID, bool /*wrap*/)       { return getBuf(compID,               PIC_REC_BEFORE_DBF); }
        PelUnitBuf Picture::getRecBeforeDbfBuf(bool /*wrap*/)                                 { return M_BUFS(scheduler.getSplitPicId(), PIC_REC_BEFORE_DBF); }
diff --git a/source/Lib/CommonLib/Picture.h b/source/Lib/CommonLib/Picture.h
index f43fc756779ebc3db2eb44eb7a53019056406956..a8ffbd6c55ce66e586482dc8ade02446e0925ccd 100644
--- a/source/Lib/CommonLib/Picture.h
+++ b/source/Lib/CommonLib/Picture.h
@@ -136,7 +136,12 @@ struct Picture : public UnitArea
   const CPelBuf     getResiBuf(const CompArea &blk) const;
          PelUnitBuf getResiBuf(const UnitArea &unit);
   const CPelUnitBuf getResiBuf(const UnitArea &unit) const;
-
+#if NN_FILTERING_SET_LC
+         PelBuf     getLCnnlfBuf(const ComponentID compID);
+  const CPelBuf     getLCnnlfBuf(const ComponentID compID)  const;
+         PelUnitBuf getLCnnlfBuf();
+  const CPelUnitBuf getLCnnlfBuf() const;
+#endif
 #if NNVC_USE_PARTITION_AS_CU_AVERAGE
          PelBuf     getCuAverageBuf(const ComponentID compID, bool wrap=false);
          PelUnitBuf getCuAverageBuf(bool wrap=false);
@@ -444,6 +449,33 @@ public:
   }
 #endif
 
+#if NN_FILTERING_SET_LC
+  std::vector<uint8_t> m_LCnnlfCtuEnableFlag[MAX_NUM_COMPONENT];
+  uint8_t* getLCnnlfCtuEnableFlag(int compIdx) { return m_LCnnlfCtuEnableFlag[compIdx].data(); }
+  std::vector<uint8_t>* getLCnnlfCtuEnableFlag() { return m_LCnnlfCtuEnableFlag; }
+  
+  void backupLCnnlfCtuEnableFlag( std::vector<uint8_t> *backup) {
+    for( auto i = 0; i < MAX_NUM_COMPONENT; ++i)
+    {
+      backup[i] = m_LCnnlfCtuEnableFlag[i];
+    }
+  }
+  void restoreLCnnLFCtuEnableFlag( std::vector<uint8_t> *backup) {
+    for( auto i = 0; i < MAX_NUM_COMPONENT; ++i)
+    {
+      m_LCnnlfCtuEnableFlag[i] = backup[i];
+    }
+  }
+  
+  void resizeLCnnlfCtuEnableFlag( int numEntries )
+  {
+    for( int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++ )
+    {
+      m_LCnnlfCtuEnableFlag[compIdx].resize(numEntries);
+      std::fill( m_LCnnlfCtuEnableFlag[compIdx].begin(), m_LCnnlfCtuEnableFlag[compIdx].end(), 0 );
+    }
+  }
+#endif
 };
 
 int calcAndPrintHashStatus(const CPelUnitBuf& pic, const class SEIDecodedPictureHash* pictureHashSEI, const BitDepths &bitDepths, const MsgLevel msgl);
diff --git a/source/Lib/CommonLib/Rom.cpp b/source/Lib/CommonLib/Rom.cpp
index 43b2a4a6fbd50b314a3cb490df77d890839eabfc..6c8d9f37993803b0bbf6554b86c9b4cbd3c57dbe 100644
--- a/source/Lib/CommonLib/Rom.cpp
+++ b/source/Lib/CommonLib/Rom.cpp
@@ -49,6 +49,14 @@
 std::string default_model_path = "./models/";
 #endif
 
+#if NN_FILTERING_SET_LC 
+std::string m_lcModelPath;
+#if !NN_FIXED_POINT_IMPLEMENTATION
+std::string default_lc_model_path = "models/NnlfSetLC/LC_float_model0.sadl,models/NnlfSetLC/LC_float_model1.sadl,models/NnlfSetLC/LC_float_model2.sadl,models/NnlfSetLC/LC_float_model3.sadl";
+#else
+std::string default_lc_model_path = "models/NnlfSetLC/LC_int16_model0.sadl,models/NnlfSetLC/LC_int16_model1.sadl,models/NnlfSetLC/LC_int16_model2.sadl,models/NnlfSetLC/LC_int16_model3.sadl";
+#endif
+#endif
 // ====================================================================================================================
 // Initialize / destroy functions
 // ====================================================================================================================
diff --git a/source/Lib/CommonLib/Rom.h b/source/Lib/CommonLib/Rom.h
index d56ab592d1c4040c35c4daaeb1b4a397409ddcd3..4cb3cfa218953df0ebff1f69f807e9244a47c7a8 100644
--- a/source/Lib/CommonLib/Rom.h
+++ b/source/Lib/CommonLib/Rom.h
@@ -52,6 +52,12 @@
 extern std::string default_model_path;
 #endif
 
+#if NN_FILTERING_SET_LC
+// extern std::string g_LCModelPath;
+extern std::string m_lcModelPath;
+extern std::string default_lc_model_path;
+#endif
+
 // ====================================================================================================================
 // Initialize / destroy functions
 // ====================================================================================================================
diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h
index f05c0cb4368eeafe898b8d82b7568d41895e8523..edeed538f6128e1b5c47cca2aec3158edaaa2e7b 100644
--- a/source/Lib/CommonLib/Slice.h
+++ b/source/Lib/CommonLib/Slice.h
@@ -1485,6 +1485,10 @@ private:
   bool              m_nnlfSet0EnabledFlag;
 #endif
 
+#if NN_FILTERING_SET_LC
+  bool              m_nnlfSetLCEnabledFlag;
+#endif
+
   bool              m_alfEnabledFlag;
   bool              m_ccalfEnabledFlag;
 #if NN_FILTERING_SET_1
@@ -1777,6 +1781,10 @@ public:
   bool                    getNnlfSet1MultiframeEnabledFlag() const                                        { return m_nnlfSet1MultiframeEnabledFlag; }
   void                    setNnlfSet1MultiframeEnabledFlag( bool b )                                      { m_nnlfSet1MultiframeEnabledFlag = b; }
 #endif
+#endif
+#if NN_FILTERING_SET_LC
+  bool                    getNnlfSetLCEnabledFlag() const                                                  { return m_nnlfSetLCEnabledFlag; }
+  void                    setNnlfSetLCEnabledFlag( bool b )                                                { m_nnlfSetLCEnabledFlag = b; }
 #endif
   void                    setJointCbCrEnabledFlag(bool bVal)                                              { m_JointCbCrEnabledFlag = bVal; }
   bool                    getJointCbCrEnabledFlag() const                                                 { return m_JointCbCrEnabledFlag; }
@@ -2431,6 +2439,12 @@ private:
   bool m_ccalfEnabledFlag[MAX_NUM_COMPONENT];
   int  m_ccalfCbApsId;
   int  m_ccalfCrApsId;
+
+#if NN_FILTERING_SET_LC
+  int lc_nn_scale[MAX_NUM_COMPONENT];
+  int lc_modelIdx;
+#endif
+
   bool                        m_deblockingFilterOverrideFlag;                           //!< deblocking filter override controls enabled
   bool                        m_deblockingFilterDisable;                                //!< deblocking filter disabled flag
   int                         m_deblockingFilterBetaOffsetDiv2;                         //!< beta offset for deblocking filter
@@ -2550,6 +2564,14 @@ public:
   void setCcAlfEnabledFlag(ComponentID compId, bool b) { m_ccalfEnabledFlag[compId] = b; }
   bool getCcAlfEnabledFlag(ComponentID compId) const { return m_ccalfEnabledFlag[compId]; }
 
+#if NN_FILTERING_SET_LC
+  void setLCNnScale(int sc, ComponentID id) { lc_nn_scale[id] = sc; }
+  int  getLCNnScale(ComponentID id) const { return lc_nn_scale[id]; }
+
+  void setLCNnModelIdx(int idx) { lc_modelIdx = idx; }
+  int  getLCNnModelIdx() const { return lc_modelIdx; }
+#endif
+
   void setCcAlfCbApsId(int i) { m_ccalfCbApsId = i; }
   int  getCcAlfCbApsId() const { return m_ccalfCbApsId; }
   void setCcAlfCrApsId(int i) { m_ccalfCrApsId = i; }
@@ -2703,6 +2725,10 @@ private:
   bool                       m_biDirPred;
   int                        m_symRefIdx[2];
 
+#if NN_FILTERING_SET_LC
+  uint8_t   m_LCnnlfSliceModeIdc[3];  
+#endif
+
   //  Data
   int                        m_iSliceQpDelta;
   int                        m_iSliceChromaQpDelta[MAX_NUM_COMPONENT+1];
@@ -2859,6 +2885,12 @@ public:
   bool                        getPendingRasInit() const                              { return m_pendingRasInit;                                      }
   void                        setPendingRasInit( bool val )                          { m_pendingRasInit = val;                                       }
 
+#if NN_FILTERING_SET_LC
+  void                        setLCnnlfSliceModeIdc( uint8_t s, int compIdx )         { m_LCnnlfSliceModeIdc[compIdx] = s;                             }
+  uint8_t                     getLCnnlfSliceModeIdc( int compIdx )                    { return m_LCnnlfSliceModeIdc[compIdx];                          }
+  uint8_t*                    getLCnnlfSliceModeIdc()                                 { return m_LCnnlfSliceModeIdc;                                   }
+#endif
+
   void                        setLmcsEnabledFlag(bool b)                              { m_lmcsEnabledFlag = b;                                       }
   bool                        getLmcsEnabledFlag()                                    { return m_lmcsEnabledFlag;                                    }
   const bool                  getLmcsEnabledFlag() const                              { return m_lmcsEnabledFlag;                                    }
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index c3cae8ce23842fce75f7c8085ee72eb1f0e28eb9..d42f174076af40e660f3ef5ac44b4f53551db4d5 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -128,6 +128,24 @@ using TypeSadl = float;
 #endif
 
 #define JVET_AC0055_NN_POST_FILTERING                     1 // JVET-AC0055: EE1-1.11: Content-adaptive post-filter
+// nn filter set low complexity
+#define NN_FILTERING_SET_LC                                1
+// options set for lc NNLF
+#if NN_FILTERING_SET_LC
+#define REDUCE_MODEL_RD_I                                 1
+
+// Constants
+#define NN_LUMA_MODEL_NUMBER                              4
+#define LCNN_MODEL_IDX_BITS                               2
+#define LCNN_RESIDUE_SCALE_SHIFT                          8
+#define BOUNDARY_EXT                                      8
+
+#if NN_FIXED_POINT_IMPLEMENTATION
+#define ORG_QUANTIZERS_IN                                 10
+#define SADL_QUANTIZERS_IN                                11
+#define OUT_QUANTIZERS_IN                                 13
+#endif
+#endif
 
 //########### place macros to be removed in next cycle below this line ###############
 
diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp
index 717d84b9e18c2a45b933b2d0c9e50fc5d6c8d167..46f10c6e572da1162c48acde8e1210b7ebea9133 100644
--- a/source/Lib/DecoderLib/CABACReader.cpp
+++ b/source/Lib/DecoderLib/CABACReader.cpp
@@ -150,6 +150,42 @@ void CABACReader::coding_tree_unit( CodingStructure& cs, const UnitArea& area, i
   }
 #endif
 
+#if NN_FILTERING_SET_LC
+  if( cs.sps->getNnlfSetLCEnabledFlag() && ctuRsAddr == 0)
+  {
+    uint32_t numCTUs = cs.pcv->sizeInCtus;
+    int frame_width_in_ctus = cs.pcv->widthInCtus;
+    for( int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++ )
+    {
+      uint8_t* ctbLCnnlfFlag = cs.slice->getPic()->getLCnnlfCtuEnableFlag( compIdx );
+      const uint8_t *sliceModeIdc = cs.slice->getLCnnlfSliceModeIdc();
+      for( int ctuIdx = 0; ctuIdx < numCTUs; ctuIdx++ )
+      {
+        if (sliceModeIdc[compIdx] == LCNNLF_OFF)
+        {
+          ctbLCnnlfFlag[ctuIdx] = LCNNLF_OFF;
+        }
+        else if(sliceModeIdc[compIdx] == LCNNLF_ALL_ON)
+        {
+          ctbLCnnlfFlag[ctuIdx] = LCNNLF_ALL_ON;
+        }
+        else
+        {
+          CHECK(sliceModeIdc[compIdx] != LCNNLF_CTU_ONOFF, "slice mode idc should be LCNNLF_CTU_ONOFF");
+          /* code */
+          int ctxInc = 0;
+          const int       leftCTUAddr = (ctuIdx%frame_width_in_ctus > 0) ? ctuIdx - 1 : -1;
+          const int       aboveCTUAddr = (ctuIdx >= frame_width_in_ctus) ? ctuIdx - frame_width_in_ctus : -1;
+          if( leftCTUAddr >= 0 ) ctxInc += ((ctbLCnnlfFlag[leftCTUAddr] != LCNNLF_OFF) ? 1 : 0);
+          if( aboveCTUAddr >= 0 ) ctxInc += ((ctbLCnnlfFlag[aboveCTUAddr] != LCNNLF_OFF) ? 1 : 0);
+          RExt__DECODER_DEBUG_BIT_STATISTICS_CREATE_SET(STATS__CABAC_BITS__LCNNLF);
+          ctbLCnnlfFlag[ctuIdx] = m_BinDecoder.decodeBin( Ctx::ctbLCnnlfFlag( compIdx * MAX_NUM_COMPONENT + ctxInc) );
+        }
+      }
+    }
+  }
+#endif
+
   sao( cs, ctuRsAddr );
 #if NN_FILTERING_SET_1
   if ( cs.sps->getNnlfSet1EnabledFlag() && ctuRsAddr == 0)
diff --git a/source/Lib/DecoderLib/DecLib.cpp b/source/Lib/DecoderLib/DecLib.cpp
index 81c8c8ab1f6689c9125dfbd08225894680a4e4f0..fec65cd6170ec92ec37fbe47a2423c5f4b2d8728 100644
--- a/source/Lib/DecoderLib/DecLib.cpp
+++ b/source/Lib/DecoderLib/DecLib.cpp
@@ -733,7 +733,30 @@ void DecLib::executeLoopFilters()
     m_pcNNFilterSet1.cnnFilter(m_pcPic);
   }
 #endif
+#if NN_FILTERING_SET_LC
+  if (cs.sps->getNnlfSetLCEnabledFlag())
+  {
+    if( !m_lcModelPath.empty() )
+    {
+      m_lcNNLF.loadModel( m_lcModelPath );
+    }
+    const int iPicWidth = cs.pps->getPicWidthInLumaSamples();
+    const int iPicHeight = cs.pps->getPicHeightInLumaSamples();
+    const unsigned int maxCUSize = cs.pps->pcv->maxCUWidth;
+    const int frameWidthInCtus = (iPicWidth % maxCUSize) ? iPicWidth / maxCUSize + 1 : iPicWidth / maxCUSize;
+    const int frameHeightInCtus = (iPicHeight % maxCUSize) ? iPicHeight / maxCUSize + 1 : iPicHeight / maxCUSize;
+    const int numCtusInFrame = frameWidthInCtus * frameHeightInCtus;
+    m_lcNNLF.initNumCTUs(numCtusInFrame);
+#if NNVC_USE_REC_BEFORE_DBF
+    m_lcNNLF.filterPictureDecoder(cs, m_pcPic->getRecBeforeDbfBuf(), m_pcPic->getLCnnlfBuf(), m_pcPic->getRecoBuf(), m_pcPic->cs->picHeader->getLCNnModelIdx());
+#else
+    m_lcNNLF.filterPictureDecoder(cs, m_pcPic->getRecoBuf(), m_pcPic->getLCnnlfBuf(), m_pcPic->getRecoBuf(), m_pcPic->cs->picHeader->getLCNnModelIdx());
+#endif
   
+    m_lcNNLF.scaleResidue(m_pcPic->getRecoBuf(), m_pcPic->getLCnnlfBuf(), m_pcPic->cs->picHeader);
+    m_pcPic->getRecoBuf().copyFrom(m_pcPic->getLCnnlfBuf());
+  }
+#endif
   if( cs.sps->getSAOEnabledFlag() )
   {
     m_cSAO.SAOProcess( cs, cs.picture->getSAO() );
diff --git a/source/Lib/DecoderLib/DecLib.h b/source/Lib/DecoderLib/DecLib.h
index f5ae58e499fbffefbf4796ffd68ad8ca7461d4f0..78085009191daf393540327a1e3d3bcdf9b4b49a 100644
--- a/source/Lib/DecoderLib/DecLib.h
+++ b/source/Lib/DecoderLib/DecLib.h
@@ -66,6 +66,11 @@
 #if JVET_AC0055_NN_POST_FILTERING
 #include "CommonLib/NNPostFilter.h"
 #endif
+
+#if NN_FILTERING_SET_LC
+#include "CommonLib/NNFilterSetLC.h"
+#endif
+
 class InputNALUnit;
 
 //! \ingroup DecoderLib
@@ -148,6 +153,10 @@ private:
 #endif
 #if JVET_AC0055_NN_POST_FILTERING
   NNPostFilter            m_nnPostFilter;
+#endif
+#if NN_FILTERING_SET_LC
+  NNFilterSetLC            m_lcNNLF;
+  // std::string             m_lcModelPath;                  ///< LC model path 
 #endif
   // decoder side RD cost computation
   RdCost                  m_cRdCost;                      ///< RD cost computation class
diff --git a/source/Lib/DecoderLib/DecSlice.cpp b/source/Lib/DecoderLib/DecSlice.cpp
index af13a30ab3e95d01797fc404fae8ee69ab425d8d..e5d4dae6a3dc4f2c334e917b8468fe1ef83ece5d 100644
--- a/source/Lib/DecoderLib/DecSlice.cpp
+++ b/source/Lib/DecoderLib/DecSlice.cpp
@@ -102,6 +102,9 @@ void DecSlice::decompressSlice( Slice* slice, InputBitstream* bitstream, int deb
 
   if (slice->getFirstCtuRsAddrInSlice() == 0)
   {
+#if NN_FILTERING_SET_LC
+    cs.picture->resizeLCnnlfCtuEnableFlag( cs.pcv->sizeInCtus );
+#endif
     cs.picture->resizeAlfCtuEnableFlag( cs.pcv->sizeInCtus );
     cs.picture->resizeAlfCtbFilterIndex(cs.pcv->sizeInCtus);
     cs.picture->resizeAlfCtuAlternative( cs.pcv->sizeInCtus );
diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp
index 5115b4a4de7edbafda7281d8c7678a9673ef3585..197c2f8fc4cacd8300d4842c01643db5bde5f19f 100644
--- a/source/Lib/DecoderLib/VLCReader.cpp
+++ b/source/Lib/DecoderLib/VLCReader.cpp
@@ -1725,7 +1725,7 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS)
   READ_FLAG(uiCode, "sps_nnlf_enabled_flag");                          pcSPS->setNnlfEnabledFlag ( uiCode ? true : false );
   if (pcSPS->getNnlfEnabledFlag())
   {
-    READ_FLAG(uiCode, "sps_nnlf_set");                                 pcSPS->setNnlfSet ( uiCode);
+    READ_UVLC(uiCode, "sps_nnlf_set");                                 pcSPS->setNnlfSet ( uiCode);
   }
 #if NN_FILTERING_SET_0
   pcSPS->setNnlfSet0EnabledFlag(pcSPS->getNnlfEnabledFlag() && pcSPS->getNnlfSet() == 0 ? true : false);
@@ -1733,6 +1733,9 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS)
 #if NN_FILTERING_SET_1
   pcSPS->setNnlfSet1EnabledFlag(pcSPS->getNnlfEnabledFlag() && pcSPS->getNnlfSet() == 1 ? true : false);
 #endif
+#if NN_FILTERING_SET_LC
+  pcSPS->setNnlfSetLCEnabledFlag(pcSPS->getNnlfEnabledFlag() && pcSPS->getNnlfSet() == 2 ? true : false);
+#endif
 #endif
 
 #if NN_FILTERING_SET_1
@@ -3332,7 +3335,27 @@ void HLSyntaxReader::parsePictureHeader( PicHeader* picHeader, ParameterSetManag
     picHeader->setSaoEnabledFlag(CHANNEL_TYPE_CHROMA, false);
   }
 
+#if NN_FILTERING_SET_LC 
+  if (sps->getNnlfSetLCEnabledFlag())
+  {
+    READ_SCODE(LCNN_RESIDUE_SCALE_SHIFT+1, iCode, "nnScale_Y");
+    picHeader->setLCNnScale(iCode + (1 << LCNN_RESIDUE_SCALE_SHIFT), COMPONENT_Y);
+    READ_SCODE(LCNN_RESIDUE_SCALE_SHIFT+1, iCode, "nnScale_Cb");
+    picHeader->setLCNnScale(iCode + (1 << LCNN_RESIDUE_SCALE_SHIFT), COMPONENT_Cb);
+    READ_SCODE(LCNN_RESIDUE_SCALE_SHIFT+1, iCode, "nnScale_Cr");
+    picHeader->setLCNnScale(iCode + (1 << LCNN_RESIDUE_SCALE_SHIFT), COMPONENT_Cr);
 
+    if(LCNN_MODEL_IDX_BITS > 0)
+    {
+      READ_CODE(LCNN_MODEL_IDX_BITS, uiCode, "nn_filter_idx");
+      picHeader->setLCNnModelIdx(uiCode);
+    }
+    else
+    {
+      picHeader->setLCNnModelIdx(0);
+    }
+  }
+#endif
 
   // deblocking filter controls
   if (pps->getDeblockingFilterControlPresentFlag())
@@ -4320,6 +4343,23 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, PicHeader* picHeader, Par
       }
     }
 
+#if NN_FILTERING_SET_LC
+    if (sps->getNnlfSetLCEnabledFlag())
+    {
+      for( int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++ )
+      {
+        READ_FLAG( uiCode, "slice_cnnlf_mode_idc" );
+        //fprintf(stderr, "slice_cnn_mode_idc =%d \n", uiCode);
+        pcSlice->setLCnnlfSliceModeIdc( (uiCode == 1) ? LCNNLF_CTU_ONOFF : LCNNLF_OFF, compIdx );
+        if( pcSlice->getLCnnlfSliceModeIdc( compIdx) != LCNNLF_OFF )
+        {
+          READ_FLAG( uiCode, "slice_cnnlf_luma_slice_all_on_flag" );
+          //fprintf(stderr, "slice_cnn_luma_slice_all_on_flag =%d \n", uiCode);
+          pcSlice->setLCnnlfSliceModeIdc( (uiCode == 1) ? LCNNLF_ALL_ON : LCNNLF_CTU_ONOFF, compIdx );
+        }
+      }
+    }
+#endif
 
     if (pps->getDeblockingFilterControlPresentFlag())
     {
diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp
index 48bad45e0a03386aedf5899f03b1faccf2735032..630326eaf2a8bae70b47ca5ce53d4a5882896119 100644
--- a/source/Lib/EncoderLib/CABACWriter.cpp
+++ b/source/Lib/EncoderLib/CABACWriter.cpp
@@ -50,7 +50,7 @@
 
 //! \ingroup EncoderLib
 //! \{
-
+#if !NN_FILTERING_SET_LC
 void CABACWriter::initCtxModels( const Slice& slice )
 {
   int       qp                = slice.getSliceQp();
@@ -62,7 +62,7 @@ void CABACWriter::initCtxModels( const Slice& slice )
   }
   m_BinEncoder.reset( qp, (int)sliceType );
 }
-
+#endif
 
 
 template <class BinProbModel>
@@ -171,7 +171,19 @@ void CABACWriter::coding_tree_unit( CodingStructure& cs, const UnitArea& area, i
     }
   }
 #endif
-
+#if NN_FILTERING_SET_LC
+  if(cs.sps->getNnlfSetLCEnabledFlag() && ctuRsAddr == 0 && !skipSao)
+  {
+    for( int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++ )
+    {
+      uint32_t numCTUs = cs.pcv->sizeInCtus;
+      int frame_width_in_ctus = cs.pcv->widthInCtus;
+      uint8_t* ctbCnnFilterFlag = cs.slice->getPic()->getLCnnlfCtuEnableFlag( compIdx);  
+      uint8_t*   sliceModeIdc = cs.slice->getLCnnlfSliceModeIdc();
+      codeLCnnlfCtuEnableFlags(numCTUs, frame_width_in_ctus, ctbCnnFilterFlag, ComponentID(compIdx), sliceModeIdc) ;
+    }
+  }
+#endif
   if( !skipSao )
   {
     sao( *cs.slice, ctuRsAddr );
diff --git a/source/Lib/EncoderLib/CABACWriter.h b/source/Lib/EncoderLib/CABACWriter.h
index dbdb6f95f2c43dc8548afd9e0ae807758a3b58c3..6ce435563250e3910fdbec8df4a1b13f2b071b43 100644
--- a/source/Lib/EncoderLib/CABACWriter.h
+++ b/source/Lib/EncoderLib/CABACWriter.h
@@ -59,7 +59,21 @@ public:
   virtual ~CABACWriter() {}
 
 public:
+#if NN_FILTERING_SET_LC
+  void initCtxModels( const Slice& slice )
+  {
+    int       qp                = slice.getSliceQp();
+    SliceType sliceType         = slice.getSliceType();
+    SliceType encCABACTableIdx  = slice.getEncCABACTableIdx();
+    if( !slice.isIntra() && (encCABACTableIdx==B_SLICE || encCABACTableIdx==P_SLICE) && slice.getPPS()->getCabacInitPresentFlag() )
+    {
+      sliceType = encCABACTableIdx;
+    }
+    m_BinEncoder.reset( qp, (int)sliceType );
+  }
+#else
   void        initCtxModels             ( const Slice&                  slice );
+#endif
   void        setEncCu(EncCu* pcEncCu) { m_EncCu = pcEncCu; }
   SliceType   getCtxInitId              ( const Slice&                  slice );
   void        initBitstream             ( OutputBitstream*              bitstream )           { m_Bitstream = bitstream; m_BinEncoder.init( m_Bitstream ); }
@@ -180,6 +194,28 @@ public:
   void        codeNnlfSet1ParamIdx         ( CodingStructure& cs, uint32_t ctuRsAddr, const int chal );
 #endif
 
+#if NN_FILTERING_SET_LC
+  // void        codeLCnnlfCtuEnableFlags  (uint32_t numCTUs, int  frame_width_in_ctus, uint8_t* ctbCnnFilterFlag,  ComponentID compID, uint8_t *cnnlfSliceModeIdc);
+  void  codeLCnnlfCtuEnableFlags(uint32_t numCTUs, int  frame_width_in_ctus, uint8_t* ctbCnnFilterFlag,  ComponentID compID, uint8_t *cnnlfSliceModeIdc)
+  {
+    if (cnnlfSliceModeIdc[compID] == LCNNLF_OFF || cnnlfSliceModeIdc[compID] == LCNNLF_ALL_ON)
+    {
+      return;
+    }
+    else
+    {
+      for( int ctuIdx = 0; ctuIdx < numCTUs; ctuIdx++ )
+      {
+        int ctxInc = 0;
+        const int       leftCTUAddr = (ctuIdx%frame_width_in_ctus > 0) ? ctuIdx - 1 : -1;
+        const int       aboveCTUAddr = (ctuIdx >= frame_width_in_ctus) ? ctuIdx - frame_width_in_ctus : -1;
+        if( leftCTUAddr >= 0 ) ctxInc += ((ctbCnnFilterFlag[leftCTUAddr] != LCNNLF_OFF) ? 1 : 0);
+        if( aboveCTUAddr >= 0 ) ctxInc += ((ctbCnnFilterFlag[aboveCTUAddr] != LCNNLF_OFF) ? 1 : 0);
+        m_BinEncoder.encodeBin( ctbCnnFilterFlag[ctuIdx], Ctx::ctbLCnnlfFlag( compID * MAX_NUM_COMPONENT + ctxInc ) );
+      } 
+    }
+  }; 
+#endif
 private:
   void        unary_max_symbol          ( unsigned symbol, unsigned ctxId0, unsigned ctxIdN, unsigned maxSymbol );
   void        unary_max_eqprob          ( unsigned symbol,                                   unsigned maxSymbol );
diff --git a/source/Lib/EncoderLib/CMakeLists.txt b/source/Lib/EncoderLib/CMakeLists.txt
index a87d2b518a53b986a81a5ace843e8c91a984bc33..d463cd590d71e92bfd6cd70dcef4a5ee2bfcbb19 100644
--- a/source/Lib/EncoderLib/CMakeLists.txt
+++ b/source/Lib/EncoderLib/CMakeLists.txt
@@ -65,15 +65,18 @@ if( MSVC )
   set_property( SOURCE EncNNFilter.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ")
   set_property( SOURCE EncNNFilterSet0.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ")
   set_property( SOURCE EncNNFilterSet1.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ")
+  set_property( SOURCE EncNNFilterSetLC.cpp APPEND PROPERTY COMPILE_FLAGS "/arch:AVX2 -DNDEBUG=1 ")
 elseif( UNIX OR MINGW )
   if( NNLF_BUILD_WITH_AVX512 STREQUAL "1" )
     set_property( SOURCE EncNNFilter.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -ffast-math")
     set_property( SOURCE EncNNFilterSet0.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -ffast-math")
     set_property( SOURCE EncNNFilterSet1.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -ffast-math")
+    set_property( SOURCE EncNNFilterSetLC.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx512f -mavx512bw -ffast-math")
   else()
     set_property( SOURCE EncNNFilter.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2 -ffast-math")
     set_property( SOURCE EncNNFilterSet0.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2 -ffast-math")
     set_property( SOURCE EncNNFilterSet1.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2 -ffast-math")
+    set_property( SOURCE EncNNFilterSetLC.cpp APPEND PROPERTY COMPILE_FLAGS "-DNDEBUG=1 -mavx2 -ffast-math")
   endif()
 endif()
 
diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h
index 85403fd4efaed0efd8272c5e75ea98e6dddeea52..d422fb0a9b3028e5d7444b048e023245b8cf84a4 100644
--- a/source/Lib/EncoderLib/EncCfg.h
+++ b/source/Lib/EncoderLib/EncCfg.h
@@ -826,6 +826,9 @@ protected:
 #if JVET_AC0055_NN_POST_FILTERING
   bool        m_nnpf;
 #endif
+#if NN_FILTERING_SET_LC
+  bool        m_nnlfSetLC;
+#endif
 #if JVET_O0756_CALCULATE_HDRMETRICS
   double                       m_whitePointDeltaE[hdrtoolslib::NB_REF_WHITE];
   double                       m_maxSampleValue;
@@ -2184,6 +2187,11 @@ public:
   bool         getNnlfSet1UseMultiframe()                       const { return m_nnlfSet1Multiframe; }
 #endif
 #endif
+
+#if NN_FILTERING_SET_LC
+  void         setUseNnlfSetLC(bool b)                                 { m_nnlfSetLC = b; }
+#endif
+
 #if JVET_AC0055_NN_POST_FILTERING
   void         setUseNnpf(bool b)                                      { m_nnpf = b; }
   bool         getUseNnpf()                                      const { return m_nnpf; }
diff --git a/source/Lib/EncoderLib/EncGOP.cpp b/source/Lib/EncoderLib/EncGOP.cpp
index 41edbe15234e8c09a4f4effd2475f3529ad8c1e1..24f228a43635784b0a7710784397fcaad6c4e831 100644
--- a/source/Lib/EncoderLib/EncGOP.cpp
+++ b/source/Lib/EncoderLib/EncGOP.cpp
@@ -2811,6 +2811,11 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
     pcPic->resizeNnlfSet1ParamIdx( pcPic->cs->pcv->sizeInNnlfSet1InferSize );
 #endif
     
+#if NN_FILTERING_SET_LC
+    pcPic->resizeLCnnlfCtuEnableFlag( numberOfCtusInFrame );
+    m_pcLCnnlf.create(m_pcEncLib->getCABACEncoder(), m_pcEncLib->getCtxCache(), pcSlice, m_pcCfg->getBitDepth());
+#endif
+
     bool decPic = false;
     bool encPic = false;
     // test if we can skip the picture entirely or decode instead of encoding
@@ -3165,6 +3170,103 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
       }
 #endif
 
+#if NN_FILTERING_SET_LC
+      if ( cs.sps->getNnlfSetLCEnabledFlag() )
+      {
+        if( !m_lcModelPath.empty() )
+        {
+          m_pcLCnnlf.loadModel( m_lcModelPath );
+        }
+
+        double best_pic_cost = MAX_DOUBLE;
+        double curr_pic_cost = MAX_DOUBLE;
+
+        int    best_idx = -1;
+
+        int nn_bestScale[MAX_NUM_COMPONENT]{MAX_INT};
+        uint8_t   cnnFilterSliceModeIdc[3];  
+        std::vector<uint8_t> cnnFilterCtuEnableFlag[MAX_NUM_COMPONENT];
+        if(m_pcLCnnlf.m_cCnnTmpBufModelSelect.bufs.empty())
+        {
+          CPelUnitBuf recoBuf = pcPic->getRecoBuf();
+          ChromaFormat chromaFormat = recoBuf.chromaFormat;
+          Size sz = recoBuf.Y();
+          Area area = Area( Position(0,0), sz);
+          m_pcLCnnlf.m_cCnnTmpBufModelSelect.create(chromaFormat, area, 0, 0, MEMORY_ALIGN_DEF_SIZE);
+        }
+        int start_idx = 0;
+        int one_after_end_idx = (int) m_pcLCnnlf.m_ModelPaths.size();
+
+  #if REDUCE_MODEL_RD_I
+        if(cs.slice->getSliceType() == I_SLICE)
+          one_after_end_idx /= 2;
+  #endif
+
+        for( int modelIdx = start_idx; modelIdx < one_after_end_idx; modelIdx += 1)
+        {
+          const int iPicWidth = cs.pps->getPicWidthInLumaSamples();
+          const int iPicHeight = cs.pps->getPicHeightInLumaSamples();
+          const unsigned int maxCUSize = cs.pps->pcv->maxCUWidth;
+          const int frameWidthInCtus = (iPicWidth % maxCUSize) ? iPicWidth / maxCUSize + 1 : iPicWidth / maxCUSize;
+          const int frameHeightInCtus = (iPicHeight % maxCUSize) ? iPicHeight / maxCUSize + 1 : iPicHeight / maxCUSize;
+          const int numCtusInFrame = frameWidthInCtus * frameHeightInCtus;
+          m_pcLCnnlf.initNumCTUs(numCtusInFrame);
+
+          PelUnitBuf origBuf = (pcSlice->getSPS()->getUseLmcs() || m_pcCfg->getGopBasedTemporalFilterEnabled())
+                                ? pcPic->getTrueOrigBuf()
+                                : pcPic->getOrigBuf();
+#if NNVC_USE_REC_BEFORE_DBF 
+          m_pcLCnnlf.filterPicture(pcPic->getRecBeforeDbfBuf(), pcPic->getBsMapBuf(), pcPic->getLCnnlfBuf(), modelIdx, cs.slice->getSliceQp());
+#else
+          m_pcLCnnlf.filterPicture(pcPic->getRecoBuf(), pcPic->getBsMapBuf(), pcPic->getLCnnlfBuf(), modelIdx, cs.slice->getSliceQp());
+#endif
+          m_pcLCnnlf.m_ctuEnableFlag[COMPONENT_Y] = cs.picture->getLCnnlfCtuEnableFlag(COMPONENT_Y);
+          m_pcLCnnlf.m_ctuEnableFlag[COMPONENT_Cb] = cs.picture->getLCnnlfCtuEnableFlag(COMPONENT_Cb);
+          m_pcLCnnlf.m_ctuEnableFlag[COMPONENT_Cr] = cs.picture->getLCnnlfCtuEnableFlag(COMPONENT_Cr);
+
+          for (int idx = 0; idx < numCtusInFrame; idx++)
+          {
+            m_pcLCnnlf.m_ctuEnableFlag[COMPONENT_Y][idx] = 1;
+            m_pcLCnnlf.m_ctuEnableFlag[COMPONENT_Cb][idx] = 1;
+            m_pcLCnnlf.m_ctuEnableFlag[COMPONENT_Cr][idx] = 1;
+          }
+          // pcPic->getLCnnlfBuf().copyFrom(pcPic->getNnFilteredBackupBuf());
+          m_pcLCnnlf.deriveScale(origBuf, pcPic->getRecoBuf(), pcPic->getLCnnlfBuf(), picHeader);
+          m_pcLCnnlf.scaleResidue(pcPic->getRecoBuf(), pcPic->getLCnnlfBuf(), picHeader);
+          uint8_t cnnSliceModeIdc[3] = { LCNNLF_OFF, LCNNLF_OFF, LCNNLF_OFF };
+          curr_pic_cost = m_pcLCnnlf.filterPictureRD(cs, pcPic->getRecoBuf(), pcPic->getLCnnlfBuf(), origBuf,  pcSlice->getLambdas(), cnnSliceModeIdc);
+          for( int s = 0; s < uiNumSliceSegments; s++ )
+          {
+            pcPic->slices[s]->setLCnnlfSliceModeIdc( cnnSliceModeIdc[0], COMPONENT_Y );
+            pcPic->slices[s]->setLCnnlfSliceModeIdc( cnnSliceModeIdc[1], COMPONENT_Cb );
+            pcPic->slices[s]->setLCnnlfSliceModeIdc( cnnSliceModeIdc[2], COMPONENT_Cr );
+          }
+
+          if(curr_pic_cost < best_pic_cost)
+          {
+            best_idx = modelIdx;
+            best_pic_cost = curr_pic_cost;
+            m_pcLCnnlf.m_cCnnTmpBufModelSelect.copyFrom(pcPic->getLCnnlfBuf());
+            for( auto i = 0; i < MAX_NUM_COMPONENT; ++i)
+            {
+              nn_bestScale[i] = pcSlice->getPicHeader()->getLCNnScale((ComponentID)i);
+            }
+            memcpy(cnnFilterSliceModeIdc, pcSlice->getLCnnlfSliceModeIdc(), sizeof(cnnFilterSliceModeIdc));
+            cs.picture->backupLCnnlfCtuEnableFlag(cnnFilterCtuEnableFlag);
+          }
+        } // for( modelIdx )
+        picHeader->setLCNnModelIdx(best_idx);
+        pcPic->getLCnnlfBuf().copyFrom(m_pcLCnnlf.m_cCnnTmpBufModelSelect);
+        for( auto i = 0; i < MAX_NUM_COMPONENT; ++i)
+        {
+          pcPic->cs->picHeader->setLCNnScale(nn_bestScale[i], (ComponentID)i);
+        }
+        memcpy(pcSlice->getLCnnlfSliceModeIdc(), cnnFilterSliceModeIdc, sizeof(cnnFilterSliceModeIdc));
+        cs.picture->restoreLCnnLFCtuEnableFlag(cnnFilterCtuEnableFlag);
+        pcPic->getRecoBuf().copyFrom(pcPic->getLCnnlfBuf());
+      }
+#endif
+
       if( pcSlice->getSPS()->getSAOEnabledFlag() )
       {
         bool sliceEnabled[MAX_NUM_COMPONENT];
diff --git a/source/Lib/EncoderLib/EncGOP.h b/source/Lib/EncoderLib/EncGOP.h
index d8519fff1d1607c640c14b949db1f13b42e47805..d93c8417b1d51b631f60537071282a871a8fa623 100644
--- a/source/Lib/EncoderLib/EncGOP.h
+++ b/source/Lib/EncoderLib/EncGOP.h
@@ -88,6 +88,11 @@
 #if JVET_AC0055_NN_POST_FILTERING
 #include "EncoderLib/EncNNPostFilter.h"
 #endif
+
+#if NN_FILTERING_SET_LC
+#include "EncoderLib/EncNNFilterSetLC.h"
+#endif
+
 //! \ingroup EncoderLib
 //! \{
 
@@ -184,6 +189,10 @@ private:
   EncNNFilterSet0*          m_pcCNNLF;
 #endif
 
+#if NN_FILTERING_SET_LC
+  EncNNFilterSetLC          m_pcLCnnlf;
+#endif
+
   EncReshape*               m_pcReshaper;
   RateCtrl*                 m_pcRateCtrl;
   // indicate sequence first
diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp
index 94c2a9af123e977dee34958837a26d09f55b93d6..643871c1dbee603f955c6080650027666c80b297 100644
--- a/source/Lib/EncoderLib/EncLib.cpp
+++ b/source/Lib/EncoderLib/EncLib.cpp
@@ -1601,6 +1601,10 @@ void EncLib::xInitSPS( SPS& sps )
   }
 #endif
 
+#if NN_FILTERING_SET_LC
+  sps.setNnlfSetLCEnabledFlag(m_nnlfSetLC);
+#endif
+
   if (sps.getVuiParametersPresentFlag())
   {
     VUI* pcVUI = sps.getVuiParameters();
diff --git a/source/Lib/EncoderLib/EncNNFilterSet1.cpp b/source/Lib/EncoderLib/EncNNFilterSet1.cpp
index 3bf646862170b11f79bcd40609df14a64851155c..298d368a563288d27a98606ed8e45e89802eecd6 100644
--- a/source/Lib/EncoderLib/EncNNFilterSet1.cpp
+++ b/source/Lib/EncoderLib/EncNNFilterSet1.cpp
@@ -160,7 +160,7 @@ void EncNNFilterSet1::cnnFilterPicture(Picture* pic, int numParams)
         if (chal == 0)
         {
 #if NN_FIXED_POINT_IMPLEMENTATION
-          cnnFilterLumaBlock<int16_t>(pic, extBlock, extLeft, extRight, extTop, extBottom, paramIdx, pcSlice->getSliceType() != I_SLICE); 
+          cnnFilterLumaBlock<TypeSadl>(pic, extBlock, extLeft, extRight, extTop, extBottom, paramIdx, pcSlice->getSliceType() != I_SLICE); 
 #else
           cnnFilterLumaBlock<float>(pic, extBlock, extLeft, extRight, extTop, extBottom, paramIdx, pcSlice->getSliceType() != I_SLICE);
 #endif
@@ -168,7 +168,7 @@ void EncNNFilterSet1::cnnFilterPicture(Picture* pic, int numParams)
         else
         {
 #if NN_FIXED_POINT_IMPLEMENTATION
-          cnnFilterChromaBlock<int16_t>(pic, extBlock, extLeft >> 1, extRight >> 1, extTop >> 1, extBottom >> 1, paramIdx, pcSlice->getSliceType() != I_SLICE);
+          cnnFilterChromaBlock<TypeSadl>(pic, extBlock, extLeft >> 1, extRight >> 1, extTop >> 1, extBottom >> 1, paramIdx, pcSlice->getSliceType() != I_SLICE);
 #else
           cnnFilterChromaBlock<float>(pic, extBlock, extLeft >> 1, extRight >> 1, extTop >> 1, extBottom >> 1, paramIdx, pcSlice->getSliceType() != I_SLICE);
 #endif
diff --git a/source/Lib/EncoderLib/EncNNFilterSetLC.cpp b/source/Lib/EncoderLib/EncNNFilterSetLC.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b3be05f5cb80796f1982654721070fb171860db3
--- /dev/null
+++ b/source/Lib/EncoderLib/EncNNFilterSetLC.cpp
@@ -0,0 +1,523 @@
+/* The copyright in this software is being made available under the BSD
+ * License, included below. This software may be subject to other third party
+ * and contributor rights, including patent rights, and no such rights are
+ * granted under this license.
+ *
+ * Copyright (c) 2010-2023, ITU/ISO/IEC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file     NNFilterSetLC.cpp
+    \brief    Encoder low complexity cnn loop filter
+*/
+
+#define HAVE_INTTYPES_H 1
+#define __STDC_FORMAT_MACROS
+#include "CABACWriter.h"
+#include <sadl/model.h>
+#include "CommonLib/NNInference.h"
+#include "CommonLib/NNFilterSetLC.h"
+#include "EncNNFilterSetLC.h"
+
+#if NN_FILTERING_SET_LC
+#include "CodingStructure.h"
+#include "Picture.h"
+
+#define CnnFilterCtx( c ) SubCtx( Ctx::ctbLCnnlfFlag, c )
+
+EncNNFilterSetLC::EncNNFilterSetLC()
+  : m_CABACEstimator(nullptr)
+{
+}
+
+
+void EncNNFilterSetLC::create(CABACEncoder* cabacEncoder, CtxCache* ctxCache, Slice* pcSlice, const int inputBitDepth[MAX_NUM_CHANNEL_TYPE])
+{
+  m_CABACEstimator = cabacEncoder->getCABACEstimator( pcSlice->getSPS() );
+  m_CtxCache = ctxCache;
+  m_CABACEstimator->initCtxModels ( *pcSlice );
+  m_CABACEstimator->resetBits();
+  std::memcpy( m_inputBitDepth, inputBitDepth, sizeof( m_inputBitDepth ) );
+}
+
+
+#define FRAME_RD_Y_MULTIPLIER  2.0
+
+double EncNNFilterSetLC::filterPictureRD(CodingStructure &cs, PelUnitBuf in, PelUnitBuf out, PelUnitBuf original, const double *lambdas, uint8_t *cnnlfSliceModeIdc)
+{
+  int    blockSize       = 64;
+  PelBuf reconBufY       = in.get(COMPONENT_Y);
+  PelBuf reconBufCb      = in.get(COMPONENT_Cb);
+  PelBuf reconBufCr      = in.get(COMPONENT_Cr);
+  PelBuf nnFilteredBufY  = out.get(COMPONENT_Y);
+  PelBuf nnFilteredBufCb = out.get(COMPONENT_Cb);
+  PelBuf nnFilteredBufCr = out.get(COMPONENT_Cr);
+
+  PelBuf OriginalBufY  = original.get(COMPONENT_Y);
+  PelBuf OriginalBufCb = original.get(COMPONENT_Cb);
+  PelBuf OriginalBufCr = original.get(COMPONENT_Cr);
+
+  int shiftLuma = 2 * DISTORTION_PRECISION_ADJUSTMENT(m_inputBitDepth[CHANNEL_TYPE_LUMA]);
+  int shiftChroma = 2 * DISTORTION_PRECISION_ADJUSTMENT(m_inputBitDepth[CHANNEL_TYPE_CHROMA]);
+  m_lambda[COMPONENT_Y] = lambdas[COMPONENT_Y] * double(1 << shiftLuma);
+  m_lambda[COMPONENT_Cb] = lambdas[COMPONENT_Cb] * double(1 << shiftChroma);
+  m_lambda[COMPONENT_Cr] = lambdas[COMPONENT_Cr] * double(1 << shiftChroma);
+
+  int pic_w = reconBufY.width;
+  int pic_h = reconBufY.height;
+
+  int ctuIdx = 0;
+  /* Frame level SSD params */
+
+  /* Luma (Y) */
+  double recFrmSSDY    = 0;  // All Off 
+  double nnFltFrmSSDY  = 0;  // All On
+  double bestFrmSSDY   = 0;  //On/Off,  tracks the best SSD for each CTU
+
+  /* Chroma (Cb) */
+  double recFrmSSDCb   = 0;  // All Off
+  double nnFltFrmSSDCb = 0;  // All On
+  double bestFrmSSDCb   = 0;  //On/Off,  tracks the best SSD for each CTU
+
+  /* Chroma (Cr) */
+  double recFrmSSDCr   = 0; // All Off
+  double nnFltFrmSSDCr = 0; // All On
+  double bestFrmSSDCr  = 0;  //On/Off,  tracks the best SSD for each CTU
+
+  for (int y = 0; y < pic_h; y += blockSize * 2)
+  {
+    for (int x = 0; x < pic_w; x += blockSize * 2)
+    {
+      uint32_t recBLKSSDY    = 0;
+      uint32_t nnFltBLKSSDY  = 0;
+      uint32_t recBLKSSDCb   = 0;
+      uint32_t nnFltBLKSSDCb = 0;
+      uint32_t recBLKSSDCr   = 0;
+      uint32_t nnFltBLKSSDCr = 0;
+
+      for (int i = 0; i < blockSize; ++i)
+      {
+        for (int j = 0; j < blockSize; ++j)
+        {
+          int pos_x = x / 2 + j;
+          int pos_y = y / 2 + i;
+          if (pos_x >= pic_w / 2 || pos_y >= pic_h / 2)
+          {
+            continue;
+          }
+          // Luma SSD
+
+          recBLKSSDY += (reconBufY.at(pos_x * 2, pos_y * 2) - OriginalBufY.at(pos_x * 2, pos_y * 2))
+                        * (reconBufY.at(pos_x * 2, pos_y * 2)
+                           - OriginalBufY.at(pos_x * 2, pos_y * 2));   // SSD , sum of squared difference
+          nnFltBLKSSDY += (nnFilteredBufY.at(pos_x * 2, pos_y * 2) - OriginalBufY.at(pos_x * 2, pos_y * 2))
+                          * (nnFilteredBufY.at(pos_x * 2, pos_y * 2) - OriginalBufY.at(pos_x * 2, pos_y * 2));
+          recBLKSSDY += (reconBufY.at(pos_x * 2 + 1, pos_y * 2) - OriginalBufY.at(pos_x * 2 + 1, pos_y * 2))
+                        * (reconBufY.at(pos_x * 2 + 1, pos_y * 2)
+                           - OriginalBufY.at(pos_x * 2 + 1, pos_y * 2));   // SSD , sum of squared difference
+          nnFltBLKSSDY += (nnFilteredBufY.at(pos_x * 2 + 1, pos_y * 2) - OriginalBufY.at(pos_x * 2 + 1, pos_y * 2))
+                          * (nnFilteredBufY.at(pos_x * 2 + 1, pos_y * 2) - OriginalBufY.at(pos_x * 2 + 1, pos_y * 2));
+          recBLKSSDY += (reconBufY.at(pos_x * 2, pos_y * 2 + 1) - OriginalBufY.at(pos_x * 2, pos_y * 2 + 1))
+                        * (reconBufY.at(pos_x * 2, pos_y * 2 + 1)
+                           - OriginalBufY.at(pos_x * 2, pos_y * 2 + 1));   // SSD , sum of squared difference
+          nnFltBLKSSDY += (nnFilteredBufY.at(pos_x * 2, pos_y * 2 + 1) - OriginalBufY.at(pos_x * 2, pos_y * 2 + 1))
+                          * (nnFilteredBufY.at(pos_x * 2, pos_y * 2 + 1) - OriginalBufY.at(pos_x * 2, pos_y * 2 + 1));
+          recBLKSSDY += (reconBufY.at(pos_x * 2 + 1, pos_y * 2 + 1) - OriginalBufY.at(pos_x * 2 + 1, pos_y * 2 + 1))
+                        * (reconBufY.at(pos_x * 2 + 1, pos_y * 2 + 1)
+                           - OriginalBufY.at(pos_x * 2 + 1, pos_y * 2 + 1));   // SSD , sum of squared difference
+          nnFltBLKSSDY +=
+            (nnFilteredBufY.at(pos_x * 2 + 1, pos_y * 2 + 1) - OriginalBufY.at(pos_x * 2 + 1, pos_y * 2 + 1))
+            * (nnFilteredBufY.at(pos_x * 2 + 1, pos_y * 2 + 1) - OriginalBufY.at(pos_x * 2 + 1, pos_y * 2 + 1));
+
+          // Chroma SSD
+          recBLKSSDCb +=
+            (reconBufCb.at(pos_x, pos_y) - OriginalBufCb.at(pos_x, pos_y))
+            * (reconBufCb.at(pos_x, pos_y) - OriginalBufCb.at(pos_x, pos_y));   // SSD , sum of squared difference
+          nnFltBLKSSDCb += (nnFilteredBufCb.at(pos_x, pos_y) - OriginalBufCb.at(pos_x, pos_y))
+                           * (nnFilteredBufCb.at(pos_x, pos_y) - OriginalBufCb.at(pos_x, pos_y));
+
+          recBLKSSDCr +=
+            (reconBufCr.at(pos_x, pos_y) - OriginalBufCr.at(pos_x, pos_y))
+            * (reconBufCr.at(pos_x, pos_y) - OriginalBufCr.at(pos_x, pos_y));   // SSD , sum of squared difference
+          nnFltBLKSSDCr += (nnFilteredBufCr.at(pos_x, pos_y) - OriginalBufCr.at(pos_x, pos_y))
+                           * (nnFilteredBufCr.at(pos_x, pos_y) - OriginalBufCr.at(pos_x, pos_y));
+        }
+      }
+      /* AVMBKJ 
+      1) get the RD for all off 
+      2) get the RD for all ON 
+      3) get the RD for ON/OFF  */
+      recFrmSSDY  += recBLKSSDY;
+      recFrmSSDCb += recBLKSSDCb;
+      recFrmSSDCr += recBLKSSDCr;
+
+      nnFltFrmSSDY += nnFltBLKSSDY;
+      nnFltFrmSSDCb += nnFltBLKSSDCb;
+      nnFltFrmSSDCr += nnFltBLKSSDCr;
+
+      if (recBLKSSDY <= nnFltBLKSSDY)
+      {
+        bestFrmSSDY += recBLKSSDY;
+        m_ctuEnableFlag[COMPONENT_Y][ctuIdx] = 0;
+
+      }
+      else
+      {
+        bestFrmSSDY += nnFltBLKSSDY;
+        m_ctuEnableFlag[COMPONENT_Y][ctuIdx] = 1;
+      }
+
+      if (recBLKSSDCb <= nnFltBLKSSDCb)
+      {
+        bestFrmSSDCb += recBLKSSDCb;
+        m_ctuEnableFlag[COMPONENT_Cb][ctuIdx] = 0;
+      }
+      else
+      {
+        bestFrmSSDCb += nnFltBLKSSDCb;
+        m_ctuEnableFlag[COMPONENT_Cb][ctuIdx] = 1;
+      }
+      if (recBLKSSDCr <= nnFltBLKSSDCr)
+      {
+        bestFrmSSDCr += recBLKSSDCr;
+        m_ctuEnableFlag[COMPONENT_Cr][ctuIdx] = 0;
+      }
+      else
+      {
+        bestFrmSSDCr += nnFltBLKSSDCr;
+        m_ctuEnableFlag[COMPONENT_Cr][ctuIdx] = 1;
+      }
+      CHECK(ctuIdx >= m_numCtusInFrame, "ctuIdx should be less than no. of CTUs in frame");
+      ctuIdx++;
+    }
+  }
+   /* slice level decision */
+  /* calculate RD for all off case, All off case consumes 1 bit  at frame level*/
+
+
+  /* Luma case */
+  double rate_all_off_Y = 1;
+  double costAllOff_Y = recFrmSSDY + m_lambda[COMPONENT_Y] * rate_all_off_Y;
+  double rate_all_on_Y = 2; // All on needs 2 bits for signalling at frame level 
+  double costAllOn_Y = nnFltFrmSSDY + m_lambda[COMPONENT_Y] * rate_all_on_Y;
+  double bestPicCost = 0.0;
+
+  TempCtx       ctxStart( m_CtxCache, CnnFilterCtx( m_CABACEstimator->getCtx() ) );
+  m_CABACEstimator->getCtx() = CnnFilterCtx( ctxStart );
+  m_CABACEstimator->resetBits();
+  uint32_t numCTUs = cs.pcv->sizeInCtus;
+  int frame_width_in_ctus = cs.pcv->widthInCtus;
+  uint8_t* ctbCnnFilterFlag = cs.slice->getPic()->getLCnnlfCtuEnableFlag( COMPONENT_Y);  
+  cnnlfSliceModeIdc[COMPONENT_Y] = LCNNLF_CTU_ONOFF; /* set this to get rate for transmitting CTU bits */
+  m_CABACEstimator->codeLCnnlfCtuEnableFlags(numCTUs, frame_width_in_ctus, ctbCnnFilterFlag, ComponentID(COMPONENT_Y), cnnlfSliceModeIdc);
+
+
+  double cost_on_off_Y = bestFrmSSDY + m_lambda[COMPONENT_Y] * (rate_all_on_Y + (FracBitsScale * (double)m_CABACEstimator->getEstFracBits()));
+
+  if (costAllOff_Y <= costAllOn_Y && costAllOff_Y <= cost_on_off_Y)
+  {
+    cnnlfSliceModeIdc[COMPONENT_Y] = LCNNLF_OFF;
+    for( int ctuIdx = 0; ctuIdx < numCTUs; ctuIdx++ )
+    {
+      m_ctuEnableFlag[COMPONENT_Y][ctuIdx] = 0;
+    }    
+    bestPicCost += costAllOff_Y * (FRAME_RD_Y_MULTIPLIER);
+  }
+  else if(costAllOn_Y <= costAllOff_Y && costAllOn_Y <= cost_on_off_Y)
+  {
+    cnnlfSliceModeIdc[COMPONENT_Y] = LCNNLF_ALL_ON;
+    for( int ctuIdx = 0; ctuIdx < numCTUs; ctuIdx++ )
+    {
+      m_ctuEnableFlag[COMPONENT_Y][ctuIdx] = 1;
+    } 
+    bestPicCost += costAllOn_Y * (FRAME_RD_Y_MULTIPLIER);
+  }
+  else
+  {
+    cnnlfSliceModeIdc[COMPONENT_Y] = LCNNLF_CTU_ONOFF;
+    /* CTU flags shall reamin the same as was set before */
+    bestPicCost += cost_on_off_Y * (FRAME_RD_Y_MULTIPLIER);
+  }
+
+  /* Cb case */
+  double rate_all_off_Cb = 1;
+  double costAllOff_Cb = recFrmSSDCb + m_lambda[COMPONENT_Cb] * rate_all_off_Cb;
+  double rate_all_on_Cb = 2; // All on needs 2 bits for signalling at frame level 
+  double costAllOn_Cb = nnFltFrmSSDCb + m_lambda[COMPONENT_Cb] * rate_all_on_Cb;
+
+  m_CABACEstimator->getCtx() = CnnFilterCtx( ctxStart );
+  m_CABACEstimator->resetBits();
+
+  ctbCnnFilterFlag = cs.slice->getPic()->getLCnnlfCtuEnableFlag( COMPONENT_Cb);  
+  cnnlfSliceModeIdc[COMPONENT_Cb] = LCNNLF_CTU_ONOFF;
+  m_CABACEstimator->codeLCnnlfCtuEnableFlags(numCTUs, frame_width_in_ctus, ctbCnnFilterFlag, ComponentID(COMPONENT_Cb), cnnlfSliceModeIdc);
+
+  double cost_on_off_Cb = bestFrmSSDCb + m_lambda[COMPONENT_Cb] * (rate_all_on_Cb + (FracBitsScale * (double)m_CABACEstimator->getEstFracBits()));
+
+  if (costAllOff_Cb <= costAllOn_Cb && costAllOff_Cb <= cost_on_off_Cb)
+  {
+    cnnlfSliceModeIdc[COMPONENT_Cb] = LCNNLF_OFF;
+    for( int ctuIdx = 0; ctuIdx < numCTUs; ctuIdx++ )
+    {
+      m_ctuEnableFlag[COMPONENT_Cb][ctuIdx] = 0;
+    } 
+    bestPicCost += costAllOff_Cb;
+
+  }
+  else if(costAllOn_Cb <= costAllOff_Cb && costAllOn_Cb <= cost_on_off_Cb)
+  {
+    cnnlfSliceModeIdc[COMPONENT_Cb] = LCNNLF_ALL_ON;
+    for( int ctuIdx = 0; ctuIdx < numCTUs; ctuIdx++ )
+    {
+      m_ctuEnableFlag[COMPONENT_Cb][ctuIdx] = 1;
+    } 
+    bestPicCost += costAllOn_Cb;
+  }
+  else
+  {
+    cnnlfSliceModeIdc[COMPONENT_Cb] = LCNNLF_CTU_ONOFF;
+    bestPicCost += cost_on_off_Cb;
+  } 
+
+
+   /* Cr case */
+  double rate_all_off_Cr = 1;
+  double costAllOff_Cr = recFrmSSDCr + m_lambda[COMPONENT_Cr] * rate_all_off_Cr;
+  double rate_all_on_Cr = 2; // All on needs 2 bits for signalling at frame level 
+  double costAllOn_Cr = nnFltFrmSSDCr + m_lambda[COMPONENT_Cr] * rate_all_on_Cr;
+
+  m_CABACEstimator->getCtx() = CnnFilterCtx( ctxStart );
+  m_CABACEstimator->resetBits();
+  ctbCnnFilterFlag = cs.slice->getPic()->getLCnnlfCtuEnableFlag( COMPONENT_Cr);  
+  cnnlfSliceModeIdc[COMPONENT_Cr] = LCNNLF_CTU_ONOFF; /* set this to get rate for transmitting CTU bits */
+  m_CABACEstimator->codeLCnnlfCtuEnableFlags(numCTUs, frame_width_in_ctus, ctbCnnFilterFlag, ComponentID(COMPONENT_Cr), cnnlfSliceModeIdc);
+  double cost_on_off_Cr = bestFrmSSDCr + m_lambda[COMPONENT_Cr] * (rate_all_on_Cr + (FracBitsScale * (double)m_CABACEstimator->getEstFracBits()));
+
+  if (costAllOff_Cr <= costAllOn_Cr && costAllOff_Cr <= cost_on_off_Cr)
+  {
+    cnnlfSliceModeIdc[COMPONENT_Cr] = LCNNLF_OFF;
+    for( int ctuIdx = 0; ctuIdx < numCTUs; ctuIdx++ )
+    {
+      m_ctuEnableFlag[COMPONENT_Cr][ctuIdx] = 0;
+    } 
+    bestPicCost += costAllOff_Cr;
+    /* reset buf later on */
+  }
+  else if(costAllOn_Cr <= costAllOff_Cr && costAllOn_Cr <= cost_on_off_Cr)
+  {
+    cnnlfSliceModeIdc[COMPONENT_Cr] = LCNNLF_ALL_ON;
+    for( int ctuIdx = 0; ctuIdx < numCTUs; ctuIdx++ )
+    {
+      m_ctuEnableFlag[COMPONENT_Cr][ctuIdx] = 1;
+    } 
+    bestPicCost += costAllOn_Cr;
+  }
+  else
+  {
+    cnnlfSliceModeIdc[COMPONENT_Cr] = LCNNLF_CTU_ONOFF;
+    bestPicCost += cost_on_off_Cr;
+  }
+
+  /* reset the buffers over here */ 
+  ctuIdx = 0;
+  for( int y = 0; y < pic_h; y += blockSize * 2)
+  {
+    for( int x = 0; x < pic_w; x+=blockSize*2)
+    {
+      for (int i = 0; i < blockSize; ++i)
+      {
+        for (int j = 0; j < blockSize; ++j)
+        {
+          int pos_x = x / 2 + j;
+          int pos_y = y / 2 + i;
+          if (pos_x >= pic_w / 2 || pos_y >= pic_h / 2)
+          {
+            continue;
+          }
+
+          if (m_ctuEnableFlag[COMPONENT_Y][ctuIdx] == 0)
+          {
+            nnFilteredBufY.at(pos_x * 2, pos_y * 2)         = reconBufY.at(pos_x * 2, pos_y * 2);
+            nnFilteredBufY.at(pos_x * 2 + 1, pos_y * 2)     = reconBufY.at(pos_x * 2 + 1, pos_y * 2);
+            nnFilteredBufY.at(pos_x * 2, pos_y * 2 + 1)     = reconBufY.at(pos_x * 2, pos_y * 2 + 1);
+            nnFilteredBufY.at(pos_x * 2 + 1, pos_y * 2 + 1) = reconBufY.at(pos_x * 2 + 1, pos_y * 2 + 1);
+          }
+          if (m_ctuEnableFlag[COMPONENT_Cb][ctuIdx] == 0)
+          {
+            nnFilteredBufCb.at(pos_x, pos_y) = reconBufCb.at(pos_x, pos_y); 
+          }
+          if (m_ctuEnableFlag[COMPONENT_Cr][ctuIdx] == 0)
+          {
+            nnFilteredBufCr.at(pos_x, pos_y) = reconBufCr.at(pos_x, pos_y); 
+          }
+        }
+      }
+       ctuIdx++;
+    }   
+  }
+  return bestPicCost;
+
+}
+
+
+#define NN_SCALE_STABLIZING_FACTOR (0.1 * (1<< NN_RESIDUE_ADDITIONAL_SHIFT))
+#define NN_RESIDUE_SCALE_DEVIATION_UP_BOUND 1.25
+#define NN_RESIDUE_SCALE_DEVIATION_BOT_BOUND 0.0625
+
+void EncNNFilterSetLC::deriveScale(PelUnitBuf origBuf, PelUnitBuf recoBuf, PelUnitBuf filteredBuf, PicHeader *ph)
+{
+  PelBuf origBufY       = origBuf.get(COMPONENT_Y);
+  PelBuf origBufCb      = origBuf.get(COMPONENT_Cb);
+  PelBuf origBufCr      = origBuf.get(COMPONENT_Cr);
+  PelBuf reconBufY       = recoBuf.get(COMPONENT_Y);
+  PelBuf reconBufCb      = recoBuf.get(COMPONENT_Cb);
+  PelBuf reconBufCr      = recoBuf.get(COMPONENT_Cr);
+  PelBuf nnFilteredBufY  = filteredBuf.get(COMPONENT_Y);
+  PelBuf nnFilteredBufCb = filteredBuf.get(COMPONENT_Cb);
+  PelBuf nnFilteredBufCr = filteredBuf.get(COMPONENT_Cr);
+
+  int pic_w = reconBufY.width;
+  int pic_h = reconBufY.height;
+  int pic_wh = pic_w*pic_h;
+
+  int scaleY = 0, scaleCb = 0, scaleCr = 0;
+  int shiftY = LCNN_RESIDUE_SCALE_SHIFT;
+  int shiftCb = LCNN_RESIDUE_SCALE_SHIFT;
+  int shiftCr = LCNN_RESIDUE_SCALE_SHIFT; 
+
+  int scaleUpBoundY  = int(NN_RESIDUE_SCALE_DEVIATION_UP_BOUND * (1 << shiftY));
+  int scaleLowBoundY = int(NN_RESIDUE_SCALE_DEVIATION_BOT_BOUND * (1 << shiftY));
+
+  int scaleUpBoundC  = int(NN_RESIDUE_SCALE_DEVIATION_UP_BOUND * (1 << shiftCb));
+  int scaleLowBoundC = int(NN_RESIDUE_SCALE_DEVIATION_BOT_BOUND * (1 << shiftCb));
+
+
+  double selfMulti[MAX_NUM_COMPONENT] = {0., 0., 0.}; 
+  double crossMulti[MAX_NUM_COMPONENT] = {0., 0., 0.}; 
+  double sumOriResi[MAX_NUM_COMPONENT] = {0., 0., 0.};
+  double sumNnResi[MAX_NUM_COMPONENT] = {0., 0., 0.};
+
+  int blockSize = 64;
+  int ctuStride = (pic_w + 2 * blockSize - 1) / (2 * blockSize);
+
+  for (int y = 0; y < pic_h; y++)
+  {
+    for (int x = 0; x < pic_w; x++)
+    {
+      int ctuIdx = (y / (2 * blockSize)) * ctuStride + x / (2 * blockSize);
+      if (m_ctuEnableFlag[COMPONENT_Y][ctuIdx] == 1)
+      {
+        int oriResi = (origBufY.at(x, y) - reconBufY.at(x, y)) << NN_RESIDUE_ADDITIONAL_SHIFT;
+        int nnResi  = nnFilteredBufY.at(x, y) - (reconBufY.at(x, y) << NN_RESIDUE_ADDITIONAL_SHIFT);
+
+        selfMulti[COMPONENT_Y] += nnResi * nnResi;
+        crossMulti[COMPONENT_Y] += nnResi * oriResi;
+        sumOriResi[COMPONENT_Y] += oriResi;
+        sumNnResi[COMPONENT_Y] += nnResi;
+      }
+    }
+  }
+
+  scaleY = int(((pic_wh * crossMulti[COMPONENT_Y] - sumOriResi[COMPONENT_Y] * sumNnResi[COMPONENT_Y] + pic_wh * pic_wh * NN_SCALE_STABLIZING_FACTOR)
+                / (pic_wh * selfMulti[COMPONENT_Y] - sumNnResi[COMPONENT_Y] * sumNnResi[COMPONENT_Y] + pic_wh * pic_wh * NN_SCALE_STABLIZING_FACTOR))
+                 * (1 << shiftY) + 0.5);
+
+  if (scaleY > scaleUpBoundY)
+  {
+    scaleY = scaleUpBoundY;
+  }
+  if (scaleY < scaleLowBoundY)
+  {
+    scaleY = scaleLowBoundY;
+  }
+
+  for (int y = 0; y < pic_h/2; y++)
+  {
+    for (int x = 0; x < pic_w/2; x++)
+    {
+      int ctuIdx = (y / blockSize) * ctuStride + x / blockSize;
+      if (m_ctuEnableFlag[COMPONENT_Cb][ctuIdx] == 1)
+      {
+      int oriResiCb = (origBufCb.at(x, y) - reconBufCb.at(x, y)) << NN_RESIDUE_ADDITIONAL_SHIFT;
+      int nnResiCb = nnFilteredBufCb.at(x, y) - (reconBufCb.at(x, y) << NN_RESIDUE_ADDITIONAL_SHIFT);
+
+      selfMulti[COMPONENT_Cb] += nnResiCb*nnResiCb;
+      crossMulti[COMPONENT_Cb] += nnResiCb*oriResiCb;
+      sumOriResi[COMPONENT_Cb] += oriResiCb;
+      sumNnResi[COMPONENT_Cb] += nnResiCb;
+      }
+
+      if (m_ctuEnableFlag[COMPONENT_Cr][ctuIdx] == 1)
+      {
+      int oriResiCr = (origBufCr.at(x, y) - reconBufCr.at(x, y)) << NN_RESIDUE_ADDITIONAL_SHIFT;
+      int nnResiCr = nnFilteredBufCr.at(x, y) - (reconBufCr.at(x, y) << NN_RESIDUE_ADDITIONAL_SHIFT);
+
+
+      selfMulti[COMPONENT_Cr] += nnResiCr*nnResiCr;
+      crossMulti[COMPONENT_Cr] += nnResiCr*oriResiCr;
+      sumOriResi[COMPONENT_Cr] += oriResiCr;
+      sumNnResi[COMPONENT_Cr] += nnResiCr;
+      }
+    }
+  }
+
+  scaleCb = int(((pic_wh / 4 * crossMulti[COMPONENT_Cb] - sumOriResi[COMPONENT_Cb] * sumNnResi[COMPONENT_Cb] + pic_wh / 4 * pic_wh / 4 * NN_SCALE_STABLIZING_FACTOR)
+                 / (pic_wh / 4 * selfMulti[COMPONENT_Cb] - sumNnResi[COMPONENT_Cb] * sumNnResi[COMPONENT_Cb] + pic_wh / 4 * pic_wh / 4 * NN_SCALE_STABLIZING_FACTOR))
+                  * (1 << shiftCb)  + 0.5);
+
+  if (scaleCb > scaleUpBoundC)
+  {
+    scaleCb = scaleUpBoundC;
+  }
+  if (scaleCb < scaleLowBoundC)
+  {
+    scaleCb = scaleLowBoundC;
+  }
+
+
+  scaleCr  = int(((pic_wh / 4 * crossMulti[COMPONENT_Cr] - sumOriResi[COMPONENT_Cr] * sumNnResi[COMPONENT_Cr] + pic_wh / 4 * pic_wh / 4 * NN_SCALE_STABLIZING_FACTOR)
+                 / (pic_wh / 4 * selfMulti[COMPONENT_Cr] - sumNnResi[COMPONENT_Cr] * sumNnResi[COMPONENT_Cr] + pic_wh / 4 * pic_wh / 4 * NN_SCALE_STABLIZING_FACTOR))
+                  * (1 << shiftCr) + 0.5);
+
+  if (scaleCr > scaleUpBoundC)
+  {
+    scaleCr = scaleUpBoundC;
+  }
+  if (scaleCr < scaleLowBoundC)
+  {
+    scaleCr = scaleLowBoundC;
+  }
+
+  ph->setLCNnScale(scaleY, COMPONENT_Y);
+
+  ph->setLCNnScale(scaleCb, COMPONENT_Cb);
+
+  ph->setLCNnScale(scaleCr, COMPONENT_Cr);
+}
+
+
+#endif
\ No newline at end of file
diff --git a/source/Lib/EncoderLib/EncNNFilterSetLC.h b/source/Lib/EncoderLib/EncNNFilterSetLC.h
new file mode 100644
index 0000000000000000000000000000000000000000..fb953747811bd134aac01b6ec71e3cd6694a501e
--- /dev/null
+++ b/source/Lib/EncoderLib/EncNNFilterSetLC.h
@@ -0,0 +1,72 @@
+/* The copyright in this software is being made available under the BSD
+ * License, included below. This software may be subject to other third party
+ * and contributor rights, including patent rights, and no such rights are
+ * granted under this license.
+ *
+ * Copyright (c) 2010-2023, ITU/ISO/IEC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file     EncNNFilterSetLC.h
+    \brief    Encoder low complexity cnn loop filter class (header)
+*/
+
+#ifndef __ENCNNFILTERSETLC__
+#define __ENCNNFILTERSETLC__
+
+#include "CommonDef.h"
+
+#if NN_FILTERING_SET_LC
+
+#include "CommonLib/NNFilterSetLC.h"
+#include "CABACWriter.h"
+
+
+class EncNNFilterSetLC : public NNFilterSetLC
+{
+public:
+  EncNNFilterSetLC();
+  virtual ~EncNNFilterSetLC() {}
+
+  
+  double filterPictureRD(CodingStructure &cs, PelUnitBuf in, PelUnitBuf out, PelUnitBuf original, const double *lambdas, uint8_t *cnnlfSliceModeIdc);
+  void create(CABACEncoder* cabacEncoder, CtxCache* ctxCache, Slice* pcSlice, const int inputBitDepth[MAX_NUM_CHANNEL_TYPE]);
+  void deriveScale(PelUnitBuf origBuf, PelUnitBuf recoBuf, PelUnitBuf filteredBuf, PicHeader *ph);
+
+public:
+ 
+  CABACWriter*           m_CABACEstimator;
+  CtxCache*              m_CtxCache;
+  const double           FracBitsScale = 1.0 / double(1 << SCALE_BITS);
+  double  m_lambda[MAX_NUM_COMPONENT];
+  PelStorage m_cCnnTmpBufModelSelect;
+  int     m_inputBitDepth[MAX_NUM_CHANNEL_TYPE];
+
+};
+#endif
+
+#endif
diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp
index e59ac4bfe2a1ca1b5e3cafd17344238087685c64..28c3869c9038e950cb05e71844c75e87222b56ce 100644
--- a/source/Lib/EncoderLib/VLCWriter.cpp
+++ b/source/Lib/EncoderLib/VLCWriter.cpp
@@ -1009,7 +1009,7 @@ void HLSWriter::codeSPS( const SPS* pcSPS )
   WRITE_FLAG(pcSPS->getNnlfEnabledFlag(), "sps_nnlf_enabled_flag");
   if (pcSPS->getNnlfEnabledFlag())
   {
-    WRITE_FLAG(pcSPS->getNnlfSet(), "sps_nnlf_set");
+    WRITE_UVLC(pcSPS->getNnlfSet(), "sps_nnlf_set");
   }
 #endif
 
@@ -2103,6 +2103,16 @@ WRITE_FLAG(picHeader->getGdrOrIrapPicFlag(), "ph_gdr_or_irap_pic_flag");
     picHeader->setSaoEnabledFlag(CHANNEL_TYPE_CHROMA, false);
   }
 
+#if NN_FILTERING_SET_LC
+  if(sps->getNnlfSetLCEnabledFlag())
+  {
+    WRITE_SCODE(picHeader->getLCNnScale(COMPONENT_Y) - (1 << LCNN_RESIDUE_SCALE_SHIFT), LCNN_RESIDUE_SCALE_SHIFT+1, "nnScale_Y");
+    WRITE_SCODE(picHeader->getLCNnScale(COMPONENT_Cb) - (1 << LCNN_RESIDUE_SCALE_SHIFT), LCNN_RESIDUE_SCALE_SHIFT+1, "nnScale_Cb");
+    WRITE_SCODE(picHeader->getLCNnScale(COMPONENT_Cr) - (1 << LCNN_RESIDUE_SCALE_SHIFT), LCNN_RESIDUE_SCALE_SHIFT+1, "nnScale_Cr");
+    if(LCNN_MODEL_IDX_BITS > 0)
+      WRITE_CODE(picHeader->getLCNnModelIdx(),LCNN_MODEL_IDX_BITS, "nn_filter_idx");
+  }
+#endif
 
 
   // deblocking filter controls
@@ -2588,6 +2598,20 @@ void HLSWriter::codeSliceHeader         ( Slice* pcSlice, PicHeader *picHeader )
       }
     }
 
+#if NN_FILTERING_SET_LC
+    if (pcSlice->getSPS()->getNnlfSetLCEnabledFlag())
+    {
+      for( int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++ )
+      {
+        WRITE_FLAG( (pcSlice->getLCnnlfSliceModeIdc(compIdx) == LCNNLF_OFF) ? 0 : 1, "slice_cnnlf_mode_idc" );
+
+        if( pcSlice->getLCnnlfSliceModeIdc( compIdx ) != LCNNLF_OFF )
+        {        
+          WRITE_FLAG( (pcSlice->getLCnnlfSliceModeIdc( compIdx ) == LCNNLF_ALL_ON) ? 1 : 0, "slice_cnnlf_luma_slice_all_on_flag" );
+        }
+      }
+    }
+#endif
 
     if (pcSlice->getPPS()->getDeblockingFilterControlPresentFlag())
     {
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/1_generate_raw_data/BVIDVC_streamsplit.csv b/training/training_scripts/Nn_Filtering_Set_LC/1_generate_raw_data/BVIDVC_streamsplit.csv
new file mode 100644
index 0000000000000000000000000000000000000000..b93643865eae7ea826e129d17e4bdb250645bc1a
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/1_generate_raw_data/BVIDVC_streamsplit.csv
@@ -0,0 +1,621 @@
+TestVectors
+class_a/train/ABangkokMarketVidevo_3840x2176_25fps_10bit_420.yuv
+class_a/train/ABricksBushesStaticBVITexture_3840x2176_120fps_10bit_420.yuv
+class_a/train/ABuildingRoofS4IRIS_3840x2176_24fps_10bit_420.yuv
+class_a/train/ACityScapesS1IRIS_3840x2176_24fps_10bit_420.yuv
+class_a/train/ACityScapesS2IRIS_3840x2176_24fps_10bit_420.yuv
+class_a/train/AColourfulKoreanLanternsVidevo_3840x2176_50fps_10bit_420.yuv
+class_a/train/AHongKongIslandVidevo_3840x2176_25fps_10bit_420.yuv
+class_a/train/AHongKongMarket1Videvo_3840x2176_25fps_10bit_420.yuv
+class_a/train/AHorseStaringS1Videvo_3840x2176_50fps_10bit_420.yuv
+class_a/train/AJoggersS2BVIHFR_3840x2176_120fps_10bit_420.yuv
+class_a/train/ALakeYonseiUniversity_3840x2176_30fps_10bit_420.yuv
+class_a/train/AMyeongDongVidevo_3840x2176_25fps_10bit_420.yuv
+class_a/train/ASeasideWalkIRIS_3840x2176_24fps_10bit_420.yuv
+class_a/train/AStreetDancerS3IRIS_3840x2176_24fps_10bit_420.yuv
+class_a/train/ATaiChiHongKongS1Videvo_3840x2176_25fps_10bit_420.yuv
+class_a/train/ATaksinBridgeVidevo_3840x2176_23fps_10bit_420.yuv
+class_a/train/ATrafficonTasksinBridgeVidevo_3840x2176_25fps_10bit_420.yuv
+class_a/train/ATreeWillsBVITexture_3840x2176_120fps_10bit_420.yuv
+class_a/validation/ABricksLeavesBVITexture_3840x2176_120fps_10bit_420.yuv
+class_a/validation/AColourfulRugsMoroccoVidevo_3840x2176_50fps_10bit_420.yuv
+class_b/train1/BAdvertisingMassagesBangkokVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train1/BAmericanFootballS2Harmonics_1920x1088_60fps_10bit_420.yuv
+class_b/train1/BAmericanFootballS4Harmonics_1920x1088_60fps_10bit_420.yuv
+class_b/train1/BAnimalsS11Harmonics_1920x1088_60fps_10bit_420.yuv
+class_b/train1/BAnimalsS1Harmonics_1920x1088_60fps_10bit_420.yuv
+class_b/train1/BBangkokMarketVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train1/BBasketballGoalScoredS2Videvo_1920x1088_25fps_10bit_420.yuv
+class_b/train1/BBasketballS1YonseiUniversity_1920x1088_30fps_10bit_420.yuv
+class_b/train1/BBasketballS2YonseiUniversity_1920x1088_30fps_10bit_420.yuv
+class_b/train1/BBasketballS3YonseiUniversity_1920x1088_30fps_10bit_420.yuv
+class_b/train1/BBoatsChaoPhrayaRiverVidevo_1920x1088_23fps_10bit_420.yuv
+class_b/train1/BBobbleheadBVIHFR_1920x1088_120fps_10bit_420.yuv
+class_b/train1/BBookcaseBVITexture_1920x1088_120fps_10bit_420.yuv
+class_b/train1/BBoxingPracticeHarmonics_1920x1088_60fps_10bit_420.yuv
+class_b/train1/BBricksBushesStaticBVITexture_1920x1088_120fps_10bit_420.yuv
+class_b/train1/BBricksLeavesBVITexture_1920x1088_120fps_10bit_420.yuv
+class_b/train1/BBricksTiltingBVITexture_1920x1088_120fps_10bit_420.yuv
+class_b/train1/BBubblesPitcherS1BVITexture_1920x1088_120fps_10bit_420.yuv
+class_b/train1/BBuildingRoofS1IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train1/BBuildingRoofS2IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train1/BBuildingRoofS3IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train1/BBuildingRoofS4IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train1/BBuntingHangingAcrossHongKongVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train1/BBusyHongKongStreetVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train1/BCalmingWaterBVITexture_1920x1088_120fps_10bit_420.yuv
+class_b/train1/BCarpetPanAverageBVITexture_1920x1088_120fps_10bit_420.yuv
+class_b/train1/BCatchBVIHFR_1920x1088_120fps_10bit_420.yuv
+class_b/train1/BCeramicsandSpicesMoroccoVidevo_1920x1088_50fps_10bit_420.yuv
+class_b/train1/BCharactersYonseiUniversity_1920x1088_30fps_10bit_420.yuv
+class_b/train1/BChristmasPresentsIRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train1/BCityScapesS1IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train1/BCityScapesS2IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train1/BCityScapesS3IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train1/BCityStreetS1IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train1/BCityStreetS3IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train1/BCityStreetS4IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train1/BCityStreetS5IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train1/BCityStreetS6IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train1/BCityStreetS7IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train1/BCloseUpBasketballSceneVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train1/BCloudsStaticBVITexture_1920x1088_120fps_10bit_420.yuv
+class_b/train1/BColourfulDecorationWatPhoVidevo_1920x1088_50fps_10bit_420.yuv
+class_b/train1/BColourfulKoreanLanternsVidevo_1920x1088_50fps_10bit_420.yuv
+class_b/train1/BColourfulPaperLanternsVidevo_1920x1088_50fps_10bit_420.yuv
+class_b/train1/BColourfulRugsMoroccoVidevo_1920x1088_50fps_10bit_420.yuv
+class_b/train1/BCostaRicaS3Harmonics_1920x1088_60fps_10bit_420.yuv
+class_b/train1/BCrosswalkHarmonics_1920x1088_60fps_10bit_420.yuv
+class_b/train1/BCrosswalkHongKong2S1Videvo_1920x1088_25fps_10bit_420.yuv
+class_b/train1/BCrosswalkHongKong2S2Videvo_1920x1088_25fps_10bit_420.yuv
+class_b/train1/BCrosswalkHongKongVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train1/BCrowdRunMCLV_1920x1088_25fps_10bit_420.yuv
+class_b/train1/BCyclistS1BVIHFR_1920x1088_120fps_10bit_420.yuv
+class_b/train1/BCyclistVeniceBeachBoardwalkVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train1/BDollsScene1YonseiUniversity_1920x1088_30fps_10bit_420.yuv
+class_b/train1/BDollsScene2YonseiUniversity_1920x1088_30fps_10bit_420.yuv
+class_b/train1/BDowntownHongKongVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train1/BDrivingPOVHarmonics_1920x1088_60fps_10bit_420.yuv
+class_b/train1/BDropsOnWaterBVITexture_1920x1088_120fps_10bit_420.yuv
+class_b/train1/BElFuenteMaskLIVENetFlix_1920x1088_24fps_10bit_420.yuv
+class_b/train1/BEnteringHongKongStallS1Videvo_1920x1088_25fps_10bit_420.yuv
+class_b/train1/BFerrisWheelTurningVidevo_1920x1088_50fps_10bit_420.yuv
+class_b/train1/BFireS18Mitch_1920x1088_24fps_10bit_420.yuv
+class_b/train1/BFireS21Mitch_1920x1088_24fps_10bit_420.yuv
+class_b/train1/BFireS71Mitch_1920x1088_24fps_10bit_420.yuv
+class_b/train1/BFirewoodS2IRIS_1920x1088_25fps_10bit_420.yuv
+class_b/train1/BFitnessIRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train1/BFjordsS1Harmonics_1920x1088_60fps_10bit_420.yuv
+class_b/train1/BFlowerChapelS1IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train1/BFlowerChapelS2IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train1/BFlyingCountrysideDareful_1920x1088_29fps_10bit_420.yuv
+class_b/train1/BFlyingThroughLAStreetVidevo_1920x1088_23fps_10bit_420.yuv
+class_b/train1/BFungusZoomBVITexture_1920x1088_120fps_10bit_420.yuv
+class_b/train1/BGrassBVITexture_1920x1088_120fps_10bit_420.yuv
+class_b/train1/BHamsterBVIHFR_1920x1088_120fps_10bit_420.yuv
+class_b/train1/BHarleyDavidsonIRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train1/BHongKongIslandVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train1/BHongKongMarket1Videvo_1920x1088_25fps_10bit_420.yuv
+class_b/train1/BHongKongMarket3S1Videvo_1920x1088_25fps_10bit_420.yuv
+class_b/train1/BHongKongMarket3S2Videvo_1920x1088_25fps_10bit_420.yuv
+class_b/train1/BHongKongMarket4S1Videvo_1920x1088_25fps_10bit_420.yuv
+class_b/train1/BHongKongMarket4S2Videvo_1920x1088_25fps_10bit_420.yuv
+class_b/train1/BHongKongS1Harmonics_1920x1088_60fps_10bit_420.yuv
+class_b/train1/BHongKongS2Harmonics_1920x1088_60fps_10bit_420.yuv
+class_b/train1/BHongKongS3Harmonics_1920x1088_60fps_10bit_420.yuv
+class_b/train1/BHorseDrawnCarriagesVidevo_1920x1088_50fps_10bit_420.yuv
+class_b/train1/BHorseStaringS1Videvo_1920x1088_50fps_10bit_420.yuv
+class_b/train1/BHorseStaringS2Videvo_1920x1088_50fps_10bit_420.yuv
+class_b/train1/BJockeyHarmonics_1920x1088_120fps_10bit_420.yuv
+class_b/train1/BJoggersS1BVIHFR_1920x1088_120fps_10bit_420.yuv
+class_b/train1/BJoggersS2BVIHFR_1920x1088_120fps_10bit_420.yuv
+class_b/train2/BKartingIRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train2/BKoraDrumsVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train2/BLampLeavesBVITexture_1920x1088_120fps_10bit_420.yuv
+class_b/train2/BLeaves1BVITexture_1920x1088_120fps_10bit_420.yuv
+class_b/train2/BLeaves3BVITexture_1920x1088_120fps_10bit_420.yuv
+class_b/train2/BLowLevelShotAlongHongKongVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train2/BLungshanTempleS1Videvo_1920x1088_50fps_10bit_420.yuv
+class_b/train2/BLungshanTempleS2Videvo_1920x1088_50fps_10bit_420.yuv
+class_b/train2/BManMoTempleVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train2/BManStandinginProduceTruckVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train2/BManWalkingThroughBangkokVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train2/BMaplesS1YonseiUniversity_1920x1088_30fps_10bit_420.yuv
+class_b/train2/BMaplesS2YonseiUniversity_1920x1088_30fps_10bit_420.yuv
+class_b/train2/BMirabellParkS1IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train2/BMirabellParkS2IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train2/BMobileHarmonics_1920x1088_60fps_10bit_420.yuv
+class_b/train2/BMoroccanCeramicsShopVidevo_1920x1088_50fps_10bit_420.yuv
+class_b/train2/BMoroccanSlippersVidevo_1920x1088_50fps_10bit_420.yuv
+class_b/train2/BMuralPaintingVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train2/BMyeongDongVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train2/BNewYorkStreetDareful_1920x1088_30fps_10bit_420.yuv
+class_b/train2/BPaintingTiltingBVITexture_1920x1088_120fps_10bit_420.yuv
+class_b/train2/BParkViolinMCLJCV_1920x1088_25fps_10bit_420.yuv
+class_b/train2/BPedestriansSeoulatDawnVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train2/BPeopleWalkingS1IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train2/BPillowsTransBVITexture_1920x1088_120fps_10bit_420.yuv
+class_b/train2/BPlasmaFreeBVITexture_1920x1088_120fps_10bit_420.yuv
+class_b/train2/BPresentsChristmasTreeDareful_1920x1088_29fps_10bit_420.yuv
+class_b/train2/BReadySetGoS2TampereUniversity_1920x1088_120fps_10bit_420.yuv
+class_b/train2/BResidentialBuildingSJTU_1920x1088_60fps_10bit_420.yuv
+class_b/train2/BRollerCoaster2Netflix_1920x1088_60fps_10bit_420.yuv
+class_b/train2/BRunnersSJTU_1920x1088_60fps_10bit_420.yuv
+class_b/train2/BRuralSetupIRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train2/BRuralSetupS2IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train2/BScarfSJTU_1920x1088_60fps_10bit_420.yuv
+class_b/train2/BSeasideWalkIRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train2/BSeekingMCLV_1920x1088_25fps_10bit_420.yuv
+class_b/train2/BSeoulCanalatDawnVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train2/BShoppingCentreVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train2/BSignboardBoatLIVENetFlix_1920x1088_30fps_10bit_420.yuv
+class_b/train2/BSkyscraperBangkokVidevo_1920x1088_23fps_10bit_420.yuv
+class_b/train2/BSmokeClearBVITexture_1920x1088_120fps_10bit_420.yuv
+class_b/train2/BSquareAndTimelapseHarmonics_1920x1088_60fps_10bit_420.yuv
+class_b/train2/BSquareS1IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train2/BSquareS2IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train2/BStreetArtVidevo_1920x1088_30fps_10bit_420.yuv
+class_b/train2/BStreetDancerS2IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train2/BStreetDancerS3IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train2/BStreetDancerS4IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train2/BStreetDancerS5IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train2/BStreetsOfIndiaS1Harmonics_1920x1088_60fps_10bit_420.yuv
+class_b/train2/BStreetsOfIndiaS2Harmonics_1920x1088_60fps_10bit_420.yuv
+class_b/train2/BStreetsOfIndiaS3Harmonics_1920x1088_60fps_10bit_420.yuv
+class_b/train2/BTaiChiHongKongS1Videvo_1920x1088_25fps_10bit_420.yuv
+class_b/train2/BTaiChiHongKongS2Videvo_1920x1088_25fps_10bit_420.yuv
+class_b/train2/BTaipeiCityRooftops8Videvo_1920x1088_25fps_10bit_420.yuv
+class_b/train2/BTaipeiCityRooftopsS1Videvo_1920x1088_25fps_10bit_420.yuv
+class_b/train2/BTaipeiCityRooftopsS2Videvo_1920x1088_25fps_10bit_420.yuv
+class_b/train2/BTaksinBridgeVidevo_1920x1088_23fps_10bit_420.yuv
+class_b/train2/BTallBuildingsSJTU_1920x1088_60fps_10bit_420.yuv
+class_b/train2/BTennisMCLV_1920x1088_24fps_10bit_420.yuv
+class_b/train2/BToddlerFountain2Netflix_1920x1088_60fps_10bit_420.yuv
+class_b/train2/BTouristsSatOutsideVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train2/BToyCalendarHarmonics_1920x1088_60fps_10bit_420.yuv
+class_b/train2/BTrackingDownHongKongSideVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train2/BTrackingPastRestaurantVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train2/BTrackingPastStallHongKongVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train2/BTraditionalIndonesianKecakVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train2/BTrafficandBuildingSJTU_1920x1088_60fps_10bit_420.yuv
+class_b/train2/BTrafficFlowSJTU_1920x1088_60fps_10bit_420.yuv
+class_b/train2/BTrafficonTasksinBridgeVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train2/BTreeWillsBVITexture_1920x1088_120fps_10bit_420.yuv
+class_b/train2/BTruckIRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train2/BTunnelFlagS1Harmonics_1920x1088_60fps_10bit_420.yuv
+class_b/train2/BUnloadingVegetablesVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train2/BVegetableMarketS1LIVENetFlix_1920x1088_30fps_10bit_420.yuv
+class_b/train2/BVegetableMarketS2LIVENetFlix_1920x1088_30fps_10bit_420.yuv
+class_b/train2/BVegetableMarketS3LIVENetFlix_1920x1088_30fps_10bit_420.yuv
+class_b/train2/BVegetableMarketS4LIVENetFlix_1920x1088_30fps_10bit_420.yuv
+class_b/train2/BVeniceS1Harmonics_1920x1088_60fps_10bit_420.yuv
+class_b/train2/BVeniceS2Harmonics_1920x1088_60fps_10bit_420.yuv
+class_b/train2/BVeniceSceneIRIS_1920x1088_24fps_10bit_420.yuv
+class_b/train2/BWalkingDownKhaoStreetVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train2/BWalkingThroughFootbridgeVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/train2/BWaterS65Mitch_1920x1088_24fps_10bit_420.yuv
+class_b/train2/BWaterS81Mitch_1920x1088_24fps_10bit_420.yuv
+class_b/train2/BWatPhoTempleVidevo_1920x1088_50fps_10bit_420.yuv
+class_b/train2/BWoodSJTU_1920x1088_60fps_10bit_420.yuv
+class_b/train2/BWovenVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/validation/BAmericanFootballS3Harmonics_1920x1088_60fps_10bit_420.yuv
+class_b/validation/BBasketballGoalScoredS1Videvo_1920x1088_25fps_10bit_420.yuv
+class_b/validation/BChristmasRoomDareful_1920x1088_29fps_10bit_420.yuv
+class_b/validation/BChurchInsideMCLJCV_1920x1088_30fps_10bit_420.yuv
+class_b/validation/BConstructionS2YonseiUniversity_1920x1088_30fps_10bit_420.yuv
+class_b/validation/BEnteringHongKongStallS2Videvo_1920x1088_25fps_10bit_420.yuv
+class_b/validation/BFirewoodS1IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/validation/BFlagShootTUMSVT_1920x1088_50fps_10bit_420.yuv
+class_b/validation/BFlyingMountainsDareful_1920x1088_29fps_10bit_420.yuv
+class_b/validation/BGrazTowerIRIS_1920x1088_24fps_10bit_420.yuv
+class_b/validation/BHongKongMarket2Videvo_1920x1088_25fps_10bit_420.yuv
+class_b/validation/BLakeYonseiUniversity_1920x1088_30fps_10bit_420.yuv
+class_b/validation/BLaundryHangingOverHongKongVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/validation/BMyanmarS4Harmonics_1920x1088_60fps_10bit_420.yuv
+class_b/validation/BMyanmarS6Harmonics_1920x1088_60fps_10bit_420.yuv
+class_b/validation/BOrangeBuntingoverHongKongVidevo_1920x1088_25fps_10bit_420.yuv
+class_b/validation/BPersonRunningOutsideVidevo_1920x1088_50fps_10bit_420.yuv
+class_b/validation/BSmokeS45Mitch_1920x1088_24fps_10bit_420.yuv
+class_b/validation/BSparklerBVIHFR_1920x1088_120fps_10bit_420.yuv
+class_b/validation/BStreetDancerS1IRIS_1920x1088_24fps_10bit_420.yuv
+class_b/validation/BWalkingDownNorthRodeoVidevo_1920x1088_25fps_10bit_420.yuv
+class_c/train/CAdvertisingMassagesBangkokVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CAmericanFootballS2Harmonics_960x544_60fps_10bit_420.yuv
+class_c/train/CAmericanFootballS3Harmonics_960x544_60fps_10bit_420.yuv
+class_c/train/CAnimalsS11Harmonics_960x544_60fps_10bit_420.yuv
+class_c/train/CAnimalsS1Harmonics_960x544_60fps_10bit_420.yuv
+class_c/train/CBangkokMarketVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CBasketballGoalScoredS1Videvo_960x544_25fps_10bit_420.yuv
+class_c/train/CBasketballGoalScoredS2Videvo_960x544_25fps_10bit_420.yuv
+class_c/train/CBasketballS2YonseiUniversity_960x544_30fps_10bit_420.yuv
+class_c/train/CBasketballS3YonseiUniversity_960x544_30fps_10bit_420.yuv
+class_c/train/CBoatsChaoPhrayaRiverVidevo_960x544_23fps_10bit_420.yuv
+class_c/train/CBobbleheadBVIHFR_960x544_120fps_10bit_420.yuv
+class_c/train/CBookcaseBVITexture_960x544_120fps_10bit_420.yuv
+class_c/train/CBoxingPracticeHarmonics_960x544_60fps_10bit_420.yuv
+class_c/train/CBricksBushesStaticBVITexture_960x544_120fps_10bit_420.yuv
+class_c/train/CBricksLeavesBVITexture_960x544_120fps_10bit_420.yuv
+class_c/train/CBricksTiltingBVITexture_960x544_120fps_10bit_420.yuv
+class_c/train/CBubblesPitcherS1BVITexture_960x544_120fps_10bit_420.yuv
+class_c/train/CBuildingRoofS1IRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CBuildingRoofS2IRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CBuildingRoofS3IRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CBuntingHangingAcrossHongKongVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CBusyHongKongStreetVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CCalmingWaterBVITexture_960x544_120fps_10bit_420.yuv
+class_c/train/CCarpetPanAverageBVITexture_960x544_120fps_10bit_420.yuv
+class_c/train/CCatchBVIHFR_960x544_120fps_10bit_420.yuv
+class_c/train/CCeramicsandSpicesMoroccoVidevo_960x544_50fps_10bit_420.yuv
+class_c/train/CChristmasPresentsIRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CChristmasRoomDareful_960x544_29fps_10bit_420.yuv
+class_c/train/CChurchInsideMCLJCV_960x544_30fps_10bit_420.yuv
+class_c/train/CCityScapesS2IRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CCityScapesS3IRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CCityStreetS1IRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CCityStreetS3IRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CCityStreetS4IRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CCityStreetS5IRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CCityStreetS6IRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CCityStreetS7IRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CCloseUpBasketballSceneVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CCloudsStaticBVITexture_960x544_120fps_10bit_420.yuv
+class_c/train/CColourfulDecorationWatPhoVidevo_960x544_50fps_10bit_420.yuv
+class_c/train/CColourfulKoreanLanternsVidevo_960x544_50fps_10bit_420.yuv
+class_c/train/CColourfulPaperLanternsVidevo_960x544_50fps_10bit_420.yuv
+class_c/train/CColourfulRugsMoroccoVidevo_960x544_50fps_10bit_420.yuv
+class_c/train/CConstructionS2YonseiUniversity_960x544_30fps_10bit_420.yuv
+class_c/train/CCostaRicaS3Harmonics_960x544_60fps_10bit_420.yuv
+class_c/train/CCrosswalkHarmonics_960x544_60fps_10bit_420.yuv
+class_c/train/CCrosswalkHongKong2S2Videvo_960x544_25fps_10bit_420.yuv
+class_c/train/CCrosswalkHongKongVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CCrowdRunMCLV_960x544_25fps_10bit_420.yuv
+class_c/train/CCyclistS1BVIHFR_960x544_120fps_10bit_420.yuv
+class_c/train/CCyclistVeniceBeachBoardwalkVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CDollsScene1YonseiUniversity_960x544_30fps_10bit_420.yuv
+class_c/train/CDollsScene2YonseiUniversity_960x544_30fps_10bit_420.yuv
+class_c/train/CDowntownHongKongVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CDrivingPOVHarmonics_960x544_60fps_10bit_420.yuv
+class_c/train/CDropsOnWaterBVITexture_960x544_120fps_10bit_420.yuv
+class_c/train/CElFuenteMaskLIVENetFlix_960x544_24fps_10bit_420.yuv
+class_c/train/CEnteringHongKongStallS1Videvo_960x544_25fps_10bit_420.yuv
+class_c/train/CEnteringHongKongStallS2Videvo_960x544_25fps_10bit_420.yuv
+class_c/train/CFerrisWheelTurningVidevo_960x544_50fps_10bit_420.yuv
+class_c/train/CFireS18Mitch_960x544_24fps_10bit_420.yuv
+class_c/train/CFireS21Mitch_960x544_24fps_10bit_420.yuv
+class_c/train/CFirewoodS1IRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CFirewoodS2IRIS_960x544_25fps_10bit_420.yuv
+class_c/train/CFjordsS1Harmonics_960x544_60fps_10bit_420.yuv
+class_c/train/CFlagShootTUMSVT_960x544_50fps_10bit_420.yuv
+class_c/train/CFlowerChapelS1IRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CFlowerChapelS2IRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CFlyingCountrysideDareful_960x544_29fps_10bit_420.yuv
+class_c/train/CFlyingMountainsDareful_960x544_29fps_10bit_420.yuv
+class_c/train/CFlyingThroughLAStreetVidevo_960x544_23fps_10bit_420.yuv
+class_c/train/CFungusZoomBVITexture_960x544_120fps_10bit_420.yuv
+class_c/train/CGrassBVITexture_960x544_120fps_10bit_420.yuv
+class_c/train/CGrazTowerIRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CHamsterBVIHFR_960x544_120fps_10bit_420.yuv
+class_c/train/CHarleyDavidsonIRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CHongKongIslandVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CHongKongMarket1Videvo_960x544_25fps_10bit_420.yuv
+class_c/train/CHongKongMarket2Videvo_960x544_25fps_10bit_420.yuv
+class_c/train/CHongKongMarket3S1Videvo_960x544_25fps_10bit_420.yuv
+class_c/train/CHongKongMarket3S2Videvo_960x544_25fps_10bit_420.yuv
+class_c/train/CHongKongMarket4S2Videvo_960x544_25fps_10bit_420.yuv
+class_c/train/CHongKongS1Harmonics_960x544_60fps_10bit_420.yuv
+class_c/train/CHongKongS2Harmonics_960x544_60fps_10bit_420.yuv
+class_c/train/CHongKongS3Harmonics_960x544_60fps_10bit_420.yuv
+class_c/train/CHorseDrawnCarriagesVidevo_960x544_50fps_10bit_420.yuv
+class_c/train/CHorseStaringS1Videvo_960x544_50fps_10bit_420.yuv
+class_c/train/CJockeyHarmonics_960x544_120fps_10bit_420.yuv
+class_c/train/CJoggersS1BVIHFR_960x544_120fps_10bit_420.yuv
+class_c/train/CJoggersS2BVIHFR_960x544_120fps_10bit_420.yuv
+class_c/train/CKoraDrumsVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CLakeYonseiUniversity_960x544_30fps_10bit_420.yuv
+class_c/train/CLampLeavesBVITexture_960x544_120fps_10bit_420.yuv
+class_c/train/CLeaves1BVITexture_960x544_120fps_10bit_420.yuv
+class_c/train/CLowLevelShotAlongHongKongVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CLungshanTempleS1Videvo_960x544_50fps_10bit_420.yuv
+class_c/train/CLungshanTempleS2Videvo_960x544_50fps_10bit_420.yuv
+class_c/train/CManMoTempleVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CManStandinginProduceTruckVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CManWalkingThroughBangkokVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CMaplesS2YonseiUniversity_960x544_30fps_10bit_420.yuv
+class_c/train/CMirabellParkS1IRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CMirabellParkS2IRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CMobileHarmonics_960x544_60fps_10bit_420.yuv
+class_c/train/CMoroccanCeramicsShopVidevo_960x544_50fps_10bit_420.yuv
+class_c/train/CMoroccanSlippersVidevo_960x544_50fps_10bit_420.yuv
+class_c/train/CMuralPaintingVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CMyanmarS4Harmonics_960x544_60fps_10bit_420.yuv
+class_c/train/CMyanmarS6Harmonics_960x544_60fps_10bit_420.yuv
+class_c/train/CMyeongDongVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CNewYorkStreetDareful_960x544_30fps_10bit_420.yuv
+class_c/train/COrangeBuntingoverHongKongVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CPedestriansSeoulatDawnVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CPeopleWalkingS1IRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CPersonRunningOutsideVidevo_960x544_50fps_10bit_420.yuv
+class_c/train/CPillowsTransBVITexture_960x544_120fps_10bit_420.yuv
+class_c/train/CPlasmaFreeBVITexture_960x544_120fps_10bit_420.yuv
+class_c/train/CReadySetGoS2TampereUniversity_960x544_120fps_10bit_420.yuv
+class_c/train/CResidentialBuildingSJTU_960x544_60fps_10bit_420.yuv
+class_c/train/CRollerCoaster2Netflix_960x544_60fps_10bit_420.yuv
+class_c/train/CRunnersSJTU_960x544_60fps_10bit_420.yuv
+class_c/train/CRuralSetupIRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CRuralSetupS2IRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CScarfSJTU_960x544_60fps_10bit_420.yuv
+class_c/train/CSeasideWalkIRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CSeekingMCLV_960x544_25fps_10bit_420.yuv
+class_c/train/CShoppingCentreVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CSignboardBoatLIVENetFlix_960x544_30fps_10bit_420.yuv
+class_c/train/CSkyscraperBangkokVidevo_960x544_23fps_10bit_420.yuv
+class_c/train/CSmokeClearBVITexture_960x544_120fps_10bit_420.yuv
+class_c/train/CSmokeS45Mitch_960x544_24fps_10bit_420.yuv
+class_c/train/CSparklerBVIHFR_960x544_120fps_10bit_420.yuv
+class_c/train/CSquareAndTimelapseHarmonics_960x544_60fps_10bit_420.yuv
+class_c/train/CSquareS1IRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CSquareS2IRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CStreetArtVidevo_960x544_30fps_10bit_420.yuv
+class_c/train/CStreetDancerS1IRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CStreetDancerS2IRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CStreetDancerS3IRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CStreetDancerS4IRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CStreetDancerS5IRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CStreetsOfIndiaS1Harmonics_960x544_60fps_10bit_420.yuv
+class_c/train/CStreetsOfIndiaS2Harmonics_960x544_60fps_10bit_420.yuv
+class_c/train/CStreetsOfIndiaS3Harmonics_960x544_60fps_10bit_420.yuv
+class_c/train/CTaiChiHongKongS1Videvo_960x544_25fps_10bit_420.yuv
+class_c/train/CTaiChiHongKongS2Videvo_960x544_25fps_10bit_420.yuv
+class_c/train/CTaipeiCityRooftops8Videvo_960x544_25fps_10bit_420.yuv
+class_c/train/CTaipeiCityRooftopsS1Videvo_960x544_25fps_10bit_420.yuv
+class_c/train/CTaipeiCityRooftopsS2Videvo_960x544_25fps_10bit_420.yuv
+class_c/train/CTaksinBridgeVidevo_960x544_23fps_10bit_420.yuv
+class_c/train/CTallBuildingsSJTU_960x544_60fps_10bit_420.yuv
+class_c/train/CTennisMCLV_960x544_24fps_10bit_420.yuv
+class_c/train/CTouristsSatOutsideVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CTrackingPastRestaurantVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CTrackingPastStallHongKongVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CTraditionalIndonesianKecakVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CTrafficandBuildingSJTU_960x544_60fps_10bit_420.yuv
+class_c/train/CTrafficFlowSJTU_960x544_60fps_10bit_420.yuv
+class_c/train/CTreeWillsBVITexture_960x544_120fps_10bit_420.yuv
+class_c/train/CTruckIRIS_960x544_24fps_10bit_420.yuv
+class_c/train/CTunnelFlagS1Harmonics_960x544_60fps_10bit_420.yuv
+class_c/train/CUnloadingVegetablesVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CVegetableMarketS1LIVENetFlix_960x544_30fps_10bit_420.yuv
+class_c/train/CVegetableMarketS3LIVENetFlix_960x544_30fps_10bit_420.yuv
+class_c/train/CVegetableMarketS4LIVENetFlix_960x544_30fps_10bit_420.yuv
+class_c/train/CVeniceS1Harmonics_960x544_60fps_10bit_420.yuv
+class_c/train/CVeniceS2Harmonics_960x544_60fps_10bit_420.yuv
+class_c/train/CWalkingDownKhaoStreetVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CWalkingDownNorthRodeoVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CWalkingThroughFootbridgeVidevo_960x544_25fps_10bit_420.yuv
+class_c/train/CWaterS65Mitch_960x544_24fps_10bit_420.yuv
+class_c/train/CWaterS81Mitch_960x544_24fps_10bit_420.yuv
+class_c/train/CWoodSJTU_960x544_60fps_10bit_420.yuv
+class_c/train/CWovenVidevo_960x544_25fps_10bit_420.yuv
+class_c/validation/CAmericanFootballS4Harmonics_960x544_60fps_10bit_420.yuv
+class_c/validation/CBasketballS1YonseiUniversity_960x544_30fps_10bit_420.yuv
+class_c/validation/CBuildingRoofS4IRIS_960x544_24fps_10bit_420.yuv
+class_c/validation/CCharactersYonseiUniversity_960x544_30fps_10bit_420.yuv
+class_c/validation/CCityScapesS1IRIS_960x544_24fps_10bit_420.yuv
+class_c/validation/CCrosswalkHongKong2S1Videvo_960x544_25fps_10bit_420.yuv
+class_c/validation/CFireS71Mitch_960x544_24fps_10bit_420.yuv
+class_c/validation/CFitnessIRIS_960x544_24fps_10bit_420.yuv
+class_c/validation/CHongKongMarket4S1Videvo_960x544_25fps_10bit_420.yuv
+class_c/validation/CHorseStaringS2Videvo_960x544_50fps_10bit_420.yuv
+class_c/validation/CKartingIRIS_960x544_24fps_10bit_420.yuv
+class_c/validation/CLaundryHangingOverHongKongVidevo_960x544_25fps_10bit_420.yuv
+class_c/validation/CLeaves3BVITexture_960x544_120fps_10bit_420.yuv
+class_c/validation/CMaplesS1YonseiUniversity_960x544_30fps_10bit_420.yuv
+class_c/validation/CPaintingTiltingBVITexture_960x544_120fps_10bit_420.yuv
+class_c/validation/CParkViolinMCLJCV_960x544_25fps_10bit_420.yuv
+class_c/validation/CPresentsChristmasTreeDareful_960x544_29fps_10bit_420.yuv
+class_c/validation/CSeoulCanalatDawnVidevo_960x544_25fps_10bit_420.yuv
+class_c/validation/CToddlerFountain2Netflix_960x544_60fps_10bit_420.yuv
+class_c/validation/CToyCalendarHarmonics_960x544_60fps_10bit_420.yuv
+class_c/validation/CTrackingDownHongKongSideVidevo_960x544_25fps_10bit_420.yuv
+class_c/validation/CTrafficonTasksinBridgeVidevo_960x544_25fps_10bit_420.yuv
+class_c/validation/CVegetableMarketS2LIVENetFlix_960x544_30fps_10bit_420.yuv
+class_c/validation/CVeniceSceneIRIS_960x544_24fps_10bit_420.yuv
+class_c/validation/CWatPhoTempleVidevo_960x544_50fps_10bit_420.yuv
+class_d/train/DAdvertisingMassagesBangkokVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DAmericanFootballS3Harmonics_480x272_60fps_10bit_420.yuv
+class_d/train/DAmericanFootballS4Harmonics_480x272_60fps_10bit_420.yuv
+class_d/train/DAnimalsS11Harmonics_480x272_60fps_10bit_420.yuv
+class_d/train/DAnimalsS1Harmonics_480x272_60fps_10bit_420.yuv
+class_d/train/DBangkokMarketVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DBasketballGoalScoredS1Videvo_480x272_25fps_10bit_420.yuv
+class_d/train/DBasketballGoalScoredS2Videvo_480x272_25fps_10bit_420.yuv
+class_d/train/DBasketballS1YonseiUniversity_480x272_30fps_10bit_420.yuv
+class_d/train/DBasketballS2YonseiUniversity_480x272_30fps_10bit_420.yuv
+class_d/train/DBasketballS3YonseiUniversity_480x272_30fps_10bit_420.yuv
+class_d/train/DBoatsChaoPhrayaRiverVidevo_480x272_23fps_10bit_420.yuv
+class_d/train/DBobbleheadBVIHFR_480x272_120fps_10bit_420.yuv
+class_d/train/DBookcaseBVITexture_480x272_120fps_10bit_420.yuv
+class_d/train/DBoxingPracticeHarmonics_480x272_60fps_10bit_420.yuv
+class_d/train/DBricksBushesStaticBVITexture_480x272_120fps_10bit_420.yuv
+class_d/train/DBricksLeavesBVITexture_480x272_120fps_10bit_420.yuv
+class_d/train/DBricksTiltingBVITexture_480x272_120fps_10bit_420.yuv
+class_d/train/DBubblesPitcherS1BVITexture_480x272_120fps_10bit_420.yuv
+class_d/train/DBuildingRoofS1IRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DBuildingRoofS2IRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DBuildingRoofS4IRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DBuntingHangingAcrossHongKongVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DBusyHongKongStreetVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DCalmingWaterBVITexture_480x272_120fps_10bit_420.yuv
+class_d/train/DCarpetPanAverageBVITexture_480x272_120fps_10bit_420.yuv
+class_d/train/DCatchBVIHFR_480x272_120fps_10bit_420.yuv
+class_d/train/DCeramicsandSpicesMoroccoVidevo_480x272_50fps_10bit_420.yuv
+class_d/train/DCharactersYonseiUniversity_480x272_30fps_10bit_420.yuv
+class_d/train/DChristmasPresentsIRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DChristmasRoomDareful_480x272_29fps_10bit_420.yuv
+class_d/train/DChurchInsideMCLJCV_480x272_30fps_10bit_420.yuv
+class_d/train/DCityScapesS1IRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DCityScapesS2IRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DCityScapesS3IRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DCityStreetS1IRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DCityStreetS3IRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DCityStreetS4IRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DCityStreetS5IRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DCityStreetS6IRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DCityStreetS7IRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DCloseUpBasketballSceneVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DCloudsStaticBVITexture_480x272_120fps_10bit_420.yuv
+class_d/train/DColourfulDecorationWatPhoVidevo_480x272_50fps_10bit_420.yuv
+class_d/train/DColourfulKoreanLanternsVidevo_480x272_50fps_10bit_420.yuv
+class_d/train/DColourfulPaperLanternsVidevo_480x272_50fps_10bit_420.yuv
+class_d/train/DColourfulRugsMoroccoVidevo_480x272_50fps_10bit_420.yuv
+class_d/train/DConstructionS2YonseiUniversity_480x272_30fps_10bit_420.yuv
+class_d/train/DCostaRicaS3Harmonics_480x272_60fps_10bit_420.yuv
+class_d/train/DCrosswalkHarmonics_480x272_60fps_10bit_420.yuv
+class_d/train/DCrosswalkHongKongVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DCrowdRunMCLV_480x272_25fps_10bit_420.yuv
+class_d/train/DCyclistS1BVIHFR_480x272_120fps_10bit_420.yuv
+class_d/train/DCyclistVeniceBeachBoardwalkVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DDollsScene1YonseiUniversity_480x272_30fps_10bit_420.yuv
+class_d/train/DDollsScene2YonseiUniversity_480x272_30fps_10bit_420.yuv
+class_d/train/DDowntownHongKongVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DDropsOnWaterBVITexture_480x272_120fps_10bit_420.yuv
+class_d/train/DElFuenteMaskLIVENetFlix_480x272_24fps_10bit_420.yuv
+class_d/train/DEnteringHongKongStallS1Videvo_480x272_25fps_10bit_420.yuv
+class_d/train/DEnteringHongKongStallS2Videvo_480x272_25fps_10bit_420.yuv
+class_d/train/DFerrisWheelTurningVidevo_480x272_50fps_10bit_420.yuv
+class_d/train/DFireS18Mitch_480x272_24fps_10bit_420.yuv
+class_d/train/DFireS21Mitch_480x272_24fps_10bit_420.yuv
+class_d/train/DFireS71Mitch_480x272_24fps_10bit_420.yuv
+class_d/train/DFirewoodS1IRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DFirewoodS2IRIS_480x272_25fps_10bit_420.yuv
+class_d/train/DFitnessIRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DFjordsS1Harmonics_480x272_60fps_10bit_420.yuv
+class_d/train/DFlagShootTUMSVT_480x272_50fps_10bit_420.yuv
+class_d/train/DFlowerChapelS1IRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DFlowerChapelS2IRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DFlyingCountrysideDareful_480x272_29fps_10bit_420.yuv
+class_d/train/DFlyingMountainsDareful_480x272_29fps_10bit_420.yuv
+class_d/train/DFlyingThroughLAStreetVidevo_480x272_23fps_10bit_420.yuv
+class_d/train/DFungusZoomBVITexture_480x272_120fps_10bit_420.yuv
+class_d/train/DGrassBVITexture_480x272_120fps_10bit_420.yuv
+class_d/train/DGrazTowerIRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DHamsterBVIHFR_480x272_120fps_10bit_420.yuv
+class_d/train/DHarleyDavidsonIRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DHongKongIslandVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DHongKongMarket1Videvo_480x272_25fps_10bit_420.yuv
+class_d/train/DHongKongMarket2Videvo_480x272_25fps_10bit_420.yuv
+class_d/train/DHongKongMarket3S1Videvo_480x272_25fps_10bit_420.yuv
+class_d/train/DHongKongMarket3S2Videvo_480x272_25fps_10bit_420.yuv
+class_d/train/DHongKongMarket4S1Videvo_480x272_25fps_10bit_420.yuv
+class_d/train/DHongKongMarket4S2Videvo_480x272_25fps_10bit_420.yuv
+class_d/train/DHongKongS2Harmonics_480x272_60fps_10bit_420.yuv
+class_d/train/DHongKongS3Harmonics_480x272_60fps_10bit_420.yuv
+class_d/train/DHorseDrawnCarriagesVidevo_480x272_50fps_10bit_420.yuv
+class_d/train/DHorseStaringS1Videvo_480x272_50fps_10bit_420.yuv
+class_d/train/DHorseStaringS2Videvo_480x272_50fps_10bit_420.yuv
+class_d/train/DJockeyHarmonics_480x272_120fps_10bit_420.yuv
+class_d/train/DJoggersS1BVIHFR_480x272_120fps_10bit_420.yuv
+class_d/train/DJoggersS2BVIHFR_480x272_120fps_10bit_420.yuv
+class_d/train/DKartingIRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DKoraDrumsVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DLakeYonseiUniversity_480x272_30fps_10bit_420.yuv
+class_d/train/DLampLeavesBVITexture_480x272_120fps_10bit_420.yuv
+class_d/train/DLaundryHangingOverHongKongVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DLeaves1BVITexture_480x272_120fps_10bit_420.yuv
+class_d/train/DLeaves3BVITexture_480x272_120fps_10bit_420.yuv
+class_d/train/DLowLevelShotAlongHongKongVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DLungshanTempleS1Videvo_480x272_50fps_10bit_420.yuv
+class_d/train/DLungshanTempleS2Videvo_480x272_50fps_10bit_420.yuv
+class_d/train/DManMoTempleVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DManStandinginProduceTruckVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DManWalkingThroughBangkokVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DMaplesS2YonseiUniversity_480x272_30fps_10bit_420.yuv
+class_d/train/DMirabellParkS1IRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DMobileHarmonics_480x272_60fps_10bit_420.yuv
+class_d/train/DMoroccanCeramicsShopVidevo_480x272_50fps_10bit_420.yuv
+class_d/train/DMoroccanSlippersVidevo_480x272_50fps_10bit_420.yuv
+class_d/train/DMuralPaintingVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DMyanmarS4Harmonics_480x272_60fps_10bit_420.yuv
+class_d/train/DMyanmarS6Harmonics_480x272_60fps_10bit_420.yuv
+class_d/train/DMyeongDongVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DNewYorkStreetDareful_480x272_30fps_10bit_420.yuv
+class_d/train/DOrangeBuntingoverHongKongVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DPaintingTiltingBVITexture_480x272_120fps_10bit_420.yuv
+class_d/train/DParkViolinMCLJCV_480x272_25fps_10bit_420.yuv
+class_d/train/DPedestriansSeoulatDawnVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DPeopleWalkingS1IRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DPersonRunningOutsideVidevo_480x272_50fps_10bit_420.yuv
+class_d/train/DPillowsTransBVITexture_480x272_120fps_10bit_420.yuv
+class_d/train/DPlasmaFreeBVITexture_480x272_120fps_10bit_420.yuv
+class_d/train/DPresentsChristmasTreeDareful_480x272_29fps_10bit_420.yuv
+class_d/train/DReadySetGoS2TampereUniversity_480x272_120fps_10bit_420.yuv
+class_d/train/DResidentialBuildingSJTU_480x272_60fps_10bit_420.yuv
+class_d/train/DRollerCoaster2Netflix_480x272_60fps_10bit_420.yuv
+class_d/train/DRunnersSJTU_480x272_60fps_10bit_420.yuv
+class_d/train/DRuralSetupIRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DSeasideWalkIRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DSeekingMCLV_480x272_25fps_10bit_420.yuv
+class_d/train/DSeoulCanalatDawnVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DShoppingCentreVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DSignboardBoatLIVENetFlix_480x272_30fps_10bit_420.yuv
+class_d/train/DSkyscraperBangkokVidevo_480x272_23fps_10bit_420.yuv
+class_d/train/DSmokeClearBVITexture_480x272_120fps_10bit_420.yuv
+class_d/train/DSmokeS45Mitch_480x272_24fps_10bit_420.yuv
+class_d/train/DSparklerBVIHFR_480x272_120fps_10bit_420.yuv
+class_d/train/DSquareAndTimelapseHarmonics_480x272_60fps_10bit_420.yuv
+class_d/train/DSquareS1IRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DSquareS2IRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DStreetArtVidevo_480x272_30fps_10bit_420.yuv
+class_d/train/DStreetDancerS1IRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DStreetDancerS2IRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DStreetDancerS3IRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DStreetDancerS4IRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DStreetDancerS5IRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DStreetsOfIndiaS1Harmonics_480x272_60fps_10bit_420.yuv
+class_d/train/DStreetsOfIndiaS2Harmonics_480x272_60fps_10bit_420.yuv
+class_d/train/DStreetsOfIndiaS3Harmonics_480x272_60fps_10bit_420.yuv
+class_d/train/DTaiChiHongKongS1Videvo_480x272_25fps_10bit_420.yuv
+class_d/train/DTaiChiHongKongS2Videvo_480x272_25fps_10bit_420.yuv
+class_d/train/DTaipeiCityRooftops8Videvo_480x272_25fps_10bit_420.yuv
+class_d/train/DTaipeiCityRooftopsS1Videvo_480x272_25fps_10bit_420.yuv
+class_d/train/DTaipeiCityRooftopsS2Videvo_480x272_25fps_10bit_420.yuv
+class_d/train/DTaksinBridgeVidevo_480x272_23fps_10bit_420.yuv
+class_d/train/DTallBuildingsSJTU_480x272_60fps_10bit_420.yuv
+class_d/train/DTennisMCLV_480x272_24fps_10bit_420.yuv
+class_d/train/DToddlerFountain2Netflix_480x272_60fps_10bit_420.yuv
+class_d/train/DToyCalendarHarmonics_480x272_60fps_10bit_420.yuv
+class_d/train/DTrackingPastRestaurantVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DTraditionalIndonesianKecakVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DTrafficandBuildingSJTU_480x272_60fps_10bit_420.yuv
+class_d/train/DTrafficFlowSJTU_480x272_60fps_10bit_420.yuv
+class_d/train/DTrafficonTasksinBridgeVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DTreeWillsBVITexture_480x272_120fps_10bit_420.yuv
+class_d/train/DTruckIRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DTunnelFlagS1Harmonics_480x272_60fps_10bit_420.yuv
+class_d/train/DUnloadingVegetablesVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DVegetableMarketS1LIVENetFlix_480x272_30fps_10bit_420.yuv
+class_d/train/DVegetableMarketS2LIVENetFlix_480x272_30fps_10bit_420.yuv
+class_d/train/DVegetableMarketS4LIVENetFlix_480x272_30fps_10bit_420.yuv
+class_d/train/DVeniceS1Harmonics_480x272_60fps_10bit_420.yuv
+class_d/train/DVeniceS2Harmonics_480x272_60fps_10bit_420.yuv
+class_d/train/DVeniceSceneIRIS_480x272_24fps_10bit_420.yuv
+class_d/train/DWalkingDownKhaoStreetVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DWalkingThroughFootbridgeVidevo_480x272_25fps_10bit_420.yuv
+class_d/train/DWaterS81Mitch_480x272_24fps_10bit_420.yuv
+class_d/train/DWatPhoTempleVidevo_480x272_50fps_10bit_420.yuv
+class_d/train/DWoodSJTU_480x272_60fps_10bit_420.yuv
+class_d/train/DWovenVidevo_480x272_25fps_10bit_420.yuv
+class_d/validation/DAmericanFootballS2Harmonics_480x272_60fps_10bit_420.yuv
+class_d/validation/DBuildingRoofS3IRIS_480x272_24fps_10bit_420.yuv
+class_d/validation/DCrosswalkHongKong2S1Videvo_480x272_25fps_10bit_420.yuv
+class_d/validation/DCrosswalkHongKong2S2Videvo_480x272_25fps_10bit_420.yuv
+class_d/validation/DDrivingPOVHarmonics_480x272_60fps_10bit_420.yuv
+class_d/validation/DHongKongS1Harmonics_480x272_60fps_10bit_420.yuv
+class_d/validation/DMaplesS1YonseiUniversity_480x272_30fps_10bit_420.yuv
+class_d/validation/DMirabellParkS2IRIS_480x272_24fps_10bit_420.yuv
+class_d/validation/DRuralSetupS2IRIS_480x272_24fps_10bit_420.yuv
+class_d/validation/DScarfSJTU_480x272_60fps_10bit_420.yuv
+class_d/validation/DTouristsSatOutsideVidevo_480x272_25fps_10bit_420.yuv
+class_d/validation/DTrackingDownHongKongSideVidevo_480x272_25fps_10bit_420.yuv
+class_d/validation/DTrackingPastStallHongKongVidevo_480x272_25fps_10bit_420.yuv
+class_d/validation/DVegetableMarketS3LIVENetFlix_480x272_30fps_10bit_420.yuv
+class_d/validation/DWalkingDownNorthRodeoVidevo_480x272_25fps_10bit_420.yuv
+class_d/validation/DWaterS65Mitch_480x272_24fps_10bit_420.yuv
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/1_generate_raw_data/bvidvc_mp4_2_yuv.py b/training/training_scripts/Nn_Filtering_Set_LC/1_generate_raw_data/bvidvc_mp4_2_yuv.py
new file mode 100644
index 0000000000000000000000000000000000000000..529d2e581f272d131a6c729294f389e78656f7c7
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/1_generate_raw_data/bvidvc_mp4_2_yuv.py
@@ -0,0 +1,35 @@
+import os
+import argparse
+import subprocess
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input_dir', type=str, 
+                        help="Path to the folder where png files are present", 
+                        required=True)
+    parser.add_argument('--out_dir', type=str,
+                        help="Path to the directory where outputs to be stored",
+                        required=True)
+    return vars(parser.parse_args())
+
+
+if __name__ == "__main__":
+
+    args = parse_arguments()
+
+    in_dir    = args['input_dir']
+    out_dir   = args['out_dir']
+
+    if not os.path.exists(out_dir):
+        os.makedirs(out_dir)
+
+    for inp in os.listdir(in_dir):
+        input_path  = os.path.join(in_dir, inp)
+        interm_yuv  = None
+
+        if os.path.splitext(inp)[1] == ".mp4":
+            yuv_file = os.path.join(out_dir, os.path.splitext(os.path.basename(inp))[0] + ".yuv") 
+            decompr_cmd = " ".join(["ffmpeg -v error", "-i", input_path, "-pix_fmt", "yuv420p10le", "-y",
+                                    yuv_file])
+            
+            status = os.system(decompr_cmd)
\ No newline at end of file
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/1_generate_raw_data/div2k_gt_datagen.py b/training/training_scripts/Nn_Filtering_Set_LC/1_generate_raw_data/div2k_gt_datagen.py
new file mode 100644
index 0000000000000000000000000000000000000000..2d884cad8d8588c7d40d6a9198c6301a8f06398a
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/1_generate_raw_data/div2k_gt_datagen.py
@@ -0,0 +1,209 @@
+import os
+import argparse
+import subprocess
+import numpy as np
+import math
+
+def str2bool(v):
+    if isinstance(v, bool):
+       return v
+    if v.lower() in ('yes', 'y', 'true', 't', '1'):
+        return True
+    elif v.lower() in ('no', 'n', 'false', 'f', '0'):
+        return False
+    else:
+        raise argparse.ArgumentTypeError('Boolean value expected.')
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input_dir', type=str, 
+                        help="Path to the folder where png files are present", 
+                        required=True)
+    parser.add_argument('--out_dir', type=str,
+                        help="Path to the directory where outputs to be stored",
+                        required=True)
+    parser.add_argument('--ctu_size', type=int,
+                        help="CTU size that is used for training NNLF, default=128",
+                        default=128, required=False)
+    parser.add_argument('--pad_size', type=int,
+                        help="Padding size across one side of the CTU, default=0",
+                        default=0, required=False)
+    parser.add_argument('--del_inter_res', type=str2bool,
+                        help="Do you want to delete the intermediate yuv, default=True",
+                        default=True, required=False)
+    parser.add_argument('--bit_depth', type=int,
+                        help="Bit depth of the input files, Default=8",
+                        default=8, required=False)
+    return vars(parser.parse_args())
+
+class VideoCaptureYUV:
+    def __init__(self, filename, size, bitdepth):
+        self.height, self.width = size
+        self.bitdepth = bitdepth
+        self.f = open(filename, 'rb')
+        self.frame_len = int(self.width * self.height * 3 / 2)
+        self.data_type = np.uint8
+        if self.bitdepth > 8:
+            self.frame_len *= 2
+            self.data_type = np.uint16
+        self.luma_shape = (self.height, self.width)
+        self.chroma_shape = (int(self.height/2), int(self.width/2))
+        self.cb_offset = self.height * self.width
+        self.cr_offset = self.height * self.width + int(self.height * self.width / 4)
+    def read_raw(self):
+        try:                
+            raw = self.f.read(self.frame_len)
+            yuv = np.frombuffer(raw, dtype=self.data_type)
+            
+            
+            y = yuv[0 : self.cb_offset].reshape(self.luma_shape)
+            u = yuv[self.cb_offset : self.cr_offset].reshape(self.chroma_shape)
+            v = yuv[self.cr_offset : ].reshape(self.chroma_shape)
+        except Exception as e:
+            print(str(e))
+            return False, None, None, None
+        return True,y,u,v
+
+#Padding size and block size of luma is 2 times of chroma
+def pad_n_ext2mul_blksz(y,u,v,ctu_sz,pad_sz):
+    pad_lh = math.ceil(y.shape[0]/(2*ctu_sz)) * 2 * ctu_sz + 4 * pad_sz
+    pad_lw = math.ceil(y.shape[1]/(2*ctu_sz)) * 2 * ctu_sz + 4 * pad_sz
+    pad_ch = int(pad_lh/2)
+    pad_cw = int(pad_lw/2)
+    y_pad = np.pad(y, pad_width=pad_sz*2, mode='edge')
+    u_pad = np.pad(u, pad_width=pad_sz, mode='edge')
+    v_pad = np.pad(v, pad_width=pad_sz, mode='edge')
+    
+    for i in range(y_pad.shape[0], pad_lh):
+        y_pad = np.append(y_pad, y_pad[-1, :].reshape(1, y_pad.shape[1]), 0)
+    for i in range(y_pad.shape[1], pad_lw):
+        y_pad = np.append(y_pad, y_pad[:, -1].reshape(y_pad.shape[0],1), 1)
+    
+    for i in range(u_pad.shape[0], pad_ch):
+        u_pad = np.append(u_pad, u_pad[-1, :].reshape(1, u_pad.shape[1]), 0)
+        v_pad = np.append(v_pad, v_pad[-1, :].reshape(1, v_pad.shape[1]), 0)
+    for i in range(u_pad.shape[1], pad_cw):
+        u_pad = np.append(u_pad, u_pad[:, -1].reshape(u_pad.shape[0],1), 1)
+        v_pad = np.append(v_pad, v_pad[:, -1].reshape(v_pad.shape[0],1), 1)
+
+    return y_pad, u_pad, v_pad
+
+def pad_n_dump_ctus(args, inter_yuv_file):
+
+    inter_yuv_basename = "_".join(os.path.splitext("DIV2K" + os.path.basename(inter_yuv_file))[0].split("_")[:2])
+    #CTU size and padding size taken here are for Chroma
+    ctu_sz = int(args['ctu_size']/2)
+    pad_sz = int(args['pad_size']/2)
+    bitdepth = args['bit_depth']
+
+    output_dir = args['out_dir']
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    
+    [width, height] = list(map(int, inter_yuv_basename.split("_")[1].split("x")))
+    img_size = (height, width)
+
+
+    cap_orig = VideoCaptureYUV(inter_yuv_file, img_size, bitdepth)
+    frm_cnt = 0
+    while 1:
+
+        ret, y, u, v = cap_orig.read_raw()
+        if not ret:
+            print("Reached end of sequence, num of frames =", frm_cnt)
+            break
+
+        y_pad, u_pad, v_pad = pad_n_ext2mul_blksz(y, u, v, ctu_sz, pad_sz)
+
+        #Padded width and height
+        pad_lh = y_pad.shape[0]  
+        pad_lw = y_pad.shape[1]
+
+        for y in range(pad_sz*2, pad_lh-pad_sz*2, 2*ctu_sz):
+            for x in range(pad_sz*2, pad_lw-pad_sz*2, 2*ctu_sz):
+                
+                #Luma offsets
+                y_y_startoffset = y - pad_sz * 2
+                y_y_endoffset   = y + 2 * (ctu_sz + pad_sz)
+                y_x_startoffset = x - pad_sz * 2
+                y_x_endoffset   = x + 2 * (ctu_sz + pad_sz)
+                out_file_name = "_".join([inter_yuv_basename, 
+                                         str(frm_cnt), 
+                                         str(y_y_startoffset)+ "-" + str(y_x_startoffset)]) + ".yuv"
+                if os.path.exists(out_file_name):
+                    continue
+
+                y_orig = y_pad[y_y_startoffset:y_y_endoffset, y_x_startoffset:y_x_endoffset].flatten()
+                
+                #Chroma Offsets
+                uv_y_startoffset = int(y/2) - pad_sz
+                uv_y_endoffset   = int(y/2) + ctu_sz + pad_sz
+                uv_x_startoffset = int(x/2) - pad_sz
+                uv_x_endoffset   = int(x/2) + ctu_sz + pad_sz
+                u_orig = u_pad[uv_y_startoffset:uv_y_endoffset, uv_x_startoffset:uv_x_endoffset].flatten()
+                v_orig = v_pad[uv_y_startoffset:uv_y_endoffset, uv_x_startoffset:uv_x_endoffset].flatten()
+
+                yuv_orig = np.concatenate((y_orig, u_orig, v_orig))
+
+                out_file_name = "_".join([inter_yuv_basename, 
+                                         str(frm_cnt), 
+                                         str(y_x_startoffset)+ "-" + str(y_y_startoffset)]) + ".yuv"
+                binary_file = open(os.path.join(args['out_dir'], out_file_name), "wb")
+                
+                binary_file.write(yuv_orig.tobytes())
+                binary_file.close()
+
+        frm_cnt += 1
+
+    return
+
+def get_resolution(file):
+
+    ffmpeg_wt_cmd = " ".join(["ffprobe -v error -select_streams v:0", 
+                              "-show_entries stream=width", 
+                              "-print_format csv",
+                              "-i", file])
+    ffmpeg_ht_cmd = " ".join(["ffprobe -v error -select_streams v:0", 
+                              "-show_entries stream=height", 
+                              "-print_format csv",
+                              "-i", file])
+
+    width  = int(subprocess.getoutput(ffmpeg_wt_cmd).split(',')[1])
+    height = int(subprocess.getoutput(ffmpeg_ht_cmd).split(',')[1])
+
+    return width,height
+
+if __name__=="__main__":
+
+    args = parse_arguments()
+
+    in_dir    = args['input_dir']
+    out_dir   = args['out_dir']
+
+    for inp in os.listdir(in_dir):
+        input_path  = os.path.join(in_dir, inp)
+        interm_yuv  = None
+
+        if os.path.splitext(inp)[1] == ".yuv":
+            interm_yuv = input_path
+            pad_n_dump_ctus(args, interm_yuv)
+
+        elif os.path.splitext(inp)[1] == ".png":
+            width, height = get_resolution(input_path)
+            outfile_prefix = "-".join(os.path.splitext(inp)[0].split("_"))  #converting '_' to '-'
+            outfile = "_".join([outfile_prefix, str(width)+"x"+str(height), "8bit", "420"])
+            interm_yuv = os.path.join(out_dir, outfile+".yuv")
+
+            ffmpeg_cmd = " ".join(["ffmpeg", "-v error", "-i", input_path,
+                                    "-pix_fmt", "yuv420p", interm_yuv])
+            status = os.system(ffmpeg_cmd)
+            if status:
+                print("Error while converting "+ input_path+" to yuv")
+            
+            pad_n_dump_ctus(args, interm_yuv)
+
+            if args['del_inter_res']:
+                os.remove(interm_yuv)
+
+        else:
+            print("Error: Unsupported input type")
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/1_generate_raw_data/div2k_png_2_yuv.py b/training/training_scripts/Nn_Filtering_Set_LC/1_generate_raw_data/div2k_png_2_yuv.py
new file mode 100644
index 0000000000000000000000000000000000000000..d575edab21f7df418fab105f7cc60b067c756883
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/1_generate_raw_data/div2k_png_2_yuv.py
@@ -0,0 +1,55 @@
+import os
+import argparse
+import subprocess
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input_dir', type=str, 
+                        help="Path to the folder where png files are present", 
+                        required=True)
+    parser.add_argument('--out_dir', type=str,
+                        help="Path to the directory where outputs to be stored",
+                        required=True)
+    return vars(parser.parse_args())
+
+def get_resolution(file):
+
+    ffmpeg_wt_cmd = " ".join(["ffprobe -v error -select_streams v:0", 
+                              "-show_entries stream=width", 
+                              "-print_format csv",
+                              "-i", file])
+    ffmpeg_ht_cmd = " ".join(["ffprobe -v error -select_streams v:0", 
+                              "-show_entries stream=height", 
+                              "-print_format csv",
+                              "-i", file])
+
+    width  = int(subprocess.getoutput(ffmpeg_wt_cmd).split(',')[1])
+    height = int(subprocess.getoutput(ffmpeg_ht_cmd).split(',')[1])
+
+    return width,height
+
+if __name__=="__main__":
+
+    args = parse_arguments()
+
+    in_dir    = args['input_dir']
+    out_dir   = args['out_dir']
+
+    for inp in os.listdir(in_dir):
+        input_path  = os.path.join(in_dir, inp)
+        interm_yuv  = None
+
+        if os.path.splitext(inp)[1] == ".png":
+            width, height = get_resolution(input_path)
+            outfile_prefix = "-".join(os.path.splitext(inp)[0].split("_"))  #converting '_' to '-'
+            outfile = "_".join([outfile_prefix, str(width)+"x"+str(height), "8bit", "420"])
+            interm_yuv = os.path.join(out_dir, outfile+".yuv")
+
+            ffmpeg_cmd = " ".join(["ffmpeg", "-v error", "-i", input_path,
+                                    "-pix_fmt", "yuv420p", interm_yuv])
+            status = os.system(ffmpeg_cmd)
+            if status:
+                print("Error while converting "+ input_path+" to yuv")
+
+        else:
+            print("Error: Unsupported input type")
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/1_generate_raw_data/gt_bvidvc_datagen.py b/training/training_scripts/Nn_Filtering_Set_LC/1_generate_raw_data/gt_bvidvc_datagen.py
new file mode 100644
index 0000000000000000000000000000000000000000..112109ba0d3913f63dd409d0963995ef4d05363c
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/1_generate_raw_data/gt_bvidvc_datagen.py
@@ -0,0 +1,186 @@
+import os
+import numpy as np
+import math
+import argparse
+from sys import stderr
+
+def str2bool(v):
+    if isinstance(v, bool):
+       return v
+    if v.lower() in ('yes', 'y', 'true', 't', '1'):
+        return True
+    elif v.lower() in ('no', 'n', 'false', 'f', '0'):
+        return False
+    else:
+        raise argparse.ArgumentTypeError('Boolean value expected.')
+    
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--dump_ctus', type=str2bool,
+                        help="Whether to dump ctus, default=True", 
+                        default = True, required=False)
+    parser.add_argument('--stream', type=str,
+                        help="Path to the input stream/yuv", required=False)
+    parser.add_argument('--ctu_size', type=int,
+                        help="CTU size that is used for training NNLF, default=128",
+                        default=128, required=False)
+    parser.add_argument('--pad_size', type=int,
+                        help="Padding size across one side of the CTU, default=0",
+                        default=0, required=False)
+    parser.add_argument('--out_dir', type=str,
+                        help="Path to the directory where outputs should be stored",
+                        required=False)
+    parser.add_argument('--del_recon_bs', type=str2bool,
+                        help="Do you want to delete the intermediate yuvs, default=True",
+                        default=True, required=False)
+    parser.add_argument('--bit_depth', type=int,
+                        help="Bit depth of the input, Default=10",
+                        default=10, required=False)
+    return vars(parser.parse_args())
+
+class VideoCaptureYUV:
+    def __init__(self, filename, size, bitdepth):
+        self.height, self.width = size
+        self.bitdepth = bitdepth
+        self.f = open(filename, 'rb')
+        self.frame_len = int(self.width * self.height * 3 / 2)
+        self.data_type = np.uint8
+        if self.bitdepth > 8:
+            self.frame_len *= 2
+            self.data_type = np.uint16
+        self.luma_shape = (self.height, self.width)
+        self.chroma_shape = (int(self.height/2), int(self.width/2))
+        self.cb_offset = self.height * self.width
+        self.cr_offset = self.height * self.width + int(self.height * self.width / 4)
+    def read_raw(self):
+        try:                
+            raw = self.f.read(self.frame_len)
+            yuv = np.frombuffer(raw, dtype=self.data_type)
+            
+            
+            y = yuv[0 : self.cb_offset].reshape(self.luma_shape)
+            u = yuv[self.cb_offset : self.cr_offset].reshape(self.chroma_shape)
+            v = yuv[self.cr_offset : ].reshape(self.chroma_shape)
+        except Exception as e:
+            print(str(e))
+            return False, None, None, None
+        return True,y,u,v
+
+#Padding size and block size of luma is 2 times of chroma
+def pad_n_ext2mul_blksz(y,u,v,ctu_sz,pad_sz):
+    pad_lh = math.ceil(y.shape[0]/(2*ctu_sz)) * 2 * ctu_sz + 4 * pad_sz
+    pad_lw = math.ceil(y.shape[1]/(2*ctu_sz)) * 2 * ctu_sz + 4 * pad_sz
+    pad_ch = int(pad_lh/2)
+    pad_cw = int(pad_lw/2)
+    y_pad = np.pad(y, pad_width=pad_sz*2, mode='edge')
+    u_pad = np.pad(u, pad_width=pad_sz, mode='edge')
+    v_pad = np.pad(v, pad_width=pad_sz, mode='edge')
+    
+    for i in range(y_pad.shape[0], pad_lh):
+        y_pad = np.append(y_pad, y_pad[-1, :].reshape(1, y_pad.shape[1]), 0)
+    for i in range(y_pad.shape[1], pad_lw):
+        y_pad = np.append(y_pad, y_pad[:, -1].reshape(y_pad.shape[0],1), 1)
+    
+    for i in range(u_pad.shape[0], pad_ch):
+        u_pad = np.append(u_pad, u_pad[-1, :].reshape(1, u_pad.shape[1]), 0)
+        v_pad = np.append(v_pad, v_pad[-1, :].reshape(1, v_pad.shape[1]), 0)
+    for i in range(u_pad.shape[1], pad_cw):
+        u_pad = np.append(u_pad, u_pad[:, -1].reshape(u_pad.shape[0],1), 1)
+        v_pad = np.append(v_pad, v_pad[:, -1].reshape(v_pad.shape[0],1), 1)
+
+    return y_pad, u_pad, v_pad
+
+
+def pad_n_dump_ctus(args, inter_yuv_file):
+
+    inter_yuv_basename = "_".join(os.path.splitext(os.path.basename(inter_yuv_file))[0].split("_")[:2])
+    #CTU size and padding size taken here are for Chroma
+    ctu_sz = int(args['ctu_size']/2)
+    pad_sz = int(args['pad_size']/2)
+    bitdepth = args['bit_depth']
+
+    output_dir = args['out_dir']
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    
+    [width, height] = list(map(int, inter_yuv_basename.split("_")[1].split("x")))
+    img_size = (height, width)
+
+
+    cap_orig = VideoCaptureYUV(inter_yuv_file, img_size, bitdepth)
+    frm_cnt = 0
+    while 1:
+
+        ret, y, u, v = cap_orig.read_raw()
+        if not ret:
+            print("Reached end of sequence, num of frames =", frm_cnt)
+            break
+
+        y_pad, u_pad, v_pad = pad_n_ext2mul_blksz(y, u, v, ctu_sz, pad_sz)
+
+        #Padded width and height
+        pad_lh = y_pad.shape[0]  
+        pad_lw = y_pad.shape[1]
+
+        for y in range(pad_sz*2, pad_lh-pad_sz*2, 2*ctu_sz):
+            for x in range(pad_sz*2, pad_lw-pad_sz*2, 2*ctu_sz):
+                
+                #Luma offsets
+                y_y_startoffset = y - pad_sz * 2
+                y_y_endoffset   = y + 2 * (ctu_sz + pad_sz)
+                y_x_startoffset = x - pad_sz * 2
+                y_x_endoffset   = x + 2 * (ctu_sz + pad_sz)
+                out_file_name = "_".join([inter_yuv_basename, 
+                                         str(frm_cnt), 
+                                         str(y_y_startoffset)+ "-" + str(y_x_startoffset)]) + ".yuv"
+                if os.path.exists(out_file_name):
+                    continue
+
+                y_orig = y_pad[y_y_startoffset:y_y_endoffset, y_x_startoffset:y_x_endoffset].flatten()
+                
+                #Chroma Offsets
+                uv_y_startoffset = int(y/2) - pad_sz
+                uv_y_endoffset   = int(y/2) + ctu_sz + pad_sz
+                uv_x_startoffset = int(x/2) - pad_sz
+                uv_x_endoffset   = int(x/2) + ctu_sz + pad_sz
+                u_orig = u_pad[uv_y_startoffset:uv_y_endoffset, uv_x_startoffset:uv_x_endoffset].flatten()
+                v_orig = v_pad[uv_y_startoffset:uv_y_endoffset, uv_x_startoffset:uv_x_endoffset].flatten()
+
+                yuv_orig = np.concatenate((y_orig, u_orig, v_orig))
+
+                out_file_name = "_".join([inter_yuv_basename, 
+                                         str(frm_cnt), 
+                                         str(y_x_startoffset)+ "-" + str(y_y_startoffset)]) + ".yuv"
+                binary_file = open(os.path.join(args['out_dir'], out_file_name), "wb")
+                
+                binary_file.write(yuv_orig.tobytes())
+                binary_file.close()
+
+        frm_cnt += 1
+
+    return
+
+if __name__=="__main__":
+    args = parse_arguments()
+
+    yuv_file = None
+    delete_inter_yuv = False
+    if os.path.splitext(args['stream'])[1] != ".yuv":
+        yuv_file = os.path.join(args['out_dir'], os.path.splitext(os.path.basename(args['stream']))[0] + ".yuv") 
+        decompr_cmd = " ".join(["ffmpeg -v error", "-i", args['stream'], "-pix_fmt", "yuv420p10le", "-y",
+                                yuv_file])
+        status = os.system(decompr_cmd)
+        if status:
+            print("Failed for cmd "+ decompr_cmd, file=stderr)
+            exit(-1)
+        delete_inter_yuv = args['del_recon_bs']
+    else:
+        yuv_file = args['stream']
+
+    if args['dump_ctus']:
+        pad_n_dump_ctus(args, yuv_file)
+    
+
+    if delete_inter_yuv:
+        os.remove(yuv_file)
+    print("Done")
\ No newline at end of file
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/1_generate_raw_data/gt_bvidvc_parallel_runs.py b/training/training_scripts/Nn_Filtering_Set_LC/1_generate_raw_data/gt_bvidvc_parallel_runs.py
new file mode 100644
index 0000000000000000000000000000000000000000..d59b62470b58c904ebd5613776b8f75b7a249430
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/1_generate_raw_data/gt_bvidvc_parallel_runs.py
@@ -0,0 +1,44 @@
+import os
+import argparse
+from multiprocessing import Pool
+from sys import stderr
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--stream_dir', type=str,
+                        help="Path to the input streams/yuvs directory", required=True)
+    parser.add_argument('--out_dir', type=str,
+                        help="Path to the directory where outputs should be stored",
+                        required=True)
+    parser.add_argument('--num_cores', type=int,
+                        help="Number of gt_data_gen.py runs to be triggered, default=1",
+                        default=1, required=False)
+    return vars(parser.parse_args())
+
+def run_cmd(inp_cmd):
+    status = os.system(inp_cmd)
+    # print(inp_cmd)
+    if status:
+        print("Failed for cmd "+ inp_cmd, file=stderr)
+    return
+
+def run_multiple_cmds(list_inp_cmd, num_cores):
+    pool = Pool(processes=num_cores)
+    pool.map_async(run_cmd, list_inp_cmd)
+    pool.close()
+    pool.join() 
+    return
+
+if __name__=="__main__":
+
+    args = parse_arguments()
+    
+    stream_dir = args['stream_dir']
+    cmd_list = []
+    for stream_file in os.listdir(stream_dir):
+        cmd = " ".join(["python gt_bvidvc_datagen.py", 
+                        "--stream", os.path.join(stream_dir, stream_file),
+                        "--out_dir", args['out_dir']])
+
+        cmd_list.append(cmd)
+    run_multiple_cmds(cmd_list, args['num_cores'])
\ No newline at end of file
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/2_generate_compression_data/encode_streams.py b/training/training_scripts/Nn_Filtering_Set_LC/2_generate_compression_data/encode_streams.py
new file mode 100644
index 0000000000000000000000000000000000000000..83d161b6339fe58e82d7183352a5996c61a4e9c0
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/2_generate_compression_data/encode_streams.py
@@ -0,0 +1,91 @@
+import os
+import argparse
+import re
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input_dir', type=str, 
+                        help="Path to the folder where yuv files are present", 
+                        required=True)
+    parser.add_argument('--output_dir', type=str,
+                        help="Path to the directory where outputs to be stored",
+                        required=True)
+    parser.add_argument('--enc_exe', type=str,
+                        help="Path to the encoder executable",
+                        required=False, default="./EncoderApp")
+    parser.add_argument('--enc_type', type=str,
+                        help="RA: Random Access, AI: All Intra",
+                        required=True)
+    parser.add_argument('--model_paths', type=str,
+                        help="Model files paths,separated by comma needed if it is RA. Eg: model0,model1,model2,model4",
+                        required=False, default=None)
+    return vars(parser.parse_args())
+
+if __name__=="__main__":
+
+    qp_list = [20,25,30,35,40,45]
+
+    args = parse_arguments()
+    
+    in_dir    = args['input_dir']
+    out_dir   = args['output_dir']
+    enc_exe   = args['enc_exe']
+    enc_type  = args['enc_type']
+
+    cfg_file = None
+    if enc_type == "AI":
+        cfg_file = "encoder_intra_vtm.cfg"
+        num_frames = 1
+        inp_file_parser = re.compile(r'(\w+)_(\d+x\d+)_(\d+)bit_(\d+).yuv')
+    elif enc_type == "RA":
+        cfg_file = "encoder_randomaccess_vtm.cfg"
+        num_frames = 64
+        inp_file_parser = re.compile(r'(\w+)_(\d+x\d+)_(\d+)fps_(\d+)bit_(\d+).yuv')
+        if args['model_paths'] == None:
+            print("Error model files needs to be present for BVIDVC encoding for RA")
+            exit(-1)
+    else:
+        print("Error: Unknown encoding type")
+        exit(-1)
+    
+    for inp in os.listdir(in_dir):
+        input_path = os.path.join(in_dir, inp)
+        match_obj = inp_file_parser.match(inp)
+        if not match_obj:
+            # print("Skipping input "+inp+" due to unsupported naming convention")
+            continue
+        inp_basename = os.path.splitext(inp)[0]
+        inp_bitdepth = int(inp_basename.split("_")[-2][:-3])
+        chroma_fmt = int(inp_basename.split("_")[-1])
+        if enc_type == "RA":
+            fps = int(inp_basename.split("_")[2][:-3])  #ignoring last 3 elements i.e. fps
+        else:
+            fps = 30
+        frame_skip = 0
+        width, height = inp_basename.split("_")[1].split("x")
+
+        enc_cmd_part1 = " ".join([enc_exe, 
+                                "-c", cfg_file,
+                                "-i", input_path,
+                                "--InputBitDepth="+str(inp_bitdepth),
+                                "--InputChromaFormat="+str(chroma_fmt),
+                                "--FrameRate="+str(fps),
+                                "--FrameSkip="+str(frame_skip),
+                                "--SourceWidth="+str(width),
+                                "--SourceHeight="+str(height),
+                                "--ConformanceWindowMode=1",    #To handle non-multiple of 8
+                                "--FramesToBeEncoded="+str(num_frames)])
+
+        if enc_type == "RA":
+            intra_period = 64
+            enc_cmd_part1 = " ".join([enc_cmd_part1, "-ip", str(intra_period), "--NnlfOption=3",
+                                      "--LCModelPath="+args['model_paths']])
+   
+        for qp in qp_list:
+            enc_out_file = "_".join([enc_type, str(qp)+"qp", inp_basename]) + ".266"
+            enc_out_path = os.path.join(out_dir, enc_out_file)
+
+            enc_cmd = " ".join([enc_cmd_part1,
+                                "-q", str(qp),
+                                "-b", enc_out_path])
+            print(enc_cmd)
\ No newline at end of file
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/2_generate_compression_data/training_data_gen.py b/training/training_scripts/Nn_Filtering_Set_LC/2_generate_compression_data/training_data_gen.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f40d89545c18365c937a303e4ba69d8c2e17ea2
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/2_generate_compression_data/training_data_gen.py
@@ -0,0 +1,271 @@
+import os
+import numpy as np
+import math
+import argparse
+from sys import stderr
+import random
+import itertools
+
+def str2bool(v):
+    if isinstance(v, bool):
+       return v
+    if v.lower() in ('yes', 'y', 'true', 't', '1'):
+        return True
+    elif v.lower() in ('no', 'n', 'false', 'f', '0'):
+        return False
+    else:
+        raise argparse.ArgumentTypeError('Boolean value expected.')
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--dec_exe', type=str,
+                        help="Path to the decoder executable", 
+                        default=None, required=False)
+    parser.add_argument('--dump_ctus', type=str2bool,
+                        help="Whether to dump ctus, default=True", 
+                        required=True)
+    parser.add_argument('--stream', type=str,
+                        help="Path to the input stream", required=False)
+    parser.add_argument('--DumpBasename', type=str, 
+                        help="Path to the reconstructed yuv file before deblocking", 
+                        required=False)
+    parser.add_argument('--out_dir', type=str,
+                        help="Path to the directory where outputs should be stored",
+                        required=False)
+    parser.add_argument('--decoder_log', type=str,
+                        help="Path to the decoder output log file",
+                        default=None, required=False)
+    parser.add_argument('--del_recon_bs', type=str2bool,
+                        help="DO you want to delete the decoded and bs info yuvs",
+                        default=True, required=False)
+    parser.add_argument('--bit_depth', type=int,
+                        help="Bit depth of the recon and bs yuv files, default=10",
+                        default=10, required=False)
+    parser.add_argument('--ctu_size', type=int,
+                        help="CTU size that is used for training NNLF, default=128",
+                        default=128, required=False)
+    parser.add_argument('--pad_size', type=int,
+                        help="Padding size across one side of the CTU, default=8",
+                        default=8, required=False)
+    parser.add_argument('--ModelPath', type=str,
+                        help="NNLF Models to be used by decoder when the encoder_cfg is RA",
+                        default="", required=False)
+    parser.add_argument('--encode_cfg', type=str,
+                        help="RA or AI, default=AI", default="AI", required=False)
+    parser.add_argument('--subsample_factor', type=int,
+                        help="Subsample factor in a frame, default=1, "+ \
+                            "totalCTUs/subsample_factor number of CTUs will be picked randomly from frame", default=1, 
+                        required=False)
+    args = parser.parse_args()
+    if (not args.ModelPath) and (args.encode_cfg == 'RA'):
+        parser.error('--ModelPath should be provided when --encode_cfg=RA')
+
+    return vars(args)
+
+class VideoCaptureYUV:
+    def __init__(self, filename, size, bitdepth):
+        self.height, self.width = size
+        self.bitdepth = bitdepth
+        self.f = open(filename, 'rb')
+        self.frame_len = int(self.width * self.height * 3 / 2)
+        self.data_type = np.uint8
+        if self.bitdepth > 8:
+            self.frame_len *= 2
+            self.data_type = np.uint16
+        self.luma_shape = (self.height, self.width)
+        self.chroma_shape = (int(self.height/2), int(self.width/2))
+        self.cb_offset = self.height * self.width
+        self.cr_offset = self.height * self.width + int(self.height * self.width / 4)
+    def read_raw(self):
+        try:                
+            raw = self.f.read(self.frame_len)
+            yuv = np.frombuffer(raw, dtype=self.data_type)
+            
+            
+            y = yuv[0 : self.cb_offset].reshape(self.luma_shape)
+            u = yuv[self.cb_offset : self.cr_offset].reshape(self.chroma_shape)
+            v = yuv[self.cr_offset : ].reshape(self.chroma_shape)
+        except Exception as e:
+            print(str(e))
+            return False, None, None, None
+        return True,y,u,v
+
+#Padding size and block size of luma is 2 times of chroma
+def pad_n_ext2mul_blksz(y,u,v,ctu_sz,pad_sz):
+    pad_lh = math.ceil(y.shape[0]/(2*ctu_sz)) * 2 * ctu_sz + 4 * pad_sz
+    pad_lw = math.ceil(y.shape[1]/(2*ctu_sz)) * 2 * ctu_sz + 4 * pad_sz
+    pad_ch = int(pad_lh/2)
+    pad_cw = int(pad_lw/2)
+    y_pad = np.pad(y, pad_width=pad_sz*2, mode='edge')
+    u_pad = np.pad(u, pad_width=pad_sz, mode='edge')
+    v_pad = np.pad(v, pad_width=pad_sz, mode='edge')
+    
+    for i in range(y_pad.shape[0], pad_lh):
+        y_pad = np.append(y_pad, y_pad[-1, :].reshape(1, y_pad.shape[1]), 0)
+    for i in range(y_pad.shape[1], pad_lw):
+        y_pad = np.append(y_pad, y_pad[:, -1].reshape(y_pad.shape[0],1), 1)
+    
+    for i in range(u_pad.shape[0], pad_ch):
+        u_pad = np.append(u_pad, u_pad[-1, :].reshape(1, u_pad.shape[1]), 0)
+        v_pad = np.append(v_pad, v_pad[-1, :].reshape(1, v_pad.shape[1]), 0)
+    for i in range(u_pad.shape[1], pad_cw):
+        u_pad = np.append(u_pad, u_pad[:, -1].reshape(u_pad.shape[0],1), 1)
+        v_pad = np.append(v_pad, v_pad[:, -1].reshape(v_pad.shape[0],1), 1)
+
+    return y_pad, u_pad, v_pad
+
+def get_offsets(wd,ht,blkSize,subsamplingfactor):
+    blocksX = math.ceil(wd/blkSize)
+    blocksY = math.ceil(ht/blkSize)
+    reqd_blocks = int(blocksX * blocksY / subsamplingfactor)
+    lst_x = list(range(0,wd, blkSize))
+    lst_y = list(range(0,ht, blkSize))
+
+    offset_list = [element for element in itertools.product(lst_y, lst_x)]
+    subs_off_list = random.sample(offset_list, reqd_blocks) 
+    
+    return subs_off_list
+
+def parse_dec_logs(dec_log_file):
+    f = open(dec_log_file, 'r')
+    Lines = f.readlines()
+
+    frame_info_dict =  {}
+
+    for line in Lines:
+        if line.startswith('POC'):
+            words = line.split()
+            index_poc = words.index('POC')
+            index_qp = words.index('QP')
+            
+            slicetype = None
+            if "I-SLICE" in line:
+                slicetype = "I"
+            elif "B-SLICE" in line:
+                slicetype = "B"
+            else:
+                print("Error: Unrecognized frame/slice type")
+                exit(-1)
+
+            poc_num = int(words[index_poc+1])
+            frame_info_dict[poc_num] = { 'poc': poc_num,
+                                         'qp': int(words[index_qp+1]),
+                                         'slicetype': slicetype}
+
+    return frame_info_dict
+
+
+def pad_n_dump_ctus(args):
+
+    recon_basename = os.path.splitext(os.path.basename(args['DumpBasename']+"_rec_before_dbf.yuv"))[0]
+    recon_basename = "_".join(recon_basename.split("_")[:-3])
+    #CTU size and padding size taken here are for Chroma
+    ctu_sz = int(args['ctu_size']/2)
+    pad_sz = int(args['pad_size']/2)
+    bitdepth = args['bit_depth']
+
+    output_dir = args['out_dir']
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+   
+    [width, height] = list(map(int, recon_basename.split("_")[1].split("x")))
+    img_size = (height, width)
+    sub_factor = args['subsample_factor']
+    print("Subfactor", sub_factor)
+    cap_recon = VideoCaptureYUV(args['DumpBasename']+"_rec_before_dbf.yuv", img_size, bitdepth)
+    cap_bs = VideoCaptureYUV(args['DumpBasename']+"_bs.yuv", img_size, bitdepth)
+    dec_log = args['decoder_log']
+    frame_info_dict = parse_dec_logs(dec_log)
+
+    BS_Lut = {0:0,512:1,1023:2}
+
+        
+
+    frm_cnt = 0
+    while 1:
+        ret, y, u, v = cap_recon.read_raw()
+        
+        if not ret:
+            print("Reached end of sequence, num of frames =", frm_cnt)
+            break
+        ret, bsy, bsu, bsv = cap_bs.read_raw()
+        if not ret:
+            print("Reached end of sequence, num of frames =", frm_cnt)
+            break
+
+        y_pad, u_pad, v_pad = pad_n_ext2mul_blksz(y, u, v, ctu_sz, pad_sz)
+        bs_yp, bs_up, bs_vp = pad_n_ext2mul_blksz(bsy, bsu, bsv, ctu_sz, pad_sz)
+        offset_list = get_offsets(width, height, 2 * ctu_sz, sub_factor)
+
+        # JVET-CTC for AI will only encode frames with interval 8
+        if args['encode_cfg'] == 'AI':
+            poc = frame_info_dict[frm_cnt]['poc'] * 8
+        else:
+            poc = frame_info_dict[frm_cnt]['poc']
+            #Skip I frame for RA 
+            if frame_info_dict[frm_cnt]['slicetype'] == "I":
+                frm_cnt += 1
+                continue
+
+
+        for offset in offset_list:
+            y_y_startoffset = offset[0]
+            y_y_endoffset   = offset[0] + 2 * (ctu_sz + 2 * pad_sz)
+            y_x_startoffset = offset[1]
+            y_x_endoffset   = offset[1] + 2 * (ctu_sz + 2 * pad_sz)
+            uv_y_startoffset = int(offset[0]/2)
+            uv_y_endoffset   = int(offset[0]/2) + ctu_sz + 2 * pad_sz
+            uv_x_startoffset = int(offset[1]/2)
+            uv_x_endoffset   = int(offset[1]/2) + ctu_sz + 2 * pad_sz
+
+            map_cu = [np.uint16((y_pad[y_y_startoffset:y_y_endoffset:2,   y_x_startoffset:y_x_endoffset:2] << 6)+np.vectorize(BS_Lut.get)(bs_yp[y_y_startoffset:y_y_endoffset:2, y_x_startoffset:y_x_endoffset:2])), 
+                        y_pad[y_y_startoffset:y_y_endoffset:2, y_x_startoffset+1:y_x_endoffset:2], 
+                        y_pad[y_y_startoffset+1:y_y_endoffset:2,   y_x_startoffset:y_x_endoffset:2], 
+                        y_pad[y_y_startoffset+1:y_y_endoffset:2, y_x_startoffset+1:y_x_endoffset:2]]
+            map_cu.extend([np.uint16((u_pad[uv_y_startoffset:uv_y_endoffset, uv_x_startoffset:uv_x_endoffset] << 6)+ np.vectorize(BS_Lut.get)(bs_up[uv_y_startoffset:uv_y_endoffset, uv_x_startoffset:uv_x_endoffset])),np.uint16((v_pad[uv_y_startoffset:uv_y_endoffset, uv_x_startoffset:uv_x_endoffset] << 6)+ np.vectorize(BS_Lut.get)(bs_vp[uv_y_startoffset:uv_y_endoffset, uv_x_startoffset:uv_x_endoffset]))])
+            
+            out_file_name = "_".join([recon_basename, 
+                                        str(frame_info_dict[frm_cnt]['qp']),
+                                        str(poc),
+                                        str(y_x_startoffset)+ "-" + str(y_y_startoffset)]) + ".npy"
+
+            np.save(os.path.join(output_dir, out_file_name), map_cu)
+        frm_cnt += 1
+    return
+
+def generate_dec_cmd(decexe, input, extraoptions=[], output=None, console_log=""):
+    dec_cmd_list = [decexe, "-b", input]
+    for extraoption in extraoptions:
+        dec_cmd_list.append(extraoption)
+    if output:
+        dec_cmd_list.extend(["-o", output])
+    if console_log:
+        dec_cmd_list.extend([">", console_log, "2>&1"])
+    return " ".join(dec_cmd_list)
+
+if __name__=="__main__":
+    args = parse_arguments()
+    if args['dec_exe']:
+        if args['encode_cfg'] == 'RA':
+            dec_cmd = generate_dec_cmd(args['dec_exe'], args['stream'], 
+                                output = "/dev/null",
+                                extraoptions=["--DumpBasename="+args['DumpBasename'], "--LCModelPath="+args['ModelPath']],
+                                console_log=args['decoder_log'])
+        else:
+            dec_cmd = generate_dec_cmd(args['dec_exe'], args['stream'], 
+                                output = "/dev/null", 
+                                extraoptions=["--DumpBasename="+args['DumpBasename']],
+                                console_log=args['decoder_log'])
+        status = os.system(dec_cmd)
+        if status:
+            print("Failed for cmd "+ dec_cmd, file=stderr)
+            exit(-1)
+    
+    if args['dump_ctus']:
+        pad_n_dump_ctus(args)
+    
+
+    if args['del_recon_bs']:
+        os.system("rm " + args['DumpBasename']+"_*")
+        # os.remove(args['DumpBasename']+"_rec_before_dbf.yuv")
+    print("Done")
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/2_generate_compression_data/training_data_gen_parallel.py b/training/training_scripts/Nn_Filtering_Set_LC/2_generate_compression_data/training_data_gen_parallel.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d88582da7eea822bfe848531f08dbb8fd37aaec
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/2_generate_compression_data/training_data_gen_parallel.py
@@ -0,0 +1,89 @@
+import os
+import argparse
+from multiprocessing import Pool
+from sys import stderr
+import sys
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--dec_exe', type=str,
+                        help="Path to the decoder executable", required=False,
+                        default="./DecoderApp")
+    parser.add_argument('--stream_dir', type=str,
+                        help="Path to the input streams directory", required=True)
+    parser.add_argument('--consol_dir', type=str, 
+                        help="Path to dump the console logs (Can't be same as out_dir)", 
+                        required=True)
+    parser.add_argument('--out_dir', type=str,
+                        help="Path to the output directory",
+                        required=True)
+    parser.add_argument('--num_cores', type=int,
+                        help="Number of training_data_gen.py runs to be triggered, default=1",
+                        default=1, required=False)
+    parser.add_argument('--subsample_factor', type=int,
+                        help="Subsample factor in a frame, default=1, "+ \
+                            "totalCTUs/subsample_factor number of CTUs will be picked randomly from frame", default=1, 
+                        required=False)
+    parser.add_argument('--encode_cfg',type=str,help="Type of streams in --streams_dir AI or RA, default is AI",default='AI',required=False)
+    parser.add_argument('--ModelPath',type=str,
+                        help="NNLF Models used by decoder when --encoder_cfg is RA",default=None,required=False)
+
+    args = parser.parse_args();
+
+    if (not args.ModelPath) and (args.encode_cfg == 'RA'):
+        parser.error('--ModelPath should be provided when --encode_cfg=RA')
+    
+    return vars(args)
+
+def run_cmd(inp_cmd):
+    status = os.system(inp_cmd)
+    if status:
+        print("Failed for cmd "+ inp_cmd, file=stderr)
+    return
+
+def run_multiple_cmds(list_inp_cmd, num_cores):
+    pool = Pool(processes=num_cores)
+    pool.map_async(run_cmd, list_inp_cmd)
+    pool.close()
+    pool.join() 
+    return
+
+if __name__=="__main__":
+
+    args = parse_arguments()
+    
+    stream_dir = args['stream_dir']
+    cmd_list = []
+    for stream_file in os.listdir(stream_dir):
+        if os.path.splitext(stream_file)[1] not in [".266", ".bit"]:
+            continue
+        stream_file_split = stream_file.split("_")
+        qp = stream_file_split[1][:-2]
+        inter_file_prefix = "_".join([stream_file_split[2], stream_file_split[3], str(qp)])
+        interm_dir = args['consol_dir']
+        if not os.path.exists(interm_dir):
+            os.makedirs(interm_dir)
+        
+        if not os.path.exists(args['out_dir']):
+            os.makedirs(args['out_dir'])
+
+        dump_basename = os.path.join(interm_dir, inter_file_prefix)
+        console_file = os.path.join(interm_dir, inter_file_prefix + "_console.txt")
+        dec_log_file = os.path.join(interm_dir, inter_file_prefix + "_dec_log.txt")
+        dump_ctus = 1
+        
+        enc_cmd_list = ["python",  os.path.join(os.path.dirname(sys.argv[0]), "training_data_gen.py"), 
+                    "--dec_exe", args['dec_exe'],
+                    "--dump_ctus", str(dump_ctus),
+                    "--stream", os.path.join(stream_dir, stream_file),
+                    "--DumpBasename", dump_basename,
+                    "--out_dir", args['out_dir'],
+                    "--subsample_factor", str(args['subsample_factor']),
+                    "--decoder_log",dec_log_file,
+                    "--encode_cfg",args['encode_cfg']]
+        if args['ModelPath']:
+            enc_cmd_list.extend(["--ModelPath",args['ModelPath']])
+        cmd = " ".join(enc_cmd_list)
+
+        cmd_list.append(cmd + " > " + console_file)
+    run_multiple_cmds(cmd_list, args['num_cores'])
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/3_training/AI_LC-CPDF_model0_cfg.json b/training/training_scripts/Nn_Filtering_Set_LC/3_training/AI_LC-CPDF_model0_cfg.json
new file mode 100644
index 0000000000000000000000000000000000000000..985d093e8f382e36307c952b82a4062ccd37198d
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/3_training/AI_LC-CPDF_model0_cfg.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a04d81203a9bb034e6bd5160c31dccd431f6a401b81f58d3307a921818dd43e7
+size 1172
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/3_training/AI_LC-CPDF_model1_cfg.json b/training/training_scripts/Nn_Filtering_Set_LC/3_training/AI_LC-CPDF_model1_cfg.json
new file mode 100644
index 0000000000000000000000000000000000000000..f3c69fcd0ce0c308a12e93c5b49015fb07a9fefc
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/3_training/AI_LC-CPDF_model1_cfg.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f3394e1fb2a7bfd3414e44091e1f950d125bef588c0a096cf5916c454f261df
+size 1172
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/3_training/AI_LC-baseline_cfg.json b/training/training_scripts/Nn_Filtering_Set_LC/3_training/AI_LC-baseline_cfg.json
new file mode 100644
index 0000000000000000000000000000000000000000..6d4f2b40e3be9d9229e9abff0993b8bf2c2e0da6
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/3_training/AI_LC-baseline_cfg.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af1f2f8239c978a455c1b440498055e151dc1279e686a571de04562e1b36ab74
+size 863
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/3_training/NNLF_data_loader.py b/training/training_scripts/Nn_Filtering_Set_LC/3_training/NNLF_data_loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..3204f89605be3d6144b96130877340fdedce7dba
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/3_training/NNLF_data_loader.py
@@ -0,0 +1,129 @@
+from tensorflow.keras.utils import Sequence
+import math
+import numpy as np
+import os
+
+class X0140_data_loader(Sequence):
+
+    def __init__(self, x_set, y_set, batch_size=16, luma_patch_size=128, is_train_data=True, augment_data=False):
+        self.x, self.y = x_set, y_set
+        self.batch_size = batch_size
+        self.patch_size = luma_patch_size
+        self.is_train_data = is_train_data
+        self.augment_data = augment_data
+        # Boundary Strength mapping
+        self.BS_Lut = np.array([0,512,1023])
+
+    def __len__(self):
+        return math.ceil(len(self.x) / self.batch_size)
+
+    def parse_recon_file(self, filename, flip=-1, rot=0):
+        input = np.load(filename)
+        
+        # Extract BS Info and Recon values
+        input = np.append(input, [(input[0,:,:] & 0x3F),(input[4,:,:] & 0x3F),(input[5,:,:] & 0x3F)], axis=0)
+        input[0,:,:] = (input[0,:,:] >> 6)
+        input[4,:,:] = (input[4,:,:] >> 6)
+        input[5,:,:] = (input[5,:,:] >> 6)
+
+        # Map BS to actual values used in VTM-NNVC.
+        input[6,:,:] = self.BS_Lut[input[6,:,:]]
+        input[7,:,:] = self.BS_Lut[input[7,:,:]]
+        input[8,:,:] = self.BS_Lut[input[8,:,:]]
+
+        input = np.float32(input)
+        input = input / 1023.0
+        test_basename = os.path.splitext(os.path.basename(filename))[0]
+        qp = np.float32(test_basename.split("_")[3])
+        qpin = np.power(2, (qp-42.)/6.)
+        qpPlane = np.full_like(input[0,:,:], qpin)
+        input = np.insert(input, 6, qpPlane, axis=0)
+
+        input = np.moveaxis(input, 0, 2)
+
+        if flip==-1 and rot==0:
+            return input
+
+        # Compose channel Y to single plane and rotate, flip
+        Yrec = np.empty((2*input.shape[0], 2*input.shape[1]))
+        Yrec[::2,::2] = input[:,:,0]
+        Yrec[::2, 1::2] = input[:,:,1]
+        Yrec[1::2, ::2] = input[:,:,2]
+        Yrec[1::2, 1::2] = input[:,:,3]
+
+        if(-1 != flip):
+            Yrec = np.flip(Yrec, axis = flip)
+            input[:,:,4:] = np.flip(input[:,:,4:], axis = flip)
+
+        input[:,:,4:] = np.rot90(input[:,:,4:], k = rot)
+        Yrec = np.rot90(Yrec, k = rot)
+
+        input[:,:,0] = Yrec[::2,::2]
+        input[:,:,1] = Yrec[::2, 1::2]
+        input[:,:,2] = Yrec[1::2, ::2]
+        input[:,:,3] = Yrec[1::2, 1::2]
+
+
+        return input
+
+    def parse_orig_yuv(self,yuv_file, flip=-1, rot=0):
+
+        datatype = np.uint16
+        shift = 0
+        offset_multiplier = 2
+
+        if "DIV2K" in yuv_file:
+            datatype = np.uint8
+            shift = 2
+            offset_multiplier = 1
+
+        num_luma_pixels_in_patch = self.patch_size*self.patch_size
+        Yarr = np.fromfile(yuv_file, dtype=datatype, count=num_luma_pixels_in_patch)
+        Yarr = Yarr.reshape(self.patch_size, self.patch_size)
+        Uarr = np.fromfile(yuv_file, dtype=datatype, count=(num_luma_pixels_in_patch)>>2, offset=offset_multiplier*num_luma_pixels_in_patch)
+        Uarr = Uarr.reshape(self.patch_size>>1, self.patch_size>>1)
+        Varr = np.fromfile(yuv_file, dtype=datatype, count=(num_luma_pixels_in_patch)>>2, offset=(offset_multiplier*((num_luma_pixels_in_patch*5)>>2)))
+        Varr = Varr.reshape(self.patch_size>>1, self.patch_size>>1)
+
+        # Flip and rotate
+        if(-1 != flip):
+            Yarr = np.flip(Yarr, axis = flip)
+            Uarr = np.flip(Uarr, axis = flip)
+            Varr = np.flip(Varr, axis = flip)
+
+        Yarr = np.rot90(Yarr, k = rot)
+        Uarr = np.rot90(Uarr, k = rot)
+        Varr = np.rot90(Varr, k = rot)
+
+        Yarr_1 = Yarr[::2,::2]
+        Yarr_2 = Yarr[::2, 1::2]
+        Yarr_3 = Yarr[1::2, ::2]
+        Yarr_4 = Yarr[1::2, 1::2]
+
+        orig_yuv = np.stack((Yarr_1,Yarr_2,Yarr_3,Yarr_4,Uarr,Varr))
+        orig_yuv = np.float32(np.uint16(orig_yuv)<<shift)/1023.
+
+        orig_yuv = np.moveaxis(orig_yuv, 0, 2)
+
+        return orig_yuv
+
+    def __getitem__(self, idx):
+        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
+        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
+
+        # data augmentation
+        if self.is_train_data and self.augment_data:
+        # 0 = 0 deg, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg
+            rot_choice = np.random.choice([0,1,2,3], self.batch_size)
+        #1 = Horizontal Flip  -1 = no flip, #vertical flip  is redundant with rotation combinations
+            flip_choice = np.random.choice([1,-1], self.batch_size)
+            data = np.array([self.parse_recon_file(file_name, f, r) for file_name,f,r in zip(batch_x,flip_choice,rot_choice)]), np.array([self.parse_orig_yuv(file_name_gt, f, r) for file_name_gt,f,r in zip(batch_y,flip_choice,rot_choice)])
+        else:
+            data = np.array([self.parse_recon_file(file_name) for file_name in batch_x]), np.array([self.parse_orig_yuv(file_name_gt) for file_name_gt in batch_y])
+
+        return data
+
+    def __on_epoch_end__(self):
+        c = list(zip(self.x, self.y))
+        random.shuffle(c)
+        self.x, self.y = zip(*c)
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/3_training/NNLF_models.py b/training/training_scripts/Nn_Filtering_Set_LC/3_training/NNLF_models.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ab23dd1511e14c9ebb3b0d03d23cc3ae74e344f
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/3_training/NNLF_models.py
@@ -0,0 +1,1024 @@
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers import Conv2D
+from tensorflow.keras.layers import DepthwiseConv2D
+from tensorflow.keras.layers import Add
+from tensorflow.keras.layers import Conv1D
+from tensorflow.keras.layers import LeakyReLU
+from tensorflow.keras.layers import Input
+from tensorly.decomposition import parafac
+import numpy as np
+
+
+class modelParamsError(Exception):
+    """Error in model params"""
+    pass
+
+def weighted_MSE_1211(y_true, y_pred):
+    mse = tf.keras.losses.MeanSquaredError()
+    loss_y  = mse(y_pred[:,:,:,0:4], y_true[:,:,:,0:4])
+    loss_cb = mse(y_pred[:,:,:,4], y_true[:,:,:,4])
+    loss_cr = mse(y_pred[:,:,:,5], y_true[:,:,:,5])
+
+    loss = 12 * loss_y + loss_cb + loss_cr
+    return loss
+
+def weighted_MSE_411(y_true, y_pred):
+    mse = tf.keras.losses.MeanSquaredError()
+    loss_y  = mse(y_pred[:,:,:,0:4], y_true[:,:,:,0:4])
+    loss_cb = mse(y_pred[:,:,:,4], y_true[:,:,:,4])
+    loss_cr = mse(y_pred[:,:,:,5], y_true[:,:,:,5])
+
+    loss = 4 * loss_y + loss_cb + loss_cr
+    return loss
+
+def weighted_MSE_loss_default(y_true, y_pred):
+    mse = tf.keras.losses.MeanSquaredError()
+    loss_y  = mse(y_pred[:,:,:,0:4], y_true[:,:,:,0:4])
+    loss_cb = mse(y_pred[:,:,:,4], y_true[:,:,:,4])
+    loss_cr = mse(y_pred[:,:,:,5], y_true[:,:,:,5])
+
+    loss = 12 * loss_y + loss_cb + loss_cr
+    return loss
+
+class InpVal_model():
+    def __init__(self):
+        self.input_shape = (72,72,10)
+    
+    def create_model(self):
+        input = Input(shape=self.input_shape, name="input")
+        input_sliced =  input[:,4:68, 4:68, 0:6]
+        model = Model(inputs=input, outputs=input_sliced)
+        opt = keras.optimizers.Adam()
+        model.compile(optimizer=opt,loss=weighted_MSE_loss_default,experimental_run_tf_function=False)
+        return model
+
+class X0140_model():
+    def __init__(self, model_params, training=False, training_params=None):
+
+        # compile the model if training is True
+        self.training = training
+        # Number of large activation channels
+        self.M = model_params['M']
+        # Number of channels out of hidden layer
+        self.K = model_params['K']
+        # Number of hidden layers
+        self.N = model_params['N']
+
+        #input shape to network
+        self.input_shape = (72,72,10)
+
+        # Ouput number of channles, will differ based on whether
+        # Luma only Model
+        # Chroma only Model
+        # Combined Model
+        if (model_params['luma_output'] == True) and (model_params['chroma_output'] == True):
+            self.num_output_channels = 6
+        elif (model_params['luma_output'] == True) and (model_params['chroma_output'] == False):
+            self.num_output_channels = 4
+        elif (model_params['luma_output'] == False) and (model_params['chroma_output'] == True):
+            self.num_output_channels = 2
+        else:
+            raise modelParamsError("Luma and Chroma Output atleast one should be True")
+
+        if training == True:
+            if 'learning_rate' in training_params:
+                self.lR = training_params['learning_rate']
+            else:
+                self.lR = None
+
+        # Model file from which weights need to be initialized
+        # Valid only for CP and baseline network
+        if 'ref_model' in model_params:
+            self.ref_model_file = model_params['ref_model']
+            if 'ref_model_type' in model_params:
+                self.ref_model_type =  model_params['ref_model_type']
+            else:
+                raise modelParamsError("ref_model_type is required when ref_model is given")
+
+        else:
+            self.ref_model_file = ""
+
+        # Paramete for Relu activation layers in the network
+        self.alpha = 0.2
+        self.CP_layer_cfg = {}
+        self.DSC_layer_cfg = {}
+
+        if 'CP_layers' in model_params:
+            self.CP_layer_cfg = model_params['CP_layers']
+            self.CP_layer_cfg = {int(k):int(v) for k,v in self.CP_layer_cfg.items()}
+
+        if 'DSC_layers' in model_params:
+            self.DSC_layer_cfg = model_params['DSC_layers']
+
+
+        if len(list(set(self.CP_layer_cfg.keys()) & set(self.DSC_layer_cfg))):
+                raise modelParamsError("Hidden layers cannot be both CP and DSC")
+                # print("Hidden layers cannot be both CP and DSC")
+            # return None
+
+    def hidden_layer(self,idx, input, weights=None):
+
+        layer1 = Conv2D(filters=self.M, kernel_size=1, name="hidden_"+str(idx)+ "_1")
+        x = layer1(input)
+        if weights is not None:
+            layer1.set_weights(weights[0])
+
+        x = LeakyReLU(alpha=self.alpha, name="hidden_activation_"+str(idx))(x)
+        layer2 = Conv2D(filters=self.K, kernel_size=1,name="hidden_"+str(idx)+ "_2")
+        x = layer2(x)
+
+        if weights is not None:
+            layer2.set_weights(weights[1])
+
+        layer3 = Conv2D(filters=self.K, kernel_size=3, padding='same', name="hidden_"+str(idx)+ "_3")
+        out = layer3(x)
+
+        if weights is not None:
+            layer3.set_weights(weights[2])
+
+        return out
+
+    def hidden_layer_CP(self, idx, input, last_layer, wts=None, wts_prev=None):
+
+        wts_for_next = None
+
+        R = self.CP_layer_cfg[idx]
+
+        layer1 = Conv2D(filters=self.M, kernel_size=1,name="hidden_"+str(idx)+ "_1")
+        x = layer1(input)
+
+        if wts is not None:
+            if wts_prev is None:
+                layer1.set_weights(wts[0])
+            else:
+                b_prev = wts_prev[1].reshape(-1,1)
+                prev_wt_matrix = np.moveaxis(wts_prev[0][0,0,:,:],0,1)
+                cur_wt_matrix = np.moveaxis(wts[0][0][0,0,:,:],0,1)
+                b_cur = wts[0][1].reshape(-1,1)
+
+                wts_prod = np.matmul(cur_wt_matrix, prev_wt_matrix)
+                bias_fuse = np.add(np.matmul(cur_wt_matrix, b_prev), b_cur)
+
+                wt_matrix = np.moveaxis(wts_prod, 0, 1)
+                layer1.set_weights([wt_matrix[np.newaxis, np.newaxis, :, :], bias_fuse.flatten()])
+
+        x = LeakyReLU(alpha=self.alpha, name="hidden_activation_"+str(idx))(x)
+
+        layer2 = Conv2D(filters=R, kernel_size=1, name="hidden_"+str(idx)+ "_2")
+        x = layer2(x)
+
+        if wts is not None:
+            cp_factors = parafac(np.moveaxis(wts[2][0],2,0), R,n_iter_max=10000, linesearch=True)
+
+            cur_wt_matrix = np.moveaxis(cp_factors[1][0], 0, 1)
+            prev_wt_matrix = np.moveaxis(wts[1][0][0,0,:,:], 0, 1)
+
+            b_prev = wts[1][1].reshape(-1, 1)
+            b_cur = np.matmul(cur_wt_matrix, b_prev)
+
+            wts_prod = np.matmul(cur_wt_matrix, prev_wt_matrix)
+            wt_matrix = np.moveaxis(wts_prod,0,1)
+            layer2.set_weights([wt_matrix[np.newaxis, np.newaxis, :,:],b_cur.flatten()])
+
+        # Grouped convolution is not cupported currenlty on tesnorflow CPU
+        # Separable convolutions are realized using Grouped convlution to make training faster,
+        # as Separable convolution layers are sub optimally implemented in CUDA gpu
+        #
+        if tf.test.is_gpu_available():
+            layer3b = Conv2D(filters=R, kernel_size=(3,1), use_bias=False, groups=R, padding='same', name="hidden_"+str(idx)+ "_3b")
+            x = layer3b(x)
+            layer3c = Conv2D(filters=R, kernel_size=(1,3), use_bias=False,groups=R,padding='same', name="hidden_"+str(idx)+ "_3c")
+            x = layer3c(x)
+
+            if wts is not None:
+                layer3b.set_weights([cp_factors[1][1][:,np.newaxis,np.newaxis,:]])
+                layer3c.set_weights([cp_factors[1][2][np.newaxis,:,np.newaxis,:]])
+        else:
+            layer3b = DepthwiseConv2D(kernel_size=(3,1), use_bias=False, padding='same', name="hidden_"+str(idx)+ "_3b")
+            x = layer3b(x)
+            layer3c = DepthwiseConv2D(kernel_size=(1,3), use_bias=False,padding='same', name="hidden_"+str(idx)+ "_3c")
+            x = layer3c(x)
+
+            if wts is not None:
+                layer3b.set_weights([cp_factors[1][1][:,np.newaxis,:,np.newaxis]])
+                layer3c.set_weights([cp_factors[1][2][np.newaxis,:,:,np.newaxis]])
+
+        if last_layer:
+            layer3d = Conv2D(filters=self.K, kernel_size=1, name="hidden_"+str(idx)+ "_3d")
+            x = layer3d(x)
+
+            if wts is not None:
+                layer3d.set_weights([np.moveaxis(cp_factors[1][3],0,1)[np.newaxis, np.newaxis, :,:], wts[2][1]])
+
+        else:
+            if wts is not None:
+                wts_for_next = [np.moveaxis(cp_factors[1][3],0,1)[np.newaxis, np.newaxis, :,:], wts[2][1]]
+        return x,wts_for_next
+
+    def hidden_layer_DSC(self,idx, input, weights=None):
+
+        layer1 = Conv2D(filters=self.M, kernel_size=1, name="hidden_"+str(idx)+ "_1")
+        x = layer1(input)
+        if weights is not None:
+            layer1.set_weights(weights[0])
+
+        x = LeakyReLU(alpha=self.alpha, name="hidden_activation_"+str(idx))(x)
+        layer2 = Conv2D(filters=self.K, kernel_size=1,name="hidden_"+str(idx)+ "_2")
+        x = layer2(x)
+
+        if weights is not None:
+            layer2.set_weights(weights[1])
+
+        if tf.test.is_gpu_available():
+            layer3_a = Conv2D(filters=self.K, kernel_size=(3,3), groups=self.K, padding='same', name="hidden_"+str(idx)+ "_3a")
+        else:
+            layer3_a = DepthwiseConv2D(kernel_size=(3,3), padding='same', name="hidden_"+str(idx)+ "_3a")
+
+        x = layer3_a(x)
+
+        layer3_b = Conv2D(filters=self.K,kernel_size=1, padding='same', name="hidden_"+str(idx)+ "_3b")
+        out = layer3_b(x)
+
+        return out
+
+    def create_x140_model_from_ref(self):
+
+        ref_model = keras.models.load_model(self.ref_model_file, compile=False)
+        ref_layer_name = []
+        ref_model_wts = []
+        for layer in ref_model.layers:
+            if len(layer.get_weights()) > 0:
+                ref_layer_name.append(layer.name)
+                ref_model_wts.append(layer.get_weights())
+
+        last_wt_idx = 0
+        input = Input(shape=self.input_shape, name="input_1")
+
+        conv1 = Conv2D(filters=self.M, kernel_size=3,padding='same',name="conv1")
+        x = conv1(input)
+
+        conv1.set_weights(ref_model.get_layer(ref_layer_name[last_wt_idx]).get_weights())
+        last_wt_idx = last_wt_idx+1
+
+        x = LeakyReLU(alpha=self.alpha, name="hidden_activation_pre")(x)
+
+        #first hidden layer has slightly different strcture
+        hidden_0_1 = Conv2D(filters=self.M, kernel_size=1,name="hidden_0_1")
+        x = hidden_0_1(x)
+        hidden_0_1.set_weights(ref_model.get_layer(ref_layer_name[last_wt_idx]).get_weights())
+        last_wt_idx = last_wt_idx+1
+
+        x = LeakyReLU(alpha=self.alpha, name="hidden_activation_0")(x)
+        if 0 in self.CP_layer_cfg:
+            R = self.CP_layer_cfg[0]
+            hidden_0_2 = Conv2D(filters=R, kernel_size=1,name="hidden_0_2")
+            x = hidden_0_2(x)
+            wts_prev = np.moveaxis(ref_model_wts[last_wt_idx][0][0,0,:,:], 0, 1)
+            bias_prev = ref_model_wts[last_wt_idx][1]
+            last_wt_idx = last_wt_idx + 1
+            
+            cp_factors = parafac(np.moveaxis(ref_model_wts[last_wt_idx][0],2,0), rank=R, n_iter_max=10000, linesearch=True)
+
+            cur_wt_matrix = np.moveaxis(cp_factors[1][0], 0, 1)
+
+            wts_cur = np.moveaxis(np.matmul(cur_wt_matrix, wts_prev), 0, 1)[np.newaxis,np.newaxis,:,:]
+            bias_cur = np.matmul(cur_wt_matrix, bias_prev.reshape(-1,1)).flatten()
+
+            hidden_0_2.set_weights([wts_cur,bias_cur])
+        else:
+            hidden_0_2 = Conv2D(filters=self.K, kernel_size=1,name="hidden_0_2")
+            x = hidden_0_2(x)
+
+            hidden_0_2.set_weights(ref_model.get_layer(ref_layer_name[last_wt_idx]).get_weights())
+            last_wt_idx = last_wt_idx+1
+
+        if 0 in self.CP_layer_cfg:
+            R = self.CP_layer_cfg[0]
+            cp_factors = parafac(np.moveaxis(ref_model.get_layer(ref_layer_name[last_wt_idx]).get_weights()[0],2,0), rank=R, n_iter_max=10000, linesearch=True)
+
+            if tf.test.is_gpu_available():
+                layer3b = Conv2D(filters=R,kernel_size=(3,1), use_bias=False, groups=R, padding='same', name="hidden_0"+"_3b")
+                x = layer3b(x)
+                layer3b.set_weights([cp_factors[1][1][:,np.newaxis,np.newaxis,:]])
+
+                layer3c = Conv2D(filters=R,kernel_size=(1,3), use_bias=False, groups=R, padding='same', name="hidden_0"+"_3c")
+                x = layer3c(x)
+                layer3c.set_weights([cp_factors[1][2][np.newaxis,:,np.newaxis,:]])
+
+            else:
+                layer3b = DepthwiseConv2D(kernel_size=(3,1), use_bias=False, padding='same', name="hidden_0"+"_3b")
+                x = layer3b(x)
+                layer3b.set_weights([cp_factors[1][1][:,np.newaxis,:,np.newaxis]])
+
+                layer3c = DepthwiseConv2D(kernel_size=(1,3), use_bias=False,padding='same', name="hidden_0"+"_3c")
+                x = layer3c(x)
+                layer3c.set_weights([cp_factors[1][2][np.newaxis,:,:,np.newaxis]])
+
+            w_prev = [np.moveaxis(cp_factors[1][3],0,1)[np.newaxis, np.newaxis, :,:], ref_model.get_layer(ref_layer_name[last_wt_idx]).get_weights()[1]]
+        elif 0 in self.DSC_layer_cfg:
+
+            if tf.test.is_gpu_available():
+                layer3a = Conv2D(filters=self.K, kernel_size=(3,3), groups=self.K, padding='same', name="hidden_0"+ "_3a")
+            else:
+                layer3a = DepthwiseConv2D(kernel_size=(3,3), padding='same', name="hidden_0"+ "_3a")
+
+            x = layer3a(x)
+
+            layer3b = Conv2D(filters=self.K,kernel_size=1, padding='same', name="hidden_0"+ "_3b")
+            x = layer3b(x)
+
+        else:
+
+            hidden_0_3 = Conv2D(filters=self.K, kernel_size=3, padding='same', name="hidden_0_3")
+            x = hidden_0_3(x)
+            hidden_0_3.set_weights(ref_model.get_layer(ref_layer_name[last_wt_idx]).get_weights())
+
+        last_wt_idx = last_wt_idx+1
+
+        for idx in range(1, self.N):
+            wts = []
+            wts.append(ref_model.get_layer(ref_layer_name[last_wt_idx]).get_weights())
+            wts.append(ref_model.get_layer(ref_layer_name[last_wt_idx + 1]).get_weights())
+            wts.append(ref_model.get_layer(ref_layer_name[last_wt_idx + 2]).get_weights())
+
+            if idx in self.CP_layer_cfg:
+                last_layer = False
+                if idx == (self.N - 1):
+                    last_layer = True
+                x, w_prev = self.hidden_layer_CP(idx, x, last_layer, wts=wts, wts_prev=w_prev)
+            elif idx in self.DSC_layer_cfg:
+                x = self.hidden_layer_DSC(idx, x, wts)
+            else:
+                x = self.hidden_layer(idx, x, wts)
+
+            last_wt_idx = last_wt_idx+3
+
+        output_layer = Conv2D(filters=self.num_output_channels, kernel_size=3, padding='same', name="output_layer")
+        x = output_layer(x)
+        output_layer.set_weights(ref_model.get_layer(ref_layer_name[last_wt_idx]).get_weights())
+        last_wt_idx = last_wt_idx + 1
+
+        res_sliced = x[:,4:68, 4:68, 0:6]
+        input_sliced =  input[:,4:68, 4:68, 0:6]
+
+        output = Add(name='add')([res_sliced,input_sliced])
+
+        model = Model(inputs=input, outputs=output)
+
+        if self.training:
+            if self.lR:
+                opt = keras.optimizers.Adam(learning_rate=self.lR)
+            else:
+                opt = keras.optimizers.Adam()
+
+            model.compile(optimizer=opt,loss=weighted_MSE_loss_default,experimental_run_tf_function=False)
+
+        return model
+
+    def create_x140_model(self):
+
+        input = Input(shape=self.input_shape, name="input_1")
+        conv1 = Conv2D(filters=self.M, kernel_size=3,padding='same',name="conv1")
+        x = conv1(input)
+        x = LeakyReLU(alpha=self.alpha, name="hidden_activation_pre")(x)
+
+        #first hidden layer has slightly different strcture
+        hidden_0_1 = Conv2D(filters=self.M, kernel_size=1,name="hidden_0_1")
+        x = hidden_0_1(x)
+        x = LeakyReLU(alpha=self.alpha, name="hidden_activation_0")(x)
+
+        if 0 in self.CP_layer_cfg:
+            R = self.CP_layer_cfg[0]
+            hidden_0_2 = Conv2D(filters=R, kernel_size=1,name="hidden_0_2")  
+        else:  
+            hidden_0_2 = Conv2D(filters=self.K, kernel_size=1,name="hidden_0_2")
+        x = hidden_0_2(x)
+
+        if 0 in self.CP_layer_cfg:
+
+            if tf.test.is_gpu_available():
+                layer3b = Conv2D(filters=R, kernel_size=(3,1), groups=R, use_bias=False, padding='same', name="hidden_0"+"_3b")
+                layer3c = Conv2D(filters=R, kernel_size=(1,3), groups=R, use_bias=False,padding='same', name="hidden_0"+"_3c")
+            else:
+                layer3b = DepthwiseConv2D(kernel_size=(3,1), use_bias=False, padding='same', name="hidden_0"+"_3b")
+                layer3c = DepthwiseConv2D(kernel_size=(1,3), use_bias=False,padding='same', name="hidden_0"+"_3c")
+
+            x = layer3b(x)
+            x = layer3c(x)
+
+        elif 0 in self.DSC_layer_cfg:
+            # Grouped convolution is not cupported currenlty on tesnorflow CPU
+            # Separable convolutions are realized using Grouped convlution to make training faster,
+            # as Separable convolution layers are sub optimally implemented in CUDA gpu
+            if tf.test.is_gpu_available():
+                layer3a = Conv2D(filters=self.K, kernel_size=(3,3), groups=self.K, padding='same', name="hidden_0"+ "_3a")
+            else:
+                layer3a = DepthwiseConv2D(kernel_size=(3,3), padding='same', name="hidden_0" + "_3a")
+
+            x = layer3a(x)
+
+            layer3b = Conv2D(filters=self.K,kernel_size=1, padding='same', name="hidden_0"+ "_3b")
+            x = layer3b(x)
+
+        else:
+            hidden_0_3 = Conv2D(filters=self.K, kernel_size=3, padding='same', name="hidden_0_3")
+            x = hidden_0_3(x)
+
+        w_prev = None
+        for idx in range(1, self.N):
+            if idx in self.CP_layer_cfg:
+                last_layer = False
+                if idx == (self.N - 1):
+                    last_layer = True
+                x, w_prev = self.hidden_layer_CP(idx, x, last_layer, wts=None, wts_prev=w_prev)
+            elif idx in self.DSC_layer_cfg:
+                x = self.hidden_layer_DSC(idx,x)
+            else:
+                x = self.hidden_layer(idx, x)
+
+        output_layer = Conv2D(filters=self.num_output_channels, kernel_size=3, padding='same', name="output_layer")
+        x = output_layer(x)
+
+        res_sliced = x[:,4:68, 4:68, 0:6]
+        input_sliced =  input[:,4:68, 4:68, 0:6]
+
+        output = Add(name='add')([res_sliced,input_sliced])
+
+        model = Model(inputs=input, outputs=output)
+
+        if self.training:
+            if self.lR:
+                opt = keras.optimizers.Adam(learning_rate=self.lR)
+            else:
+                opt = keras.optimizers.Adam()
+            model.compile(optimizer=opt,loss=weighted_MSE_loss_default,experimental_run_tf_function=False)
+
+        return model
+
+
+    def get_model(self):
+        if self.ref_model_file:
+            if self.ref_model_type == 'baseline':
+                target_model = self.create_x140_model_from_ref()
+                return(target_model)
+            else:
+                target_model = keras.models.load_model(self.ref_model_file, compile=False)
+                if self.training:
+                    if self.lR:
+                        opt = keras.optimizers.Adam(learning_rate=self.lR)
+                    else:
+                        opt = keras.optimizers.Adam()
+                    target_model.compile(optimizer=opt,loss=weighted_MSE_loss_default,experimental_run_tf_function=False)
+                return target_model
+        else:
+            return(self.create_x140_model())
+
+class LC_NNLF_model():
+    def __init__(self, model_params, training=False, training_params=None):
+
+        # compile the model if training is True
+        self.training = training
+
+        self.K = model_params['K']
+        self.M_Y = model_params['M_Y']
+        self.K_Y = model_params['K_Y']
+        self.M_C = model_params['M_C']
+        
+        #if self.K <= self.K_Y:
+        #    raise modelParamsError("K should be greater than Ky")
+        #self.K_C = self.K - self.K_Y
+
+        self.K_C = model_params['K_C']
+        
+        self.N_Y = model_params['N_Y']
+        self.N_C = model_params['N_C']
+        
+        self.model_type = model_params['model_type']
+        self.M = model_params['M']
+        
+        if self.model_type == 'cp':
+            self.R = model_params['R']
+
+        #input shape to network
+        self.input_shape = (72,72,10)
+        self.output_shape = (64,64,6)
+        
+        if training == True:
+            if 'learning_rate' in training_params:
+                self.lR = training_params['learning_rate']
+            else:
+                self.lR = None
+        # Paramete for Relu activation layers in the network
+        self.alpha = 0.2
+
+        # Model file from which weights need to be initialized
+        # Valid only for CP and baseline network
+        if 'ref_model' in model_params:
+            self.ref_model_file = model_params['ref_model']
+
+            if 'ref_model_type' in model_params:
+                self.ref_model_type = model_params['ref_model_type']
+            else:
+                raise modelParamsError("Ref model type must be mentioned, when ref model file is given")
+        else:
+            self.ref_model_file = ""
+            self.ref_model_type = ""
+            
+
+
+        self.CP_layer_cfg_y = {}
+        self.CP_layer_cfg_c = {}
+
+        self.DSC_layer_cfg_y = {}
+        self.DSC_layer_cfg_c = {}
+        
+        if self.model_type == 'cp':
+            if 'CP_layers_y' in model_params:
+                self.CP_layer_cfg_y = model_params['CP_layers_y']
+                self.CP_layer_cfg_y = {int(k):int(v) for k,v in self.CP_layer_cfg_y.items()}
+
+            if 'CP_layers_c' in model_params:
+                self.CP_layer_cfg_c = model_params['CP_layers_c']
+                self.CP_layer_cfg_c = {int(k):int(v) for k,v in self.CP_layer_cfg_c.items()}
+        
+        if self.model_type == 'dsc':
+            if 'DSC_layers_y' in model_params:
+                self.DSC_layer_cfg_y = model_params['DSC_layers_y']
+
+            if 'DSC_layers_c' in model_params:
+                self.DSC_layer_cfg_c = model_params['DSC_layers_c']
+
+
+    def hidden_layer(self,idx, input, is_luma, weights=None):
+
+        if is_luma == True:
+            M = self.M_Y
+            K = self.K_Y
+            layer_suffix = str(idx) + "_y"
+        else:
+            M = self.M_C
+            K = self.K_C
+            layer_suffix = str(idx) + "_c"
+
+        layer1 = Conv2D(filters=M, kernel_size=1, name="hidden_"+layer_suffix+ "_1")
+        x = layer1(input)
+
+        if weights is not None:
+            layer1.set_weights(weights[0])
+
+        x = LeakyReLU(alpha=self.alpha, name="hidden_activation_"+layer_suffix)(x)
+        layer2 = Conv2D(filters=K, kernel_size=1,name="hidden_"+layer_suffix+ "_2")
+        x = layer2(x)
+
+        if weights is not None:
+            layer2.set_weights(weights[1])
+
+        layer3 = Conv2D(filters=K, kernel_size=3, padding='same', name="hidden_"+layer_suffix+ "_3")
+        out = layer3(x)
+
+        if weights is not None:
+            layer3.set_weights(weights[2])
+
+        return out
+
+
+    def hidden_layer_CP(self, idx, input, is_luma, last_layer, wts=None, wts_prev=None):
+
+        wts_for_next = None
+        
+        if is_luma == True:
+            M = self.M_Y
+            K = self.K_Y
+            layer_suffix = str(idx) + "_y"
+            R = self.CP_layer_cfg_y[idx]
+        else:
+            M = self.M_C
+            K = self.K_C
+            layer_suffix = str(idx) + "_c"
+            R = self.CP_layer_cfg_c[idx]
+        
+        layer1 = Conv2D(filters=M, kernel_size=1,name="hidden_"+layer_suffix+ "_1")
+        x = layer1(input)
+        
+        if wts is not None:
+            if wts_prev is None:
+                layer1.set_weights(wts[0])
+            else:
+                b_prev = wts_prev[1].reshape(-1,1)
+                prev_wt_matrix = np.moveaxis(wts_prev[0][0,0,:,:],0,1)
+                cur_wt_matrix = np.moveaxis(wts[0][0][0,0,:,:],0,1)
+                b_cur = wts[0][1].reshape(-1, 1)
+            
+                wts_prod = np.matmul(cur_wt_matrix,prev_wt_matrix)
+                bias_fused = np.add(np.matmul(cur_wt_matrix,b_prev),b_cur)
+
+                wt_matrix = np.moveaxis(wts_prod,0,1)
+                layer1.set_weights([wt_matrix[np.newaxis, np.newaxis, :,:],bias_fused.flatten()])
+
+        x = LeakyReLU(alpha=self.alpha, name="hidden_activation_"+layer_suffix)(x)
+        layer2 = Conv2D(filters=R, kernel_size=1, name="hidden_"+layer_suffix+ "_2")
+        x = layer2(x)
+
+        if wts is not None:
+            cp_factors = parafac(np.moveaxis(wts[2][0],2,0), R,n_iter_max=10000, linesearch=True)
+
+            cur_wt_matrix = np.moveaxis(cp_factors[1][0], 0, 1)
+            prev_wt_matrix = np.moveaxis(wts[1][0][0,0,:,:], 0, 1)
+
+            b_prev = wts[1][1].reshape(-1, 1)
+            b_cur = np.matmul(cur_wt_matrix, b_prev)
+
+            wts_prod = np.matmul(cur_wt_matrix, prev_wt_matrix)
+            wt_matrix = np.moveaxis(wts_prod,0,1)
+            layer2.set_weights([wt_matrix[np.newaxis, np.newaxis, :,:],b_cur.flatten()])
+
+        # Grouped convolution is not cupported currenlty on tesnorflow CPU
+        # Separable convolutions are realized using Grouped convlution to make training faster,
+        # as Separable convolution layers are sub optimally implemented in CUDA gpu
+        #
+        if tf.test.is_gpu_available():
+            layer3b = Conv2D(filters=R, kernel_size=(3,1), use_bias=False, groups=R, padding='same', name="hidden_"+layer_suffix+ "_3b")
+            x = layer3b(x)
+            
+            layer3c = Conv2D(filters=R, kernel_size=(1,3), use_bias=False,groups=R,padding='same', name="hidden_"+layer_suffix+ "_3c")
+            x = layer3c(x)
+
+            if wts is not None:
+                layer3b.set_weights([cp_factors[1][1][:,np.newaxis,np.newaxis,:]])
+                layer3c.set_weights([cp_factors[1][2][np.newaxis,:,np.newaxis,:]])
+
+        else:
+            layer3b = DepthwiseConv2D(kernel_size=(3,1), use_bias=False, padding='same', name="hidden_"+layer_suffix+ "_3b")
+            x = layer3b(x)
+            
+            layer3c = DepthwiseConv2D(kernel_size=(1,3), use_bias=False,padding='same', name="hidden_"+layer_suffix+ "_3c")
+            x = layer3c(x)
+
+            if wts is not None:
+                layer3b.set_weights([cp_factors[1][1][:,np.newaxis,:,np.newaxis]])
+                layer3c.set_weights([cp_factors[1][2][np.newaxis,:,:,np.newaxis]])
+
+
+        if last_layer:
+            layer3d = Conv2D(filters=K, kernel_size=1,name="hidden_"+layer_suffix+ "_3d")
+            x = layer3d(x)
+
+            if wts is not None:
+                layer3d.set_weights([np.moveaxis(cp_factors[1][3],0,1)[np.newaxis, np.newaxis, :,:], wts[2][1]])
+
+        else:
+            if wts is not None:
+                wts_for_next = [np.moveaxis(cp_factors[1][3],0,1)[np.newaxis, np.newaxis, :,:], wts[2][1]]
+
+
+        
+        return x,wts_for_next
+
+
+    def hidden_layer_DSC(self,idx, input, weights=None):
+
+        layer1 = Conv2D(filters=self.M, kernel_size=1, name="hidden_"+str(idx)+ "_1")
+        x = layer1(input)
+        if weights is not None:
+            layer1.set_weights(weights[0])
+
+        x = LeakyReLU(alpha=self.alpha, name="hidden_activation_"+str(idx))(x)
+        layer2 = Conv2D(filters=self.K, kernel_size=1,name="hidden_"+str(idx)+ "_2")
+        x = layer2(x)
+
+        if weights is not None:
+            layer2.set_weights(weights[1])
+
+        if tf.test.is_gpu_available():
+            layer3_a = Conv2D(filters=self.K, kernel_size=(3,3), groups=self.K, padding='same', name="hidden_"+str(idx)+ "_3a")
+        else:
+            layer3_a = DepthwiseConv2D(kernel_size=(3,3), padding='same', name="hidden_"+str(idx)+ "_3a")
+            
+        x = layer3_a(x)
+
+        layer3_b = Conv2D(filters=self.K,kernel_size=1, padding='same', name="hidden_"+str(idx)+ "_3b")
+        out = layer3_b(x)
+
+        return out
+
+    def create_LC_NNLF_model(self):
+
+        input = Input(shape=self.input_shape, name="input_1")
+
+        conv1 = Conv2D(filters=self.M, kernel_size=3,padding='same',name="conv1")
+        x = conv1(input)
+        x = LeakyReLU(alpha=self.alpha, name="hidden_activation_pre")(x)
+
+        #first hidden layer has slightly different strcture
+        hidden_0_1 = Conv2D(filters=self.M, kernel_size=1,name="hidden_0_1")
+        x = hidden_0_1(x)
+
+        x = LeakyReLU(alpha=self.alpha, name="hidden_activation_0")(x)
+        
+        if self.model_type == 'cp':
+            hidden_0_2 = Conv2D(filters=self.R, kernel_size=1,name="hidden_0_2")
+        elif self.model_type == 'baseline':
+            hidden_0_2 = Conv2D(filters=self.K, kernel_size=1,name="hidden_0_2")
+
+        x = hidden_0_2(x)
+
+        if self.model_type == 'cp':
+            if tf.test.is_gpu_available():
+                layer3b = Conv2D(filters=self.R, kernel_size=(3,1), groups=self.R, use_bias=False, padding='same', name="hidden_0"+"_3b")
+                x = layer3b(x)
+                
+                layer3c = Conv2D(filters=self.R, kernel_size=(1,3), groups=self.R, use_bias=False,padding='same', name="hidden_0"+"_3c")
+                x = layer3c(x)
+
+            else:
+                layer3b = DepthwiseConv2D(kernel_size=(3,1), use_bias=False, padding='same', name="hidden_0"+"_3b")
+                x = layer3b(x)
+                
+                layer3c = DepthwiseConv2D(kernel_size=(1,3), use_bias=False,padding='same', name="hidden_0"+"_3c")
+                x = layer3c(x)
+
+
+            layer3d = Conv2D(filters=self.K, kernel_size=1, name="hidden_0"+"_3d")
+            x = layer3d(x)
+
+        elif self.model_type == 'dsc':
+            # Grouped convolution is not cupported currenlty on tesnorflow CPU
+            # Separable convolutions are realized using Grouped convlution to make training faster,
+            # as Separable convolution layers are sub optimally implemented in CUDA gpu
+            if tf.test.is_gpu_available():
+                layer3a = Conv2D(filters=self.K, kernel_size=(3,3), groups=self.K, padding='same', name="hidden_0"+ "_3a")
+            else:
+                layer3a = DepthwiseConv2D(kernel_size=(3,3), padding='same', name="hidden_0" + "_3a")
+
+            x = layer3a(x)
+
+            layer3b = Conv2D(filters=self.K,kernel_size=1, padding='same', name="hidden_0"+ "_3b")
+            x = layer3b(x)
+
+        else:
+            hidden_0_3 = Conv2D(filters=self.K, kernel_size=3, padding='same', name="hidden_0_3")
+            x = hidden_0_3(x)
+
+        x_Y = x[:,:,:,:16]
+        x_C = x[:,:,:,16:]
+        
+        w_Y = None
+        for idx in range(1, self.N_Y):
+            if idx in self.CP_layer_cfg_y:
+                last_layer = False
+                if idx == (self.N_Y-1):
+                    last_layer = True
+                x_Y,w_Y = self.hidden_layer_CP(idx, x_Y,True, last_layer, wts=None, wts_prev=w_Y)
+            elif idx in self.DSC_layer_cfg_y:
+                x_Y = self.hidden_layer_DSC(idx, x_Y,True)
+            else:
+                x_Y = self.hidden_layer(idx, x_Y,True)
+        
+        w_C = None
+        for idx in range(1, self.N_C):
+            if idx in self.CP_layer_cfg_c:
+                last_layer = False
+                if idx == (self.N_C-1):
+                    last_layer = True
+                x_C,w_C = self.hidden_layer_CP(idx, x_C,False,last_layer, wts=None, wts_prev=w_C)
+            elif idx in self.DSC_layer_cfg_y:
+                x_C = self.hidden_layer_DSC(idx, x_C,False)
+            else:
+                x_C = self.hidden_layer(idx, x_C,False)
+
+
+        output_layer_Y = Conv2D(filters=4, kernel_size=3, padding='same', name="output_layer_Y")
+        x_Y = output_layer_Y(x_Y)
+
+        output_layer_C = Conv2D(filters=2, kernel_size=3, padding='same', name="output_layer_C")
+        x_C = output_layer_C(x_C)
+
+
+        res_sliced_y = x_Y[:,4:68, 4:68, 0:4]
+        input_sliced_y =  input[:,4:68, 4:68, 0:4]
+
+        res_sliced_c = x_C[:,4:68, 4:68, 0:2]
+        input_sliced_c =  input[:,4:68, 4:68, 4:6]
+
+        output_Y = Add(name='add_1')([res_sliced_y,input_sliced_y])
+        output_C = Add(name='add_2')([res_sliced_c,input_sliced_c])
+
+        output = tf.keras.layers.Concatenate()([output_Y, output_C])
+
+        model = Model(inputs=input, outputs=output)
+
+        if self.training:
+            if self.lR:
+                opt = keras.optimizers.Adam(learning_rate=self.lR)
+            else:
+                opt = keras.optimizers.Adam()
+            model.compile(optimizer=opt,loss=weighted_MSE_loss_default,experimental_run_tf_function=False)
+
+        return model
+
+
+    def create_LC_NNLF_model_from_ref(self):
+
+        ref_model = keras.models.load_model(self.ref_model_file, compile=False)
+        ref_model_wts_common = []
+        ref_model_wts_luma = []
+        ref_model_wts_chroma = []
+
+        for layer in ref_model.layers:
+            if len(layer.get_weights()) > 0:
+                if ('_y_' in layer.name) or layer.name.endswith('_Y'):
+                    ref_model_wts_luma.append(layer.get_weights())
+                elif ('_c_' in layer.name) or layer.name.endswith('_C'):
+                    ref_model_wts_chroma.append(layer.get_weights())
+                else:
+                    ref_model_wts_common.append(layer.get_weights())
+
+        last_wt_idx = 0
+        last_wt_idx_y = 0
+        last_wt_idx_c = 0
+
+        input = Input(shape=self.input_shape, name="input_1")
+
+        conv1 = Conv2D(filters=self.M, kernel_size=3,padding='same',name="conv1")
+        x = conv1(input)
+
+        conv1.set_weights(ref_model_wts_common[last_wt_idx])
+        last_wt_idx = last_wt_idx+1
+
+        x = LeakyReLU(alpha=self.alpha, name="hidden_activation_pre")(x)
+
+        #first hidden layer has slightly different strcture
+        hidden_0_1 = Conv2D(filters=self.M, kernel_size=1,name="hidden_0_1")
+        x = hidden_0_1(x)
+        
+        hidden_0_1.set_weights(ref_model_wts_common[last_wt_idx])
+        last_wt_idx = last_wt_idx+1
+
+        x = LeakyReLU(alpha=self.alpha, name="hidden_activation_0")(x)
+
+        if self.model_type == 'baseline':
+            hidden_0_2 = Conv2D(filters=self.K, kernel_size=1,name="hidden_0_2")
+            x = hidden_0_2(x)
+            
+            hidden_0_2.set_weights(ref_model_wts_common[last_wt_idx])
+            last_wt_idx = last_wt_idx+1
+
+        elif self.model_type == 'cp':
+            hidden_0_2 = Conv2D(filters=self.R, kernel_size=1,name="hidden_0_2")
+            x = hidden_0_2(x)
+
+            wts_prev = np.moveaxis(ref_model_wts_common[last_wt_idx][0][0,0,:,:], 0, 1)
+            bias_prev = ref_model_wts_common[last_wt_idx][1]
+            last_wt_idx = last_wt_idx + 1
+            
+            cp_factors = parafac(np.moveaxis(ref_model_wts_common[last_wt_idx][0],2,0), rank=self.R, n_iter_max=10000, linesearch=True)
+
+            cur_wt_matrix = np.moveaxis(cp_factors[1][0], 0, 1)
+
+            wts_cur = np.moveaxis(np.matmul(cur_wt_matrix, wts_prev), 0, 1)[np.newaxis,np.newaxis,:,:]
+            bias_cur = np.matmul(cur_wt_matrix, bias_prev.reshape(-1,1)).flatten()
+
+            hidden_0_2.set_weights([wts_cur,bias_cur])
+
+            #
+            if tf.test.is_gpu_available():
+                layer3b = Conv2D(filters=self.R,kernel_size=(3,1), use_bias=False, groups=self.R, padding='same', name="hidden_0"+"_3b")
+                x = layer3b(x)
+                layer3b.set_weights([cp_factors[1][1][:,np.newaxis,np.newaxis,:]])
+
+                layer3c = Conv2D(filters=self.R,kernel_size=(1,3), use_bias=False, groups=self.R, padding='same', name="hidden_0"+"_3c")
+                x = layer3c(x)
+                layer3c.set_weights([cp_factors[1][2][np.newaxis,:,np.newaxis,:]])
+
+            else:
+                layer3b = DepthwiseConv2D(kernel_size=(3,1), use_bias=False, padding='same', name="hidden_0"+"_3b")
+                x = layer3b(x)
+                layer3b.set_weights([cp_factors[1][1][:,np.newaxis,:,np.newaxis]])
+
+                layer3c = DepthwiseConv2D(kernel_size=(1,3), use_bias=False,padding='same', name="hidden_0"+"_3c")
+                x = layer3c(x)
+                layer3c.set_weights([cp_factors[1][2][np.newaxis,:,:,np.newaxis]])
+
+
+            layer3d = Conv2D(filters=self.K, kernel_size=1, name="hidden_0"+"_3d")
+            x = layer3d(x)
+            layer3d.set_weights([np.moveaxis(cp_factors[1][3],0,1)[np.newaxis, np.newaxis, :,:], ref_model_wts_common[last_wt_idx][1]])
+
+        elif 0 in self.DSC_layer_cfg:
+
+            if tf.test.is_gpu_available():
+                layer3a = Conv2D(filters=self.K, kernel_size=(3,3), groups=self.K, padding='same', name="hidden_0"+ "_3a")
+            else:
+                layer3a = DepthwiseConv2D(kernel_size=(3,3), padding='same', name="hidden_0"+ "_3a")
+
+            x = layer3a(x)
+
+            layer3b = Conv2D(filters=self.K,kernel_size=1, padding='same', name="hidden_0"+ "_3b")
+            x = layer3b(x)
+
+        else:
+
+            hidden_0_3 = Conv2D(filters=self.K, kernel_size=3, padding='same', name="hidden_0_3")
+            x = hidden_0_3(x)
+            hidden_0_3.set_weights(ref_model.get_layer(ref_model_wts_common[last_wt_idx]).get_weights())
+
+        last_wt_idx = last_wt_idx+1
+
+        x_Y = x[:,:,:,:16]
+        x_C = x[:,:,:,16:]
+        
+        w_Y = None
+        w_C = None
+        for idx in range(1, self.N_Y):
+            if idx in self.CP_layer_cfg_y:
+                last_layer = False
+                if idx == (self.N_Y-1):
+                    last_layer = True
+
+                wts = []
+                wts.append(ref_model_wts_luma[last_wt_idx_y])
+                wts.append(ref_model_wts_luma[last_wt_idx_y + 1])
+                wts.append(ref_model_wts_luma[last_wt_idx_y + 2])
+                
+                x_Y, w_Y = self.hidden_layer_CP(idx, x_Y,True, last_layer, wts=wts, wts_prev=w_Y)
+
+            elif idx in self.DSC_layer_cfg_y:
+                x_Y = self.hidden_layer_DSC(idx, x_Y,True)
+            else:
+                x_Y = self.hidden_layer(idx, x_Y,True)
+        
+            last_wt_idx_y = last_wt_idx_y+3
+
+        for idx in range(1, self.N_C):
+            if idx in self.CP_layer_cfg_c:
+                last_layer = False
+                if idx == (self.N_C-1):
+                    last_layer = True
+
+                wts = []
+                wts.append(ref_model_wts_chroma[last_wt_idx_c])
+                wts.append(ref_model_wts_chroma[last_wt_idx_c + 1])
+                wts.append(ref_model_wts_chroma[last_wt_idx_c + 2])
+
+                x_C, w_C = self.hidden_layer_CP(idx, x_C,False,last_layer, wts=wts, wts_prev=w_C)
+
+            elif idx in self.DSC_layer_cfg_y:
+                x_C = self.hidden_layer_DSC(idx, x_C,False)
+            else:
+                x_C = self.hidden_layer(idx, x_C,False)
+
+            last_wt_idx_c = last_wt_idx_c + 3
+        
+        output_layer_Y = Conv2D(filters=4, kernel_size=3, padding='same', name="output_layer_Y")
+        x_Y = output_layer_Y(x_Y)
+        output_layer_Y.set_weights(ref_model_wts_luma[last_wt_idx_y])
+        last_wt_idx_y = last_wt_idx_y + 1
+
+        output_layer_C = Conv2D(filters=2, kernel_size=3, padding='same', name="output_layer_C")
+        x_C = output_layer_C(x_C)
+        output_layer_C.set_weights(ref_model_wts_chroma[last_wt_idx_c])
+        last_wt_idx_c = last_wt_idx_c + 1
+
+        res_sliced_y = x_Y[:,4:68, 4:68, 0:4]
+        input_sliced_y =  input[:,4:68, 4:68, 0:4]
+
+        res_sliced_c = x_C[:,4:68, 4:68, 0:2]
+        input_sliced_c =  input[:,4:68, 4:68, 4:6]
+
+        output_Y = Add(name='add_1')([res_sliced_y,input_sliced_y])
+        output_C = Add(name='add_2')([res_sliced_c,input_sliced_c])
+
+        output = tf.keras.layers.Concatenate()([output_Y, output_C])
+
+        model = Model(inputs=input, outputs=output)
+
+        if self.training:
+            if self.lR:
+                opt = keras.optimizers.Adam(learning_rate=self.lR)
+            else:
+                opt = keras.optimizers.Adam()
+
+            model.compile(optimizer=opt,loss=weighted_MSE_loss_default,experimental_run_tf_function=False)
+
+        return model
+
+
+    def get_model(self):
+
+        if self.ref_model_file:
+            if self.model_type == self.ref_model_type:
+                target_model = keras.models.load_model(self.ref_model_file, compile=False)
+                if self.training:
+                    if self.lR:
+                        opt = keras.optimizers.Adam(learning_rate=self.lR)
+                    else:
+                        opt = keras.optimizers.Adam()
+                    target_model.compile(optimizer=opt,loss=weighted_MSE_loss_default,experimental_run_tf_function=False)
+                return target_model
+            else:
+                return(self.create_LC_NNLF_model_from_ref())
+        else:
+            return(self.create_LC_NNLF_model())
+
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/3_training/RA_LC-CPDF_model2_cfg_stage1.json b/training/training_scripts/Nn_Filtering_Set_LC/3_training/RA_LC-CPDF_model2_cfg_stage1.json
new file mode 100644
index 0000000000000000000000000000000000000000..65c95c1bd15fe37fc0e21dc192d47f799ba85d2e
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/3_training/RA_LC-CPDF_model2_cfg_stage1.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ba31a633735f9453af0972cbc5d4a9e7ba925c1e6c35777c86abf8245cfe960
+size 1931
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/3_training/RA_LC-CPDF_model2_cfg_stage2.json b/training/training_scripts/Nn_Filtering_Set_LC/3_training/RA_LC-CPDF_model2_cfg_stage2.json
new file mode 100644
index 0000000000000000000000000000000000000000..fced6f4d6576538c3469a84ce9582321140eae2b
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/3_training/RA_LC-CPDF_model2_cfg_stage2.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:861d1e34fcc181e3f30ebafb12fb06d04e1a7d78309172033725111928646636
+size 1938
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/3_training/RA_LC-CPDF_model3_cfg_stage1.json b/training/training_scripts/Nn_Filtering_Set_LC/3_training/RA_LC-CPDF_model3_cfg_stage1.json
new file mode 100644
index 0000000000000000000000000000000000000000..4ba9f9fc39f3753a2b152b2802d52da5a67dbbe6
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/3_training/RA_LC-CPDF_model3_cfg_stage1.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:60011bf124c123a3e5fd3cf79002fed19cd58c18a8939ad2ec033a0c1c58a68e
+size 1931
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/3_training/RA_LC-CPDF_model3_cfg_stage2.json b/training/training_scripts/Nn_Filtering_Set_LC/3_training/RA_LC-CPDF_model3_cfg_stage2.json
new file mode 100644
index 0000000000000000000000000000000000000000..4e828640550ad03558d2e6badded888da9478c8f
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/3_training/RA_LC-CPDF_model3_cfg_stage2.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd6f624d646717e089b42d9e515698a36a9b574e2a93dded245166e6c20b53c5
+size 1938
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/3_training/train_NNLF.py b/training/training_scripts/Nn_Filtering_Set_LC/3_training/train_NNLF.py
new file mode 100644
index 0000000000000000000000000000000000000000..1622fdd9a1dfa9e01002e168ae18aafcf11a2c8f
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/3_training/train_NNLF.py
@@ -0,0 +1,223 @@
+import numpy as np
+import sys
+import glob
+import os
+import json
+import shutil
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.python.platform import gfile
+from tensorflow.python.framework.ops import disable_eager_execution
+import tensorflow.python.util.deprecation as deprecation
+from NNLF_models import X0140_model,LC_NNLF_model,InpVal_model
+from NNLF_data_loader import X0140_data_loader
+import keras.backend as K
+from NNLF_models import weighted_MSE_1211, weighted_MSE_411
+
+def ra_lr_scheduler_st1(epoch, lr):
+    if epoch == 20:
+        return lr * 0.5
+    elif epoch == 30:
+        return lr * 0.2
+    elif epoch == 35:
+        return lr * 0.1
+    else:
+        return lr
+
+def ra_lr_scheduler_st2(epoch, lr):
+    if epoch == 10:
+        return lr * 0.5
+    elif epoch == 20:
+        return lr * 0.2
+    elif epoch == 25:
+        return lr * 0.1
+    else:
+        return lr
+
+def ai_lr_scheduler(epoch, lr):
+    if (epoch >= 300) and (epoch % 50 == 0):
+        return lr * 0.1
+    else:
+        return lr
+
+def train_NNLF():
+    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+    deprecation._PRINT_DEPRECATION_WARNINGS = False
+
+    cfg = json.load(open(sys.argv[1]))
+
+    model_params = cfg['model_parameters']
+    training_data_info = cfg['training_data']
+    training_params = cfg['training_parameters']
+    out_artifacts = cfg['out_artifacts']
+
+    if not os.path.exists(out_artifacts['out_dir']):
+        os.makedirs(out_artifacts['out_dir'])
+
+    train_dir_list = training_data_info['train_dir_list']
+    valid_dir_list = training_data_info['valid_dir_list']
+    gt_train_dir_list = training_data_info['gt_train_dir_list']
+    gt_valid_dir_list = training_data_info['gt_valid_dir_list']
+
+    chkpt_dir = os.path.join(out_artifacts['out_dir'], "checkpoints")
+    tensorboard_logs = os.path.join(out_artifacts['out_dir'], "tensorboardlogs")
+    best_checkpoint_filepath = os.path.join(out_artifacts['out_dir'], "model")
+
+    train_data_file_list = []
+    train_data_gt_file_list = []
+
+    qp_list = model_params['qp_list']
+    
+    gt_prefix = ""
+    if training_params['encode_cfg'] == "AI":
+        gt_prefix = "DIV2K"
+
+    for train_dir in train_dir_list:
+        for train_file in os.listdir(train_dir):
+            train_basename = os.path.splitext(train_file)[0]
+            train_base_split = train_basename.split("_")
+
+            if int(train_base_split[2]) not in qp_list:
+                continue
+
+            gt_file = gt_prefix + "_".join(train_base_split[:2] + train_base_split[4:]) + ".yuv"
+            for gt_train_dir in gt_train_dir_list:
+                gt_file_full_path = os.path.join(gt_train_dir, gt_file)
+                if os.path.exists(gt_file_full_path):
+                    train_data_gt_file_list.append(gt_file_full_path)
+                    break
+            train_data_file_list.append(os.path.join(train_dir, train_file))
+
+    if(len(train_data_file_list) != len(train_data_gt_file_list)):
+        print("Train data and ground truth data are not of same size")
+        return
+
+    test_data_file_list = []
+    test_data_gt_file_list = []
+
+    for valid_dir in valid_dir_list:
+        for valid_file in os.listdir(valid_dir):
+            valid_basename = os.path.splitext(valid_file)[0]
+            valid_base_split = valid_basename.split("_")
+            if int(valid_base_split[2]) not in qp_list:
+                continue
+            gt_file = gt_prefix + "_".join(valid_base_split[:2] + valid_base_split[4:]) + ".yuv"
+
+            for gt_valid_dir in gt_valid_dir_list:
+                gt_file_full_path = os.path.join(gt_valid_dir, gt_file)
+                if os.path.exists(gt_file_full_path):
+                    test_data_gt_file_list.append(gt_file_full_path)
+                    break
+            test_data_file_list.append(os.path.join(valid_dir, valid_file))
+
+    if(len(test_data_file_list) != len(test_data_gt_file_list)):
+        print("Test data and ground truth data are not of same size")
+        return
+
+    graph = tf.Graph()
+    with graph.as_default():
+        # Prepare dataset
+        training_generator = X0140_data_loader(train_data_file_list, train_data_gt_file_list, batch_size=training_params['batch_size'], is_train_data = True, augment_data=training_params['augment_data'])
+        validation_generator = X0140_data_loader(test_data_file_list, test_data_gt_file_list, batch_size=training_params['batch_size'], is_train_data = False, augment_data=training_params['augment_data'])
+
+        checkpoint_filepattern = "weights-{epoch:03d}-{val_loss:.8f}.ckpt"
+        checkpoint_filepath = os.path.sep.join([chkpt_dir,checkpoint_filepattern])
+
+        model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_filepath,monitor='val_loss',mode='min',save_weights_only=True)
+        tb_callback = tf.keras.callbacks.TensorBoard(log_dir=tensorboard_logs, write_graph=False, profile_batch=1)
+
+        start_epoch = 0
+        init_val_loss = None
+        if model_params['model_arch'] == 'X0140':
+            model_obj = X0140_model(model_params, True, training_params)
+            model = model_obj.get_model()
+        elif model_params['model_arch'] == 'LC_NNLF':
+            model_obj = LC_NNLF_model(model_params, True, training_params)
+            model = model_obj.get_model()
+        else:
+            model = None
+
+        if model == None:
+            print('Unable to create ' + str(model_params['model_arch']) + ' model')
+            return
+        else:
+            print(model.summary())
+
+        if training_params['resume_chkpt']:
+            checkpoint_path = training_params['resume_chkpt']
+            model.load_weights(checkpoint_path).expect_partial()
+            #resume from checkpoint sets LR from checkpoint
+            if training_params['learning_rate']:
+                K.set_value(model.optimizer.learning_rate, training_params['learning_rate'])
+            checkpoint_file_name = os.path.splitext(os.path.basename(checkpoint_path))[0]
+            start_epoch = int(checkpoint_file_name.split('-')[1])
+            init_val_loss = float(checkpoint_file_name.split('-')[2])
+
+        best_model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=best_checkpoint_filepath,monitor='val_loss',mode='min',save_best_only=True, initial_value_threshold=init_val_loss)
+        
+        lr_callback = tf.keras.callbacks.LearningRateScheduler(ai_lr_scheduler)  
+        if training_params['encode_cfg'] == "RA_stage1":
+            lr_callback = tf.keras.callbacks.LearningRateScheduler(ra_lr_scheduler_st1)
+        elif training_params['encode_cfg'] == "RA_stage2":
+            lr_callback = tf.keras.callbacks.LearningRateScheduler(ra_lr_scheduler_st2)
+
+        # Copy the config file to the output directory for reference
+        shutil.copy(sys.argv[1], os.path.join(out_artifacts['out_dir'], "config_file_start_epoch_"+str(start_epoch)+".json"))
+        
+        ###Printing initial validation loss, just between input to NNLF and ground truth
+        empty_model_obj = InpVal_model()
+        empty_model = empty_model_obj.create_model()
+        init_hist = empty_model.evaluate(validation_generator, 
+                                         workers=training_params['num_workers'], 
+                                         use_multiprocessing=True,
+                                         verbose=1)
+        no_nnlf_loss_file = open(os.path.join(out_artifacts['out_dir'], "valLoss_without_NNLF.txt"), "w")
+        print("Validation directory list: ", valid_dir_list, file=no_nnlf_loss_file)
+        print("The loss without NNLF is :", init_hist, file=no_nnlf_loss_file)
+        no_nnlf_loss_file.close()
+
+        if "train_iter" in cfg.keys():
+            for i,train_iter in enumerate(cfg['train_iter']):
+                if start_epoch > train_iter['end_epoch']:
+                    continue
+                opt = keras.optimizers.Adam(learning_rate=train_iter['learning_rate'])
+                loss_function = None
+                if train_iter['loss'] == "weighted_MSE_411":
+                    loss_function = weighted_MSE_411
+                elif train_iter['loss'] == "weighted_MSE_1211":
+                    loss_function = weighted_MSE_1211
+                else:
+                    print("ERROR: The loss function passed in config file is invalid")
+                    exit(-1)
+                model.compile(optimizer=opt, loss=loss_function, experimental_run_tf_function=False)
+
+                if (i > 0) and ((cfg['train_iter'][i-1]['loss'] == "weighted_MSE_411") and (train_iter['loss'] == "weighted_MSE_1211")):
+                    init_val_loss = None
+                    best_model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=best_checkpoint_filepath, monitor='val_loss', mode='min', save_best_only=True, initial_value_threshold=init_val_loss)
+                
+                history = model.fit(training_generator, validation_data=validation_generator, use_multiprocessing=True,
+                                    workers=training_params['num_workers'], initial_epoch=start_epoch,epochs=train_iter['end_epoch'],
+                                    callbacks=[model_checkpoint_callback, best_model_checkpoint_callback, tb_callback, lr_callback])
+                start_epoch = train_iter['end_epoch']
+        else:
+            history = model.fit(training_generator, validation_data=validation_generator, use_multiprocessing=True,
+                                workers=training_params['num_workers'], initial_epoch=start_epoch,epochs=training_params['num_epochs'],
+                                callbacks=[model_checkpoint_callback, best_model_checkpoint_callback, tb_callback, lr_callback])
+
+def gpu_mem_growth():
+    gpus = tf.config.list_physical_devices('GPU')
+    if gpus:
+        try:
+            # Currently, memory growth needs to be the same across GPUs
+            for gpu in gpus:
+                tf.config.experimental.set_memory_growth(gpu, True)
+            logical_gpus = tf.config.list_logical_devices('GPU')
+            print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
+        except RuntimeError as e:
+            # Memory growth must be set before GPUs have been initialized
+            print(e)
+    return
+
+if __name__ == "__main__":
+    gpu_mem_growth()
+    train_NNLF()
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/4_conversions/sadl_conversion_utilities/norm_auto.py b/training/training_scripts/Nn_Filtering_Set_LC/4_conversions/sadl_conversion_utilities/norm_auto.py
new file mode 100644
index 0000000000000000000000000000000000000000..f079dda9be42b369c0612abde7f364719212a30f
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/4_conversions/sadl_conversion_utilities/norm_auto.py
@@ -0,0 +1,529 @@
+from sys import argv
+from keras import models
+import os
+
+def print_model(model):
+    for idx, layer in enumerate(model.layers[1:]):
+        wts = layer.get_weights()
+        if len(wts) == 1 and wts != []:
+            print(abs(wts[0]).max())
+        elif len(wts) == 2 and wts != []:
+            print(abs(wts[0]).max())
+            print(abs(wts[1]).max())
+
+    return
+
+def max_weight(model):
+    max_value = 0.0
+    layer_name = ''
+    for idx, layer in enumerate(model.layers[1:]):
+        wts = layer.get_weights()
+        if len(wts) == 2 and wts != [] and layer.name != 'output_layer':
+            if max_value < abs(wts[0]).max():
+                max_value = abs(wts[0]).max()
+                layer_name = layer.name
+
+        elif len(wts) == 1 and wts != []:
+            if max_value < abs(wts[0]).max():
+                max_value = abs(wts[0]).max()
+                layer_name = layer.name
+
+    return layer_name, max_value
+
+def non_split_max_weight(model):
+    max_value = 0.0
+    layer_name = ''
+    for idx, layer in enumerate(model.layers[1:]):
+        wts = layer.get_weights()
+        if len(wts) == 2 and wts != []:
+            if max_value < abs(wts[0]).max():
+                max_value = abs(wts[0]).max()
+                layer_name = layer.name
+
+        elif len(wts) == 1 and wts != []:
+            if max_value < abs(wts[0]).max():
+                max_value = abs(wts[0]).max()
+                layer_name = layer.name
+
+        if layer.name == 'hidden_1_y_1' or layer.name == 'hidden_1_c_1':
+            break
+
+    return layer_name, max_value
+
+def max_split_weight(model, y_flag, c_flag):
+    max_value = 0.0
+    layer_name = ''
+    if y_flag is True and c_flag is True:
+        return "Completed", 0.0
+    elif y_flag is True and c_flag is False:
+        for idx, layer in enumerate(model.layers[1:]):
+            if '_c_' not in layer.name:
+                continue
+            wts = layer.get_weights()
+            if len(wts) == 2 and wts != []:
+                if max_value < abs(wts[0]).max():
+                    max_value = abs(wts[0]).max()
+                    layer_name = layer.name
+
+            elif len(wts) == 1 and wts != []:
+                if max_value < abs(wts[0]).max():
+                    max_value = abs(wts[0]).max()
+                    layer_name = layer.name
+    elif y_flag is False and c_flag is True:
+        for idx, layer in enumerate(model.layers[1:]):
+            if '_y_' not in layer.name:
+                continue
+            wts = layer.get_weights()
+            if len(wts) == 2 and wts != []:
+                if max_value < abs(wts[0]).max():
+                    max_value = abs(wts[0]).max()
+                    layer_name = layer.name
+
+            elif len(wts) == 1 and wts != []:
+                if max_value < abs(wts[0]).max():
+                    max_value = abs(wts[0]).max()
+                    layer_name = layer.name
+    else:
+        for idx, layer in enumerate(model.layers[1:]):
+            if '_y_' not in layer.name and '_c_' not in layer.name:
+                continue
+            wts = layer.get_weights()
+            if len(wts) == 2 and wts != []:
+                if max_value < abs(wts[0]).max():
+                    max_value = abs(wts[0]).max()
+                    layer_name = layer.name
+
+            elif len(wts) == 1 and wts != []:
+                if max_value < abs(wts[0]).max():
+                    max_value = abs(wts[0]).max()
+                    layer_name = layer.name
+
+    return layer_name, max_value
+
+def max_bias(model):
+    max_value = 0.0
+    layer_name = ''
+    for idx, layer in enumerate(model.layers[1:]):
+        wts = layer.get_weights()
+        if len(wts) == 2 and wts != [] and layer.name != 'conv1' and layer.name != 'output_layer':
+            if max_value < abs(wts[1]).max():
+                max_value = abs(wts[1]).max()
+                layer_name = layer.name
+
+    return layer_name, max_value
+
+def mul_backward(model, prev_model, start_layer, end_layer, mul_factor, geo_mean):
+    start = False
+    layer_type = 'x0140'
+    if ('_y_' in start_layer):
+        layer_type = '_y_'
+    elif ('_c_' in start_layer):
+        layer_type = '_c_'
+
+    for idx, layer in enumerate(model.layers[::-1]):
+        if layer.name != start_layer and start == False:
+            continue
+        elif layer.name == end_layer:
+            break
+        else:
+            start = True
+            wts = layer.get_weights()
+        if layer.name == start_layer:
+            wts[0] /= mul_factor
+        elif layer.name != start_layer and (layer_type == 'x0140' or layer_type in layer.name):
+            if len(wts) == 2:
+                f_w = float(geo_mean/abs(wts[0]).max())
+                if f_w >= mul_factor:
+                    wts[0] *= mul_factor
+                    wts[1] *= mul_factor
+                    mul_factor = 1
+                    layer.set_weights(wts)
+                    break
+                elif f_w < 1:
+                    wts[1] *= mul_factor
+                else:
+                    wts[0] *= f_w
+                    wts[1] *= mul_factor
+                    mul_factor /= f_w
+            elif len(wts) == 1:
+                f = float(geo_mean/abs(wts[0]).max())
+                if f >= mul_factor:
+                    wts[0] *= mul_factor
+                    mul_factor = 1
+                    layer.set_weights(wts)
+                    break
+                elif f < 1:
+                    continue
+                else:
+                    wts[0] *= f
+                    mul_factor /= f
+
+        layer.set_weights(wts)
+
+        if mul_factor == 1:
+            break
+    if mul_factor != 1 and layer.name == end_layer:
+        model.set_weights(prev_model.get_weights())
+        return mul_factor, False
+
+    return 1, True
+
+def mul_forward(model, prev_model, start_layer, end_layer, div_factor, geo_mean):
+    start = False
+    layer_type = 'x0140'
+    if ('_y_' in start_layer):
+        layer_type = '_y_'
+    elif ('_c_' in start_layer):
+        layer_type = '_c_'
+
+    for idx, layer in enumerate(model.layers[1:]):
+        if layer.name != start_layer and start == False:
+            continue
+        elif layer.name == end_layer:
+            break
+        else:
+            start = True
+            wts = layer.get_weights()
+        if layer.name == start_layer:
+            wts[0] /= div_factor
+            if len(wts) == 2:
+                wts[1] /= div_factor
+        elif layer.name != start_layer and (layer_type == 'x0140' or layer_type in layer.name):
+            if len(wts) == 2:
+                f_w = float(geo_mean/abs(wts[0]).max())
+                if f_w >= div_factor:
+                    wts[0] *= div_factor
+                    div_factor = 1
+                    layer.set_weights(wts)
+                    break
+                elif f_w < 1:
+                    wts[1] /= div_factor
+                else:
+                    wts[0] *= f_w
+                    div_factor /= f_w
+                    wts[1] /= div_factor
+            elif len(wts) == 1:
+                f = float(geo_mean/abs(wts[0]).max())
+                if f >= div_factor:
+                    wts[0] *= div_factor
+                    div_factor = 1
+                    layer.set_weights(wts)
+                    break
+                elif f < 1:
+                    continue
+                else:
+                    wts[0] *= f
+                    div_factor /= f
+
+        layer.set_weights(wts)
+
+        if div_factor == 1:
+            break
+    if div_factor != 1 and layer.name == end_layer:
+        model.set_weights(prev_model.get_weights())    
+        return div_factor, False
+
+    return 1, True
+
+if __name__ == '__main__':
+
+    os.environ["CUDA_VISIBLE_DEVICES"]="-1"
+    model = models.load_model(argv[1], compile=False)
+    prev_model = models.load_model(argv[1], compile=False)
+    out_model_path = argv[2]
+
+    model_type = ''
+    
+    for idx, layer in enumerate(model.layers[1:]):
+        if ('_y_' in layer.name) or layer.name.endswith('_Y') or layer.name.endswith('_y') or ('_c_' in layer.name) or layer.name.endswith('_C') or layer.name.endswith('_c'):
+            model_type = 'LC_split'
+            break
+        else:
+            model_type = 'x0140'
+
+    if model_type == 'LC_split':
+        layer_prod          = 1.0
+        layer_prod_y        = 1.0
+        layer_prod_c        = 1.0
+        layer_count         = 0
+        layer_count_y       = 0
+        layer_count_c       = 0
+        bias_limit          = 2**(15-11)
+        y_flag              = False
+        c_flag              = False
+        limit_1x1           = 1.0
+        limit_3x1_1x3       = 1.0
+        div_factor          = 1.0
+
+        # print_model(model)
+        
+        # first loop through model
+        for idx, layer in enumerate(model.layers[1:]):
+            wts = layer.get_weights()
+            layer_y = 0
+            layer_c = 0
+
+            if ('_y_' in layer.name):
+                layer_y = 1
+            elif ('_c_' in layer.name):
+                layer_c = 1
+
+            if wts != [] and layer_y == 1:
+                layer_prod_y *= abs(wts[0]).max()
+                layer_count_y += 1
+            elif wts != [] and layer_c == 1:
+                layer_prod_c *= abs(wts[0]).max()
+                layer_count_c += 1
+            elif wts != [] and layer_y == 0 and layer_c == 0 and not (layer.name.endswith("Y") or layer.name.endswith("C")):
+                layer_prod *= abs(wts[0]).max()
+                layer_count += 1
+                
+        geo_mean_y = layer_prod_y**(1/layer_count_y)
+        geo_mean_c = layer_prod_c**(1/layer_count_c)
+        geo_mean = layer_prod**(1/layer_count)
+
+        print("Non-split Geo mean :", geo_mean)
+        print("Luma Geo Mean : ", geo_mean_y)
+        print("Chroma Geo Mean : ", geo_mean_c)
+
+        start_layer, max_value = non_split_max_weight(model)
+        prev_max_value = 0
+
+        while True:
+            prev_model.set_weights(model.get_weights())
+            factor = float(max_value/geo_mean)
+
+            if factor <= 1:
+                break
+
+            first_layer = ''
+            last_layer = 'hidden_0_3d'
+
+            mul_fac, result = mul_forward(model, prev_model, start_layer, last_layer, factor, geo_mean)
+            if result == False:
+                div_fac, result = mul_backward(model, prev_model, start_layer, first_layer, factor, geo_mean)
+                if result == False:
+                    if mul_fac > div_fac:
+                        factor /= mul_fac
+                        _, result = mul_backward(model, prev_model, start_layer, first_layer, factor, geo_mean)
+                    elif mul_fac < div_fac:
+                        factor /= div_fac
+                        _, result = mul_forward(model, prev_model, start_layer, last_layer, factor, geo_mean)
+
+                    if result == False:
+                        print_model(model)
+                        print("Error cannot reduce", start_layer, max_value)
+                        break
+
+            start_layer, max_value = non_split_max_weight(model)
+
+            if prev_max_value == max_value:
+                print("Non-split Normalization Complete:")
+                break
+
+            prev_max_value = max_value
+            
+        # weight normalization
+        start_layer, max_value = max_split_weight(model, y_flag, c_flag)
+        prev_max_value = 0
+
+        while True:
+            if ('_y_' in start_layer):
+
+                prev_model.set_weights(model.get_weights())
+                factor = float(max_value/geo_mean_y)
+
+                if factor <= 1:
+                    break
+
+                first_layer = 'hidden_0_3d'
+                last_layer = 'hidden_10_c_3d'
+
+                mul_fac, result = mul_forward(model, prev_model, start_layer, last_layer, factor, geo_mean_y)
+                if result == False:
+                    div_fac, result = mul_backward(model, prev_model, start_layer, first_layer, factor, geo_mean_y)
+                    if result == False:
+                        if mul_fac > div_fac:
+                            factor /= mul_fac
+                            _, result = mul_backward(model, prev_model, start_layer, first_layer, factor, geo_mean_y)
+                        elif mul_fac < div_fac:
+                            factor /= div_fac
+                            _, result = mul_forward(model, prev_model, start_layer, last_layer, factor, geo_mean_y)
+
+                        if result == False:
+                            print_model(model)
+                            print("Error cannot reduce", start_layer, max_value)
+                            break
+
+                start_layer, max_value = max_split_weight(model, y_flag, c_flag)
+
+                if prev_max_value == max_value:
+                    print("Y Normalization Complete:")
+                    y_flag = True
+
+                prev_max_value = max_value
+
+            elif ('_c_' in start_layer):
+
+                prev_model.set_weights(model.get_weights())
+                factor = float(max_value/geo_mean_c)
+
+                if factor <= 1:
+                    break
+
+                first_layer = 'hidden_1_y_1'
+                last_layer = 'output_layer_Y'
+
+                mul_fac, result = mul_forward(model, prev_model, start_layer, last_layer, factor, geo_mean_c)
+                if result == False:
+                    div_fac, result = mul_backward(model, prev_model, start_layer, first_layer, factor, geo_mean_c)
+                    if result == False:
+                        if mul_fac > div_fac:
+                            factor /= mul_fac
+                            _, result = mul_backward(model, prev_model, start_layer, first_layer, factor, geo_mean_c)
+                        elif mul_fac < div_fac:
+                            factor /= div_fac
+                            _, result = mul_forward(model, prev_model, start_layer, last_layer, factor, geo_mean_c)
+
+                        if result == False:
+                            print_model(model)
+                            print("Error cannot reduce", start_layer, max_value)
+                            break
+
+                start_layer, max_value = max_split_weight(model, y_flag, c_flag)
+
+                if prev_max_value == max_value:
+                    print("C Normalization Complete")
+                    c_flag = True
+
+                prev_max_value = max_value
+
+            elif (start_layer == 'Completed'):
+                # print_model(model)
+                break
+
+        # bias normalization
+        start_layer, max_value = max_bias(model)
+        prev_max_value = 0
+
+        while max_value >= bias_limit:
+            
+            prev_model.set_weights(model.get_weights())
+
+            factor = float(max_value/bias_limit)
+            if factor <= 1:
+                print("\nBias Normalization Complete:")
+                break
+            first_layer = 'conv1'
+            last_layer = 'output_layer'
+
+            result = mul_forward(model, prev_model, start_layer, last_layer, factor, bias_limit)
+            if result == False:
+                result = mul_backward(model, prev_model, start_layer, first_layer, factor, bias_limit)
+                if result == False:
+                    print_model(model)
+                    print("Error cannot reduce", start_layer, max_value)
+                    break
+
+            start_layer, max_value = max_bias(model)    
+
+            if max_value == bias_limit:
+                print("\nBias Normalization Complete:")
+                # print_model(model)  
+                break
+
+            prev_max_value = max_value
+
+    elif model_type == 'x0140':
+        layer_prod          = 1.0
+        layer_count         = 0
+        bias_limit          = 2**(15-11)
+
+        # print_model(model)
+
+        # first loop through model
+        for idx, layer in enumerate(model.layers[1:]):
+            wts = layer.get_weights()
+            if layer.name == 'output_layer':
+                continue
+
+            if wts != []:
+                layer_prod *= abs(wts[0]).max()
+                layer_count += 1
+
+        geo_mean = layer_prod**(1/layer_count)
+
+        print("Geo Mean : ", geo_mean)
+
+        # weight normalization
+        start_layer, max_value = max_weight(model)
+        prev_max_value = 0
+
+        while max_value >= geo_mean:
+            
+            prev_model.set_weights(model.get_weights())
+            factor = float(max_value/geo_mean)
+            if factor <= 1:
+                break
+            first_layer = ''
+            last_layer = 'output_layer'
+
+            mul_fac, result = mul_forward(model, prev_model, start_layer, last_layer, factor, geo_mean)
+            if result == False:
+                div_fac, result = mul_backward(model, prev_model, start_layer, first_layer, factor, geo_mean)
+                if result == False:
+                    if mul_fac > div_fac:
+                        factor /= mul_fac
+                        _, result = mul_backward(model, prev_model, start_layer, first_layer, factor, geo_mean)
+                    elif mul_fac < div_fac:
+                        factor /= div_fac
+                        _, result = mul_forward(model, prev_model, start_layer, last_layer, factor, geo_mean)
+
+                    if result == False:
+                        print_model(model)
+                        print("Error cannot reduce", start_layer, max_value)
+                        break
+
+            start_layer, max_value = max_weight(model)
+
+            if prev_max_value == max_value:
+                print("Weight Normalization Complete:")
+                # print_model(model)  
+                break
+
+            prev_max_value = max_value
+
+        # bias normalization
+        start_layer, max_value = max_bias(model)
+        prev_max_value = 0
+
+        while max_value >= bias_limit:
+
+            prev_model.set_weights(model.get_weights())
+
+            factor = float(max_value/bias_limit)
+            if factor <= 1:
+                print("Bias Normalization Complete:")
+                break
+            first_layer = 'conv1'
+            last_layer = 'output_layer'
+
+            result = mul_forward(model, prev_model, start_layer, last_layer, factor, bias_limit)
+            if result == False:
+                result = mul_backward(model, prev_model, start_layer, first_layer, factor, bias_limit)
+                if result == False:
+                    print_model(model)
+                    print("Error cannot reduce", start_layer, max_value)
+                    break
+
+            start_layer, max_value = max_bias(model)
+
+            if max_value == bias_limit:
+                print("Bias Normalization Complete:")
+                # print_model(model)
+                break
+
+            prev_max_value = max_value
+
+    # model saved
+    model.save(out_model_path)
\ No newline at end of file
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/4_conversions/sadl_conversion_utilities/q_factor_gen.py b/training/training_scripts/Nn_Filtering_Set_LC/4_conversions/sadl_conversion_utilities/q_factor_gen.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a390074271980832efb4c0bf8a2c1339a7a7c83
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/4_conversions/sadl_conversion_utilities/q_factor_gen.py
@@ -0,0 +1,106 @@
+from sys import argv
+
+# Assuming int16 is the required sadl format
+
+word = "[INFO] id: "
+stop_word = "[INFO] == end model loading =="
+input_1 = open(argv[1], "r")
+input_2 = open(argv[1], "r")
+output = open(argv[2], "w")
+
+model_num = str(argv[1].split('/')[-1]).split('_')[-1]
+
+q = 11 #input
+q_bias = 11
+groups = 1
+
+string = ""
+i = 0
+model_type = ''
+
+temp = input_1.readlines()
+for idx, line in enumerate(temp):
+    if ('_y_' in line) or line.endswith('_Y') or ('_c_' in line) or line.endswith('_C'):
+        model_type = 'LC_split'
+        break
+    else:
+        model_type = 'x0140' 
+
+content = input_2.readlines()
+for idx, line in enumerate(content):
+    i += 1
+    if line.find(word) != -1:
+        line_split = line.split(" ")
+        id = line_split[2]
+        name = line_split[5]
+        if name == "Placeholder\n":
+            string = string + id + " " + str(q) + " "
+        elif name == "Conv2D\n":
+            q_dynamic = 0
+            if groups >= int(1):
+                data_line = content[idx - 2]
+                tensor_line = content[idx - 3]
+                data = float(data_line.split(" ")[-1])
+                for q_dynamic in range(0, 15):
+                    if(2**q_dynamic > abs(data)):
+                        break
+                q_upd = 15 - q_dynamic
+                groups = 0
+                if(int(q) > 9):
+                    string = string[0:-3] + str(q_upd) + " " + id + " 0 "
+                else:
+                    string = string[0:-2] + str(q_upd) + " " + id + " 0 "
+            else:
+                string = string + id + " 0 "
+        elif name == "LeakyRelu\n" or name == "Concat\n" or name == "Add\n":
+            id = id
+        elif name == "BiasAdd\n":
+            # Split model
+            if (model_type == 'LC_split') and content[idx + 1].find('hidden_1_c_1') != -1:
+                q_bias = 11
+            elif (model_type == 'LC_split') and content[idx + 1].find('hidden_2_c_1') != -1:
+                q_bias = 14
+            elif (model_type == 'LC_split') and content[idx + 1].find('hidden_10_c_1') != -1:
+                q_bias = 12
+            elif (model_type == 'LC_split') and content[idx + 1].find('output_layer_C') != -1:
+                q_bias = 13
+            elif (model_type == 'LC_split') and content[idx + 1].find('hidden_1_y_1') != -1:
+                q_bias = 11
+            elif (model_type == 'LC_split') and content[idx + 1].find('hidden_3_y_1') != -1:
+                q_bias = 12
+            elif (model_type == 'LC_split') and content[idx + 1].find('output_layer_Y') != -1:
+                q_bias = 13
+
+            # X0140 model
+            elif (model_type == 'x0140') and content[idx + 1].find('hidden_2_1') != -1:
+                q_bias = 12
+            elif (model_type == 'x0140') and content[idx + 1].find('hidden_10_1') != -1:
+                q_bias = 13
+                
+            if True:
+                q_upd = q_bias
+
+                if(int(q) > 9):
+                    string = string[0:-3] + str(q_upd) + " "
+                else:
+                    string = string[0:-2] + str(q_upd) + " "
+
+        else:
+            if (name == "Const\n") and content[idx + 6].find("Concat") != -1:
+                q_upd = 0
+            else:
+                q_upd = q
+            string = string + id + " " + str(q_upd) + " "
+            if (name != "Const\n"):
+                print("Entered here for wrong name", name)
+    elif(line.find(stop_word)!=-1):
+        break
+    else:
+        grp_line = content[idx + 6]
+        if(grp_line.find("groups:")!= -1):
+            groups = int(grp_line.split(" ")[-1][:-1])
+
+output.write(string)
+input_1.close()
+input_2.close()
+output.close()
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/4_conversions/sadl_conversion_utilities/sadl_mod_src_files/layer_const.h b/training/training_scripts/Nn_Filtering_Set_LC/4_conversions/sadl_conversion_utilities/sadl_mod_src_files/layer_const.h
new file mode 100644
index 0000000000000000000000000000000000000000..586308c182a8e35e3929823840d8ce1785892765
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/4_conversions/sadl_conversion_utilities/sadl_mod_src_files/layer_const.h
@@ -0,0 +1,154 @@
+/* The copyright in this software is being made available under the BSD
+ * License, included below. This software may be subject to other third party
+ * and contributor rights, including patent rights, and no such rights are
+ * granted under this license.
+ *
+ * Copyright (c) 2010-2023, ITU/ISO/IEC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#pragma once
+#include <cmath>
+
+#include "layer.h"
+
+namespace sadl
+{
+namespace layers
+{
+template<typename T> class Const : public Layer<T>
+{
+public:
+  using Layer<T>::Layer;
+  using Layer<T>::m_out;   // to avoid this->
+  using Layer<T>::m_initDone;
+
+  virtual bool apply(std::vector<Tensor<T> *> &in) override;
+  virtual bool init(const std::vector<Tensor<T> *> &in) override;
+
+protected:
+  virtual bool              loadInternal(std::istream &file, Version v) override;
+  template<typename U> void readTensor(std::istream &file, Tensor<T> &out);
+  DUMP_MODEL_EXT;
+};
+
+template<typename T> bool Const<T>::apply(std::vector<Tensor<T> *> &in)
+{
+  assert(in.size() == 0);
+  (void) in;
+  // assert(ptr==ptr)
+  return true;
+}
+
+template<typename T> bool Const<T>::init(const std::vector<Tensor<T> *> &in)
+{
+  if (in.size() != 0)
+    return false;
+  m_initDone = true;
+  return true;
+}
+
+template<typename T> template<typename U> void Const<T>::readTensor(std::istream &file, Tensor<T> &out)
+{
+  if (std::is_same<T, U>::value)
+    file.read((char *) out.data(), sizeof(T) * out.size());
+  else
+  {
+    std::vector<U> data(out.size());
+    file.read((char *) data.data(), sizeof(U) * data.size());
+    for (int k = 0; k < (int) data.size(); ++k)
+      out[k] = static_cast<T>(data[k]);
+  }
+}
+
+template<typename T> bool Const<T>::loadInternal(std::istream &file, Version v)
+{
+  // load values
+  int32_t x = 0;
+  file.read((char *) &x, sizeof(x));
+  if (x <= 0 || x > Dimensions::MaxDim)
+  {
+    std::cerr << "[ERROR] invalid nb of dimensions: " << x << std::endl;
+    return false;
+  }
+  Dimensions d;
+  d.resize(x);
+  for (int k = 0; k < d.size(); ++k)
+  {
+    file.read((char *) &x, sizeof(x));
+    d[k] = x;
+  }
+
+  if (d.nbElements() >= Tensor<T>::kMaxSize)
+  {
+    std::cerr << "[ERROR] tensor too large? " << d.nbElements() << std::endl;
+    return false;
+  }
+  m_out.resize(d);
+  SADL_DBG(std::cout << "  - tensor: " << m_out.dims() << std::endl);
+
+  file.read((char *) &x, sizeof(x));
+
+  // cannot check internal type because tensor also used by reshape etc.
+  switch (x)
+  {
+  case TensorInternalType::Int32:
+    // assert((std::is_same<T,int32_t>::value));
+    file.read((char *) &m_out.quantizer, sizeof(m_out.quantizer));
+    readTensor<int32_t>(file, m_out);
+    break;
+  case TensorInternalType::Float:
+    // assert((std::is_same<T, float>::value));
+    readTensor<float>(file, m_out);
+    break;
+  case TensorInternalType::Int16:
+    // assert((std::is_same<T, int16_t>::value));
+    file.read((char *) &m_out.quantizer, sizeof(m_out.quantizer));
+    readTensor<int16_t>(file, m_out);
+    break;
+  default:
+    std::cerr << "[ERROR] unknown internal type " << x << std::endl;
+    return false;
+  }
+
+  float max_tensor = 0.0;
+  int k = 0;
+  max_tensor = m_out[0];
+  for (k = 0; k < m_out.size(); ++k) 
+    if(fabs(max_tensor) < fabs(m_out[k])) 
+      { 
+        max_tensor = m_out[k]; 
+      } 
+  SADL_DBG( std::cout << "  - data: "; 
+            std::cout << max_tensor << std::endl;
+          );
+  SADL_DBG(std::cout << "  - quantizer: " << m_out.quantizer << std::endl);
+  // SADL_DBG(std::cout<<m_out<<std::endl;)
+  return true;
+}
+
+}   // namespace layers
+}   // namespace sadl
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/4_conversions/sadl_conversion_utilities/sadl_mod_src_files/layer_conv2d_1x1.h b/training/training_scripts/Nn_Filtering_Set_LC/4_conversions/sadl_conversion_utilities/sadl_mod_src_files/layer_conv2d_1x1.h
new file mode 100644
index 0000000000000000000000000000000000000000..db3de6f1ede392a46a04dcea7305d66feac1e4ed
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/4_conversions/sadl_conversion_utilities/sadl_mod_src_files/layer_conv2d_1x1.h
@@ -0,0 +1,444 @@
+/* The copyright in this software is being made available under the BSD
+ * License, included below. This software may be subject to other third party
+ * and contributor rights, including patent rights, and no such rights are
+ * granted under this license.
+ *
+ * Copyright (c) 2010-2023, ITU/ISO/IEC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#pragma once
+#include <cmath>
+#include "layer.h"
+#if __AVX2__
+#include <immintrin.h>
+#endif
+
+namespace sadl
+{
+namespace layers
+{
+// ////////////////////////////////////////////////////////////////////////////////////////////////////////
+// 1x1
+// ////////////////////////////////////////////////////////////////////////////////////////////////////////
+template<typename T> template<int s_h, int s_w> void Conv2D<T>::conv2d_1x1_s_dispatch(const Tensor<T> &A, const Tensor<T> &kernel)
+{
+#if __AVX2__
+#define CONV_MOD8 simd8_conv2d_1x1_s_d
+#define CONV_MOD16 simd16_conv2d_1x1_s_d
+#define CONV_MOD32 simd32_conv2d_1x1_s_d
+#else
+#define CONV_MOD8 conv2d_1x1_s_d
+#define CONV_MOD16 conv2d_1x1_s_d
+#define CONV_MOD32 conv2d_1x1_s_d
+#endif
+  const int in_D{ A.dims()[3] };
+  switch (in_D)
+  {
+  case 1:
+    conv2d_1x1_s_d<1, s_h, s_w>(A, kernel);
+    break;
+  case 2:
+    conv2d_1x1_s_d<2, s_h, s_w>(A, kernel);
+    break;
+  case 4:
+    conv2d_1x1_s_d<4, s_h, s_w>(A, kernel);
+    break;
+  case 8:
+    CONV_MOD8<8, s_h, s_w>(A, kernel);
+    break;
+  case 16:
+    CONV_MOD16<16, s_h, s_w>(A, kernel);
+    break;
+  case 24:
+    CONV_MOD8<24, s_h, s_w>(A, kernel);
+    break;
+  case 32:
+    CONV_MOD32<32, s_h, s_w>(A, kernel);
+    break;
+  case 48:
+    CONV_MOD16<48, s_h, s_w>(A, kernel);
+    break;
+  case 64:
+    CONV_MOD32<64, s_h, s_w>(A, kernel);
+    break;
+  case 72:
+    // better do 64 and than 8
+    CONV_MOD8<72, s_h, s_w>(A, kernel);
+    break;
+  case 96:
+    CONV_MOD32<96, s_h, s_w>(A, kernel);
+    break;
+  case 128:
+    CONV_MOD32<128, s_h, s_w>(A, kernel);
+    break;
+  case 160:
+    CONV_MOD32<160, s_h, s_w>(A, kernel);
+    break;
+  case 384:
+    CONV_MOD32<384, s_h, s_w>(A, kernel);
+    break;
+  case 480:
+    CONV_MOD32<480, s_h, s_w>(A, kernel);
+    break;
+  default:
+    conv2d_1x1_s<s_h, s_w>(A, kernel);
+    break;
+  }
+#undef CONV_MOD8
+#undef CONV_MOD16
+#undef CONV_MOD32
+}
+
+template<typename T> template<int s_h, int s_w> void Conv2D<T>::conv2d_1x1_s(const Tensor<T> &A, const Tensor<T> &kernel)
+{
+  const int     in_H{ A.dims()[1] };
+  const int     in_W{ A.dims()[2] };
+  const int     in_D{ A.dims()[3] };
+  const int     nb_filters{ kernel.dims()[2] };
+  constexpr int half_size_h{ 0 };
+  constexpr int half_size_w{ 0 };
+  const int     top{ m_pads[0] };
+  const int     left{ m_pads[1] };
+  const int     start_h{ half_size_h - top };
+  const int     start_w{ half_size_w - left };
+
+#if DEBUG_SIMD && __AVX2__
+  std::cout << "\n[WARN] generic version conv1x1 inD=" << in_D << " outD=" << nb_filters << " s=[" << s_w << ' ' << s_h << "] " << in_H << 'x' << in_W << " "
+            << in_D * kernel.dims()[0] * kernel.dims()[1] * nb_filters * (in_H / s_h) * (in_W / s_w) / 1000 << " kMAC" << std::endl;
+#endif
+  constexpr int im_nb = 0;
+  const int     shift = kernel.quantizer + m_q;
+  for (int im_i = start_h; im_i < in_H; im_i += s_h)
+  {
+    for (int im_j = start_w; im_j < in_W; im_j += s_w)
+    {
+      for (int filter_nb = 0; filter_nb < nb_filters; ++filter_nb)
+      {
+        typename ComputationType<T>::type x = 0;
+        for (int filter_d = 0; filter_d < in_D; ++filter_d)
+        {
+          {
+            x += (typename ComputationType<T>::type) A(im_nb, im_i, im_j, filter_d) * kernel(0, 0, filter_nb, filter_d);
+            COUNTERS_MAC(kernel(0, 0, filter_nb, filter_d));
+          }
+        }
+        ComputationType<T>::quantize(x, shift);
+        COUNTERS(x);
+        SATURATE(x);
+        m_out(im_nb, im_i / s_w, im_j / s_h, filter_nb) = static_cast<T>(x);
+      }
+    }
+  }
+}
+
+template<typename T> template<int in_D, int s_h, int s_w> void Conv2D<T>::conv2d_1x1_s_d(const Tensor<T> &A, const Tensor<T> &kernel)
+{
+  const int     in_H{ A.dims()[1] };
+  const int     in_W{ A.dims()[2] };
+  const int     nb_filters{ kernel.dims()[2] };
+  constexpr int half_size_h{ 0 };
+  constexpr int half_size_w{ 0 };
+  const int     top{ m_pads[0] };
+  const int     left{ m_pads[1] };
+  const int     start_h{ half_size_h - top };
+  const int     start_w{ half_size_w - left };
+
+#if DEBUG_SIMD && __AVX2__
+  std::cout << "\n[WARN] generic version conv 1x1 inD=" << in_D << " outD=" << nb_filters << " s=[" << s_w << ' ' << s_h << "]  " << in_H << 'x' << in_W << " "
+            << in_D * kernel.dims()[0] * kernel.dims()[1] * nb_filters * (in_H / s_h) * (in_W / s_w) / 1000 << " kMAC" << std::endl;
+#endif
+
+  constexpr int im_nb = 0;
+  const int     shift = kernel.quantizer + m_q;
+  for (int im_i = start_h; im_i < in_H; im_i += s_h)
+  {
+    for (int im_j = start_w; im_j < in_W; im_j += s_w)
+    {
+      for (int filter_nb = 0; filter_nb < nb_filters; ++filter_nb)
+      {
+        typename ComputationType<T>::type x = 0;
+        for (int filter_d = 0; filter_d < in_D; ++filter_d)
+        {
+          x += (typename ComputationType<T>::type) A(im_nb, im_i, im_j, filter_d) * kernel(0, 0, filter_nb, filter_d);
+          COUNTERS_MAC(kernel(0, 0, filter_nb, filter_d));
+        }
+        ComputationType<T>::quantize(x, shift);
+        COUNTERS(x);
+        SATURATE(x);
+        m_out(im_nb, im_i / s_h, im_j / s_w, filter_nb) = static_cast<T>(x);
+      }
+    }
+  }
+}
+
+#if __AVX2__
+// ////////////////////////////////////////////////////////////////////////////////////////////////////////
+// 1x1
+// ////////////////////////////////////////////////////////////////////////////////////////////////////////
+template<> template<int in_D, int s_h, int s_w> inline void Conv2D<float>::simd8_conv2d_1x1_s_d(const Tensor<float> &A, const Tensor<float> &kernel)
+{
+  static_assert(in_D % 8 == 0, "Should be used with mod8 filters.");
+  const int     in_H{ A.dims()[1] };
+  const int     in_W{ A.dims()[2] };
+  const int     nb_filters{ kernel.dims()[2] };
+  constexpr int half_size_h{ 0 };
+  constexpr int half_size_w{ 0 };
+  const int     top{ m_pads[0] };
+  const int     left{ m_pads[1] };
+  const int     start_h{ half_size_h - top };
+  const int     start_w{ half_size_w - left };
+#if DEBUG_SIMD && __AVX512F__
+  if (in_D >= 16)
+  {
+    std::cout << "\n[WARN] suboptimal SIMD8 version conv 1x1 inD=" << in_D << " outD=" << nb_filters << " s=[" << s_w << ' ' << s_h << "]  " << in_H << 'x'
+              << in_W << " " << in_D * kernel.dims()[0] * kernel.dims()[1] * nb_filters * (in_H / s_h) * (in_W / s_w) / 1000 << " kMAC" << std::endl;
+  }
+#endif
+  constexpr int im_nb = 0;
+  for (int im_i = start_h; im_i < in_H; im_i += s_h)
+  {
+    for (int im_j = start_w; im_j < in_W; im_j += s_w)
+    {
+      for (int filter = 0; filter < nb_filters; ++filter)
+      {
+        __m256 s = _mm256_setzero_ps();
+        for (int filter_d = 0; filter_d < in_D; filter_d += 8)
+        {
+          const float *kptr = kernel.addr(0, 0, filter, filter_d);
+          const float *aptr = A.addr(im_nb, im_i, im_j, filter_d);
+          const __m256 k0   = _mm256_load_ps(kptr);
+#if __FMA__
+          s = _mm256_fmadd_ps(k0, _mm256_load_ps(aptr), s);
+#else
+          const __m256 m0 = _mm256_mul_ps(k0, _mm256_load_ps(aptr));
+          s               = _mm256_add_ps(s, m0);
+          // s + m0; // s = _mm256_hadd_ps(s, m0);
+#endif
+        }
+        m_out(im_nb, im_i / s_h, im_j / s_w, filter) = sum8_float(s);
+      }
+    }
+  }
+}
+
+#if __AVX512F__
+template<> template<int in_D, int s_h, int s_w> inline void Conv2D<float>::simd16_conv2d_1x1_s_d(const Tensor<float> &A, const Tensor<float> &kernel)
+{
+  static_assert(in_D % 16 == 0, "Should be used with mod16 filters.");
+  constexpr int im_nb = 0;
+  const int     in_H{ A.dims()[1] };
+  const int     in_W{ A.dims()[2] };
+  const int     nb_filters{ kernel.dims()[2] };
+  constexpr int half_size_h{ 0 };
+  constexpr int half_size_w{ 0 };
+  const int     top{ m_pads[0] };
+  const int     left{ m_pads[1] };
+  const int     start_h{ half_size_h - top };
+  const int     start_w{ half_size_w - left };
+  for (int im_i = start_h; im_i < in_H; im_i += s_h)
+  {
+    for (int im_j = start_w; im_j < in_W; im_j += s_w)
+    {
+      for (int filter = 0; filter < nb_filters; ++filter)
+      {
+        __m512 s = _mm512_setzero_ps();
+        for (int filter_d = 0; filter_d < in_D; filter_d += 16)
+        {
+          const float *kptr = kernel.addr(0, 0, filter, filter_d);
+          const float *aptr = A.addr(im_nb, im_i, im_j, filter_d);
+          const __m512 k0   = _mm512_load_ps(kptr);
+#if __FMA__
+          s = _mm512_fmadd_ps(k0, _mm512_load_ps(aptr), s);
+#else
+          const __m512 m0 = _mm512_mul_ps(k0, _mm512_load_ps(aptr));
+          s               = _mm512_add_ps(s, m0);
+#endif
+        }
+        m_out(im_nb, im_i / s_h, im_j / s_w, filter) = sum16_float(s);
+      }
+    }
+  }
+}
+#endif
+
+// int16
+template<> template<int in_D, int s_h, int s_w> void Conv2D<int16_t>::simd8_conv2d_1x1_s_d(const Tensor<int16_t> &A, const Tensor<int16_t> &kernel)
+{   // should be sse42
+#if DEBUG_COUNTERS || SATURATE_RESULT
+  using T = int16_t;
+#endif
+  const int     in_H{ A.dims()[1] };
+  const int     in_W{ A.dims()[2] };
+  const int     nb_filters{ kernel.dims()[2] };
+  constexpr int half_size_h{ 0 };
+  constexpr int half_size_w{ 0 };
+  const int     top{ m_pads[0] };
+  const int     left{ m_pads[1] };
+  const int     start_h{ half_size_h - top };
+  const int     start_w{ half_size_w - left };
+  static_assert(in_D % 8 == 0, "Should be used with mod16 filters.");
+#if DEBUG_SIMD && __AVX2__
+  if (in_D >= 8)
+  {
+    std::cout << "\n[WARN] suboptimal SIMD8 version conv 3x3 inD=" << in_D << " outD=" << nb_filters << " s=[" << s_w << ' ' << s_h << "]  " << in_H << 'x'
+              << in_W << " " << in_D * kernel.dims()[0] * kernel.dims()[1] * nb_filters * (in_H / s_h) * (in_W / s_w) / 1000 << " kMAC" << std::endl;
+  }
+#endif
+  constexpr int im_nb = 0;
+  const int     shift = kernel.quantizer + m_q;
+  for (int im_i = start_h; im_i < in_H; im_i += s_h)
+  {
+    for (int im_j = start_w; im_j < in_W; im_j += s_w)
+    {
+      for (int filter = 0; filter < nb_filters; ++filter)
+      {
+        __m128i s = _mm_setzero_si128();
+        for (int filter_d = 0; filter_d < in_D; filter_d += 8)
+        {
+          const __m128i *kptr = (const __m128i *) kernel.addr(0, 0, filter, filter_d);
+          const __m128i  k0   = _mm_load_si128(kptr);   // or loadu ?
+          const __m128i *aptr = (const __m128i *) A.addr(im_nb, im_i, im_j, filter_d);
+          const __m128i  v0   = _mm_load_si128(aptr);
+
+          const __m128i mad0 = _mm_madd_epi16(k0, v0);   // res in si32
+          s                  = _mm_add_epi32(s, mad0);
+        }
+#if ENABLE_HALF_ROUND
+        typename ComputationType<T>::type z = shift ? ((sum32_int16(s) + (1 << (shift - 1))) >> shift) : sum32_int16(s);
+#else
+        typename ComputationType<T>::type z = (sum32_int16(s) >> shift);
+#endif
+        SATURATE(z);
+        m_out(im_nb, im_i / s_h, im_j / s_w, filter) = static_cast<int16_t>(z);
+      }
+    }
+  }
+}
+
+template<> template<int in_D, int s_h, int s_w> void Conv2D<int16_t>::simd16_conv2d_1x1_s_d(const Tensor<int16_t> &A, const Tensor<int16_t> &kernel)
+{
+#if DEBUG_COUNTERS || SATURATE_RESULT
+  using T = int16_t;
+#endif
+  const int     in_H{ A.dims()[1] };
+  const int     in_W{ A.dims()[2] };
+  const int     nb_filters{ kernel.dims()[2] };
+  constexpr int half_size_h{ 0 };
+  constexpr int half_size_w{ 0 };
+  const int     top{ m_pads[0] };
+  const int     left{ m_pads[1] };
+  const int     start_h{ half_size_h - top };
+  const int     start_w{ half_size_w - left };
+  static_assert(in_D % 16 == 0, "Should be used with mod16 filters.");
+#if DEBUG_SIMD && __AVX512BW__
+  if (in_D >= 32)
+  {
+    std::cout << "\n[WARN] suboptimal SIMD16 version conv 3x3 inD=" << in_D << " outD=" << nb_filters << " s=[" << s_w << ' ' << s_h << "]  " << in_H << 'x'
+              << in_W << " " << in_D * kernel.dims()[0] * kernel.dims()[1] * nb_filters * (in_H / s_h) * (in_W / s_w) / 1000 << " kMAC" << std::endl;
+  }
+#endif
+  constexpr int im_nb = 0;
+  const int     shift = kernel.quantizer + m_q;
+  for (int im_i = start_h; im_i < in_H; im_i += s_h)
+  {
+    for (int im_j = start_w; im_j < in_W; im_j += s_w)
+    {
+      for (int filter = 0; filter < nb_filters; ++filter)
+      {
+        __m256i s = _mm256_setzero_si256();
+        for (int filter_d = 0; filter_d < in_D; filter_d += 16)
+        {
+          const __m256i *kptr = (const __m256i *) kernel.addr(0, 0, filter, filter_d);
+          const __m256i  k0   = _mm256_load_si256(kptr);   // or loadu ?
+          const __m256i *aptr = (const __m256i *) A.addr(im_nb, im_i, im_j, filter_d);
+          const __m256i  v0   = _mm256_load_si256(aptr);
+
+          const __m256i mad0 = _mm256_madd_epi16(k0, v0);   // res in si32
+          s                  = _mm256_add_epi32(s, mad0);
+        }
+#if ENABLE_HALF_ROUND
+        typename ComputationType<T>::type z = shift ? ((sum32_int16(s) + (1 << (shift - 1))) >> shift) : sum32_int16(s);
+#else
+        typename ComputationType<T>::type z = (sum32_int16(s) >> shift);
+#endif
+        SATURATE(z);
+        m_out(im_nb, im_i / s_h, im_j / s_w, filter) = static_cast<int16_t>(z);
+      }
+    }
+  }
+}
+
+#if __AVX512BW__
+template<> template<int in_D, int s_h, int s_w> void Conv2D<int16_t>::simd32_conv2d_1x1_s_d(const Tensor<int16_t> &A, const Tensor<int16_t> &kernel)
+{
+  static_assert(in_D % 32 == 0, "Should be used with mod32 filters.");
+  using T = int16_t;
+  const int     in_H{ A.dims()[1] };
+  const int     in_W{ A.dims()[2] };
+  const int     nb_filters{ kernel.dims()[2] };
+  constexpr int half_size_h{ 0 };
+  constexpr int half_size_w{ 0 };
+  const int     top{ m_pads[0] };
+  const int     left{ m_pads[1] };
+  const int     start_h{ half_size_h - top };
+  const int     start_w{ half_size_w - left };
+  constexpr int im_nb = 0;
+  const int     shift = kernel.quantizer + m_q;
+  for (int im_i = start_h; im_i < in_H; im_i += s_h)
+  {
+    for (int im_j = start_w; im_j < in_W; im_j += s_w)
+    {
+      for (int filter = 0; filter < nb_filters; ++filter)
+      {
+        __m512i s = _mm512_setzero_si512();
+        for (int filter_d = 0; filter_d < in_D; filter_d += 32)
+        {
+          const __m512i *kptr = (const __m512i *) kernel.addr(0, 0, filter, filter_d);
+          const __m512i  k0   = _mm512_load_si512(kptr);
+          const __m512i *aptr = (const __m512i *) A.addr(im_nb, im_i, im_j, filter_d);
+          const __m512i  v0   = _mm512_load_si512(aptr);
+
+          const __m512i mad0 = _mm512_madd_epi16(k0, v0);   // res in si32
+          s                  = _mm512_add_epi32(s, mad0);
+        }
+#if ENABLE_HALF_ROUND
+        typename ComputationType<int32_t>::type z = shift ? ((_mm512_reduce_add_epi32(s) + (1 << (shift - 1))) >> shift) : _mm512_reduce_add_epi32(s);
+#else
+        typename ComputationType<int32_t>::type z = (_mm512_reduce_add_epi32(s) >> shift);
+#endif
+        COUNTERS(z);
+        SATURATE(z);
+        m_out(im_nb, im_i / s_h, im_j / s_w, filter) = z;
+      }
+    }
+  }
+}
+#endif
+#endif
+
+}   // namespace layers
+}   // namespace sadl
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/4_conversions/sadl_conversion_utilities/sadl_mod_src_files/layer_conv2d_3x3.h b/training/training_scripts/Nn_Filtering_Set_LC/4_conversions/sadl_conversion_utilities/sadl_mod_src_files/layer_conv2d_3x3.h
new file mode 100644
index 0000000000000000000000000000000000000000..36d5c040af14e98470418b9c9f7aacfcc3c3b4d3
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/4_conversions/sadl_conversion_utilities/sadl_mod_src_files/layer_conv2d_3x3.h
@@ -0,0 +1,787 @@
+/* The copyright in this software is being made available under the BSD
+ * License, included below. This software may be subject to other third party
+ * and contributor rights, including patent rights, and no such rights are
+ * granted under this license.
+ *
+ * Copyright (c) 2010-2023, ITU/ISO/IEC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#pragma once
+#include <cmath>
+#include "layer.h"
+#if __AVX2__
+#include <immintrin.h>
+#endif
+
+namespace sadl
+{
+namespace layers
+{
+// ////////////////////////////////////////////////////////////////////////////////////////////////////////
+// 3x3
+// ////////////////////////////////////////////////////////////////////////////////////////////////////////
+template<typename T> template<int s_h, int s_w> void Conv2D<T>::conv2d_3x3_s_peel(const Tensor<T> &A, const Tensor<T> &kernel)
+{
+  constexpr int im_nb      = 0;
+  const int     shift      = kernel.quantizer + m_q;
+  constexpr int ihalf_size = 1;
+  const int     in_H{ A.dims()[1] };
+  const int     in_W{ A.dims()[2] };
+  const int     nb_filters{ kernel.dims()[2] };
+  constexpr int half_size_h{ ihalf_size };
+  constexpr int half_size_w{ ihalf_size };
+  const int     top{ m_pads[0] };
+  const int     left{ m_pads[1] };
+  const int     start_h{ half_size_h - top };
+  const int     start_w{ half_size_w - left };
+  const int     in_D{ A.dims()[3] };
+  for (int filter_nb = 0; filter_nb < nb_filters; ++filter_nb)
+  {
+    // corners
+    {
+      int  im_i;
+      int  im_j;
+      auto loop_with_cond = [&, filter_nb, shift](int i0, int i1, int j0, int j1)
+      {
+        typename ComputationType<T>::type x = 0;
+        for (int filter_i = i0; filter_i <= i1; ++filter_i)
+        {
+          for (int filter_j = j0; filter_j <= j1; ++filter_j)
+          {
+            for (int filter_d = 0; filter_d < in_D; ++filter_d)
+            {
+              int ii = im_i + filter_i;
+              int jj = im_j + filter_j;
+              int ki = ihalf_size + filter_i;
+              int kj = ihalf_size + filter_j;
+              x += (typename ComputationType<T>::type) A(im_nb, ii, jj, filter_d) * kernel(ki, kj, filter_nb, filter_d);
+              COUNTERS_MAC(kernel(ki, kj, filter_nb, filter_d));
+            }
+          }
+        }
+        ComputationType<T>::quantize(x, shift);
+        COUNTERS(x);
+        SATURATE(x);
+        m_out(im_nb, im_i / s_h, im_j / s_w, filter_nb) = static_cast<T>(x);
+      };
+
+      im_j = start_w;
+      if (im_j < in_W)
+      {   // left side
+        im_i = start_h;
+        if (im_i < in_H)
+        {   // top left corner
+          loop_with_cond(-start_h, ihalf_size, -start_w, ihalf_size);
+        }
+        im_i = ((in_H - ihalf_size - start_h) / s_h) * s_h + start_h;
+        if (im_i > 0 && im_i < in_H && im_i != start_h)
+        {   // bottom left corner
+          const int end_i = (im_i + 1 < in_H) ? 1 : 0;
+          loop_with_cond(-ihalf_size, end_i, -start_w, ihalf_size);
+        }
+      }
+
+      im_j            = ((in_W - ihalf_size - start_w) / s_w) * s_w + start_w;
+      const int end_j = (im_j + 1 < in_W) ? 1 : 0;
+      if (im_j > 0 && im_j < in_W && im_j != start_w)
+      {   // rihgt side
+        im_i = start_h;
+        if (im_i < in_H)
+        {   // top right corner
+          loop_with_cond(-start_h, ihalf_size, -ihalf_size, end_j);
+        }
+
+        im_i = ((in_H - ihalf_size - start_h) / s_h) * s_h + start_h;
+        if (im_i > 0 && im_i < in_H && im_i != start_h)
+        {   // bottom right corner
+          const int end_i = (im_i + 1 < in_H) ? 1 : 0;
+          loop_with_cond(-ihalf_size, end_i, -ihalf_size, end_j);
+        }
+      }
+    }
+
+    // vertical borders
+    {
+      for (int im_i = start_h + s_h; im_i < in_H - ihalf_size; im_i += s_h)
+      {
+        int im_j = start_w;   // can be only 0 or 1
+        if (im_j < in_W)
+        {   // left side
+          typename ComputationType<T>::type x = 0;
+          for (int filter_i = -ihalf_size; filter_i <= ihalf_size; ++filter_i)
+          {
+            for (int filter_j = -start_w; filter_j <= ihalf_size; ++filter_j)
+            {
+              for (int filter_d = 0; filter_d < in_D; ++filter_d)
+              {
+                int ii = im_i + filter_i;
+                int jj = im_j + filter_j;
+                int ki = ihalf_size + filter_i;
+                int kj = ihalf_size + filter_j;
+                x += (typename ComputationType<T>::type) A(im_nb, ii, jj, filter_d) * kernel(ki, kj, filter_nb, filter_d);
+                COUNTERS_MAC(kernel(ki, kj, filter_nb, filter_d));
+              }
+            }
+          }
+          ComputationType<T>::quantize(x, shift);
+          COUNTERS(x);
+          SATURATE(x);
+          m_out(im_nb, im_i / s_h, im_j / s_w, filter_nb) = static_cast<T>(x);
+        }
+
+        im_j = ((in_W - ihalf_size - start_w) / s_w) * s_w + start_w;
+        if (im_j > 0 && im_j < in_W && im_j != start_w)
+        {   // rihgt side
+          typename ComputationType<T>::type x          = 0;
+          const int                         end_filter = (im_j + 1) < in_W ? 1 : 0;
+          for (int filter_i = -ihalf_size; filter_i <= ihalf_size; ++filter_i)
+          {
+            for (int filter_j = -ihalf_size; filter_j <= end_filter; ++filter_j)
+            {
+              for (int filter_d = 0; filter_d < in_D; ++filter_d)
+              {
+                int ii = im_i + filter_i;
+                int jj = im_j + filter_j;
+                int ki = ihalf_size + filter_i;
+                int kj = ihalf_size + filter_j;
+                x += (typename ComputationType<T>::type) A(im_nb, ii, jj, filter_d) * kernel(ki, kj, filter_nb, filter_d);
+                COUNTERS_MAC(kernel(ki, kj, filter_nb, filter_d));
+              }
+            }
+          }
+          ComputationType<T>::quantize(x, shift);
+          COUNTERS(x);
+          SATURATE(x);
+          m_out(im_nb, im_i / s_h, im_j / s_w, filter_nb) = static_cast<T>(x);
+        }
+      }
+    }
+    {
+      // horizontal borders
+      for (int im_j = s_w + start_w; im_j < in_W - ihalf_size; im_j += s_w)
+      {
+        int im_i = start_h;   // 0 or 1 -> adapt filter start
+        if (im_i < in_H)
+        {   // top line
+          typename ComputationType<T>::type x = 0;
+          for (int filter_i = -start_h; filter_i <= ihalf_size; ++filter_i)
+          {
+            for (int filter_j = -ihalf_size; filter_j <= ihalf_size; ++filter_j)
+            {
+              for (int filter_d = 0; filter_d < in_D; ++filter_d)
+              {
+                int ii = im_i + filter_i;
+                int jj = im_j + filter_j;
+                int ki = ihalf_size + filter_i;
+                int kj = ihalf_size + filter_j;
+                x += (typename ComputationType<T>::type) A(im_nb, ii, jj, filter_d) * kernel(ki, kj, filter_nb, filter_d);
+                COUNTERS_MAC(kernel(ki, kj, filter_nb, filter_d));
+              }
+            }
+          }
+
+          ComputationType<T>::quantize(x, shift);
+          COUNTERS(x);
+          SATURATE(x);
+          m_out(im_nb, im_i / s_h, im_j / s_w, filter_nb) = static_cast<T>(x);
+        }
+        im_i = ((in_H - ihalf_size - start_h) / s_h) * s_h + start_h;
+        if (im_i > 0 && im_i < in_H && im_i != start_h)
+        {   // bottom line
+          typename ComputationType<T>::type x          = 0;
+          const int                         end_filter = (im_i + 1) < in_H ? 1 : 0;
+          for (int filter_i = -ihalf_size; filter_i <= end_filter; ++filter_i)
+          {
+            for (int filter_j = -ihalf_size; filter_j <= ihalf_size; ++filter_j)
+            {
+              for (int filter_d = 0; filter_d < in_D; ++filter_d)
+              {
+                int ii = im_i + filter_i;
+                int jj = im_j + filter_j;
+                int ki = ihalf_size + filter_i;
+                int kj = ihalf_size + filter_j;
+                x += (typename ComputationType<T>::type) A(im_nb, ii, jj, filter_d) * kernel(ki, kj, filter_nb, filter_d);
+                COUNTERS_MAC(kernel(ki, kj, filter_nb, filter_d));
+              }
+            }
+          }
+          ComputationType<T>::quantize(x, shift);
+          COUNTERS(x);
+          SATURATE(x);
+          m_out(im_nb, im_i / s_h, im_j / s_w, filter_nb) = static_cast<T>(x);
+        }
+      }
+    }
+  }   // filter_nb
+}
+
+template<typename T> template<int s_h, int s_w> void Conv2D<T>::conv2d_3x3_s_core(const Tensor<T> &A, const Tensor<T> &kernel)
+{
+  const int     nb_filters{ kernel.dims()[2] };
+  const int     in_H{ A.dims()[1] };
+  const int     in_W{ A.dims()[2] };
+  const int     in_D{ A.dims()[3] };
+  constexpr int ihalf_size = 1;
+  constexpr int half_size_h{ ihalf_size };
+  constexpr int half_size_w{ ihalf_size };
+  const int     top{ m_pads[0] };
+  const int     left{ m_pads[1] };
+  const int     start_h{ half_size_h - top };
+  const int     start_w{ half_size_w - left };
+  constexpr int im_nb     = 0;
+  constexpr int half_size = 1;
+  const int     shift     = kernel.quantizer + m_q;
+#if DEBUG_SIMD && __AVX2__
+  std::cout << "\n[WARN] generic version conv 3x3 inD=" << in_D << " outD=" << nb_filters << " s=[" << s_w << ' ' << s_h << "]  " << in_H << 'x' << in_W << " "
+            << in_D * kernel.dims()[0] * kernel.dims()[1] * nb_filters * (in_H / s_h) * (in_W / s_w) / 1000 << " kMAC" << std::endl;
+#endif
+  assert(start_h + s_h - half_size_h >= 0);
+  assert(start_w + s_w - half_size_w >= 0);
+  for (int im_i = start_h + s_h; im_i < in_H - half_size_h; im_i += s_h)
+  {
+    for (int im_j = start_w + s_w; im_j < in_W - half_size_w; im_j += s_w)
+    {
+      for (int filter = 0; filter < nb_filters; ++filter)
+      {
+        typename ComputationType<T>::type x = 0;
+        for (int filter_i = -half_size; filter_i <= half_size; ++filter_i)
+        {   // fixed
+          for (int filter_j = -half_size; filter_j <= half_size; ++filter_j)
+          {   // fixed
+            for (int filter_d = 0; filter_d < in_D; ++filter_d)
+            {
+              int ii = im_i + filter_i;
+              int jj = im_j + filter_j;
+              int ki = half_size + filter_i;
+              int kj = half_size + filter_j;
+              x += (typename ComputationType<T>::type) A(im_nb, ii, jj, filter_d) * kernel(ki, kj, filter, filter_d);
+              COUNTERS_MAC(kernel(ki, kj, filter, filter_d));
+            }
+          }
+        }
+        ComputationType<T>::quantize(x, shift);
+        COUNTERS(x);
+        SATURATE(x);
+        m_out(im_nb, im_i / s_h, im_j / s_w, filter) = static_cast<T>(x);
+      }
+    }
+  }
+}
+
+template<typename T> template<int s_h, int s_w> void Conv2D<T>::conv2d_3x3_s_core_dispatch(const Tensor<T> &A, const Tensor<T> &kernel)
+{
+#if __AVX2__
+#define CONV_MOD8 simd8_conv2d_3x3_s_d
+#define CONV_MOD16 simd16_conv2d_3x3_s_d
+#define CONV_MOD32 simd32_conv2d_3x3_s_d
+#else
+#define CONV_MOD8 conv2d_3x3_s_d_core
+#define CONV_MOD16 conv2d_3x3_s_d_core
+#define CONV_MOD32 conv2d_3x3_s_d_core
+#endif
+  const int in_D{ A.dims()[3] };
+  switch (in_D)
+  {
+  case 1:
+    conv2d_3x3_s_d_core<1, s_h, s_w>(A, kernel);
+    break;
+  case 2:
+    conv2d_3x3_s_d_core<2, s_h, s_w>(A, kernel);
+    break;
+  case 4:
+    conv2d_3x3_s_d_core<4, s_h, s_w>(A, kernel);
+    break;
+  case 8:
+    CONV_MOD8<8, s_h, s_w>(A, kernel);
+    break;
+  case 16:
+    CONV_MOD16<16, s_h, s_w>(A, kernel);
+    break;
+  case 24:
+    CONV_MOD8<24, s_h, s_w>(A, kernel);
+    break;
+  case 32:
+    CONV_MOD32<32, s_h, s_w>(A, kernel);
+    break;
+  case 48:
+    CONV_MOD16<48, s_h, s_w>(A, kernel);
+    break;
+  case 64:
+    CONV_MOD32<64, s_h, s_w>(A, kernel);
+    break;
+  case 72:
+    CONV_MOD8<72, s_h, s_w>(A, kernel);   // better do 64 and than 8
+    break;
+  case 96:
+    CONV_MOD32<96, s_h, s_w>(A, kernel);
+    break;
+  case 128:
+    CONV_MOD32<128, s_h, s_w>(A, kernel);
+    break;
+  default:
+    conv2d_3x3_s_core<s_h, s_w>(A, kernel);
+    break;
+  }
+#undef CONV_MOD8
+#undef CONV_MOD16
+#undef CONV_MOD32
+}
+
+template<typename T> template<int in_D, int s_h, int s_w> void Conv2D<T>::conv2d_3x3_s_d_core(const Tensor<T> &A, const Tensor<T> &kernel)
+{
+  constexpr int im_nb     = 0;
+  constexpr int half_size = 1;
+  const int     shift     = kernel.quantizer + m_q;
+  const int     nb_filters{ kernel.dims()[2] };
+  const int     in_H{ A.dims()[1] };
+  const int     in_W{ A.dims()[2] };
+  constexpr int ihalf_size = 1;
+  constexpr int half_size_h{ ihalf_size };
+  constexpr int half_size_w{ ihalf_size };
+  const int     top{ m_pads[0] };
+  const int     left{ m_pads[1] };
+  const int     start_h{ half_size_h - top };
+  const int     start_w{ half_size_w - left };
+#if DEBUG_SIMD && __AVX2__
+  std::cout << "\n[WARN] generic version conv 3x3 inD=" << in_D << " outD=" << nb_filters << " s=[" << s_w << ' ' << s_h << "]  " << in_H << 'x' << in_W << " "
+            << in_D * kernel.dims()[0] * kernel.dims()[1] * nb_filters * (in_H / s_h) * (in_W / s_w) / 1000 << " kMAC" << std::endl;
+#endif
+  // const int top{ m_pads[0] };
+  // const int left{ m_pads[1] };
+  for (int im_i = start_h + s_h; im_i < in_H - half_size_h; im_i += s_h)
+  {
+    for (int im_j = start_w + s_w; im_j < in_W - half_size_w; im_j += s_w)
+    {
+      for (int filter = 0; filter < nb_filters; ++filter)
+      {
+        typename ComputationType<T>::type x = 0;
+        for (int filter_d = 0; filter_d < in_D; ++filter_d)
+        {
+          for (int filter_i = -half_size; filter_i <= half_size; ++filter_i)
+          {   // fixed
+            for (int filter_j = -half_size; filter_j <= half_size; ++filter_j)
+            {   // fixed
+              int ii = im_i + filter_i;
+              int jj = im_j + filter_j;
+              int ki = half_size + filter_i;
+              int kj = half_size + filter_j;
+              x += (typename ComputationType<T>::type) A(im_nb, ii, jj, filter_d) * kernel(ki, kj, filter, filter_d);
+              COUNTERS_MAC(kernel(ki, kj, filter, filter_d));
+            }
+          }
+        }
+        ComputationType<T>::quantize(x, shift);
+        COUNTERS(x);
+        SATURATE(x);
+        m_out(im_nb, im_i / s_h, im_j / s_w, filter) = static_cast<T>(x);
+      }
+    }
+  }
+}
+
+// ////////////////////////////////////////////////////////////////////////////////////////////////////////
+// 3x3
+// ////////////////////////////////////////////////////////////////////////////////////////////////////////
+///
+#if __AVX2__
+template<> template<int in_D, int s_h, int s_w> void Conv2D<float>::simd8_conv2d_3x3_s_d(const Tensor<float> &A, const Tensor<float> &kernel)
+{
+  static_assert(in_D % 8 == 0, "Should be used with mod8 filters.");
+  constexpr int im_nb     = 0;
+  constexpr int half_size = 1;
+  const int     nb_filters{ kernel.dims()[2] };
+  const int     in_H{ A.dims()[1] };
+  const int     in_W{ A.dims()[2] };
+  constexpr int ihalf_size = 1;
+  constexpr int half_size_h{ ihalf_size };
+  constexpr int half_size_w{ ihalf_size };
+  const int     top{ m_pads[0] };
+  const int     left{ m_pads[1] };
+  const int     start_h{ half_size_h - top };
+  const int     start_w{ half_size_w - left };
+#if DEBUG_SIMD && __AVX512F__
+  if (in_D >= 16)
+  {
+    std::cout << "\n[WARN] suboptimal SIMD8 version conv 3x3 inD=" << in_D << " outD=" << nb_filters << " s=[" << s_w << ' ' << s_h << "]  " << in_H << 'x'
+              << in_W << " " << in_D * kernel.dims()[0] * kernel.dims()[1] * nb_filters * (in_H / s_h) * (in_W / s_w) / 1000 << " kMAC" << std::endl;
+  }
+#endif
+  for (int im_i = start_h + s_h; im_i < in_H - half_size_h; im_i += s_h)
+  {
+    for (int im_j = start_w + s_w; im_j < in_W - half_size_w; im_j += s_w)
+    {
+      for (int filter = 0; filter < nb_filters; ++filter)
+      {
+        __m256 s = _mm256_setzero_ps();
+        for (int filter_i = -half_size; filter_i <= half_size; ++filter_i)
+        {   // fixed
+          for (int filter_j = -half_size; filter_j <= half_size; ++filter_j)
+          {   // fixed
+            const int ii = im_i + filter_i;
+            const int jj = im_j + filter_j;
+            const int ki = half_size + filter_i;
+            const int kj = half_size + filter_j;
+
+            for (int filter_d = 0; filter_d < in_D; filter_d += 8)
+            {
+              const float *kptr = kernel.addr(ki, kj, filter, filter_d);
+              const __m256 k0   = _mm256_load_ps(kptr);
+              const float *aptr = A.addr(im_nb, ii, jj, filter_d);
+#if __FMA__
+              s = _mm256_fmadd_ps(k0, _mm256_load_ps(aptr), s);
+#else
+              const __m256 m0 = _mm256_mul_ps(k0, _mm256_load_ps(aptr));
+              s               = _mm256_add_ps(s, m0);
+              ;   // s + m0; // s = _mm256_hadd_ps(s, m0);
+#endif
+            }
+          }
+        }
+        m_out(im_nb, im_i / s_h, im_j / s_w, filter) = sum8_float(s);
+      }
+    }
+  }
+}
+
+#if __AVX512F__
+template<> template<int in_D, int s_h, int s_w> inline void Conv2D<float>::simd16_conv2d_3x3_s_d(const Tensor<float> &A, const Tensor<float> &kernel)
+{
+  static_assert(in_D % 16 == 0, "Should be used with mod16 filters.");
+  constexpr int im_nb     = 0;
+  constexpr int half_size = 1;
+  const int     nb_filters{ kernel.dims()[2] };
+  const int     in_H{ A.dims()[1] };
+  const int     in_W{ A.dims()[2] };
+  constexpr int ihalf_size = 1;
+  constexpr int half_size_h{ ihalf_size };
+  constexpr int half_size_w{ ihalf_size };
+  const int     top{ m_pads[0] };
+  const int     left{ m_pads[1] };
+  const int     start_h{ half_size_h - top };
+  const int     start_w{ half_size_w - left };
+  for (int im_i = start_h + s_h; im_i < in_H - half_size_h; im_i += s_h)
+  {
+    for (int im_j = start_w + s_w; im_j < in_W - half_size_w; im_j += s_w)
+    {
+      for (int filter = 0; filter < nb_filters; ++filter)
+      {
+        __m512 s = _mm512_setzero_ps();
+        for (int filter_i = -half_size; filter_i <= half_size; ++filter_i)
+        {   // fixed
+          for (int filter_j = -half_size; filter_j <= half_size; ++filter_j)
+          {   // fixed
+            const int ii = im_i + filter_i;
+            const int jj = im_j + filter_j;
+            const int ki = half_size + filter_i;
+            const int kj = half_size + filter_j;
+
+            for (int filter_d = 0; filter_d < in_D; filter_d += 16)
+            {
+              const float *kptr = kernel.addr(ki, kj, filter, filter_d);
+              const __m512 k0   = _mm512_load_ps(kptr);
+              const float *aptr = A.addr(im_nb, ii, jj, filter_d);
+#if __FMA__
+              s = _mm512_fmadd_ps(k0, _mm512_load_ps(aptr), s);
+#else
+              const __m512 m0 = _mm512_mul_ps(k0, _mm512_load_ps(aptr));
+              s               = _mm512_add_ps(s, m0);
+#endif
+            }
+          }
+        }
+        m_out(im_nb, im_i / s_h, im_j / s_w, filter) = sum16_float(s);
+      }
+    }
+  }
+}
+#endif
+
+template<> template<int in_D, int s_h, int s_w> void Conv2D<int32_t>::simd8_conv2d_3x3_s_d(const Tensor<int32_t> &A, const Tensor<int32_t> &kernel)
+{
+#if DEBUG_COUNTERS || SATURATE_RESULT
+  using T = int32_t;
+#endif
+  static_assert(in_D % 8 == 0, "Should be used with mod8 filters.");
+  constexpr int im_nb     = 0;
+  constexpr int half_size = 1;
+  const int     shift     = kernel.quantizer + m_q;
+  const int     nb_filters{ kernel.dims()[2] };
+  const int     in_H{ A.dims()[1] };
+  const int     in_W{ A.dims()[2] };
+  constexpr int ihalf_size = 1;
+  constexpr int half_size_h{ ihalf_size };
+  constexpr int half_size_w{ ihalf_size };
+  const int     top{ m_pads[0] };
+  const int     left{ m_pads[1] };
+  const int     start_h{ half_size_h - top };
+  const int     start_w{ half_size_w - left };
+#if DEBUG_SIMD && __AVX512F__
+  if (in_D >= 16)
+  {
+    std::cout << "\n[WARN] suboptimal SIMD8 version conv 3x3 inD=" << in_D << " outD=" << nb_filters << " s=[" << s_w << ' ' << s_h << "]  " << in_H << 'x'
+              << in_W << " " << in_D * kernel.dims()[0] * kernel.dims()[1] * nb_filters * (in_H / s_h) * (in_W / s_w) / 1000 << " kMAC" << std::endl;
+  }
+#endif
+
+  for (int im_i = start_h + s_h; im_i < in_H - half_size_h; im_i += s_h)
+  {
+    for (int im_j = start_w + s_w; im_j < in_W - half_size_w; im_j += s_w)
+    {
+      for (int filter = 0; filter < nb_filters; ++filter)
+      {
+        __m256i s = _mm256_setzero_si256();
+        for (int filter_i = -half_size; filter_i <= half_size; ++filter_i)
+        {   // fixed
+          for (int filter_j = -half_size; filter_j <= half_size; ++filter_j)
+          {   // fixed
+            for (int filter_d = 0; filter_d < in_D; filter_d += 8)
+            {
+              const int      ii   = im_i + filter_i;
+              const int      jj   = im_j + filter_j;
+              const int      ki   = half_size + filter_i;
+              const int      kj   = half_size + filter_j;
+              const __m256i *kptr = (const __m256i *) kernel.addr(ki, kj, filter, filter_d);
+              const __m256i  k0   = _mm256_load_si256(kptr);
+              const __m256i *aptr = (const __m256i *) A.addr(im_nb, ii, jj, filter_d);
+              const __m256i  v0   = _mm256_load_si256(aptr);
+              const __m256i  m0   = _mm256_mul_epi32(k0, v0);
+
+              const __m256i k1 = _mm256_shuffle_epi32(k0, 0b11110101);
+              const __m256i v1 = _mm256_shuffle_epi32(v0, 0b11110101);
+
+              s = _mm256_add_epi64(s, m0);
+
+              const __m256i m1 = _mm256_mul_epi32(k1, v1);
+              s                = _mm256_add_epi64(s, m1);
+            }
+          }
+        }
+#if ENABLE_HALF_ROUND
+        typename ComputationType<T>::type z = shift ? ((sum64_int32(s) + (1 << (shift - 1))) >> shift) : sum64_int32(s);
+#else
+        typename ComputationType<T>::type z = (sum64_int32(s) >> shift);
+#endif
+        SATURATE(z);
+        m_out(im_nb, im_i / s_h, im_j / s_w, filter) = static_cast<int32_t>(z);
+      }
+    }
+  }
+}
+
+// actually SSE42
+template<> template<int in_D, int s_h, int s_w> void Conv2D<int16_t>::simd8_conv2d_3x3_s_d(const Tensor<int16_t> &A, const Tensor<int16_t> &kernel)
+{
+#if DEBUG_COUNTERS || SATURATE_RESULT
+  using T = int16_t;
+#endif
+  static_assert(in_D % 8 == 0, "Should be used with mod8 filters.");
+  constexpr int im_nb     = 0;
+  constexpr int half_size = 1;
+  const int     shift     = kernel.quantizer + m_q;
+  const int     nb_filters{ kernel.dims()[2] };
+  const int     in_H{ A.dims()[1] };
+  const int     in_W{ A.dims()[2] };
+  constexpr int ihalf_size = 1;
+  constexpr int half_size_h{ ihalf_size };
+  constexpr int half_size_w{ ihalf_size };
+  const int     top{ m_pads[0] };
+  const int     left{ m_pads[1] };
+  const int     start_h{ half_size_h - top };
+  const int     start_w{ half_size_w - left };
+#if DEBUG_SIMD
+  if (in_D >= 8)
+  {
+    std::cout << "\n[WARN] suboptimal SIMD8 version conv 3x3 inD=" << in_D << " outD=" << nb_filters << " s=[" << s_w << ' ' << s_h << "]  " << in_H << 'x'
+              << in_W << " " << in_D * kernel.dims()[0] * kernel.dims()[1] * nb_filters * (in_H / s_h) * (in_W / s_w) / 1000 << " kMAC" << std::endl;
+  }
+#endif
+  for (int im_i = start_h + s_h; im_i < in_H - half_size_h; im_i += s_h)
+  {
+    for (int im_j = start_w + s_w; im_j < in_W - half_size_w; im_j += s_w)
+    {
+      for (int filter = 0; filter < nb_filters; ++filter)
+      {
+        __m128i s = _mm_setzero_si128();
+        for (int filter_i = -half_size; filter_i <= half_size; ++filter_i)
+        {   // fixed
+          for (int filter_j = -half_size; filter_j <= half_size; ++filter_j)
+          {   // fixed
+            for (int filter_d = 0; filter_d < in_D; filter_d += 8)
+            {
+              const int      ii   = im_i + filter_i;
+              const int      jj   = im_j + filter_j;
+              const int      ki   = half_size + filter_i;
+              const int      kj   = half_size + filter_j;
+              const __m128i *kptr = (const __m128i *) kernel.addr(ki, kj, filter, filter_d);
+              const __m128i  k0   = _mm_load_si128(kptr);   // or loadu ?
+              const __m128i *aptr = (const __m128i *) A.addr(im_nb, ii, jj, filter_d);
+              const __m128i  v0   = _mm_load_si128(aptr);
+
+              const __m128i mad0 = _mm_madd_epi16(k0, v0);   // res in si32
+              s                  = _mm_add_epi32(s, mad0);
+            }
+          }
+        }
+#if ENABLE_HALF_ROUND
+        typename ComputationType<T>::type z = shift ? ((sum32_int16(s) + (1 << (shift - 1))) >> shift) : sum32_int16(s);
+#else
+        typename ComputationType<T>::type z = (sum32_int16(s) >> shift);
+#endif
+        SATURATE(z);
+        m_out(im_nb, im_i / s_h, im_j / s_w, filter) = static_cast<int16_t>(z);
+      }
+    }
+  }
+}
+
+template<> template<int in_D, int s_h, int s_w> void Conv2D<int16_t>::simd16_conv2d_3x3_s_d(const Tensor<int16_t> &A, const Tensor<int16_t> &kernel)
+{
+#if DEBUG_COUNTERS || SATURATE_RESULT
+  using T = int16_t;
+#endif
+  static_assert(in_D % 16 == 0, "Should be used with mod16 filters.");
+  constexpr int im_nb     = 0;
+  constexpr int half_size = 1;
+  const int     shift     = kernel.quantizer + m_q;
+  const int     nb_filters{ kernel.dims()[2] };
+  const int     in_H{ A.dims()[1] };
+  const int     in_W{ A.dims()[2] };
+  constexpr int ihalf_size = 1;
+  constexpr int half_size_h{ ihalf_size };
+  constexpr int half_size_w{ ihalf_size };
+  const int     top{ m_pads[0] };
+  const int     left{ m_pads[1] };
+  const int     start_h{ half_size_h - top };
+  const int     start_w{ half_size_w - left };
+#if DEBUG_SIMD && __AVX512BW__
+  if (in_D >= 32)
+  {
+    std::cout << "\n[WARN] suboptimal SIMD16 version conv 3x3 inD=" << in_D << " outD=" << nb_filters << " s=[" << s_w << ' ' << s_h << "]  " << in_H << 'x'
+              << in_W << " " << in_D * kernel.dims()[0] * kernel.dims()[1] * nb_filters * (in_H / s_h) * (in_W / s_w) / 1000 << " kMAC" << std::endl;
+  }
+#endif
+  for (int im_i = start_h + s_h; im_i < in_H - half_size_h; im_i += s_h)
+  {
+    for (int im_j = start_w + s_w; im_j < in_W - half_size_w; im_j += s_w)
+    {
+      for (int filter = 0; filter < nb_filters; ++filter)
+      {
+        __m256i s = _mm256_setzero_si256();
+        for (int filter_i = -half_size; filter_i <= half_size; ++filter_i)
+        {   // fixed
+          for (int filter_j = -half_size; filter_j <= half_size; ++filter_j)
+          {   // fixed
+            for (int filter_d = 0; filter_d < in_D; filter_d += 16)
+            {
+              const int      ii   = im_i + filter_i;
+              const int      jj   = im_j + filter_j;
+              const int      ki   = half_size + filter_i;
+              const int      kj   = half_size + filter_j;
+              const __m256i *kptr = (const __m256i *) kernel.addr(ki, kj, filter, filter_d);
+              const __m256i  k0   = _mm256_load_si256(kptr);   // or loadu ?
+              const __m256i *aptr = (const __m256i *) A.addr(im_nb, ii, jj, filter_d);
+              const __m256i  v0   = _mm256_load_si256(aptr);
+
+              const __m256i mad0 = _mm256_madd_epi16(k0, v0);   // res in si32
+              s                  = _mm256_add_epi32(s, mad0);
+            }
+          }
+        }
+#if ENABLE_HALF_ROUND
+        typename ComputationType<T>::type z = shift ? ((sum32_int16(s) + (1 << (shift - 1))) >> shift) : sum32_int16(s);
+#else
+        typename ComputationType<T>::type z = (sum32_int16(s) >> shift);
+#endif
+        SATURATE(z);
+        m_out(im_nb, im_i / s_h, im_j / s_w, filter) = static_cast<int16_t>(z);
+      }
+    }
+  }
+}
+
+#if __AVX512BW__
+template<> template<int in_D, int s_h, int s_w> void Conv2D<int16_t>::simd32_conv2d_3x3_s_d(const Tensor<int16_t> &A, const Tensor<int16_t> &kernel)
+{
+  static_assert(in_D % 32 == 0, "Should be used with mod32 filters.");
+  using T                 = int16_t;
+  constexpr int im_nb     = 0;
+  constexpr int half_size = 1;
+  const int     shift     = kernel.quantizer + m_q;
+  const int     nb_filters{ kernel.dims()[2] };
+  const int     in_H{ A.dims()[1] };
+  const int     in_W{ A.dims()[2] };
+  constexpr int ihalf_size = 1;
+  constexpr int half_size_h{ ihalf_size };
+  constexpr int half_size_w{ ihalf_size };
+  const int     top{ m_pads[0] };
+  const int     left{ m_pads[1] };
+  const int     start_h{ half_size_h - top };
+  const int     start_w{ half_size_w - left };
+  for (int im_i = start_h + s_h; im_i < in_H - half_size_h; im_i += s_h)
+  {
+    for (int im_j = start_w + s_w; im_j < in_W - half_size_w; im_j += s_w)
+    {
+      for (int filter = 0; filter < nb_filters; ++filter)
+      {
+        __m512i s = _mm512_setzero_si512();
+        for (int filter_i = -half_size; filter_i <= half_size; ++filter_i)
+        {   // fixed
+          for (int filter_j = -half_size; filter_j <= half_size; ++filter_j)
+          {   // fixed
+            for (int filter_d = 0; filter_d < in_D; filter_d += 32)
+            {
+              const int      ii   = im_i + filter_i;
+              const int      jj   = im_j + filter_j;
+              const int      ki   = half_size + filter_i;
+              const int      kj   = half_size + filter_j;
+              const __m512i *kptr = (const __m512i *) kernel.addr(ki, kj, filter, filter_d);
+              const __m512i  k0   = _mm512_load_si512(kptr);
+              const __m512i *aptr = (const __m512i *) A.addr(im_nb, ii, jj, filter_d);
+              const __m512i  v0   = _mm512_load_si512(aptr);
+
+              const __m512i mad0 = _mm512_madd_epi16(k0, v0);   // res in si32
+              s                  = _mm512_add_epi32(s, mad0);
+            }
+          }
+        }
+#if ENABLE_HALF_ROUND
+        typename ComputationType<T>::type z = shift ? ((_mm512_reduce_add_epi32(s) + (1 << (shift - 1))) >> shift) : _mm512_reduce_add_epi32(s);
+#else
+        typename ComputationType<T>::type z = (_mm512_reduce_add_epi32(s) >> shift);
+#endif
+        COUNTERS(z);
+        SATURATE(z);
+        m_out(im_nb, im_i / s_h, im_j / s_w, filter) = z;
+      }
+    }
+  }
+}
+#endif
+#endif   // avx2
+
+}   // namespace layers
+}   // namespace sadl
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/4_conversions/sadl_conversion_utilities/sadl_mod_src_files/options.h b/training/training_scripts/Nn_Filtering_Set_LC/4_conversions/sadl_conversion_utilities/sadl_mod_src_files/options.h
new file mode 100644
index 0000000000000000000000000000000000000000..bb094182853450ae585f7da694585112d43ef600
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/4_conversions/sadl_conversion_utilities/sadl_mod_src_files/options.h
@@ -0,0 +1,99 @@
+/* The copyright in this software is being made available under the BSD
+ * License, included below. This software may be subject to other third party
+ * and contributor rights, including patent rights, and no such rights are
+ * granted under this license.
+ *
+ * Copyright (c) 2010-2023, ITU/ISO/IEC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#pragma once
+
+// build options
+// behavior
+#ifndef SATURATE_RESULT
+#define SATURATE_RESULT 1   // avoid overflow in int NN
+#endif
+
+#ifndef ENABLE_HALF_ROUND
+#define ENABLE_HALF_ROUND 1   // half rounding during right shifts for integer operations
+#endif
+#if SPARSE_SUPPORT
+// Sparse matmul threshold
+static constexpr float kSparsifyThreshold     = 0.8f;
+static constexpr float kSparsifySizeThreshold = 1000.0f;
+#endif
+
+// optimization
+// nothing/-msse42: no simd
+// -mavx2:  avx2
+// -mavx2 -mfma: avx2 + fuse multiply/add
+// -mavx512bw -mavx512f: avx512
+// #define NDEBUG        1 // remove sanity tests
+
+// debug
+// #define DEBUG_VALUES        1 // show values
+// #define DEBUG_MODEL         1 // show pb with model
+// #define DEBUG_COUNTERS      1 // print overflow, MAC etc.
+// #define DEBUG_PRINT         1 // print model info
+// #define DEBUG_SIMD          1 // tell about non simd version
+// #define DEBUG_KEEP_OUTPUT   1 // keep a copy of the output tensor
+#if SATURATE_RESULT
+#define SATURATE(X)                                                                                                                                            \
+  if (!std::is_same<T, float>::value)                                                                                                                          \
+  X = (X > ComputationType<T>::max) ? ComputationType<T>::max : (X < -ComputationType<T>::max ? -ComputationType<T>::max : X)
+#else
+#define SATURATE(X)
+#endif
+
+#if DEBUG_COUNTERS
+template<typename T> T my_abs(T x) { return x < T{} ? -x : x; }
+#define COUNTERS(X)                                                                                                                                            \
+  ++this->cpt_op;                                                                                                                                              \
+  if (my_abs(X) > ComputationType<T>::max)                                                                                                                     \
+  ++this->cpt_overflow
+#define COUNTERS_MAC(X)                                                                                                                                        \
+  ++this->cpt_mac;                                                                                                                                             \
+  if (X != 0)                                                                                                                                                  \
+  ++this->cpt_mac_nz
+#else
+#define COUNTERS(X) (void) X
+#define COUNTERS_MAC(X) (void) X
+#endif
+
+#ifndef DUMP_MODEL_EXT
+#define DUMP_MODEL_EXT
+#endif
+namespace sadl
+{
+enum class Version
+{
+  unknown = -1,
+  sadl01  = 1,
+  sadl02  = 2,
+  sadl03  = 3,
+};
+}
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/4_conversions/sadl_conversion_utilities/sadl_mod_src_files/tensor.h b/training/training_scripts/Nn_Filtering_Set_LC/4_conversions/sadl_conversion_utilities/sadl_mod_src_files/tensor.h
new file mode 100644
index 0000000000000000000000000000000000000000..7a3ceb698b721362d4fa02777692cc11f98dd102
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/4_conversions/sadl_conversion_utilities/sadl_mod_src_files/tensor.h
@@ -0,0 +1,649 @@
+/* The copyright in this software is being made available under the BSD
+ * License, included below. This software may be subject to other third party
+ * and contributor rights, including patent rights, and no such rights are
+ * granted under this license.
+ *
+ * Copyright (c) 2010-2023, ITU/ISO/IEC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software without
+ *    specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#pragma once
+#include <algorithm>
+#include <cstdlib>
+#if _WIN32 || __USE_ISOC11
+#include <malloc.h>
+#else
+#include <malloc/malloc.h>
+#endif
+#include <numeric>
+#include <vector>
+#include <limits>
+#include "options.h"
+
+#include "dimensions.h"
+
+namespace sadl
+{
+// tensor between layers: depth height width (or width height?)
+template<typename T, std::size_t Alignment> struct aligned_allocator
+{
+  using pointer         = T *;
+  using const_pointer   = const T *;
+  using reference       = T &;
+  using const_reference = const T &;
+  using value_type      = T;
+  using size_type       = std::size_t;
+  using difference_type = std::ptrdiff_t;
+
+  pointer       address(reference r) const { return &r; }
+  const_pointer address(const_reference s) const { return &s; }
+  size_type     max_size() const { return (static_cast<std::size_t>(0) - static_cast<std::size_t>(1)) / sizeof(T); }
+  template<typename U> struct rebind
+  {
+    typedef aligned_allocator<U, Alignment> other;
+  };
+
+  bool operator!=(const aligned_allocator &other) const { return !(*this == other); }
+  void construct(pointer p, const_reference t) const
+  {
+    void *const pv = static_cast<void *>(p);
+    new (pv) T(t);
+  }
+  void destroy(T *const p) const { p->~T(); }
+  bool operator==(const aligned_allocator & /*other*/) const { return true; }
+
+  aligned_allocator()                          = default;
+  aligned_allocator(const aligned_allocator &) = default;
+  ~aligned_allocator()                         = default;
+  aligned_allocator &operator=(const aligned_allocator &) = delete;
+
+  template<typename U> aligned_allocator(const aligned_allocator<U, Alignment> &) {}
+
+  pointer allocate(const std::size_t n) const
+  {
+    if (n == 0)
+      return nullptr;
+    size_t s = ((n * sizeof(T) + Alignment - 1) / Alignment) * Alignment;
+
+#if _WIN32
+#if __MINGW32__
+    void *const pv = __mingw_aligned_malloc(s, Alignment);
+#else
+    void *const pv = _aligned_malloc(s, Alignment);
+#endif
+#else
+#if __USE_ISOC11
+    void *const pv = aligned_alloc(Alignment, s);
+#else
+    void *pv = nullptr;
+    if (posix_memalign(&pv, Alignment, s))
+    {
+      throw std::bad_alloc();
+    }
+#endif
+#endif
+
+    if (!pv)
+      throw std::bad_alloc();
+    return static_cast<T *>(pv);
+  }
+
+#ifdef _WIN32
+  void deallocate(T *const p, const std::size_t n) const { _aligned_free(p); }
+#else
+  void deallocate(T *const p, const std::size_t /*n*/) const { free(p); }
+#endif
+
+  template<typename U> pointer allocate(const std::size_t n, const U * /* const hint */) const { return allocate(n); }
+};
+
+template<typename T> struct ComputationType
+{
+};
+
+// predecl for friendness
+template<typename T> class Tensor;
+template<typename T> void swap(Tensor<T> &t0, Tensor<T> &t1);
+template<typename T> void swapData(Tensor<T> &t0, Tensor<T> &t1);
+#if SPARSE_SUPPORT
+template<typename T> void sparsify(Tensor<T> &weights);
+#endif
+
+template<typename T> class Tensor
+{
+public:
+  using value_type     = T;
+  using Data           = std::vector<value_type, aligned_allocator<value_type, 64>>;
+  using iterator       = typename Data::iterator;
+  using const_iterator = typename Data::const_iterator;
+#if SPARSE_SUPPORT
+  using index = uint16_t;
+#endif
+
+  static bool skip_border;   // to replace by inline global C++17
+
+  Tensor() = default;
+  explicit Tensor(Dimensions d);
+
+  void resize(Dimensions d);
+
+  // linear access
+  value_type &operator[](int i);
+  value_type  operator[](int i) const;
+
+  // tensor access
+  value_type &operator()(int i);
+  value_type  operator()(int i) const;
+
+  value_type &operator()(int i, int j);
+  value_type  operator()(int i, int j) const;
+
+  value_type &operator()(int i, int j, int k);
+  value_type  operator()(int i, int j, int k) const;
+
+  value_type &      operator()(int i, int j, int k, int l);
+  value_type        operator()(int i, int j, int k, int l) const;
+  const value_type *addr(int i, int j, int k, int l) const;
+
+  bool in(int i) const;
+  bool in(int i, int j) const;
+  bool in(int i, int j, int k) const;
+  bool in(int i, int j, int k, int l) const;
+  void fill(value_type value);
+
+  const Dimensions &dims() const;
+  int64_t           size() const;
+
+  const value_type *data() const { return m_data.data(); }
+  value_type *      data() { return m_data.data(); }
+
+  iterator begin()
+  {
+#if SPARSE_SUPPORT
+    assert(!isSparse());
+#endif
+    return m_data.begin();
+  }
+  const_iterator begin() const { return m_data.begin(); }
+  iterator       end()
+  {
+#if SPARSE_SUPPORT
+    assert(!isSparse());
+#endif
+    return m_data.end();
+  }
+  const_iterator end() const { return m_data.end(); }
+
+  int                      quantizer   = 0;   // for int
+  int                      border_skip = 0;
+  static constexpr int64_t kMaxSize    = 32LL * 1024 * 1024 * 1024;
+
+  Data &getData() { return m_data; }
+#if SPARSE_SUPPORT
+  const std::vector<value_type> &getDataSparse() const { return m_data_sparse; }
+  const std::vector<index> &     getIndices() const { return m_indices; }
+  const std::vector<uint16_t> &  getNbNonzerosCol() const { return m_nb_nonzeros_col; }
+  bool                           isSparse() const { return !m_data_sparse.empty(); }
+#endif
+private:
+  Dimensions m_dims;
+  Data       m_data;
+#if SPARSE_SUPPORT
+  std::vector<value_type> m_data_sparse;
+  std::vector<index>      m_indices;
+  std::vector<uint16_t>   m_nb_nonzeros_col;
+  friend void             sparsify<>(Tensor<T> &weights);
+#endif
+
+  friend void swap<>(Tensor<T> &t0, Tensor<T> &t1);
+  friend void swapData<>(Tensor<T> &t0, Tensor<T> &t1);
+#if DEBUG_PRINT
+public:
+  static bool m_verbose;
+#endif
+};
+
+#if SPARSE_SUPPORT
+// Sparse Matmul
+template<typename T> bool isFullMatrixSparse(const Tensor<T> &weights, float sparsity_threshold, float sparsity_size_threshold)
+{
+  int N = weights.dims()[0], M = weights.dims()[1];
+
+  if (N * M < sparsity_size_threshold)
+    return false;
+
+  float cnt_zeros = 0;
+
+  for (int j = 0; j < M; ++j)
+  {
+    for (int i = 0; i < N; ++i)
+    {
+      if (weights[N * j + i] == 0)
+        cnt_zeros++;
+    }
+  }
+#if DEBUG_PRINT
+  std::cout << weights << ' ' << cnt_zeros << ' ' << M << ' ' << N << std::endl;
+#endif
+  auto sparsity_level = cnt_zeros / (float) (N * M);
+  return (sparsity_level >= sparsity_threshold);
+}
+
+template<typename T> void sparsify(Tensor<T> &weights)
+{
+  weights.m_data_sparse.clear();
+  weights.m_nb_nonzeros_col.clear();
+
+  uint16_t N = weights.dims()[0], M = weights.dims()[1];
+  assert(N < (1 << 16) && M < (1 << 16));
+
+  for (uint16_t j = 0; j < M; ++j)
+  {
+    auto cnt_non_zeros = 0;
+
+    for (uint16_t i = 0; i < N; ++i)
+    {
+      auto val = weights.m_data[N * j + i];
+      if (val != 0)
+      {
+        weights.m_data_sparse.push_back(val);
+        weights.m_indices.push_back(i);
+        cnt_non_zeros++;
+      }
+    }
+
+#if (__SSE4_2__ || __AVX2__)
+#if __AVX2__
+    int pad = 16;
+#else
+    int pad = 8;
+#endif
+    if (std::is_same<T, int16_t>::value)
+    {
+      int tmp = cnt_non_zeros;
+      while (tmp % pad != 0)
+      {
+        weights.m_data_sparse.push_back(0);
+        weights.m_indices.push_back(0);
+        tmp++;
+      }
+    }
+#endif
+
+    weights.m_nb_nonzeros_col.push_back(cnt_non_zeros);
+  }
+}
+#endif
+
+// spe
+template<> struct ComputationType<float>
+{
+  using type                = float;
+  static constexpr type max = std::numeric_limits<float>::max();
+  static void           quantize(type, int) {}     // nothing to do
+  static void           shift_left(type, int) {}   // nothing to do
+};
+
+template<> struct ComputationType<int32_t>
+{
+  using type                = int64_t;
+  static constexpr type max = std::numeric_limits<int32_t>::max();
+#if ENABLE_HALF_ROUND
+  static void           quantize(type &z, int q) { z = q ? ((z+ (1 << (q-1))) >> q) : z; }
+  static void           quantize(int32_t &z, int q) { z = q ? (int32_t)(((int64_t)z + (1 << (q-1))) >> q) : z; }
+#else
+  static void           quantize(type &z, int q) { z >>= q; }
+  static void           quantize(int32_t &z, int q) { z >>= q; }
+#endif
+  static void           shift_left(type &z, int q) { z <<= q; }
+  static void           shift_left(int32_t &z, int q) { z <<= q; }
+};
+
+template<> struct ComputationType<int16_t>
+{
+  using type                = int32_t;
+  static constexpr type max = std::numeric_limits<int16_t>::max();
+#if ENABLE_HALF_ROUND
+  static void           quantize(type &z, int q) { z = q ? ((z+(1<<(q-1)))>>q) : z; }
+  static void           quantize(int16_t &z, int q) { z = q ? (int16_t)(((int32_t)z + (1 << (q-1))) >> q) : z; }
+#else
+  static void           quantize(type &z, int q) { z >>= q; }
+  static void           quantize(int16_t &z, int q) { z >>= q; }
+#endif
+  static void           shift_left(type &z, int q) { z <<= q; }
+  static void           shift_left(int16_t &z, int q) { z <<= q; }
+};
+
+// impl
+template<typename T> bool Tensor<T>::skip_border = false;
+
+template<typename T> void swap(Tensor<T> &t0, Tensor<T> &t1)
+{
+  std::swap(t0.m_dims, t1.m_dims);
+  std::swap(t0.m_data, t1.m_data);
+  std::swap(t0.quantizer, t1.quantizer);
+  std::swap(t0.border_skip, t1.border_skip);
+#if SPARSE_SUPPORT
+  std::swap(t0.m_data_sparse, t1.m_data_sparse);
+#endif
+}
+
+template<typename T> void swapData(Tensor<T> &t0, Tensor<T> &t1)
+{
+  assert(t0.size() == t1.size());
+  std::swap(t0.m_data, t1.m_data);
+  std::swap(t0.quantizer, t1.quantizer);
+  std::swap(t0.border_skip, t1.border_skip);
+#if SPARSE_SUPPORT
+  std::swap(t0.m_data_sparse, t1.m_data_sparse);
+#endif
+}
+
+template<typename T> Tensor<T>::Tensor(Dimensions d)
+{
+#if SPARSE_SUPPORT
+  assert(!isSparse());
+#endif
+  resize(d);
+}
+
+template<typename T> const Dimensions &Tensor<T>::dims() const { return m_dims; }
+
+template<typename T> int64_t Tensor<T>::size() const { return m_data.size(); }
+
+template<typename T> void Tensor<T>::resize(Dimensions d)
+{
+#if SPARSE_SUPPORT
+  m_data_sparse.clear();
+#endif
+  m_dims     = d;
+  int64_t m = m_dims.nbElements();
+  assert(m < kMaxSize);
+  m_data.resize(m);
+}
+
+// TODO: variadic template to define all accesors
+template<typename T> T &Tensor<T>::operator[](int i)
+{
+#if SPARSE_SUPPORT
+  assert(!isSparse());
+#endif
+  return m_data[i];
+}
+
+template<typename T> T &Tensor<T>::operator()(int i)
+{
+#if SPARSE_SUPPORT
+  assert(!isSparse());
+#endif
+  assert(m_dims.size() == 1);
+  assert(i < m_dims[0] && i >= 0);
+
+  return m_data[i];
+}
+
+template<typename T> bool Tensor<T>::in(int i) const { return m_dims.size() == 1 && i < m_dims[0] && i >= 0; }
+
+template<typename T> T Tensor<T>::operator[](int i) const { return m_data[i]; }
+
+template<typename T> T Tensor<T>::operator()(int i) const
+{
+  assert(m_dims.size() == 1);
+  assert(i < m_dims[0] && i >= 0);
+
+  return m_data[i];
+}
+
+template<typename T> T &Tensor<T>::operator()(int i, int j)
+{
+#if SPARSE_SUPPORT
+  assert(!isSparse());
+#endif
+  assert(m_dims.size() == 2);
+  assert(i < m_dims[0] && i >= 0);
+  assert(j < m_dims[1] && j >= 0);
+
+  return m_data[(int64_t) m_dims[1] * i + j];
+}
+
+template<typename T> T Tensor<T>::operator()(int i, int j) const
+{
+  assert(m_dims.size() == 2);
+  assert(i < m_dims[0] && i >= 0);
+  assert(j < m_dims[1] && j >= 0);
+
+  return m_data[(int64_t) m_dims[1] * i + j];
+}
+
+template<typename T> bool Tensor<T>::in(int i, int j) const { return m_dims.size() == 2 && i < m_dims[0] && i >= 0 && j < m_dims[1] && j >= 0; }
+
+template<typename T> T &Tensor<T>::operator()(int i, int j, int k)
+{
+#if SPARSE_SUPPORT
+  assert(!isSparse());
+#endif
+  assert(m_dims.size() == 3);
+  assert(i < m_dims[0] && i >= 0);
+  assert(j < m_dims[1] && j >= 0);
+  assert(k < m_dims[2] && k >= 0);
+
+  return m_data[(int64_t) m_dims[2] * (m_dims[1] * i + j) + k];
+}
+
+template<typename T> T Tensor<T>::operator()(int i, int j, int k) const
+{
+  assert(m_dims.size() == 3);
+  assert(i < m_dims[0] && i >= 0);
+  assert(j < m_dims[1] && j >= 0);
+  assert(k < m_dims[2] && k >= 0);
+
+  return m_data[(int64_t) m_dims[2] * (m_dims[1] * i + j) + k];
+}
+
+template<typename T> bool Tensor<T>::in(int i, int j, int k) const
+{
+  return m_dims.size() == 3 && i < m_dims[0] && i >= 0 && j < m_dims[1] && j >= 0 && k < m_dims[2] && k >= 0;
+}
+
+template<typename T> T &Tensor<T>::operator()(int i, int j, int k, int l)
+{
+#if SPARSE_SUPPORT
+  assert(!isSparse());
+#endif
+  assert(m_dims.size() == 4);
+  assert(i < m_dims[0] && i >= 0);
+  assert(j < m_dims[1] && j >= 0);
+  assert(k < m_dims[2] && k >= 0);
+  assert(l < m_dims[3] && l >= 0);
+
+  return m_data[(int64_t) m_dims[3] * (m_dims[2] * (m_dims[1] * i + j) + k) + l];
+}
+
+template<typename T> bool Tensor<T>::in(int i, int j, int k, int l) const
+{
+  return m_dims.size() == 4 && i < m_dims[0] && i >= 0 && j < m_dims[1] && j >= 0 && k < m_dims[2] && k >= 0 && l < m_dims[3] && l >= 0;
+}
+
+template<typename T> const T *Tensor<T>::addr(int i, int j, int k, int l) const
+{
+  assert(m_dims.size() == 4);
+  assert(i < m_dims[0] && i >= 0);
+  assert(j < m_dims[1] && j >= 0);
+  assert(k < m_dims[2] && k >= 0);
+  assert(l < m_dims[3] && l >= 0);
+  return &m_data[(int64_t) m_dims[3] * (m_dims[2] * (m_dims[1] * i + j) + k) + l];
+}
+
+template<typename T> T Tensor<T>::operator()(int i, int j, int k, int l) const
+{
+  assert(m_dims.size() == 4);
+  assert(i < m_dims[0] && i >= 0);
+  assert(j < m_dims[1] && j >= 0);
+  assert(k < m_dims[2] && k >= 0);
+  assert(l < m_dims[3] && l >= 0);
+  return m_data[(int64_t) m_dims[3] * (m_dims[2] * (m_dims[1] * i + j) + k) + l];
+}
+
+template<typename T> void Tensor<T>::fill(value_type value)
+{
+#if SPARSE_SUPPORT
+  m_data_sparse.clear();
+#endif
+  std::fill(m_data.begin(), m_data.end(), value);
+}
+
+}   // namespace sadl
+
+#include <iostream>
+#include <sstream>
+
+#if DEBUG_PRINT
+template<typename T> bool sadl::Tensor<T>::m_verbose = true;
+
+#define SADL_DBG(X)                                                                                                                                            \
+  if (sadl::Tensor<T>::m_verbose)                                                                                                                              \
+  {                                                                                                                                                            \
+    X;                                                                                                                                                         \
+  }
+#else
+#define SADL_DBG(X)
+#endif
+
+namespace sadl
+{
+template<typename T> std::ostream &operator<<(std::ostream &out, const Tensor<T> &t)
+{
+  // adhoc
+  if (t.dims().size() == 4u)
+  {
+    out << "[";
+    if (t.dims()[0] > 1)
+      out << '\n';
+    for (int k = 0; k < t.dims()[0]; ++k)
+    {
+      out << " [";
+      if (t.dims()[1] > 1)
+        out << '\n';
+      for (int d = 0; d < t.dims()[1]; ++d)
+      {
+        out << "  [";
+        if (t.dims()[2] > 1)
+          out << '\n';
+        for (int i = 0; i < t.dims()[2]; ++i)
+        {
+          out << "   [";
+          for (int j = 0; j < t.dims()[3]; ++j)
+            out << t(k, d, i, j) << ' ';
+          out << "   ]";
+          if (t.dims()[2] > 1)
+            out << '\n';
+        }
+        out << "  ]";
+        if (t.dims()[1] > 1)
+          out << '\n';
+      }
+      out << " ]";
+      if (t.dims()[0] > 1)
+        out << '\n';
+    }
+    out << "]";
+  }
+  else if (t.dims().size() == 3u)
+  {
+    out << "[";
+    for (int d = 0; d < t.dims()[0]; ++d)
+    {
+      out << " [";
+      if (t.dims()[0] > 1)
+        out << '\n';
+      for (int i = 0; i < t.dims()[1]; ++i)
+      {
+        out << "  [";
+        if (t.dims()[1] > 1)
+          out << '\n';
+        for (int j = 0; j < t.dims()[2]; ++j)
+          out << t(d, i, j) << '\t';
+        out << "  ]";
+        if (t.dims()[1] > 1)
+          out << '\n';
+      }
+      out << " ]";
+      if (t.dims()[0] > 1)
+        out << '\n';
+    }
+    out << "]";
+  }
+  else if (t.dims().size() == 2u)
+  {
+    out << "[";
+    for (int i = 0; i < t.dims()[0]; ++i)
+    {
+      out << " [";
+      if (t.dims()[0] > 1)
+        out << '\n';
+      for (int j = 0; j < t.dims()[1]; ++j)
+        out << t(i, j) << ' ';
+      out << " ]";
+      if (t.dims()[0] > 1)
+        out << '\n';
+    }
+    out << "]\n";
+  }
+  else if (t.dims().size() == 1u)
+  {
+    out << "[";
+    for (int j = 0; j < t.dims()[0]; ++j)
+      out << t(j) << ' ';
+    out << "]";
+  }
+  else
+  {
+    out << "TODO\n";
+  }
+#if SPARSE_SUPPORT
+  if (t.isSparse())
+  {
+    uint32_t offset_data = 0;
+    int      i           = 0;
+    out << "data_sparse = [\n";
+    for (const auto &nb_nonzero: t.getNbNonzerosCol())
+    {
+      for (auto k = 0; k < nb_nonzero; ++k, ++offset_data)
+      {
+        uint16_t j = t.getIndices()[offset_data];
+        out << i << ',' << j << ": " << t.getDataSparse()[offset_data] << '\n';
+      }
+      i++;
+    }
+    out << "]\n";
+  }
+#endif
+  out << " shape=" << t.dims() << " type=";
+
+  return out;
+}
+
+}   // namespace sadl
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/4_conversions/tf2_onnx.py b/training/training_scripts/Nn_Filtering_Set_LC/4_conversions/tf2_onnx.py
new file mode 100644
index 0000000000000000000000000000000000000000..b3de9cf76930c4b707078765b035466612c2de44
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/4_conversions/tf2_onnx.py
@@ -0,0 +1,65 @@
+import numpy as np
+import onnx
+import tensorflow as tf
+import tf2onnx
+import sys
+import os
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers import Input
+from tensorflow import keras
+
+input_shape = (1, 72, 72, 10)
+s = (72, 72, 10)
+
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+
+tf.config.run_functions_eagerly(True)
+model = keras.models.load_model(sys.argv[1], compile=False)
+count = 0
+input_layer = Input(shape=model.layers[0].input_shape[0][1:], name=model.layers[0].name)
+x = input_layer
+output = None        
+
+is_split_model = False
+x_Y = None
+x_C = None
+for idx, layer in enumerate(model.layers[1:]):
+    layer_type='common'
+    if ('_y_' in layer.name) or layer.name.endswith('_Y') or layer.name.endswith('_y'):
+        layer_type = 'luma'
+        if x_Y is None:
+            x_Y = x[:,:,:,:16]
+    elif ('_c_' in layer.name) or layer.name.endswith('_C') or layer.name.endswith('_c'):
+        layer_type = 'chroma'
+        if x_C is None:
+            x_C = x[:,:,:,16:]
+
+    elif 'tf_op_layer_strided_slice' == layer.name or 'tf_op_layer_strided_slice_1' == layer.name:
+        continue
+
+    if layer_type == 'luma':
+        x_Y = layer(x_Y)
+        is_split_model = True
+    elif layer_type == 'chroma':
+        x_C = layer(x_C)
+        is_split_model = True
+    else:
+        x = layer(x)
+
+    if 'output_layer' in layer.name:
+        count = count + 1
+        if is_split_model==False:
+            output = x
+            break
+        if 2 == count:
+            output = tf.keras.layers.Concatenate()([x_Y, x_C])
+            break
+
+
+tf_model = Model(inputs=input_layer, outputs=output)
+
+model_onnx, _ = tf2onnx.convert.from_keras(tf_model, [tf.TensorSpec(input_shape, name="input_1")], opset=13)
+
+x = np.linspace(0, 1, np.prod(s)).reshape(input_shape).astype(np.float32)
+results_tf = tf_model(x).numpy()
+onnx.save(model_onnx, sys.argv[2])
\ No newline at end of file
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/README.pdf b/training/training_scripts/Nn_Filtering_Set_LC/README.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..ea5faac84bde5be69e96a81abff14eace6822503
Binary files /dev/null and b/training/training_scripts/Nn_Filtering_Set_LC/README.pdf differ
diff --git a/training/training_scripts/Nn_Filtering_Set_LC/requirements.txt b/training/training_scripts/Nn_Filtering_Set_LC/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a76dbd21b062b9710a588882b0380aea1437a64
--- /dev/null
+++ b/training/training_scripts/Nn_Filtering_Set_LC/requirements.txt
@@ -0,0 +1,5 @@
+numpy>=1.18.2
+tensorflow==2.8.0
+tensorly==0.7.0
+pandas>=1.0.3
+onnx
\ No newline at end of file