From 13c29e612bd42b05a91afdae16e2e07666ca8833 Mon Sep 17 00:00:00 2001
From: Jonathan Taquet <jonathan.taquet@crf.canon.fr>
Date: Tue, 2 Apr 2019 23:19:25 +0200
Subject: [PATCH] Merge tag 'VTM-4.2' into JVET-N0242

VTM version 4.2
---
 doc/software-manual.tex                       |   12 +
 source/App/EncoderApp/EncApp.cpp              |    4 +
 source/App/EncoderApp/EncAppCfg.cpp           |    8 +
 source/App/EncoderApp/EncAppCfg.h             |    4 +
 source/Lib/CommonLib/AdaptiveLoopFilter.cpp   |  184 +++
 source/Lib/CommonLib/AdaptiveLoopFilter.h     |   27 +
 source/Lib/CommonLib/TypeDef.h                |   29 +
 .../Lib/CommonLib/x86/AdaptiveLoopFilterX86.h |  856 ++++++++++++++
 source/Lib/DecoderLib/VLCReader.cpp           |   87 +-
 source/Lib/DecoderLib/VLCReader.h             |    4 +
 .../Lib/EncoderLib/EncAdaptiveLoopFilter.cpp  | 1018 ++++++++++++++++-
 source/Lib/EncoderLib/EncAdaptiveLoopFilter.h |  211 ++++
 source/Lib/EncoderLib/EncCfg.h                |   10 +
 source/Lib/EncoderLib/EncLib.cpp              |    4 +
 source/Lib/EncoderLib/VLCWriter.cpp           |   96 +-
 source/Lib/EncoderLib/VLCWriter.h             |    4 +
 16 files changed, 2555 insertions(+), 3 deletions(-)

diff --git a/doc/software-manual.tex b/doc/software-manual.tex
index 7bf160af9..cbe8e48af 100644
--- a/doc/software-manual.tex
+++ b/doc/software-manual.tex
@@ -1962,6 +1962,18 @@ luma TUs are also skipped.
 \par
 This option has no effect if TransformSkip is disabled.
 \\
+
+\Option{UseNonLinearAlfLuma} &
+%\ShortOption{\None} &
+\Default{true} &
+Enables optimization of non-linear filters for ALF on Luma channel.
+\\
+
+\Option{UseNonLinearAlfChroma} &
+%\ShortOption{\None} &
+\Default{true} &
+Enables optimization of non-linear filters for ALF on Chroma channels.
+\\
 \end{OptionTableNoShorthand}
 
 %%
diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp
index 626169456..8ee35e2bf 100644
--- a/source/App/EncoderApp/EncApp.cpp
+++ b/source/App/EncoderApp/EncApp.cpp
@@ -314,6 +314,10 @@ void EncApp::xInitLibCfg()
   m_cEncLib.setUseAMaxBT                                         ( m_useAMaxBT );
   m_cEncLib.setUseE0023FastEnc                                   ( m_e0023FastEnc );
   m_cEncLib.setUseContentBasedFastQtbt                           ( m_contentBasedFastQtbt );
+#if JVET_N0242_NON_LINEAR_ALF
+  m_cEncLib.setUseNonLinearAlfLuma                               ( m_useNonLinearAlfLuma );
+  m_cEncLib.setUseNonLinearAlfChroma                             ( m_useNonLinearAlfChroma );
+#endif
   m_cEncLib.setCrossComponentPredictionEnabledFlag               ( m_crossComponentPredictionEnabledFlag );
   m_cEncLib.setUseReconBasedCrossCPredictionEstimate             ( m_reconBasedCrossCPredictionEstimate );
   m_cEncLib.setLog2SaoOffsetScale                                ( CHANNEL_TYPE_LUMA  , m_log2SaoOffsetScale[CHANNEL_TYPE_LUMA]   );
diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp
index 89aaccaa7..7b0368bde 100644
--- a/source/App/EncoderApp/EncAppCfg.cpp
+++ b/source/App/EncoderApp/EncAppCfg.cpp
@@ -899,6 +899,10 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("AMaxBT",                                          m_useAMaxBT,                                      false, "Adaptive maximal BT-size")
   ("E0023FastEnc",                                    m_e0023FastEnc,                                    true, "Fast encoding setting for QTBT (proposal E0023)")
   ("ContentBasedFastQtbt",                            m_contentBasedFastQtbt,                           false, "Signal based QTBT speed-up")
+#if JVET_N0242_NON_LINEAR_ALF
+  ("UseNonLinearAlfLuma",                             m_useNonLinearAlfLuma,                             true, "Non-linear adaptive loop filters for Luma Channel")
+  ("UseNonLinearAlfChroma",                           m_useNonLinearAlfChroma,                           true, "Non-linear adaptive loop filters for Chroma Channels")
+#endif
   // Unit definition parameters
   ("MaxCUWidth",                                      m_uiMaxCUWidth,                                     64u)
   ("MaxCUHeight",                                     m_uiMaxCUHeight,                                    64u)
@@ -3194,6 +3198,10 @@ void EncAppCfg::xPrintParameter()
   msg( VERBOSE, "AMaxBT:%d ", m_useAMaxBT );
   msg( VERBOSE, "E0023FastEnc:%d ", m_e0023FastEnc );
   msg( VERBOSE, "ContentBasedFastQtbt:%d ", m_contentBasedFastQtbt );
+#if JVET_N0242_NON_LINEAR_ALF
+  msg( VERBOSE, "UseNonLinearALFLuma:%d ", m_useNonLinearAlfLuma );
+  msg( VERBOSE, "UseNonLinearALFChroma:%d ", m_useNonLinearAlfChroma );
+#endif
 
   msg( VERBOSE, "NumSplitThreads:%d ", m_numSplitThreads );
   if( m_numSplitThreads > 1 )
diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h
index 94a7e46c5..95724dd1c 100644
--- a/source/App/EncoderApp/EncAppCfg.h
+++ b/source/App/EncoderApp/EncAppCfg.h
@@ -292,6 +292,10 @@ protected:
   bool      m_useFastMrg;
   bool      m_e0023FastEnc;
   bool      m_contentBasedFastQtbt;
+#if JVET_N0242_NON_LINEAR_ALF
+  bool      m_useNonLinearAlfLuma;
+  bool      m_useNonLinearAlfChroma;
+#endif
 
 
   int       m_numSplitThreads;
diff --git a/source/Lib/CommonLib/AdaptiveLoopFilter.cpp b/source/Lib/CommonLib/AdaptiveLoopFilter.cpp
index b1aef8429..a3c9e0dac 100644
--- a/source/Lib/CommonLib/AdaptiveLoopFilter.cpp
+++ b/source/Lib/CommonLib/AdaptiveLoopFilter.cpp
@@ -39,6 +39,14 @@
 
 #include "CodingStructure.h"
 #include "Picture.h"
+#if JVET_N0242_NON_LINEAR_ALF
+#include <array>
+#include <cmath>
+#endif
+
+#if JVET_N0242_NON_LINEAR_ALF
+constexpr int AdaptiveLoopFilter::AlfNumClippingValues[];
+#endif
 
 AdaptiveLoopFilter::AdaptiveLoopFilter()
   : m_classifier( nullptr )
@@ -83,6 +91,9 @@ void AdaptiveLoopFilter::ALFProcess( CodingStructure& cs, AlfSliceParam& alfSlic
     m_ctuEnableFlag[compIdx] = cs.picture->getAlfCtuEnableFlag( compIdx );
   }
   reconstructCoeff( alfSliceParam, CHANNEL_TYPE_LUMA );
+#if JVET_N0242_NON_LINEAR_ALF
+  if( alfSliceParam.enabledFlag[COMPONENT_Cb] || alfSliceParam.enabledFlag[COMPONENT_Cr] )
+#endif
   reconstructCoeff( alfSliceParam, CHANNEL_TYPE_CHROMA );
 
   PelUnitBuf recYuv = cs.getRecoBuf();
@@ -106,7 +117,11 @@ void AdaptiveLoopFilter::ALFProcess( CodingStructure& cs, AlfSliceParam& alfSlic
         deriveClassification( m_classifier, tmpYuv.get( COMPONENT_Y ), blk );
         Area blkPCM(xPos, yPos, width, height);
         resetPCMBlkClassInfo(cs, m_classifier, tmpYuv.get(COMPONENT_Y), blkPCM);
+#if JVET_N0242_NON_LINEAR_ALF
+        m_filter7x7Blk( m_classifier, recYuv, tmpYuv, blk, COMPONENT_Y, m_coeffFinal, m_clippFinal, m_clpRngs.comp[COMPONENT_Y], cs );
+#else
         m_filter7x7Blk(m_classifier, recYuv, tmpYuv, blk, COMPONENT_Y, m_coeffFinal, m_clpRngs.comp[COMPONENT_Y], cs );
+#endif
       }
 
       for( int compIdx = 1; compIdx < MAX_NUM_COMPONENT; compIdx++ )
@@ -119,7 +134,11 @@ void AdaptiveLoopFilter::ALFProcess( CodingStructure& cs, AlfSliceParam& alfSlic
         {
           Area blk( xPos >> chromaScaleX, yPos >> chromaScaleY, width >> chromaScaleX, height >> chromaScaleY );
 
+#if JVET_N0242_NON_LINEAR_ALF
+          m_filter5x5Blk( m_classifier, recYuv, tmpYuv, blk, compID, alfSliceParam.chromaCoeff, m_chromaClippFinal, m_clpRngs.comp[compIdx], cs );
+#else
           m_filter5x5Blk( m_classifier, recYuv, tmpYuv, blk, compID, alfSliceParam.chromaCoeff, m_clpRngs.comp[compIdx], cs );
+#endif
         }
       }
       ctuIdx++;
@@ -136,6 +155,9 @@ void AdaptiveLoopFilter::reconstructCoeff( AlfSliceParam& alfSliceParam, Channel
   int numCoeffMinus1 = numCoeff - 1;
   int numFilters = isLuma( channel ) ? alfSliceParam.numLumaFilters : 1;
   short* coeff = isLuma( channel ) ? alfSliceParam.lumaCoeff : alfSliceParam.chromaCoeff;
+#if JVET_N0242_NON_LINEAR_ALF
+  short* clipp = isLuma( channel ) ? alfSliceParam.lumaClipp : alfSliceParam.chromaClipp;
+#endif
 
   if( alfSliceParam.alfLumaCoeffDeltaPredictionFlag && isLuma( channel ) )
   {
@@ -150,16 +172,26 @@ void AdaptiveLoopFilter::reconstructCoeff( AlfSliceParam& alfSliceParam, Channel
 
   for( int filterIdx = 0; filterIdx < numFilters; filterIdx++ )
   {
+#if JVET_N0242_NON_LINEAR_ALF
+    coeff[filterIdx* MAX_NUM_ALF_LUMA_COEFF + numCoeffMinus1] = factor;
+#else
     int sum = 0;
     for( int i = 0; i < numCoeffMinus1; i++ )
     {
       sum += ( coeff[filterIdx* MAX_NUM_ALF_LUMA_COEFF + i] << 1 );
     }
     coeff[filterIdx* MAX_NUM_ALF_LUMA_COEFF + numCoeffMinus1] = factor - sum;
+#endif
   }
 
   if( isChroma( channel ) )
   {
+#if JVET_N0242_NON_LINEAR_ALF
+    for( int coeffIdx = 0; coeffIdx < numCoeffMinus1; ++coeffIdx )
+    {
+      m_chromaClippFinal[coeffIdx] = alfSliceParam.nonLinearFlag[channel] ? m_alfClippingValues[channel][clipp[coeffIdx]] : m_alfClippingValues[channel][0];
+    }
+#endif
     return;
   }
 
@@ -167,6 +199,12 @@ void AdaptiveLoopFilter::reconstructCoeff( AlfSliceParam& alfSliceParam, Channel
   {
     int filterIdx = alfSliceParam.filterCoeffDeltaIdx[classIdx];
     memcpy( m_coeffFinal + classIdx * MAX_NUM_ALF_LUMA_COEFF, coeff + filterIdx * MAX_NUM_ALF_LUMA_COEFF, sizeof( short ) * numCoeff );
+#if JVET_N0242_NON_LINEAR_ALF
+    for( int coeffIdx = 0; coeffIdx < numCoeffMinus1; ++coeffIdx )
+    {
+      (m_clippFinal + classIdx * MAX_NUM_ALF_LUMA_COEFF)[coeffIdx] = alfSliceParam.nonLinearFlag[channel] ? m_alfClippingValues[channel][(clipp + filterIdx * MAX_NUM_ALF_LUMA_COEFF)[coeffIdx]] : m_alfClippingValues[channel][0];
+    }
+#endif
   }
 
   if( bRedo && alfSliceParam.alfLumaCoeffDeltaPredictionFlag )
@@ -197,6 +235,31 @@ void AdaptiveLoopFilter::create( const int picWidth, const int picHeight, const
   m_filterShapes[CHANNEL_TYPE_LUMA].push_back( AlfFilterShape( 7 ) );
   m_filterShapes[CHANNEL_TYPE_CHROMA].push_back( AlfFilterShape( 5 ) );
 
+#if JVET_N0242_NON_LINEAR_ALF
+  static_assert( AlfNumClippingValues[CHANNEL_TYPE_LUMA] > 0, "AlfNumClippingValues[CHANNEL_TYPE_LUMA] must be at least one" );
+  for( int i = 0; i < AlfNumClippingValues[CHANNEL_TYPE_LUMA]; ++i )
+  {
+    m_alfClippingValues[CHANNEL_TYPE_LUMA][i] =
+      (Pel) std::round(
+        std::pow(
+          2.,
+          double( m_inputBitDepth[CHANNEL_TYPE_LUMA] * ( AlfNumClippingValues[CHANNEL_TYPE_LUMA] - i ) ) / AlfNumClippingValues[CHANNEL_TYPE_LUMA]
+          ) );
+  }
+  static_assert( AlfNumClippingValues[CHANNEL_TYPE_CHROMA] > 0, "AlfNumClippingValues[CHANNEL_TYPE_CHROMA] must be at least one" );
+  m_alfClippingValues[CHANNEL_TYPE_CHROMA][0] = 1 << m_inputBitDepth[CHANNEL_TYPE_CHROMA];
+  for( int i = 1; i < AlfNumClippingValues[CHANNEL_TYPE_CHROMA]; ++i )
+  {
+    m_alfClippingValues[CHANNEL_TYPE_CHROMA][i] =
+      (Pel) std::round(
+        std::pow(
+          2.,
+          m_inputBitDepth[CHANNEL_TYPE_CHROMA] - 8
+            + 8. * ( AlfNumClippingValues[CHANNEL_TYPE_CHROMA] - i - 1 ) / ( AlfNumClippingValues[CHANNEL_TYPE_CHROMA] - 1 )
+          ) );
+  }
+#endif
+
   m_tempBuf.destroy();
   m_tempBuf.create( format, Area( 0, 0, picWidth, picHeight ), maxCUWidth, MAX_ALF_FILTER_LENGTH >> 1, 0, false );
 
@@ -496,7 +559,11 @@ void AdaptiveLoopFilter::deriveClassificationBlk( AlfClassifier** classifier, in
 }
 
 template<AlfFilterType filtType>
+#if JVET_N0242_NON_LINEAR_ALF
+void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, short* fClipSet, const ClpRng& clpRng, CodingStructure& cs )
+#else
 void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs )
+#endif
 {
   const bool bChroma = isChroma( compId );
   if( bChroma )
@@ -526,6 +593,9 @@ void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf
   const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4, *pImg5, *pImg6;
 
   short *coef = filterSet;
+#if JVET_N0242_NON_LINEAR_ALF
+  short *clip = fClipSet;
+#endif
 
   const int shift = m_NUM_BITS - 1;
 
@@ -547,7 +617,12 @@ void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf
   int dstStride2 = dstStride * clsSizeY;
   int srcStride2 = srcStride * clsSizeY;
 
+#if JVET_N0242_NON_LINEAR_ALF
+  std::array<int, MAX_NUM_ALF_LUMA_COEFF> filterCoeff;
+  std::array<int, MAX_NUM_ALF_LUMA_COEFF> filterClipp;
+#else
   std::vector<Pel> filterCoeff( MAX_NUM_ALF_LUMA_COEFF );
+#endif
 
   pImgYPad0 = src + startHeight * srcStride + startWidth;
   pImgYPad1 = pImgYPad0 + srcStride;
@@ -578,6 +653,9 @@ void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf
           continue;
         }
         coef = filterSet + cl.classIdx * MAX_NUM_ALF_LUMA_COEFF;
+#if JVET_N0242_NON_LINEAR_ALF
+        clip = fClipSet + cl.classIdx * MAX_NUM_ALF_LUMA_COEFF;
+#endif
       }
       else if( isPCMFilterDisabled )
       {
@@ -609,18 +687,30 @@ void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf
         if( transposeIdx == 1 )
         {
           filterCoeff = { coef[9], coef[4], coef[10], coef[8], coef[1], coef[5], coef[11], coef[7], coef[3], coef[0], coef[2], coef[6], coef[12] };
+#if JVET_N0242_NON_LINEAR_ALF
+          filterClipp = { clip[9], clip[4], clip[10], clip[8], clip[1], clip[5], clip[11], clip[7], clip[3], clip[0], clip[2], clip[6], clip[12] };
+#endif
         }
         else if( transposeIdx == 2 )
         {
           filterCoeff = { coef[0], coef[3], coef[2], coef[1], coef[8], coef[7], coef[6], coef[5], coef[4], coef[9], coef[10], coef[11], coef[12] };
+#if JVET_N0242_NON_LINEAR_ALF
+          filterClipp = { clip[0], clip[3], clip[2], clip[1], clip[8], clip[7], clip[6], clip[5], clip[4], clip[9], clip[10], clip[11], clip[12] };
+#endif
         }
         else if( transposeIdx == 3 )
         {
           filterCoeff = { coef[9], coef[8], coef[10], coef[4], coef[3], coef[7], coef[11], coef[5], coef[1], coef[0], coef[2], coef[6], coef[12] };
+#if JVET_N0242_NON_LINEAR_ALF
+          filterClipp = { clip[9], clip[8], clip[10], clip[4], clip[3], clip[7], clip[11], clip[5], clip[1], clip[0], clip[2], clip[6], clip[12] };
+#endif
         }
         else
         {
           filterCoeff = { coef[0], coef[1], coef[2], coef[3], coef[4], coef[5], coef[6], coef[7], coef[8], coef[9], coef[10], coef[11], coef[12] };
+#if JVET_N0242_NON_LINEAR_ALF
+          filterClipp = { clip[0], clip[1], clip[2], clip[3], clip[4], clip[5], clip[6], clip[7], clip[8], clip[9], clip[10], clip[11], clip[12] };
+#endif
         }
       }
       else
@@ -628,18 +718,30 @@ void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf
         if( transposeIdx == 1 )
         {
           filterCoeff = { coef[4], coef[1], coef[5], coef[3], coef[0], coef[2], coef[6] };
+#if JVET_N0242_NON_LINEAR_ALF
+          filterClipp = { clip[4], clip[1], clip[5], clip[3], clip[0], clip[2], clip[6] };
+#endif
         }
         else if( transposeIdx == 2 )
         {
           filterCoeff = { coef[0], coef[3], coef[2], coef[1], coef[4], coef[5], coef[6] };
+#if JVET_N0242_NON_LINEAR_ALF
+          filterClipp = { clip[0], clip[3], clip[2], clip[1], clip[4], clip[5], clip[6] };
+#endif
         }
         else if( transposeIdx == 3 )
         {
           filterCoeff = { coef[4], coef[3], coef[5], coef[1], coef[0], coef[2], coef[6] };
+#if JVET_N0242_NON_LINEAR_ALF
+          filterClipp = { clip[4], clip[3], clip[5], clip[1], clip[0], clip[2], clip[6] };
+#endif
         }
         else
         {
           filterCoeff = { coef[0], coef[1], coef[2], coef[3], coef[4], coef[5], coef[6] };
+#if JVET_N0242_NON_LINEAR_ALF
+          filterClipp = { clip[0], clip[1], clip[2], clip[3], clip[4], clip[5], clip[6] };
+#endif
         }
       }
 
@@ -675,39 +777,121 @@ void AdaptiveLoopFilter::filterBlk( AlfClassifier** classifier, const PelUnitBuf
           }
 
           int sum = 0;
+#if JVET_N0242_NON_LINEAR_ALF
+          const Pel curr = pImg0[+0];
+#endif
           if( filtType == ALF_FILTER_7 )
           {
+#if !JVET_N0242_NON_LINEAR_ALF
             sum += filterCoeff[0] * ( pImg5[0] + pImg6[0] );
+#else
+            sum += filterCoeff[0] * ( clipALF(filterClipp[0], curr, pImg5[+0], pImg6[+0]) );
+#endif
 
+#if !JVET_N0242_NON_LINEAR_ALF
             sum += filterCoeff[1] * ( pImg3[+1] + pImg4[-1] );
+#else
+            sum += filterCoeff[1] * ( clipALF(filterClipp[1], curr, pImg3[+1], pImg4[-1]) );
+#endif
+#if !JVET_N0242_NON_LINEAR_ALF
             sum += filterCoeff[2] * ( pImg3[+0] + pImg4[+0] );
+#else
+            sum += filterCoeff[2] * ( clipALF(filterClipp[2], curr, pImg3[+0], pImg4[+0]) );
+#endif
+#if !JVET_N0242_NON_LINEAR_ALF
             sum += filterCoeff[3] * ( pImg3[-1] + pImg4[+1] );
+#else
+            sum += filterCoeff[3] * ( clipALF(filterClipp[3], curr, pImg3[-1], pImg4[+1]) );
+#endif
 
+#if !JVET_N0242_NON_LINEAR_ALF
             sum += filterCoeff[4] * ( pImg1[+2] + pImg2[-2] );
+#else
+            sum += filterCoeff[4] * ( clipALF(filterClipp[4], curr, pImg1[+2], pImg2[-2]) );
+#endif
+#if !JVET_N0242_NON_LINEAR_ALF
             sum += filterCoeff[5] * ( pImg1[+1] + pImg2[-1] );
+#else
+            sum += filterCoeff[5] * ( clipALF(filterClipp[5], curr, pImg1[+1], pImg2[-1]) );
+#endif
+#if !JVET_N0242_NON_LINEAR_ALF
             sum += filterCoeff[6] * ( pImg1[+0] + pImg2[+0] );
+#else
+            sum += filterCoeff[6] * ( clipALF(filterClipp[6], curr, pImg1[+0], pImg2[+0]) );
+#endif
+#if !JVET_N0242_NON_LINEAR_ALF
             sum += filterCoeff[7] * ( pImg1[-1] + pImg2[+1] );
+#else
+            sum += filterCoeff[7] * ( clipALF(filterClipp[7], curr, pImg1[-1], pImg2[+1]) );
+#endif
+#if !JVET_N0242_NON_LINEAR_ALF
             sum += filterCoeff[8] * ( pImg1[-2] + pImg2[+2] );
+#else
+            sum += filterCoeff[8] * ( clipALF(filterClipp[8], curr, pImg1[-2], pImg2[+2]) );
+#endif
 
+#if !JVET_N0242_NON_LINEAR_ALF
             sum += filterCoeff[9] * ( pImg0[+3] + pImg0[-3] );
+#else
+            sum += filterCoeff[9] * ( clipALF(filterClipp[9], curr, pImg0[+3], pImg0[-3]) );
+#endif
+#if !JVET_N0242_NON_LINEAR_ALF
             sum += filterCoeff[10] * ( pImg0[+2] + pImg0[-2] );
+#else
+            sum += filterCoeff[10] * ( clipALF(filterClipp[10], curr, pImg0[+2], pImg0[-2]) );
+#endif
+#if !JVET_N0242_NON_LINEAR_ALF
             sum += filterCoeff[11] * ( pImg0[+1] + pImg0[-1] );
+#else
+            sum += filterCoeff[11] * ( clipALF(filterClipp[11], curr, pImg0[+1], pImg0[-1]) );
+#endif
+#if !JVET_N0242_NON_LINEAR_ALF
             sum += filterCoeff[12] * ( pImg0[+0] );
+#endif
           }
           else
           {
+#if !JVET_N0242_NON_LINEAR_ALF
             sum += filterCoeff[0] * ( pImg3[+0] + pImg4[+0] );
+#else
+            sum += filterCoeff[0] * ( clipALF(filterClipp[0], curr, pImg3[+0], pImg4[+0]) );
+#endif
 
+#if !JVET_N0242_NON_LINEAR_ALF
             sum += filterCoeff[1] * ( pImg1[+1] + pImg2[-1] );
+#else
+            sum += filterCoeff[1] * ( clipALF(filterClipp[1], curr, pImg1[+1], pImg2[-1]) );
+#endif
+#if !JVET_N0242_NON_LINEAR_ALF
             sum += filterCoeff[2] * ( pImg1[+0] + pImg2[+0] );
+#else
+            sum += filterCoeff[2] * ( clipALF(filterClipp[2], curr, pImg1[+0], pImg2[+0]) );
+#endif
+#if !JVET_N0242_NON_LINEAR_ALF
             sum += filterCoeff[3] * ( pImg1[-1] + pImg2[+1] );
+#else
+            sum += filterCoeff[3] * ( clipALF(filterClipp[3], curr, pImg1[-1], pImg2[+1]) );
+#endif
 
+#if !JVET_N0242_NON_LINEAR_ALF
             sum += filterCoeff[4] * ( pImg0[+2] + pImg0[-2] );
+#else
+            sum += filterCoeff[4] * ( clipALF(filterClipp[4], curr, pImg0[+2], pImg0[-2]) );
+#endif
+#if !JVET_N0242_NON_LINEAR_ALF
             sum += filterCoeff[5] * ( pImg0[+1] + pImg0[-1] );
+#else
+            sum += filterCoeff[5] * ( clipALF(filterClipp[5], curr, pImg0[+1], pImg0[-1]) );
+#endif
+#if !JVET_N0242_NON_LINEAR_ALF
             sum += filterCoeff[6] * ( pImg0[+0] );
+#endif
           }
 
           sum = ( sum + offset ) >> shift;
+#if JVET_N0242_NON_LINEAR_ALF
+          sum += curr;
+#endif
           pRec1[jj] = ClipPel( sum, clpRng );
 
           pImg0++;
diff --git a/source/Lib/CommonLib/AdaptiveLoopFilter.h b/source/Lib/CommonLib/AdaptiveLoopFilter.h
index 92928fee0..5d1da4788 100644
--- a/source/Lib/CommonLib/AdaptiveLoopFilter.h
+++ b/source/Lib/CommonLib/AdaptiveLoopFilter.h
@@ -42,6 +42,7 @@
 
 #include "Unit.h"
 #include "UnitTools.h"
+
 struct AlfClassifier
 {
   AlfClassifier() {}
@@ -66,6 +67,16 @@ enum Direction
 class AdaptiveLoopFilter
 {
 public:
+#if JVET_N0242_NON_LINEAR_ALF
+  static inline int clipALF(const int clip, const short ref, const short val0, const short val1)
+  {
+    return Clip3<int>(-clip, +clip, val0-ref) + Clip3<int>(-clip, +clip, val1-ref);
+  }
+
+  static constexpr int AlfNumClippingValues[MAX_NUM_CHANNEL_TYPE] = { 4, 4 };
+  static constexpr int MaxAlfNumClippingValues = 4;
+
+#endif
   static constexpr int   m_NUM_BITS = 8;
   static constexpr int   m_CLASSIFICATION_BLK_SIZE = 32;  //non-normative, local buffer size
   static constexpr int m_ALF_UNUSED_CLASSIDX = 255;
@@ -82,15 +93,24 @@ public:
   void deriveClassification( AlfClassifier** classifier, const CPelBuf& srcLuma, const Area& blk );
   void resetPCMBlkClassInfo(CodingStructure & cs, AlfClassifier** classifier, const CPelBuf& srcLuma, const Area& blk);
   template<AlfFilterType filtType>
+#if JVET_N0242_NON_LINEAR_ALF
+  static void filterBlk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, short* fClipSet, const ClpRng& clpRng, CodingStructure& cs );
+#else
   static void filterBlk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs );
+#endif
   inline static int getMaxGolombIdx( AlfFilterType filterType )
   {
     return filterType == ALF_FILTER_5 ? 2 : 3;
   }
 
   void( *m_deriveClassificationBlk )( AlfClassifier** classifier, int** laplacian[NUM_DIRECTIONS], const CPelBuf& srcLuma, const Area& blk, const int shift );
+#if JVET_N0242_NON_LINEAR_ALF
+  void( *m_filter5x5Blk )( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, short* fClipSet, const ClpRng& clpRng, CodingStructure& cs );
+  void( *m_filter7x7Blk )( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, short* fClipSet, const ClpRng& clpRng, CodingStructure& cs );
+#else
   void( *m_filter5x5Blk )( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs );
   void( *m_filter7x7Blk )( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs );
+#endif
 
 #ifdef TARGET_SIMD_X86
   void initAdaptiveLoopFilterX86();
@@ -99,9 +119,16 @@ public:
 #endif
 
 protected:
+#if JVET_N0242_NON_LINEAR_ALF
+  Pel                          m_alfClippingValues[MAX_NUM_CHANNEL_TYPE][MaxAlfNumClippingValues];
+#endif
   std::vector<AlfFilterShape>  m_filterShapes[MAX_NUM_CHANNEL_TYPE];
   AlfClassifier**              m_classifier;
   short                        m_coeffFinal[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF];
+#if JVET_N0242_NON_LINEAR_ALF
+  short                        m_clippFinal[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF];
+  short                        m_chromaClippFinal[MAX_NUM_ALF_LUMA_COEFF];
+#endif
   int**                        m_laplacian[NUM_DIRECTIONS];
   uint8_t*                       m_ctuEnableFlag[MAX_NUM_COMPONENT];
   PelStorage                   m_tempBuf;
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index 043b21889..f96bea9de 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -50,6 +50,8 @@
 #include <assert.h>
 #include <cassert>
 
+#define JVET_N0242_NON_LINEAR_ALF                         1 // enable CE5-3.2, Non-linear ALF based on clipping function
+
 #define JVET_N0449_MMVD_SIMP                              1 // Configurable number of mmvd distance entries used
 
 #define JVET_N0137_DUALTREE_CHROMA_SIZE                   1
@@ -1555,8 +1557,17 @@ struct AlfFilterShape
 struct AlfSliceParam
 {
   bool                         enabledFlag[MAX_NUM_COMPONENT];                          // alf_slice_enable_flag, alf_chroma_idc
+#if JVET_N0242_NON_LINEAR_ALF
+  bool                         nonLinearFlag[MAX_NUM_CHANNEL_TYPE];                     // alf_nonlinear_enable_flag[Luma/Chroma]
+#endif
   short                        lumaCoeff[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF]; // alf_coeff_luma_delta[i][j]
+#if JVET_N0242_NON_LINEAR_ALF
+  short                        lumaClipp[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF]; // alf_clipp_luma_[i][j]
+#endif
   short                        chromaCoeff[MAX_NUM_ALF_CHROMA_COEFF];                   // alf_coeff_chroma[i]
+#if JVET_N0242_NON_LINEAR_ALF
+  short                        chromaClipp[MAX_NUM_ALF_CHROMA_COEFF];                   // alf_clipp_chroma[i]
+#endif
   short                        filterCoeffDeltaIdx[MAX_NUM_ALF_CLASSES];                // filter_coeff_delta[i]
   bool                         alfLumaCoeffFlag[MAX_NUM_ALF_CLASSES];                   // alf_luma_coeff_flag[i]
   int                          numLumaFilters;                                          // number_of_filters_minus1 + 1
@@ -1572,8 +1583,17 @@ struct AlfSliceParam
   void reset()
   {
     std::memset( enabledFlag, false, sizeof( enabledFlag ) );
+#if JVET_N0242_NON_LINEAR_ALF
+    std::memset( nonLinearFlag, false, sizeof( nonLinearFlag ) );
+#endif
     std::memset( lumaCoeff, 0, sizeof( lumaCoeff ) );
+#if JVET_N0242_NON_LINEAR_ALF
+    std::memset( lumaClipp, 0, sizeof( lumaClipp ) );
+#endif
     std::memset( chromaCoeff, 0, sizeof( chromaCoeff ) );
+#if JVET_N0242_NON_LINEAR_ALF
+    std::memset( chromaClipp, 0, sizeof( chromaClipp ) );
+#endif
     std::memset( filterCoeffDeltaIdx, 0, sizeof( filterCoeffDeltaIdx ) );
     std::memset( alfLumaCoeffFlag, true, sizeof( alfLumaCoeffFlag ) );
     numLumaFilters = 1;
@@ -1584,8 +1604,17 @@ struct AlfSliceParam
   const AlfSliceParam& operator = ( const AlfSliceParam& src )
   {
     std::memcpy( enabledFlag, src.enabledFlag, sizeof( enabledFlag ) );
+#if JVET_N0242_NON_LINEAR_ALF
+    std::memcpy( nonLinearFlag, src.nonLinearFlag, sizeof( nonLinearFlag ) );
+#endif
     std::memcpy( lumaCoeff, src.lumaCoeff, sizeof( lumaCoeff ) );
+#if JVET_N0242_NON_LINEAR_ALF
+    std::memcpy( lumaClipp, src.lumaClipp, sizeof( lumaClipp ) );
+#endif
     std::memcpy( chromaCoeff, src.chromaCoeff, sizeof( chromaCoeff ) );
+#if JVET_N0242_NON_LINEAR_ALF
+    std::memcpy( chromaClipp, src.chromaClipp, sizeof( chromaClipp ) );
+#endif
     std::memcpy( filterCoeffDeltaIdx, src.filterCoeffDeltaIdx, sizeof( filterCoeffDeltaIdx ) );
     std::memcpy( alfLumaCoeffFlag, src.alfLumaCoeffFlag, sizeof( alfLumaCoeffFlag ) );
     numLumaFilters = src.numLumaFilters;
diff --git a/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h b/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h
index ef368d78f..756e92a38 100644
--- a/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h
+++ b/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h
@@ -317,12 +317,18 @@ static void simdDeriveClassificationBlk( AlfClassifier** classifier, int** lapla
 }
 
 template<X86_VEXT vext>
+#if JVET_N0242_NON_LINEAR_ALF
+static void simdFilter5x5Blk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, short* fClipSet, const ClpRng& clpRng, CodingStructure& cs )
+#else
 static void simdFilter5x5Blk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs )
+#endif
 {
+#if !JVET_N0242_NON_LINEAR_ALF
   static const unsigned char mask05[16] = { 8, 9, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
   static const unsigned char mask03[16] = { 4, 5, 2, 3, 0, 1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
   static const unsigned char mask_c[16] = { 0, 1, 8, 9, 4, 5, 14, 15, 2, 3, 10, 11, 12, 13, 6, 7 };
 
+#endif
   const bool bChroma = isChroma( compId );
 
   const SPS*     sps = cs.slice->getSPS();
@@ -336,6 +342,7 @@ static void simdFilter5x5Blk( AlfClassifier** classifier, const PelUnitBuf &recD
   const int srcStride = srcLuma.stride;
   const int dstStride = dstLuma.stride;
 
+#if !JVET_N0242_NON_LINEAR_ALF
   const Pel* srcExt = srcLuma.buf;
   Pel* dst = dstLuma.buf;
 
@@ -344,6 +351,7 @@ static void simdFilter5x5Blk( AlfClassifier** classifier, const PelUnitBuf &recD
   short *coef = filterSet;
   const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4, *pImg5;
 
+#endif
   const int numBitsMinus1 = AdaptiveLoopFilter::m_NUM_BITS - 1;
   const int offset = ( 1 << ( AdaptiveLoopFilter::m_NUM_BITS - 2 ) );
 
@@ -352,56 +360,162 @@ static void simdFilter5x5Blk( AlfClassifier** classifier, const PelUnitBuf &recD
   const int startWidth = blk.x;
   const int endWidth = blk.x + blk.width;
 
+#if JVET_N0242_NON_LINEAR_ALF
+  const Pel* src = srcLuma.buf;
+  Pel* dst = dstLuma.buf + startHeight * dstStride;
+
+  const Pel *pImgYPad0, *pImgYPad1, *pImgYPad2, *pImgYPad3, *pImgYPad4;
+  const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4;
+
+  short *coef[2] = { filterSet, filterSet };
+  short *clip[2] = { fClipSet, fClipSet };
+
+  int transposeIdx[2] = {0, 0};
+#else
   Pel* imgYRecPost = dst;
   imgYRecPost += startHeight * dstStride;
 
   int transposeIdx = 0;
 
+#endif
   const int clsSizeY = 4;
   const int clsSizeX = 4;
 
+#if JVET_N0242_NON_LINEAR_ALF
+  bool pcmFlags2x2[8] = {0,0,0,0,0,0,0,0};
+  Pel  pcmRec2x2[32];
+#else
   bool pcmFlags2x2[4] = {0,0,0,0};
   Pel  pcmRec2x2[16];
+#endif
 
   CHECK( startHeight % clsSizeY, "Wrong startHeight in filtering" );
   CHECK( startWidth % clsSizeX, "Wrong startWidth in filtering" );
   CHECK( ( endHeight - startHeight ) % clsSizeY, "Wrong endHeight in filtering" );
   CHECK( ( endWidth - startWidth ) % clsSizeX, "Wrong endWidth in filtering" );
 
+#if !JVET_N0242_NON_LINEAR_ALF
   const Pel* imgYRec = srcExt;
 
   Pel *pRec;
+#endif
   AlfClassifier *pClass = nullptr;
 
+#if JVET_N0242_NON_LINEAR_ALF
+  int dstStride2 = dstStride * clsSizeY;
+#endif
   int srcStride2 = srcStride * clsSizeY;
 
   const __m128i mmOffset = _mm_set1_epi32( offset );
+#if JVET_N0242_NON_LINEAR_ALF
+  const __m128i mmMin = _mm_set1_epi16( clpRng.min );
+  const __m128i mmMax = _mm_set1_epi16( clpRng.max );
+#else
   const __m128i mmMin = _mm_set1_epi32( clpRng.min );
   const __m128i mmMax = _mm_set1_epi32( clpRng.max );
+#endif
+
+#if JVET_N0242_NON_LINEAR_ALF
+  const unsigned char *filterCoeffIdx[2];
+  Pel filterCoeff[MAX_NUM_ALF_LUMA_COEFF][2];
+  Pel filterClipp[MAX_NUM_ALF_LUMA_COEFF][2];
 
+  pImgYPad0 = src + startHeight * srcStride + startWidth;
+#else
   const __m128i xmm10 = _mm_loadu_si128( ( __m128i* )mask03 );
   const __m128i mm_mask05 = _mm_loadu_si128( ( __m128i* )mask05 );
 
   pImgYPad0 = imgYRec + startHeight * srcStride + startWidth;
+#endif
   pImgYPad1 = pImgYPad0 + srcStride;
   pImgYPad2 = pImgYPad0 - srcStride;
   pImgYPad3 = pImgYPad1 + srcStride;
   pImgYPad4 = pImgYPad2 - srcStride;
+#if !JVET_N0242_NON_LINEAR_ALF
   pImgYPad5 = pImgYPad3 + srcStride;
+#endif
 
+#if JVET_N0242_NON_LINEAR_ALF
+  Pel* pRec0 = dst + startWidth;
+  Pel* pRec1;
+#else
   pRec = imgYRecPost + startWidth;
+#endif
 
+#if JVET_N0242_NON_LINEAR_ALF
+  for( int i = 0; i < endHeight - startHeight; i += clsSizeY )
+#else
   for( int i = 0; i < endHeight - startHeight; i += 4 )
+#endif
   {
+#if !JVET_N0242_NON_LINEAR_ALF
     pRec = imgYRecPost + startWidth + i * dstStride;
 
+#endif
     if( !bChroma )
     {
       pClass = classifier[startHeight + i] + startWidth;
     }
 
+#if JVET_N0242_NON_LINEAR_ALF
+    for( int j = 0; j < endWidth - startWidth; j += 8 )
+#else
     for( int j = 0; j < endWidth - startWidth; j += 4 )
+#endif
     {
+#if JVET_N0242_NON_LINEAR_ALF
+      for( int k = 0; k < 2; ++k )
+      {
+        if( !bChroma )
+        {
+          const AlfClassifier& cl = pClass[j+4*k];
+          transposeIdx[k] = cl.transposeIdx;
+          coef[k] = filterSet + cl.classIdx * MAX_NUM_ALF_LUMA_COEFF;
+          clip[k] = fClipSet + cl.classIdx * MAX_NUM_ALF_LUMA_COEFF;
+          if ( isPCMFilterDisabled && cl.classIdx == AdaptiveLoopFilter::m_ALF_UNUSED_CLASSIDX && transposeIdx[k] == AdaptiveLoopFilter::m_ALF_UNUSED_TRANSPOSIDX )
+          {
+            // Note that last one (i.e. filterCoeff[6][k]) is not unused with JVET_N0242_NON_LINEAR_ALF; could be simplified
+            static const unsigned char _filterCoeffIdx[7] = { 0, 0, 0, 0, 0, 0, 0 };
+            static short _identityFilterCoeff[] = { 0 };
+            static short _identityFilterClipp[] = { 0 };
+            filterCoeffIdx[k] = _filterCoeffIdx;
+            coef[k] = _identityFilterCoeff;
+            clip[k] = _identityFilterClipp;
+          }
+          else if( transposeIdx[k] == 1 )
+          {
+            static const unsigned char _filterCoeffIdx[7] = { 4, 1, 5, 3, 0, 2, 6 };
+            filterCoeffIdx[k] = _filterCoeffIdx;
+          }
+          else if( transposeIdx[k] == 2 )
+          {
+            static const unsigned char _filterCoeffIdx[7] = { 0, 3, 2, 1, 4, 5, 6 };
+            filterCoeffIdx[k] = _filterCoeffIdx;
+          }
+          else if( transposeIdx[k] == 3 )
+          {
+            static const unsigned char _filterCoeffIdx[7] = { 4, 3, 5, 1, 0, 2, 6 };
+            filterCoeffIdx[k] = _filterCoeffIdx;
+          }
+          else
+          {
+            static const unsigned char _filterCoeffIdx[7] = { 0, 1, 2, 3, 4, 5, 6 };
+            filterCoeffIdx[k] = _filterCoeffIdx;
+          }
+        }
+        else
+        {
+          static const unsigned char _filterCoeffIdx[7] = { 0, 1, 2, 3, 4, 5, 6 };
+          filterCoeffIdx[k] = _filterCoeffIdx;
+        }
+
+        for ( int i=0; i < 7; ++i )
+        {
+          filterCoeff[i][k] = coef[k][filterCoeffIdx[k][i]];
+          filterClipp[i][k] = clip[k][filterCoeffIdx[k][i]];
+        }
+      }
+#else
       if( !bChroma )
       {
         AlfClassifier& cl = pClass[j];
@@ -464,13 +578,223 @@ static void simdFilter5x5Blk( AlfClassifier** classifier, const PelUnitBuf &recD
         c0 = _mm_shuffle_epi8( c0, xmm10 );
       }
 
+#endif
       pImg0 = pImgYPad0 + j;
       pImg1 = pImgYPad1 + j;
       pImg2 = pImgYPad2 + j;
       pImg3 = pImgYPad3 + j;
       pImg4 = pImgYPad4 + j;
+#if !JVET_N0242_NON_LINEAR_ALF
       pImg5 = pImgYPad5 + j;
+#endif
+
+#if JVET_N0242_NON_LINEAR_ALF
+      pRec1 = pRec0 + j;
+
+      if ( bChroma && isPCMFilterDisabled )
+      {
+        int  blkX, blkY;
+        bool *flags  = pcmFlags2x2;
+        Pel  *pcmRec = pcmRec2x2;
+
+        // check which chroma 2x2 blocks use PCM
+        // chroma PCM may not be aligned with 4x4 ALF processing grid
+        for( blkY=0; blkY<4; blkY+=2 )
+        {
+          for( blkX=0; blkX<8; blkX+=2 )
+          {
+            Position pos(j+startWidth+blkX, i+startHeight+blkY);
+            CodingUnit* cu = isDualTree ? cs.getCU(pos, CH_C) : cs.getCU(recalcPosition(nChromaFormat, CH_C, CH_L, pos), CH_L);
+            *flags++ = cu->ipcm ? 1 : 0;
 
+            // save original samples from 2x2 PCM blocks
+            if( cu->ipcm )
+            {
+              *pcmRec++ = pRec1[(blkY+0)*dstStride + (blkX+0)];
+              *pcmRec++ = pRec1[(blkY+0)*dstStride + (blkX+1)];
+              *pcmRec++ = pRec1[(blkY+1)*dstStride + (blkX+0)];
+              *pcmRec++ = pRec1[(blkY+1)*dstStride + (blkX+1)];
+            }
+          }
+        }
+      }
+
+      __m128i xmmNull = _mm_setzero_si128();
+
+      for( int ii = 0; ii < clsSizeY; ii++ )
+      {
+        __m128i clipp, clipm;
+        __m128i coeffa, coeffb;
+        __m128i xmmCur = _mm_lddqu_si128( ( __m128i* ) ( pImg0 + 0 ) );
+
+        // coeff 0 and 1
+        __m128i xmm00 = _mm_lddqu_si128( ( __m128i* ) ( pImg3 + 0 ) );
+        xmm00 = _mm_sub_epi16( xmm00, xmmCur );
+        clipp = _mm_setr_epi16( filterClipp[0][0], filterClipp[0][0], filterClipp[0][0], filterClipp[0][0],
+                                filterClipp[0][1], filterClipp[0][1], filterClipp[0][1], filterClipp[0][1] );
+        clipm = _mm_sub_epi16( xmmNull, clipp );
+        xmm00 = _mm_min_epi16( xmm00, clipp );
+        xmm00 = _mm_max_epi16( xmm00, clipm );
+
+        __m128i xmm01 = _mm_lddqu_si128( ( __m128i* ) ( pImg4 + 0 ) );
+        xmm01 = _mm_sub_epi16( xmm01, xmmCur );
+        xmm01 = _mm_min_epi16( xmm01, clipp );
+        xmm01 = _mm_max_epi16( xmm01, clipm );
+
+        xmm00 = _mm_add_epi16( xmm00, xmm01 );
+
+        __m128i xmm10 = _mm_lddqu_si128( ( __m128i* ) ( pImg1 + 1 ) );
+        xmm10 = _mm_sub_epi16( xmm10, xmmCur );
+        clipp = _mm_setr_epi16( filterClipp[1][0], filterClipp[1][0], filterClipp[1][0], filterClipp[1][0],
+                                filterClipp[1][1], filterClipp[1][1], filterClipp[1][1], filterClipp[1][1] );
+        clipm = _mm_sub_epi16( xmmNull, clipp );
+        xmm10 = _mm_min_epi16( xmm10, clipp );
+        xmm10 = _mm_max_epi16( xmm10, clipm );
+
+        __m128i xmm11 = _mm_lddqu_si128( ( __m128i* ) ( pImg2 - 1 ) );
+        xmm11 = _mm_sub_epi16( xmm11, xmmCur );
+        xmm11 = _mm_min_epi16( xmm11, clipp );
+        xmm11 = _mm_max_epi16( xmm11, clipm );
+
+        xmm10 = _mm_add_epi16( xmm10, xmm11 );
+
+        // 4 first samples
+        coeffa = _mm_set1_epi16( filterCoeff[0][0] );
+        coeffb = _mm_set1_epi16( filterCoeff[1][0] );
+        __m128i xmm0 = _mm_unpacklo_epi16( xmm00, xmm10 );
+        __m128i xmmS0 = _mm_madd_epi16( xmm0, _mm_unpackhi_epi16( coeffa, coeffb ) );
+
+        // 4 next samples
+        coeffa = _mm_set1_epi16( filterCoeff[0][1] );
+        coeffb = _mm_set1_epi16( filterCoeff[1][1] );
+        __m128i xmm1 = _mm_unpackhi_epi16( xmm00, xmm10 );
+        __m128i xmmS1 = _mm_madd_epi16( xmm1, _mm_unpackhi_epi16( coeffa, coeffb ) );
+
+        // coeff 2 and 3
+        __m128i xmm20 = _mm_lddqu_si128( ( __m128i* ) ( pImg1 + 0 ) );
+        xmm20 = _mm_sub_epi16( xmm20, xmmCur );
+        clipp = _mm_setr_epi16( filterClipp[2][0], filterClipp[2][0], filterClipp[2][0], filterClipp[2][0],
+                                filterClipp[2][1], filterClipp[2][1], filterClipp[2][1], filterClipp[2][1] );
+        clipm = _mm_sub_epi16( xmmNull, clipp );
+        xmm20 = _mm_min_epi16( xmm20, clipp );
+        xmm20 = _mm_max_epi16( xmm20, clipm );
+
+        __m128i xmm21 = _mm_lddqu_si128( ( __m128i* ) ( pImg2 + 0 ) );
+        xmm21 = _mm_sub_epi16( xmm21, xmmCur );
+        xmm21 = _mm_min_epi16( xmm21, clipp );
+        xmm21 = _mm_max_epi16( xmm21, clipm );
+
+        xmm20 = _mm_add_epi16( xmm20, xmm21 );
+
+        __m128i xmm30 = _mm_lddqu_si128( ( __m128i* ) ( pImg1 - 1 ) );
+        xmm30 = _mm_sub_epi16( xmm30, xmmCur );
+        clipp = _mm_setr_epi16( filterClipp[3][0], filterClipp[3][0], filterClipp[3][0], filterClipp[3][0],
+                                filterClipp[3][1], filterClipp[3][1], filterClipp[3][1], filterClipp[3][1] );
+        clipm = _mm_sub_epi16( xmmNull, clipp );
+        xmm30 = _mm_min_epi16( xmm30, clipp );
+        xmm30 = _mm_max_epi16( xmm30, clipm );
+
+        __m128i xmm31 = _mm_lddqu_si128( ( __m128i* ) ( pImg2 + 1 ) );
+        xmm31 = _mm_sub_epi16( xmm31, xmmCur );
+        xmm31 = _mm_min_epi16( xmm31, clipp );
+        xmm31 = _mm_max_epi16( xmm31, clipm );
+
+        xmm30 = _mm_add_epi16( xmm30, xmm31 );
+
+        // 4 first samples
+        coeffa = _mm_set1_epi16( filterCoeff[2][0] );
+        coeffb = _mm_set1_epi16( filterCoeff[3][0] );
+        xmm0 = _mm_unpacklo_epi16( xmm20, xmm30 );
+        __m128i xmmSt = _mm_madd_epi16( xmm0, _mm_unpackhi_epi16( coeffa, coeffb ) );
+
+        xmmS0 = _mm_add_epi32(xmmS0, xmmSt);
+
+        // 4 next samples
+        coeffa = _mm_set1_epi16( filterCoeff[2][1] );
+        coeffb = _mm_set1_epi16( filterCoeff[3][1] );
+        xmm1 = _mm_unpackhi_epi16( xmm20, xmm30 );
+        xmmSt = _mm_madd_epi16( xmm1, _mm_unpackhi_epi16( coeffa, coeffb ) );
+
+        xmmS1 = _mm_add_epi32(xmmS1, xmmSt);
+
+        // coeff 4 and 5
+        __m128i xmm40 = _mm_lddqu_si128( ( __m128i* ) ( pImg0 + 2 ) );
+        xmm40 = _mm_sub_epi16( xmm40, xmmCur );
+        clipp = _mm_setr_epi16( filterClipp[4][0], filterClipp[4][0], filterClipp[4][0], filterClipp[4][0],
+                                filterClipp[4][1], filterClipp[4][1], filterClipp[4][1], filterClipp[4][1] );
+        clipm = _mm_sub_epi16( xmmNull, clipp );
+        xmm40 = _mm_min_epi16( xmm40, clipp );
+        xmm40 = _mm_max_epi16( xmm40, clipm );
+
+        __m128i xmm41 = _mm_lddqu_si128( ( __m128i* ) ( pImg0 - 2 ) );
+        xmm41 = _mm_sub_epi16( xmm41, xmmCur );
+        xmm41 = _mm_min_epi16( xmm41, clipp );
+        xmm41 = _mm_max_epi16( xmm41, clipm );
+
+        xmm40 = _mm_add_epi16( xmm40, xmm41 );
+
+        __m128i xmm50 = _mm_lddqu_si128( ( __m128i* ) ( pImg0 + 1 ) );
+        xmm50 = _mm_sub_epi16( xmm50, xmmCur );
+        clipp = _mm_setr_epi16( filterClipp[5][0], filterClipp[5][0], filterClipp[5][0], filterClipp[5][0],
+                                filterClipp[5][1], filterClipp[5][1], filterClipp[5][1], filterClipp[5][1] );
+        clipm = _mm_sub_epi16( xmmNull, clipp );
+        xmm50 = _mm_min_epi16( xmm50, clipp );
+        xmm50 = _mm_max_epi16( xmm50, clipm );
+
+        __m128i xmm51 = _mm_lddqu_si128( ( __m128i* ) ( pImg0 - 1 ) );
+        xmm51 = _mm_sub_epi16( xmm51, xmmCur );
+        xmm51 = _mm_min_epi16( xmm51, clipp );
+        xmm51 = _mm_max_epi16( xmm51, clipm );
+
+        xmm50 = _mm_add_epi16( xmm50, xmm51 );
+
+        // 4 first samples
+        coeffa = _mm_set1_epi16( filterCoeff[4][0] );
+        coeffb = _mm_set1_epi16( filterCoeff[5][0] );
+        xmm0 = _mm_unpacklo_epi16( xmm40, xmm50 );
+        xmmSt = _mm_madd_epi16( xmm0, _mm_unpackhi_epi16( coeffa, coeffb ) );
+
+        xmmS0 = _mm_add_epi32(xmmS0, xmmSt);
+
+        // 4 next samples
+        coeffa = _mm_set1_epi16( filterCoeff[4][1] );
+        coeffb = _mm_set1_epi16( filterCoeff[5][1] );
+        xmm1 = _mm_unpackhi_epi16( xmm40, xmm50 );
+        xmmSt = _mm_madd_epi16( xmm1, _mm_unpackhi_epi16( coeffa, coeffb ) );
+
+        xmmS1 = _mm_add_epi32(xmmS1, xmmSt);
+
+        // finish
+        xmmS0 = _mm_add_epi32( xmmS0, mmOffset );
+        xmmS0 = _mm_srai_epi32( xmmS0, numBitsMinus1 );
+        xmmS1 = _mm_add_epi32( xmmS1, mmOffset );
+        xmmS1 = _mm_srai_epi32( xmmS1, numBitsMinus1 );
+
+        xmmS0 = _mm_packs_epi32( xmmS0, xmmS1 );
+        // coeff 6
+        xmmS0 = _mm_add_epi16(xmmS0, xmmCur);
+        xmmS0 = _mm_min_epi16( mmMax, _mm_max_epi16( xmmS0, mmMin ) );
+
+        if( j + 8 <= endWidth - startWidth )
+        {
+          _mm_storeu_si128( ( __m128i* )( pRec1 ), xmmS0 );
+        }
+        else if( j + 6 == endWidth - startWidth )
+        {
+          xmmS0 = _mm_blend_epi16( xmmS0, xmmCur, 0xC0 );
+          _mm_storeu_si128( ( __m128i* )( pRec1 ), xmmS0 );
+        }
+        else if( j + 4 == endWidth - startWidth )
+        {
+          xmmS0 = _mm_blend_epi16( xmmS0, xmmCur, 0xF0 );
+          _mm_storeu_si128( ( __m128i* )( pRec1 ), xmmS0 );
+        }
+        else
+        {
+          xmmS0 = _mm_blend_epi16( xmmS0, xmmCur, 0xFC );
+          _mm_storeu_si128( ( __m128i* )( pRec1 ), xmmS0 );
+        }
+#else
       for( int k = 0; k < 4; k++ )
       {
         __m128i xmm4 = _mm_lddqu_si128( ( __m128i* ) ( pImg4 ) );
@@ -552,14 +876,43 @@ static void simdFilter5x5Blk( AlfClassifier** classifier, const PelUnitBuf &recD
         _mm_storel_epi64( ( __m128i* )( pRec ), xmm12 );
 
         pRec += dstStride;
+#endif
 
         pImg0 += srcStride;
         pImg1 += srcStride;
         pImg2 += srcStride;
         pImg3 += srcStride;
         pImg4 += srcStride;
+#if !JVET_N0242_NON_LINEAR_ALF
         pImg5 += srcStride;
+#endif
+
+#if JVET_N0242_NON_LINEAR_ALF
+        pRec1 += dstStride;
+      }
+      pRec1 -= dstStride2;
+      // restore 2x2 PCM chroma blocks
+      if( bChroma && isPCMFilterDisabled )
+      {
+        int  blkX, blkY;
+        bool *flags  = pcmFlags2x2;
+        Pel  *pcmRec = pcmRec2x2;
+        for( blkY=0; blkY<4; blkY+=2 )
+        {
+          for( blkX=0; blkX<8; blkX+=2 )
+          {
+            if( *flags++ )
+            {
+              pRec1[(blkY+0)*dstStride + (blkX+0)] = *pcmRec++;
+              pRec1[(blkY+0)*dstStride + (blkX+1)] = *pcmRec++;
+              pRec1[(blkY+1)*dstStride + (blkX+0)] = *pcmRec++;
+              pRec1[(blkY+1)*dstStride + (blkX+1)] = *pcmRec++;
+            }
+          }
+        }
+      }
 
+#else
       } //<-- end of k-loop
 
       pRec -= ( 4 * dstStride );
@@ -586,22 +939,34 @@ static void simdFilter5x5Blk( AlfClassifier** classifier, const PelUnitBuf &recD
       }
 
       pRec += 4;
+#endif
     }
 
+#if JVET_N0242_NON_LINEAR_ALF
+    pRec0 += dstStride2;
+#else
     pRec += 4 * dstStride;
+#endif
 
     pImgYPad0 += srcStride2;
     pImgYPad1 += srcStride2;
     pImgYPad2 += srcStride2;
     pImgYPad3 += srcStride2;
     pImgYPad4 += srcStride2;
+#if !JVET_N0242_NON_LINEAR_ALF
     pImgYPad5 += srcStride2;
+#endif
   }
 }
 
 template<X86_VEXT vext>
+#if JVET_N0242_NON_LINEAR_ALF
+static void simdFilter7x7Blk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, short* fClipSet, const ClpRng& clpRng, CodingStructure& cs )
+#else
 static void simdFilter7x7Blk( AlfClassifier** classifier, const PelUnitBuf &recDst, const CPelUnitBuf& recSrc, const Area& blk, const ComponentID compId, short* filterSet, const ClpRng& clpRng, CodingStructure& cs )
+#endif
 {
+#if !JVET_N0242_NON_LINEAR_ALF
   static const unsigned char mask0[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 6, 7, 4, 5, 2, 3 };
   static const unsigned char mask00[16] = { 2, 3, 0, 1, 0, 0, 0, 0, 8, 9, 0, 0, 0, 0, 0, 1 };
   static const unsigned char mask02[16] = { 0, 0, 0, 0, 2, 3, 10, 11, 0, 0, 10, 11, 2, 3, 0, 0 };
@@ -609,12 +974,15 @@ static void simdFilter7x7Blk( AlfClassifier** classifier, const PelUnitBuf &recD
   static const unsigned char mask22[16] = { 14, 15, 0, 0, 6, 7, 4, 5, 12, 13, 0, 0, 8, 9, 0, 1 };
   static const unsigned char mask35[16] = { 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7 };
 
+#endif
   const bool bChroma = isChroma( compId );
 
+#if !JVET_N0242_NON_LINEAR_ALF
   if( bChroma )
   {
     CHECK( 0, "Chroma doesn't support 7x7" );
   }
+#endif
   const SPS*     sps = cs.slice->getSPS();
   bool isDualTree = CS::isDualITree(cs);
   bool isPCMFilterDisabled = sps->getPCMFilterDisableFlag();
@@ -625,6 +993,7 @@ static void simdFilter7x7Blk( AlfClassifier** classifier, const PelUnitBuf &recD
   const int srcStride = srcLuma.stride;
   const int dstStride = dstLuma.stride;
 
+#if !JVET_N0242_NON_LINEAR_ALF
   const Pel* srcExt = srcLuma.buf;
   Pel* dst = dstLuma.buf;
 
@@ -634,6 +1003,7 @@ static void simdFilter7x7Blk( AlfClassifier** classifier, const PelUnitBuf &recD
   const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4;
   const Pel *pImg5, *pImg6;
 
+#endif
   const int numBitsMinus1 = AdaptiveLoopFilter::m_NUM_BITS - 1;
   const int offset = ( 1 << ( AdaptiveLoopFilter::m_NUM_BITS - 2 ) );
 
@@ -642,37 +1012,70 @@ static void simdFilter7x7Blk( AlfClassifier** classifier, const PelUnitBuf &recD
   const int startWidth = blk.x;
   const int endWidth = blk.x + blk.width;
 
+#if JVET_N0242_NON_LINEAR_ALF
+  const Pel* src = srcLuma.buf;
+  Pel* dst = dstLuma.buf + startHeight * dstStride;
+
+  const Pel *pImgYPad0, *pImgYPad1, *pImgYPad2, *pImgYPad3, *pImgYPad4, *pImgYPad5, *pImgYPad6;
+  const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4, *pImg5, *pImg6;
+
+  short *coef[2] = { filterSet, filterSet };
+  short *clip[2] = { fClipSet, fClipSet };
+
+  int transposeIdx[2] = {0, 0};
+#else
   Pel* imgYRecPost = dst;
   imgYRecPost += startHeight * dstStride;
 
   int transposeIdx = 0;
 
+#endif
   const int clsSizeY = 4;
   const int clsSizeX = 4;
 
+#if JVET_N0242_NON_LINEAR_ALF
+  bool pcmFlags2x2[8] = {0,0,0,0,0,0,0,0};
+  Pel  pcmRec2x2[32];
+#else
   bool pcmFlags2x2[4] = {0,0,0,0};
   Pel  pcmRec2x2[16];
+#endif
 
   CHECK( startHeight % clsSizeY, "Wrong startHeight in filtering" );
   CHECK( startWidth % clsSizeX, "Wrong startWidth in filtering" );
   CHECK( ( endHeight - startHeight ) % clsSizeY, "Wrong endHeight in filtering" );
   CHECK( ( endWidth - startWidth ) % clsSizeX, "Wrong endWidth in filtering" );
 
+#if !JVET_N0242_NON_LINEAR_ALF
   const Pel* imgYRec = srcExt;
 
   Pel *pRec;
+#endif
   AlfClassifier *pClass = nullptr;
 
   int dstStride2 = dstStride * clsSizeY;
   int srcStride2 = srcStride * clsSizeY;
 
   const __m128i mmOffset = _mm_set1_epi32( offset );
+#if JVET_N0242_NON_LINEAR_ALF
+  const __m128i mmMin = _mm_set1_epi16( clpRng.min );
+  const __m128i mmMax = _mm_set1_epi16( clpRng.max );
+#else
   const __m128i mmMin = _mm_set1_epi32( clpRng.min );
   const __m128i mmMax = _mm_set1_epi32( clpRng.max );
+#endif
 
+#if JVET_N0242_NON_LINEAR_ALF
+  const unsigned char *filterCoeffIdx[2];
+  Pel filterCoeff[MAX_NUM_ALF_LUMA_COEFF][2];
+  Pel filterClipp[MAX_NUM_ALF_LUMA_COEFF][2];
+
+  pImgYPad0 = src + startHeight * srcStride + startWidth;
+#else
   const __m128i xmm10 = _mm_loadu_si128( ( __m128i* )mask35 );
 
   pImgYPad0 = imgYRec + startHeight * srcStride + startWidth;
+#endif
   pImgYPad1 = pImgYPad0 + srcStride;
   pImgYPad2 = pImgYPad0 - srcStride;
   pImgYPad3 = pImgYPad1 + srcStride;
@@ -680,19 +1083,87 @@ static void simdFilter7x7Blk( AlfClassifier** classifier, const PelUnitBuf &recD
   pImgYPad5 = pImgYPad3 + srcStride;
   pImgYPad6 = pImgYPad4 - srcStride;
 
+#if JVET_N0242_NON_LINEAR_ALF
+  Pel* pRec0 = dst + startWidth;
+  Pel* pRec1;
+#else
   pRec = imgYRecPost + startWidth;
+#endif
 
+#if JVET_N0242_NON_LINEAR_ALF
+  for( int i = 0; i < endHeight - startHeight; i += clsSizeY )
+#else
   for( int i = 0; i < endHeight - startHeight; i += 4 )
+#endif
   {
+#if !JVET_N0242_NON_LINEAR_ALF
     pRec = imgYRecPost + startWidth + i * dstStride;
 
+#endif
     if( !bChroma )
     {
       pClass = classifier[startHeight + i] + startWidth;
     }
 
+#if JVET_N0242_NON_LINEAR_ALF
+    for( int j = 0; j < endWidth - startWidth; j += 8 )
+#else
     for( int j = 0; j < endWidth - startWidth; j += 4 )
+#endif
     {
+#if JVET_N0242_NON_LINEAR_ALF
+      for( int k = 0; k < 2; ++k )
+      {
+        if( !bChroma )
+        {
+          const AlfClassifier& cl = pClass[j+4*k];
+          transposeIdx[k] = cl.transposeIdx;
+          coef[k] = filterSet + cl.classIdx * MAX_NUM_ALF_LUMA_COEFF;
+          clip[k] = fClipSet + cl.classIdx * MAX_NUM_ALF_LUMA_COEFF;
+          if ( isPCMFilterDisabled && cl.classIdx == AdaptiveLoopFilter::m_ALF_UNUSED_CLASSIDX && transposeIdx[k] == AdaptiveLoopFilter::m_ALF_UNUSED_TRANSPOSIDX )
+          {
+            // Note that last one (i.e. filterCoeff[12][k]) is not unused with JVET_N0242_NON_LINEAR_ALF; could be simplified
+            static const unsigned char _filterCoeffIdx[13] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+            static short _identityFilterCoeff[] = { 0 };
+            static short _identityFilterClipp[] = { 0 };
+            filterCoeffIdx[k] = _filterCoeffIdx;
+            coef[k] = _identityFilterCoeff;
+            clip[k] = _identityFilterClipp;
+          }
+          else if( transposeIdx[k] == 1 )
+          {
+            static const unsigned char _filterCoeffIdx[13] = { 9, 4, 10, 8, 1, 5, 11, 7, 3, 0, 2, 6, 12 };
+            filterCoeffIdx[k] = _filterCoeffIdx;
+          }
+          else if( transposeIdx[k] == 2 )
+          {
+            static const unsigned char _filterCoeffIdx[13] = { 0, 3, 2, 1, 8, 7, 6, 5, 4, 9, 10, 11, 12 };
+            filterCoeffIdx[k] = _filterCoeffIdx;
+          }
+          else if( transposeIdx[k] == 3 )
+          {
+            static const unsigned char _filterCoeffIdx[13] = { 9, 8, 10, 4, 3, 7, 11, 5, 1, 0, 2, 6, 12 };
+            filterCoeffIdx[k] = _filterCoeffIdx;
+          }
+          else
+          {
+            static const unsigned char _filterCoeffIdx[13] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 };
+            filterCoeffIdx[k] = _filterCoeffIdx;
+          }
+        }
+        else
+        {
+          static const unsigned char _filterCoeffIdx[13] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 };
+          filterCoeffIdx[k] = _filterCoeffIdx;
+        }
+
+        for ( int i=0; i < 13; ++i )
+        {
+          filterCoeff[i][k] = coef[k][filterCoeffIdx[k][i]];
+          filterClipp[i][k] = clip[k][filterCoeffIdx[k][i]];
+        }
+      }
+#else
       if( !bChroma )
       {
         AlfClassifier& cl = pClass[j];
@@ -773,6 +1244,7 @@ static void simdFilter7x7Blk( AlfClassifier** classifier, const PelUnitBuf &recD
       {
         c2 = _mm_shuffle_epi8( c2, xmm10 );
       }
+ #endif
 
       pImg0 = pImgYPad0 + j;
       pImg1 = pImgYPad1 + j;
@@ -782,6 +1254,357 @@ static void simdFilter7x7Blk( AlfClassifier** classifier, const PelUnitBuf &recD
       pImg5 = pImgYPad5 + j;
       pImg6 = pImgYPad6 + j;
 
+#if JVET_N0242_NON_LINEAR_ALF
+      pRec1 = pRec0 + j;
+
+      if ( bChroma && isPCMFilterDisabled )
+      {
+        int  blkX, blkY;
+        bool *flags  = pcmFlags2x2;
+        Pel  *pcmRec = pcmRec2x2;
+
+        // check which chroma 2x2 blocks use PCM
+        // chroma PCM may not be aligned with 4x4 ALF processing grid
+        for( blkY=0; blkY<4; blkY+=2 )
+        {
+          for( blkX=0; blkX<8; blkX+=2 )
+          {
+            Position pos(j+startWidth+blkX, i+startHeight+blkY);
+            CodingUnit* cu = isDualTree ? cs.getCU(pos, CH_C) : cs.getCU(recalcPosition(nChromaFormat, CH_C, CH_L, pos), CH_L);
+            *flags++ = cu->ipcm ? 1 : 0;
+
+            // save original samples from 2x2 PCM blocks
+            if( cu->ipcm )
+            {
+              *pcmRec++ = pRec1[(blkY+0)*dstStride + (blkX+0)];
+              *pcmRec++ = pRec1[(blkY+0)*dstStride + (blkX+1)];
+              *pcmRec++ = pRec1[(blkY+1)*dstStride + (blkX+0)];
+              *pcmRec++ = pRec1[(blkY+1)*dstStride + (blkX+1)];
+            }
+          }
+        }
+      }
+
+      __m128i xmmNull = _mm_setzero_si128();
+
+      for( int ii = 0; ii < clsSizeY; ii++ )
+      {
+        __m128i clipp, clipm;
+        __m128i coeffa, coeffb;
+        __m128i xmmCur = _mm_lddqu_si128( ( __m128i* ) ( pImg0 + 0 ) );
+
+        // coeff 0 and 1
+        __m128i xmm00 = _mm_lddqu_si128( ( __m128i* ) ( pImg5 + 0 ) );
+        xmm00 = _mm_sub_epi16( xmm00, xmmCur );
+        clipp = _mm_setr_epi16( filterClipp[0][0], filterClipp[0][0], filterClipp[0][0], filterClipp[0][0],
+                                filterClipp[0][1], filterClipp[0][1], filterClipp[0][1], filterClipp[0][1] );
+        clipm = _mm_sub_epi16( xmmNull, clipp );
+        xmm00 = _mm_min_epi16( xmm00, clipp );
+        xmm00 = _mm_max_epi16( xmm00, clipm );
+
+        __m128i xmm01 = _mm_lddqu_si128( ( __m128i* ) ( pImg6 + 0 ) );
+        xmm01 = _mm_sub_epi16( xmm01, xmmCur );
+        xmm01 = _mm_min_epi16( xmm01, clipp );
+        xmm01 = _mm_max_epi16( xmm01, clipm );
+
+        xmm00 = _mm_add_epi16( xmm00, xmm01 );
+
+        __m128i xmm10 = _mm_lddqu_si128( ( __m128i* ) ( pImg3 + 1 ) );
+        xmm10 = _mm_sub_epi16( xmm10, xmmCur );
+        clipp = _mm_setr_epi16( filterClipp[1][0], filterClipp[1][0], filterClipp[1][0], filterClipp[1][0],
+                                filterClipp[1][1], filterClipp[1][1], filterClipp[1][1], filterClipp[1][1] );
+        clipm = _mm_sub_epi16( xmmNull, clipp );
+        xmm10 = _mm_min_epi16( xmm10, clipp );
+        xmm10 = _mm_max_epi16( xmm10, clipm );
+
+        __m128i xmm11 = _mm_lddqu_si128( ( __m128i* ) ( pImg4 - 1 ) );
+        xmm11 = _mm_sub_epi16( xmm11, xmmCur );
+        xmm11 = _mm_min_epi16( xmm11, clipp );
+        xmm11 = _mm_max_epi16( xmm11, clipm );
+
+        xmm10 = _mm_add_epi16( xmm10, xmm11 );
+
+        // 4 first samples
+        coeffa = _mm_set1_epi16( filterCoeff[0][0] );
+        coeffb = _mm_set1_epi16( filterCoeff[1][0] );
+        __m128i xmm0 = _mm_unpacklo_epi16( xmm00, xmm10 );
+        __m128i xmmS0 = _mm_madd_epi16( xmm0, _mm_unpackhi_epi16( coeffa, coeffb ) );
+
+        // 4 next samples
+        coeffa = _mm_set1_epi16( filterCoeff[0][1] );
+        coeffb = _mm_set1_epi16( filterCoeff[1][1] );
+        __m128i xmm1 = _mm_unpackhi_epi16( xmm00, xmm10 );
+        __m128i xmmS1 = _mm_madd_epi16( xmm1, _mm_unpackhi_epi16( coeffa, coeffb ) );
+
+        // coeff 2 and 3
+        __m128i xmm20 = _mm_lddqu_si128( ( __m128i* ) ( pImg3 + 0 ) );
+        xmm20 = _mm_sub_epi16( xmm20, xmmCur );
+        clipp = _mm_setr_epi16( filterClipp[2][0], filterClipp[2][0], filterClipp[2][0], filterClipp[2][0],
+                                filterClipp[2][1], filterClipp[2][1], filterClipp[2][1], filterClipp[2][1] );
+        clipm = _mm_sub_epi16( xmmNull, clipp );
+        xmm20 = _mm_min_epi16( xmm20, clipp );
+        xmm20 = _mm_max_epi16( xmm20, clipm );
+
+        __m128i xmm21 = _mm_lddqu_si128( ( __m128i* ) ( pImg4 + 0 ) );
+        xmm21 = _mm_sub_epi16( xmm21, xmmCur );
+        xmm21 = _mm_min_epi16( xmm21, clipp );
+        xmm21 = _mm_max_epi16( xmm21, clipm );
+
+        xmm20 = _mm_add_epi16( xmm20, xmm21 );
+
+        __m128i xmm30 = _mm_lddqu_si128( ( __m128i* ) ( pImg3 - 1 ) );
+        xmm30 = _mm_sub_epi16( xmm30, xmmCur );
+        clipp = _mm_setr_epi16( filterClipp[3][0], filterClipp[3][0], filterClipp[3][0], filterClipp[3][0],
+                                filterClipp[3][1], filterClipp[3][1], filterClipp[3][1], filterClipp[3][1] );
+        clipm = _mm_sub_epi16( xmmNull, clipp );
+        xmm30 = _mm_min_epi16( xmm30, clipp );
+        xmm30 = _mm_max_epi16( xmm30, clipm );
+
+        __m128i xmm31 = _mm_lddqu_si128( ( __m128i* ) ( pImg4 + 1 ) );
+        xmm31 = _mm_sub_epi16( xmm31, xmmCur );
+        xmm31 = _mm_min_epi16( xmm31, clipp );
+        xmm31 = _mm_max_epi16( xmm31, clipm );
+
+        xmm30 = _mm_add_epi16( xmm30, xmm31 );
+
+        // 4 first samples
+        coeffa = _mm_set1_epi16( filterCoeff[2][0] );
+        coeffb = _mm_set1_epi16( filterCoeff[3][0] );
+        xmm0 = _mm_unpacklo_epi16( xmm20, xmm30 );
+        __m128i xmmSt = _mm_madd_epi16( xmm0, _mm_unpackhi_epi16( coeffa, coeffb ) );
+
+        xmmS0 = _mm_add_epi32(xmmS0, xmmSt);
+
+        // 4 next samples
+        coeffa = _mm_set1_epi16( filterCoeff[2][1] );
+        coeffb = _mm_set1_epi16( filterCoeff[3][1] );
+        xmm1 = _mm_unpackhi_epi16( xmm20, xmm30 );
+        xmmSt = _mm_madd_epi16( xmm1, _mm_unpackhi_epi16( coeffa, coeffb ) );
+
+        xmmS1 = _mm_add_epi32(xmmS1, xmmSt);
+
+        // coeff 4 and 5
+        __m128i xmm40 = _mm_lddqu_si128( ( __m128i* ) ( pImg1 + 2 ) );
+        xmm40 = _mm_sub_epi16( xmm40, xmmCur );
+        clipp = _mm_setr_epi16( filterClipp[4][0], filterClipp[4][0], filterClipp[4][0], filterClipp[4][0],
+                                filterClipp[4][1], filterClipp[4][1], filterClipp[4][1], filterClipp[4][1] );
+        clipm = _mm_sub_epi16( xmmNull, clipp );
+        xmm40 = _mm_min_epi16( xmm40, clipp );
+        xmm40 = _mm_max_epi16( xmm40, clipm );
+
+        __m128i xmm41 = _mm_lddqu_si128( ( __m128i* ) ( pImg2 - 2 ) );
+        xmm41 = _mm_sub_epi16( xmm41, xmmCur );
+        xmm41 = _mm_min_epi16( xmm41, clipp );
+        xmm41 = _mm_max_epi16( xmm41, clipm );
+
+        xmm40 = _mm_add_epi16( xmm40, xmm41 );
+
+        __m128i xmm50 = _mm_lddqu_si128( ( __m128i* ) ( pImg1 + 1 ) );
+        xmm50 = _mm_sub_epi16( xmm50, xmmCur );
+        clipp = _mm_setr_epi16( filterClipp[5][0], filterClipp[5][0], filterClipp[5][0], filterClipp[5][0],
+                                filterClipp[5][1], filterClipp[5][1], filterClipp[5][1], filterClipp[5][1] );
+        clipm = _mm_sub_epi16( xmmNull, clipp );
+        xmm50 = _mm_min_epi16( xmm50, clipp );
+        xmm50 = _mm_max_epi16( xmm50, clipm );
+
+        __m128i xmm51 = _mm_lddqu_si128( ( __m128i* ) ( pImg2 - 1 ) );
+        xmm51 = _mm_sub_epi16( xmm51, xmmCur );
+        xmm51 = _mm_min_epi16( xmm51, clipp );
+        xmm51 = _mm_max_epi16( xmm51, clipm );
+
+        xmm50 = _mm_add_epi16( xmm50, xmm51 );
+
+        // 4 first samples
+        coeffa = _mm_set1_epi16( filterCoeff[4][0] );
+        coeffb = _mm_set1_epi16( filterCoeff[5][0] );
+        xmm0 = _mm_unpacklo_epi16( xmm40, xmm50 );
+        xmmSt = _mm_madd_epi16( xmm0, _mm_unpackhi_epi16( coeffa, coeffb ) );
+
+        xmmS0 = _mm_add_epi32(xmmS0, xmmSt);
+
+        // 4 next samples
+        coeffa = _mm_set1_epi16( filterCoeff[4][1] );
+        coeffb = _mm_set1_epi16( filterCoeff[5][1] );
+        xmm1 = _mm_unpackhi_epi16( xmm40, xmm50 );
+        xmmSt = _mm_madd_epi16( xmm1, _mm_unpackhi_epi16( coeffa, coeffb ) );
+
+        xmmS1 = _mm_add_epi32(xmmS1, xmmSt);
+
+
+        // coeff 6 and 7
+        __m128i xmm60 = _mm_lddqu_si128( ( __m128i* ) ( pImg1 + 0 ) );
+        xmm60 = _mm_sub_epi16( xmm60, xmmCur );
+        clipp = _mm_setr_epi16( filterClipp[6][0], filterClipp[6][0], filterClipp[6][0], filterClipp[6][0],
+                                filterClipp[6][1], filterClipp[6][1], filterClipp[6][1], filterClipp[6][1] );
+        clipm = _mm_sub_epi16( xmmNull, clipp );
+        xmm60 = _mm_min_epi16( xmm60, clipp );
+        xmm60 = _mm_max_epi16( xmm60, clipm );
+
+        __m128i xmm61 = _mm_lddqu_si128( ( __m128i* ) ( pImg2 + 0 ) );
+        xmm61 = _mm_sub_epi16( xmm61, xmmCur );
+        xmm61 = _mm_min_epi16( xmm61, clipp );
+        xmm61 = _mm_max_epi16( xmm61, clipm );
+
+        xmm60 = _mm_add_epi16( xmm60, xmm61 );
+
+        __m128i xmm70 = _mm_lddqu_si128( ( __m128i* ) ( pImg1 - 1 ) );
+        xmm70 = _mm_sub_epi16( xmm70, xmmCur );
+        clipp = _mm_setr_epi16( filterClipp[7][0], filterClipp[7][0], filterClipp[7][0], filterClipp[7][0],
+                                filterClipp[7][1], filterClipp[7][1], filterClipp[7][1], filterClipp[7][1] );
+        clipm = _mm_sub_epi16( xmmNull, clipp );
+        xmm70 = _mm_min_epi16( xmm70, clipp );
+        xmm70 = _mm_max_epi16( xmm70, clipm );
+
+        __m128i xmm71 = _mm_lddqu_si128( ( __m128i* ) ( pImg2 + 1 ) );
+        xmm71 = _mm_sub_epi16( xmm71, xmmCur );
+        xmm71 = _mm_min_epi16( xmm71, clipp );
+        xmm71 = _mm_max_epi16( xmm71, clipm );
+
+        xmm70 = _mm_add_epi16( xmm70, xmm71 );
+
+        // 4 first samples
+        coeffa = _mm_set1_epi16( filterCoeff[6][0] );
+        coeffb = _mm_set1_epi16( filterCoeff[7][0] );
+        xmm0 = _mm_unpacklo_epi16( xmm60, xmm70 );
+        xmmSt = _mm_madd_epi16( xmm0, _mm_unpackhi_epi16( coeffa, coeffb ) );
+
+        xmmS0 = _mm_add_epi32(xmmS0, xmmSt);
+
+        // 4 next samples
+        coeffa = _mm_set1_epi16( filterCoeff[6][1] );
+        coeffb = _mm_set1_epi16( filterCoeff[7][1] );
+        xmm1 = _mm_unpackhi_epi16( xmm60, xmm70 );
+        xmmSt = _mm_madd_epi16( xmm1, _mm_unpackhi_epi16( coeffa, coeffb ) );
+
+        xmmS1 = _mm_add_epi32(xmmS1, xmmSt);
+
+
+        // coeff 8 and 9
+        __m128i xmm80 = _mm_lddqu_si128( ( __m128i* ) ( pImg1 - 2 ) );
+        xmm80 = _mm_sub_epi16( xmm80, xmmCur );
+        clipp = _mm_setr_epi16( filterClipp[8][0], filterClipp[8][0], filterClipp[8][0], filterClipp[8][0],
+                                filterClipp[8][1], filterClipp[8][1], filterClipp[8][1], filterClipp[8][1] );
+        clipm = _mm_sub_epi16( xmmNull, clipp );
+        xmm80 = _mm_min_epi16( xmm80, clipp );
+        xmm80 = _mm_max_epi16( xmm80, clipm );
+
+        __m128i xmm81 = _mm_lddqu_si128( ( __m128i* ) ( pImg2 + 2 ) );
+        xmm81 = _mm_sub_epi16( xmm81, xmmCur );
+        xmm81 = _mm_min_epi16( xmm81, clipp );
+        xmm81 = _mm_max_epi16( xmm81, clipm );
+
+        xmm80 = _mm_add_epi16( xmm80, xmm81 );
+
+        __m128i xmm90 = _mm_lddqu_si128( ( __m128i* ) ( pImg0 + 3 ) );
+        xmm90 = _mm_sub_epi16( xmm90, xmmCur );
+        clipp = _mm_setr_epi16( filterClipp[9][0], filterClipp[9][0], filterClipp[9][0], filterClipp[9][0],
+                                filterClipp[9][1], filterClipp[9][1], filterClipp[9][1], filterClipp[9][1] );
+        clipm = _mm_sub_epi16( xmmNull, clipp );
+        xmm90 = _mm_min_epi16( xmm90, clipp );
+        xmm90 = _mm_max_epi16( xmm90, clipm );
+
+        __m128i xmm91 = _mm_lddqu_si128( ( __m128i* ) ( pImg0 - 3 ) );
+        xmm91 = _mm_sub_epi16( xmm91, xmmCur );
+        xmm91 = _mm_min_epi16( xmm91, clipp );
+        xmm91 = _mm_max_epi16( xmm91, clipm );
+
+        xmm90 = _mm_add_epi16( xmm90, xmm91 );
+
+        // 4 first samples
+        coeffa = _mm_set1_epi16( filterCoeff[8][0] );
+        coeffb = _mm_set1_epi16( filterCoeff[9][0] );
+        xmm0 = _mm_unpacklo_epi16( xmm80, xmm90 );
+        xmmSt = _mm_madd_epi16( xmm0, _mm_unpackhi_epi16( coeffa, coeffb ) );
+
+        xmmS0 = _mm_add_epi32(xmmS0, xmmSt);
+
+        // 4 next samples
+        coeffa = _mm_set1_epi16( filterCoeff[8][1] );
+        coeffb = _mm_set1_epi16( filterCoeff[9][1] );
+        xmm1 = _mm_unpackhi_epi16( xmm80, xmm90 );
+        xmmSt = _mm_madd_epi16( xmm1, _mm_unpackhi_epi16( coeffa, coeffb ) );
+
+        xmmS1 = _mm_add_epi32(xmmS1, xmmSt);
+
+
+        // coeff 10 and 11
+        __m128i xmm100 = _mm_lddqu_si128( ( __m128i* ) ( pImg0 + 2 ) );
+        xmm100 = _mm_sub_epi16( xmm100, xmmCur );
+        clipp = _mm_setr_epi16( filterClipp[10][0], filterClipp[10][0], filterClipp[10][0], filterClipp[10][0],
+                                filterClipp[10][1], filterClipp[10][1], filterClipp[10][1], filterClipp[10][1] );
+        clipm = _mm_sub_epi16( xmmNull, clipp );
+        xmm100 = _mm_min_epi16( xmm100, clipp );
+        xmm100 = _mm_max_epi16( xmm100, clipm );
+
+        __m128i xmm101 = _mm_lddqu_si128( ( __m128i* ) ( pImg0 - 2 ) );
+        xmm101 = _mm_sub_epi16( xmm101, xmmCur );
+        xmm101 = _mm_min_epi16( xmm101, clipp );
+        xmm101 = _mm_max_epi16( xmm101, clipm );
+
+        xmm100 = _mm_add_epi16( xmm100, xmm101 );
+
+        __m128i xmm110 = _mm_lddqu_si128( ( __m128i* ) ( pImg0 + 1 ) );
+        xmm110 = _mm_sub_epi16( xmm110, xmmCur );
+        clipp = _mm_setr_epi16( filterClipp[11][0], filterClipp[11][0], filterClipp[11][0], filterClipp[11][0],
+                                filterClipp[11][1], filterClipp[11][1], filterClipp[11][1], filterClipp[11][1] );
+        clipm = _mm_sub_epi16( xmmNull, clipp );
+        xmm110 = _mm_min_epi16( xmm110, clipp );
+        xmm110 = _mm_max_epi16( xmm110, clipm );
+
+        __m128i xmm111 = _mm_lddqu_si128( ( __m128i* ) ( pImg0 - 1 ) );
+        xmm111 = _mm_sub_epi16( xmm111, xmmCur );
+        xmm111 = _mm_min_epi16( xmm111, clipp );
+        xmm111 = _mm_max_epi16( xmm111, clipm );
+
+        xmm110 = _mm_add_epi16( xmm110, xmm111 );
+
+        // 4 first samples
+        coeffa = _mm_set1_epi16( filterCoeff[10][0] );
+        coeffb = _mm_set1_epi16( filterCoeff[11][0] );
+        xmm0 = _mm_unpacklo_epi16( xmm100, xmm110 );
+        xmmSt = _mm_madd_epi16( xmm0, _mm_unpackhi_epi16( coeffa, coeffb ) );
+
+        xmmS0 = _mm_add_epi32(xmmS0, xmmSt);
+
+        // 4 next samples
+        coeffa = _mm_set1_epi16( filterCoeff[10][1] );
+        coeffb = _mm_set1_epi16( filterCoeff[11][1] );
+        xmm1 = _mm_unpackhi_epi16( xmm100, xmm110 );
+        xmmSt = _mm_madd_epi16( xmm1, _mm_unpackhi_epi16( coeffa, coeffb ) );
+
+        xmmS1 = _mm_add_epi32(xmmS1, xmmSt);
+
+        // finish
+        xmmS0 = _mm_add_epi32( xmmS0, mmOffset );
+        xmmS0 = _mm_srai_epi32( xmmS0, numBitsMinus1 );
+        xmmS1 = _mm_add_epi32( xmmS1, mmOffset );
+        xmmS1 = _mm_srai_epi32( xmmS1, numBitsMinus1 );
+
+        xmmS0 = _mm_packs_epi32( xmmS0, xmmS1 );
+        // coeff 12
+        xmmS0 = _mm_add_epi16(xmmS0, xmmCur);
+        xmmS0 = _mm_min_epi16( mmMax, _mm_max_epi16( xmmS0, mmMin ) );
+
+        if( j + 8 <= endWidth - startWidth )
+        {
+          _mm_storeu_si128( ( __m128i* )( pRec1 ), xmmS0 );
+        }
+        else if( j + 6 == endWidth - startWidth )
+        {
+          xmmS0 = _mm_blend_epi16( xmmS0, xmmCur, 0xC0 );
+          _mm_storeu_si128( ( __m128i* )( pRec1 ), xmmS0 );
+        }
+        else if( j + 4 == endWidth - startWidth )
+        {
+          xmmS0 = _mm_blend_epi16( xmmS0, xmmCur, 0xF0 );
+          _mm_storeu_si128( ( __m128i* )( pRec1 ), xmmS0 );
+        }
+        else
+        {
+          xmmS0 = _mm_blend_epi16( xmmS0, xmmCur, 0xFC );
+          _mm_storeu_si128( ( __m128i* )( pRec1 ), xmmS0 );
+        }
+#else
       for( int k = 0; k < 4; k++ )
       {
         __m128i xmm6 = _mm_lddqu_si128( ( __m128i* ) pImg6 );
@@ -861,6 +1684,7 @@ static void simdFilter7x7Blk( AlfClassifier** classifier, const PelUnitBuf &recD
         _mm_storel_epi64( ( __m128i* )( pRec ), xmm12 );
 
         pRec += dstStride;
+#endif
 
         pImg0 += srcStride;
         pImg1 += srcStride;
@@ -869,6 +1693,33 @@ static void simdFilter7x7Blk( AlfClassifier** classifier, const PelUnitBuf &recD
         pImg4 += srcStride;
         pImg5 += srcStride;
         pImg6 += srcStride;
+#if JVET_N0242_NON_LINEAR_ALF
+
+        pRec1 += dstStride;
+      }
+      pRec1 -= dstStride2;
+      // restore 2x2 PCM chroma blocks
+      if( bChroma && isPCMFilterDisabled )
+      {
+        int  blkX, blkY;
+        bool *flags  = pcmFlags2x2;
+        Pel  *pcmRec = pcmRec2x2;
+        for( blkY=0; blkY<4; blkY+=2 )
+        {
+          for( blkX=0; blkX<8; blkX+=2 )
+          {
+            if( *flags++ )
+            {
+              pRec1[(blkY+0)*dstStride + (blkX+0)] = *pcmRec++;
+              pRec1[(blkY+0)*dstStride + (blkX+1)] = *pcmRec++;
+              pRec1[(blkY+1)*dstStride + (blkX+0)] = *pcmRec++;
+              pRec1[(blkY+1)*dstStride + (blkX+1)] = *pcmRec++;
+            }
+          }
+        }
+      }
+
+#else
       }
 
       pRec -= ( 4 * dstStride );
@@ -895,9 +1746,14 @@ static void simdFilter7x7Blk( AlfClassifier** classifier, const PelUnitBuf &recD
       }
 
       pRec += 4;
+#endif
     }
 
+#if JVET_N0242_NON_LINEAR_ALF
+    pRec0 += dstStride2;
+#else
     pRec += dstStride2;
+#endif
 
     pImgYPad0 += srcStride2;
     pImgYPad1 += srcStride2;
diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp
index 896ca3944..b7549a782 100644
--- a/source/Lib/DecoderLib/VLCReader.cpp
+++ b/source/Lib/DecoderLib/VLCReader.cpp
@@ -625,6 +625,17 @@ void HLSyntaxReader::parseAPS(APS* aps)
   param.enabledFlag[COMPONENT_Cb] = alfChromaIdc >> 1;
   param.enabledFlag[COMPONENT_Cr] = alfChromaIdc & 1;
 
+#if JVET_N0242_NON_LINEAR_ALF
+  READ_FLAG( code, "alf_luma_clip" );
+  param.nonLinearFlag[CHANNEL_TYPE_LUMA] = code ? true : false;
+
+  if( alfChromaIdc )
+  {
+    READ_FLAG( code, "alf_chroma_clip" );
+    param.nonLinearFlag[CHANNEL_TYPE_CHROMA] = code ? true : false;
+  }
+#endif
+
   xReadTruncBinCode(code, MAX_NUM_ALF_CLASSES);  //number_of_filters_minus1
   param.numLumaFilters = code + 1;
   if (param.numLumaFilters > 1)
@@ -2522,8 +2533,11 @@ bool HLSyntaxReader::xMoreRbspData()
   return (cnt>0);
 }
 
-
+#if JVET_N0242_NON_LINEAR_ALF
+int HLSyntaxReader::alfGolombDecode( const int k, const bool signed_val )
+#else
 int HLSyntaxReader::alfGolombDecode( const int k )
+#endif
 {
   uint32_t uiSymbol;
   int q = -1;
@@ -2555,7 +2569,11 @@ int HLSyntaxReader::alfGolombDecode( const int k )
     }
   }
   nr += q * m;                    // add the bits and the multiple of M
+#if JVET_N0242_NON_LINEAR_ALF
+  if( signed_val && nr != 0 )
+#else
   if( nr != 0 )
+#endif
   {
 #if RExt__DECODER_DEBUG_BIT_STATISTICS
     xReadFlag( uiSymbol, "" );
@@ -2604,6 +2622,9 @@ void HLSyntaxReader::alfFilter( AlfSliceParam& alfSliceParam, const bool isChrom
   static int kMinTab[MAX_NUM_ALF_COEFF];
   const int numFilters = isChroma ? 1 : alfSliceParam.numLumaFilters;
   short* coeff = isChroma ? alfSliceParam.chromaCoeff : alfSliceParam.lumaCoeff;
+#if JVET_N0242_NON_LINEAR_ALF
+  short* clipp = isChroma ? alfSliceParam.chromaClipp : alfSliceParam.lumaClipp;
+#endif
 
   for( int idx = 0; idx < maxGolombIdx; idx++ )
   {
@@ -2639,6 +2660,70 @@ void HLSyntaxReader::alfFilter( AlfSliceParam& alfSliceParam, const bool isChrom
       coeff[ind * MAX_NUM_ALF_LUMA_COEFF + i] = alfGolombDecode( kMinTab[alfShape.golombIdx[i]] );
     }
   }
+#if JVET_N0242_NON_LINEAR_ALF
+
+  // Clipping values coding
+  if ( alfSliceParam.nonLinearFlag[isChroma] )
+  {
+    READ_UVLC( code, "clip_min_golomb_order" );
+
+    kMin = code + 1;
+
+    for( int idx = 0; idx < maxGolombIdx; idx++ )
+    {
+      READ_FLAG( code, "clip_golomb_order_increase_flag" );
+      CHECK( code > 1, "Wrong golomb_order_increase_flag" );
+      kMinTab[idx] = kMin + code;
+      kMin = kMinTab[idx];
+    }
+
+    short recCoeff[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF];
+    if( isChroma )
+    {
+      memcpy( recCoeff, coeff, sizeof(short) * MAX_NUM_ALF_CHROMA_COEFF );
+    }
+    else
+    {
+      memcpy( recCoeff, coeff, sizeof(short) * numFilters * MAX_NUM_ALF_LUMA_COEFF );
+
+      if( alfSliceParam.alfLumaCoeffDeltaPredictionFlag )
+      {
+        for( int i = 1; i < numFilters; i++ )
+        {
+          for( int j = 0; j < alfShape.numCoeff - 1; j++ )
+          {
+            recCoeff[i * MAX_NUM_ALF_LUMA_COEFF + j] += recCoeff[( i - 1 ) * MAX_NUM_ALF_LUMA_COEFF + j];
+          }
+        }
+      }
+    }
+
+    // Filter coefficients
+    for( int ind = 0; ind < numFilters; ++ind )
+    {
+      if( !isChroma && !alfSliceParam.alfLumaCoeffFlag[ind] && alfSliceParam.alfLumaCoeffDeltaFlag )
+      {
+        std::fill_n( clipp + ind * MAX_NUM_ALF_LUMA_COEFF, alfShape.numCoeff, 0 );
+        continue;
+      }
+
+      for( int i = 0; i < alfShape.numCoeff - 1; i++ )
+      {
+        if( recCoeff[ind * MAX_NUM_ALF_LUMA_COEFF + i] )
+          clipp[ind * MAX_NUM_ALF_LUMA_COEFF + i] = alfGolombDecode( kMinTab[alfShape.golombIdx[i]], false );
+        else
+          clipp[ind * MAX_NUM_ALF_LUMA_COEFF + i] = 0;
+      }
+    }
+  }
+  else
+  {
+    for( int ind = 0; ind < numFilters; ++ind )
+    {
+      std::fill_n( clipp + ind * MAX_NUM_ALF_LUMA_COEFF, alfShape.numCoeff, 0 );
+    }
+  }
+#endif
 }
 
 int HLSyntaxReader::truncatedUnaryEqProb( const int maxSymbol )
diff --git a/source/Lib/DecoderLib/VLCReader.h b/source/Lib/DecoderLib/VLCReader.h
index 01117b9f8..cb4908840 100644
--- a/source/Lib/DecoderLib/VLCReader.h
+++ b/source/Lib/DecoderLib/VLCReader.h
@@ -177,7 +177,11 @@ public:
 private:
   int truncatedUnaryEqProb( const int maxSymbol );
   void xReadTruncBinCode( uint32_t& ruiSymbol, const int uiMaxSymbol );
+#if JVET_N0242_NON_LINEAR_ALF
+  int  alfGolombDecode( const int k, const bool signed_val=true );
+#else
   int  alfGolombDecode( const int k );
+#endif
 
 protected:
   bool  xMoreRbspData();
diff --git a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp
index 8259f1758..c96fd1f23 100644
--- a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp
+++ b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp
@@ -42,6 +42,370 @@
 #define AlfCtx(c) SubCtx( Ctx::ctbAlfFlag, c )
 std::vector<double> EncAdaptiveLoopFilter::m_lumaLevelToWeightPLUT;
 
+#if JVET_N0242_NON_LINEAR_ALF
+void AlfCovariance::getClipMax(const AlfFilterShape& alfShape, int *clip_max) const
+{
+  for( int k = 0; k < numCoeff-1; ++k )
+  {
+    clip_max[k] = 0;
+
+    bool inc = true;
+    while (clip_max[k]+1 < numBins && y[clip_max[k]+1][k] == y[clip_max[k]][k])
+    {
+      for (int l = 0; l < numCoeff; ++l)
+        if (E[clip_max[k]][0][k][l] != E[clip_max[k]+1][0][k][l])
+        {
+          inc = false;
+          break;
+        }
+      if (!inc)
+      {
+        break;
+      }
+      ++clip_max[k];
+    }
+  }
+  clip_max[numCoeff-1] = 0;
+}
+
+void AlfCovariance::reduceClipCost(const AlfFilterShape& alfShape, int *clip) const
+{
+  for( int k = 0; k < numCoeff-1; ++k )
+  {
+    bool dec = true;
+    while (clip[k] > 0 && y[clip[k]-1][k] == y[clip[k]][k])
+    {
+      for (int l=0; l<numCoeff; ++l)
+        if (E[clip[k]][clip[l]][k][l] != E[clip[k]-1][clip[l]][k][l])
+        {
+          dec = false;
+          break;
+        }
+      if (!dec)
+      {
+        break;
+      }
+      --clip[k];
+    }
+  }
+}
+
+double AlfCovariance::optimizeFilter(const AlfFilterShape& alfShape, int* clip, double *f, bool optimize_clip) const
+{
+  const int size = alfShape.numCoeff;
+  int clip_max[MAX_NUM_ALF_LUMA_COEFF];
+
+  double err_best, err_last;
+
+  TE kE;
+  Ty ky;
+
+  if( optimize_clip )
+  {
+    // Start by looking for min clipping that has no impact => max_clipping
+    getClipMax(alfShape, clip_max);
+    for (int k=0; k<size; ++k)
+    {
+      clip[k] = std::max(clip_max[k], clip[k]);
+      clip[k] = std::min(clip[k], numBins-1);
+    }
+  }
+
+  setEyFromClip( clip, kE, ky, size );
+
+  gnsSolveByChol( kE, ky, f, size );
+  err_best = calculateError( clip, f, size );
+
+  int step = optimize_clip ? (numBins+1)/2 : 0;
+
+  while( step > 0 )
+  {
+    double err_min = err_best;
+    int idx_min = -1;
+    int inc_min = 0;
+
+    for( int k = 0; k < size-1; ++k )
+    {
+      if( clip[k] - step >= clip_max[k] )
+      {
+        clip[k] -= step;
+        ky[k] = y[clip[k]][k];
+        for( int l = 0; l < size; l++ )
+        {
+          kE[k][l] = E[clip[k]][clip[l]][k][l];
+          kE[l][k] = E[clip[l]][clip[k]][l][k];
+        }
+
+        gnsSolveByChol( kE, ky, f, size );
+        err_last = calculateError( clip, f, size );
+
+        if( err_last < err_min )
+        {
+          err_min = err_last;
+          idx_min = k;
+          inc_min = -step;
+        }
+        clip[k] += step;
+      }
+      if( clip[k] + step < numBins )
+      {
+        clip[k] += step;
+        ky[k] = y[clip[k]][k];
+        for( int l = 0; l < size; l++ )
+        {
+          kE[k][l] = E[clip[k]][clip[l]][k][l];
+          kE[l][k] = E[clip[l]][clip[k]][l][k];
+        }
+
+        gnsSolveByChol( kE, ky, f, size );
+        err_last = calculateError( clip, f, size );
+
+        if( err_last < err_min )
+        {
+          err_min = err_last;
+          idx_min = k;
+          inc_min = step;
+        }
+        clip[k] -= step;
+
+      }
+      ky[k] = y[clip[k]][k];
+      for( int l = 0; l < size; l++ )
+      {
+        kE[k][l] = E[clip[k]][clip[l]][k][l];
+        kE[l][k] = E[clip[l]][clip[k]][l][k];
+      }
+    }
+
+    if( idx_min >= 0 )
+    {
+      err_best = err_min;
+      clip[idx_min] += inc_min;
+      ky[idx_min] = y[clip[idx_min]][idx_min];
+      for( int l = 0; l < size; l++ )
+      {
+        kE[idx_min][l] = E[clip[idx_min]][clip[l]][idx_min][l];
+        kE[l][idx_min] = E[clip[l]][clip[idx_min]][l][idx_min];
+      }
+    }
+    else
+    {
+      --step;
+    }
+  }
+
+  if( optimize_clip ) {
+    // test all max
+    for( int k = 0; k < size-1; ++k )
+    {
+      clip_max[k] = 0;
+    }
+    TE kE_max;
+    Ty ky_max;
+    setEyFromClip( clip_max, kE_max, ky_max, size );
+
+    gnsSolveByChol( kE_max, ky_max, f, size );
+    err_last = calculateError( clip_max, f, size );
+    if( err_last < err_best )
+    {
+      err_best = err_last;
+      for (int k=0; k<size; ++k)
+      {
+        clip[k] = clip_max[k];
+      }
+    }
+    else
+    {
+      // update clip to reduce coding cost
+      reduceClipCost(alfShape, clip);
+
+      // update f with best solution
+      gnsSolveByChol( kE, ky, f, size );
+    }
+  }
+
+  return err_best;
+}
+
+double AlfCovariance::calcErrorForCoeffs( const int *clip, const int *coeff, const int numCoeff, const int bitDepth ) const
+{
+  double factor = 1 << ( bitDepth - 1 );
+  double error = 0;
+
+  for( int i = 0; i < numCoeff; i++ )   //diagonal
+  {
+    double sum = 0;
+    for( int j = i + 1; j < numCoeff; j++ )
+    {
+      // E[j][i] = E[i][j], sum will be multiplied by 2 later
+      sum += E[clip[i]][clip[j]][i][j] * coeff[j];
+    }
+    error += ( ( E[clip[i]][clip[i]][i][i] * coeff[i] + sum * 2 ) / factor - 2 * y[clip[i]][i] ) * coeff[i];
+  }
+
+  return error / factor;
+}
+
+double AlfCovariance::calculateError( const int *clip, const double *coeff, const int numCoeff ) const
+{
+  double sum = 0;
+  for( int i = 0; i < numCoeff; i++ )
+  {
+    sum += coeff[i] * y[clip[i]][i];
+  }
+
+  return pixAcc - sum;
+}
+
+double AlfCovariance::calculateError( const int *clip ) const
+{
+  Ty c;
+
+  return optimizeFilter( clip, c, numCoeff );
+}
+//********************************
+// Cholesky decomposition
+//********************************
+
+#define ROUND(a)  (((a) < 0)? (int)((a) - 0.5) : (int)((a) + 0.5))
+#define REG              0.0001
+#define REG_SQR          0.0000001
+
+//Find filter coeff related
+int AlfCovariance::gnsCholeskyDec( TE inpMatr, TE outMatr, int numEq ) const
+{
+  Ty invDiag;  /* Vector of the inverse of diagonal entries of outMatr */
+
+  for( int i = 0; i < numEq; i++ )
+  {
+    for( int j = i; j < numEq; j++ )
+    {
+      /* Compute the scaling factor */
+      double scale = inpMatr[i][j];
+      if( i > 0 )
+      {
+        for( int k = i - 1; k >= 0; k-- )
+        {
+          scale -= outMatr[k][j] * outMatr[k][i];
+        }
+      }
+
+      /* Compute i'th row of outMatr */
+      if( i == j )
+      {
+        if( scale <= REG_SQR ) // if(scale <= 0 )  /* If inpMatr is singular */
+        {
+          return 0;
+        }
+        else              /* Normal operation */
+          invDiag[i] = 1.0 / ( outMatr[i][i] = sqrt( scale ) );
+      }
+      else
+      {
+        outMatr[i][j] = scale * invDiag[i]; /* Upper triangular part          */
+        outMatr[j][i] = 0.0;              /* Lower triangular part set to 0 */
+      }
+    }
+  }
+  return 1; /* Signal that Cholesky factorization is successfully performed */
+}
+
+void AlfCovariance::gnsTransposeBacksubstitution( TE U, double* rhs, double* x, int order ) const
+{
+  /* Backsubstitution starts */
+  x[0] = rhs[0] / U[0][0];               /* First row of U'                   */
+  for( int i = 1; i < order; i++ )
+  {         /* For the rows 1..order-1           */
+
+    double sum = 0; //Holds backsubstitution from already handled rows
+
+    for( int j = 0; j < i; j++ ) /* Backsubst already solved unknowns */
+    {
+      sum += x[j] * U[j][i];
+    }
+
+    x[i] = ( rhs[i] - sum ) / U[i][i];       /* i'th component of solution vect.  */
+  }
+}
+
+void AlfCovariance::gnsBacksubstitution( TE R, double* z, int size, double* A ) const
+{
+  size--;
+  A[size] = z[size] / R[size][size];
+
+  for( int i = size - 1; i >= 0; i-- )
+  {
+    double sum = 0;
+
+    for( int j = i + 1; j <= size; j++ )
+    {
+      sum += R[i][j] * A[j];
+    }
+
+    A[i] = ( z[i] - sum ) / R[i][i];
+  }
+}
+
+int AlfCovariance::gnsSolveByChol( const int *clip, double *x, int numEq ) const
+{
+  TE LHS;
+  Ty rhs;
+
+  setEyFromClip( clip, LHS, rhs, numEq );
+  return gnsSolveByChol( LHS, rhs, x, numEq );
+}
+
+int AlfCovariance::gnsSolveByChol( TE LHS, double* rhs, double *x, int numEq ) const
+{
+  Ty aux;     /* Auxiliary vector */
+  TE U;    /* Upper triangular Cholesky factor of LHS */
+
+  int res = 1;  // Signal that Cholesky factorization is successfully performed
+
+                /* The equation to be solved is LHSx = rhs */
+
+                /* Compute upper triangular U such that U'*U = LHS */
+  if( gnsCholeskyDec( LHS, U, numEq ) ) /* If Cholesky decomposition has been successful */
+  {
+    /* Now, the equation is  U'*U*x = rhs, where U is upper triangular
+    * Solve U'*aux = rhs for aux
+    */
+    gnsTransposeBacksubstitution( U, rhs, aux, numEq );
+
+    /* The equation is now U*x = aux, solve it for x (new motion coefficients) */
+    gnsBacksubstitution( U, aux, numEq, x );
+
+  }
+  else /* LHS was singular */
+  {
+    res = 0;
+
+    /* Regularize LHS */
+    for( int i = 0; i < numEq; i++ )
+    {
+      LHS[i][i] += REG;
+    }
+
+    /* Compute upper triangular U such that U'*U = regularized LHS */
+    res = gnsCholeskyDec( LHS, U, numEq );
+
+    if( !res )
+    {
+      std::memset( x, 0, sizeof( double )*numEq );
+      return 0;
+    }
+
+    /* Solve  U'*aux = rhs for aux */
+    gnsTransposeBacksubstitution( U, rhs, aux, numEq );
+
+    /* Solve U*x = aux for x */
+    gnsBacksubstitution( U, aux, numEq, x );
+  }
+  return res;
+}
+//////////////////////////////////////////////////////////////////////////////////////////
+
+#endif
 EncAdaptiveLoopFilter::EncAdaptiveLoopFilter()
   : m_CABACEstimator( nullptr )
 {
@@ -53,16 +417,29 @@ EncAdaptiveLoopFilter::EncAdaptiveLoopFilter()
   {
     m_alfCovarianceFrame[i] = nullptr;
   }
+#if !JVET_N0242_NON_LINEAR_ALF
   m_filterCoeffQuant = nullptr;
+#endif
   m_filterCoeffSet = nullptr;
+#if JVET_N0242_NON_LINEAR_ALF
+  m_filterClippSet = nullptr;
+#endif
   m_diffFilterCoeff = nullptr;
 
   m_alfWSSD = 0;
 }
 
+#if JVET_N0242_NON_LINEAR_ALF
+void EncAdaptiveLoopFilter::create( const EncCfg* encCfg, const int picWidth, const int picHeight, const ChromaFormat chromaFormatIDC, const int maxCUWidth, const int maxCUHeight, const int maxCUDepth, const int inputBitDepth[MAX_NUM_CHANNEL_TYPE], const int internalBitDepth[MAX_NUM_CHANNEL_TYPE] )
+#else
 void EncAdaptiveLoopFilter::create( const int picWidth, const int picHeight, const ChromaFormat chromaFormatIDC, const int maxCUWidth, const int maxCUHeight, const int maxCUDepth, const int inputBitDepth[MAX_NUM_CHANNEL_TYPE], const int internalBitDepth[MAX_NUM_CHANNEL_TYPE] )
+#endif
 {
   AdaptiveLoopFilter::create( picWidth, picHeight, chromaFormatIDC, maxCUWidth, maxCUHeight, maxCUDepth, inputBitDepth );
+#if JVET_N0242_NON_LINEAR_ALF
+  CHECK( encCfg == nullptr, "encCfg must not be null" );
+  m_encCfg = encCfg;
+#endif
 
   for( int channelIdx = 0; channelIdx < MAX_NUM_CHANNEL_TYPE; channelIdx++ )
   {
@@ -109,13 +486,21 @@ void EncAdaptiveLoopFilter::create( const int picWidth, const int picHeight, con
     }
   }
 
+#if !JVET_N0242_NON_LINEAR_ALF
   m_filterCoeffQuant = new int[MAX_NUM_ALF_LUMA_COEFF];
+#endif
   m_filterCoeffSet = new int*[MAX_NUM_ALF_CLASSES];
+#if JVET_N0242_NON_LINEAR_ALF
+  m_filterClippSet = new int*[MAX_NUM_ALF_CLASSES];
+#endif
   m_diffFilterCoeff = new int*[MAX_NUM_ALF_CLASSES];
 
   for( int i = 0; i < MAX_NUM_ALF_CLASSES; i++ )
   {
     m_filterCoeffSet[i] = new int[MAX_NUM_ALF_LUMA_COEFF];
+#if JVET_N0242_NON_LINEAR_ALF
+    m_filterClippSet[i] = new int[MAX_NUM_ALF_LUMA_COEFF];
+#endif
     m_diffFilterCoeff[i] = new int[MAX_NUM_ALF_LUMA_COEFF];
   }
 }
@@ -195,6 +580,19 @@ void EncAdaptiveLoopFilter::destroy()
     m_filterCoeffSet = nullptr;
   }
 
+#if JVET_N0242_NON_LINEAR_ALF
+  if( m_filterClippSet )
+  {
+    for( int i = 0; i < MAX_NUM_ALF_CLASSES; i++ )
+    {
+      delete[] m_filterClippSet[i];
+      m_filterClippSet[i] = nullptr;
+    }
+    delete[] m_filterClippSet;
+    m_filterClippSet = nullptr;
+  }
+
+#endif
   if( m_diffFilterCoeff )
   {
     for( int i = 0; i < MAX_NUM_ALF_CLASSES; i++ )
@@ -206,9 +604,11 @@ void EncAdaptiveLoopFilter::destroy()
     m_diffFilterCoeff = nullptr;
   }
 
+#if !JVET_N0242_NON_LINEAR_ALF
   delete[] m_filterCoeffQuant;
   m_filterCoeffQuant = nullptr;
 
+#endif
   AdaptiveLoopFilter::destroy();
 }
 
@@ -382,11 +782,26 @@ void EncAdaptiveLoopFilter::alfEncoder( CodingStructure& cs, AlfSliceParam& alfS
       setCtuEnableFlag( m_ctuEnableFlagTmp, channel, 0 );
     }
 
+#if JVET_N0242_NON_LINEAR_ALF
+    const int nonLinearFlagMax =
+      ( isLuma( channel ) ? m_encCfg->getUseNonLinearAlfLuma() : m_encCfg->getUseNonLinearAlfChroma() )
+      ? 2 : 1;
+
+    for( int nonLinearFlag = 0; nonLinearFlag < nonLinearFlagMax; nonLinearFlag++ )
+    {
+#endif
     //2. all CTUs are on
     setEnableFlag( m_alfSliceParamTemp, channel, true );
+#if JVET_N0242_NON_LINEAR_ALF
+    m_alfSliceParamTemp.nonLinearFlag[channel] = nonLinearFlag;
+#endif
     m_CABACEstimator->getCtx() = AlfCtx( ctxStart );
     setCtuEnableFlag( m_ctuEnableFlag, channel, 1 );
+#if JVET_N0242_NON_LINEAR_ALF
+    cost = getFilterCoeffAndCost( cs, 0, channel, nonLinearFlag != 0, iShapeIdx, uiCoeffBits );
+#else
     cost = getFilterCoeffAndCost( cs, 0, channel, false, iShapeIdx, uiCoeffBits );
+#endif
 
     if( cost < costMin )
     {
@@ -425,6 +840,9 @@ void EncAdaptiveLoopFilter::alfEncoder( CodingStructure& cs, AlfSliceParam& alfS
         cost = getFilterCoeffAndCost(cs, distUnfilter, channel, true, iShapeIdx, uiCoeffBits);
       }
     }//for iter
+#if JVET_N0242_NON_LINEAR_ALF
+    }// for nonLineaFlag
+#endif
   }//for shapeIdx
   m_CABACEstimator->getCtx() = AlfCtx( ctxBest );
   copyCtuEnableFlag( m_ctuEnableFlag, m_ctuEnableFlagTmp, channel );
@@ -443,6 +861,9 @@ void EncAdaptiveLoopFilter::alfEncoder( CodingStructure& cs, AlfSliceParam& alfS
       const int chromaScaleY = getComponentScaleY( compID, recBuf.chromaFormat );
       AlfFilterType filterType = isLuma( compID ) ? ALF_FILTER_7 : ALF_FILTER_5;
       short* coeff = isLuma( compID ) ? m_coeffFinal : alfSliceParam.chromaCoeff;
+#if JVET_N0242_NON_LINEAR_ALF
+      short* clipp = isLuma( compID ) ? m_clippFinal : m_chromaClippFinal; //alfSliceParam.chromaClipp;
+#endif
 
       for( int yPos = 0; yPos < pcv.lumaHeight; yPos += pcv.maxCUHeight )
       {
@@ -456,11 +877,19 @@ void EncAdaptiveLoopFilter::alfEncoder( CodingStructure& cs, AlfSliceParam& alfS
           {
             if( filterType == ALF_FILTER_5 )
             {
+#if JVET_N0242_NON_LINEAR_ALF
+              m_filter5x5Blk( m_classifier, recBuf, recExtBuf, blk, compID, coeff, clipp, m_clpRngs.comp[compIdx], cs );
+#else
               m_filter5x5Blk( m_classifier, recBuf, recExtBuf, blk, compID, coeff, m_clpRngs.comp[compIdx], cs );
+#endif
             }
             else if( filterType == ALF_FILTER_7 )
             {
+#if JVET_N0242_NON_LINEAR_ALF
+              m_filter7x7Blk( m_classifier, recBuf, recExtBuf, blk, compID, coeff, clipp, m_clpRngs.comp[compIdx], cs );
+#else
               m_filter7x7Blk( m_classifier, recBuf, recExtBuf, blk, compID, coeff, m_clpRngs.comp[compIdx], cs );
+#endif
            }
             else
             {
@@ -482,9 +911,15 @@ void EncAdaptiveLoopFilter::copyAlfSliceParam( AlfSliceParam& alfSliceParamDst,
   }
   else
   {
+#if JVET_N0242_NON_LINEAR_ALF
+    alfSliceParamDst.nonLinearFlag[channel] = alfSliceParamSrc.nonLinearFlag[channel];
+#endif
     alfSliceParamDst.enabledFlag[COMPONENT_Cb] = alfSliceParamSrc.enabledFlag[COMPONENT_Cb];
     alfSliceParamDst.enabledFlag[COMPONENT_Cr] = alfSliceParamSrc.enabledFlag[COMPONENT_Cr];
     memcpy( alfSliceParamDst.chromaCoeff, alfSliceParamSrc.chromaCoeff, sizeof( alfSliceParamDst.chromaCoeff ) );
+#if JVET_N0242_NON_LINEAR_ALF
+    memcpy( alfSliceParamDst.chromaClipp, alfSliceParamSrc.chromaClipp, sizeof( alfSliceParamDst.chromaClipp ) );
+#endif
   }
 }
 double EncAdaptiveLoopFilter::getFilterCoeffAndCost( CodingStructure& cs, double distUnfilter, ChannelType channel, bool bReCollectStat, int iShapeIdx, int& uiCoeffBits )
@@ -502,19 +937,37 @@ double EncAdaptiveLoopFilter::getFilterCoeffAndCost( CodingStructure& cs, double
   //get filter coeff
   if( isLuma( channel ) )
   {
+#if JVET_N0242_NON_LINEAR_ALF
+    std::fill_n(m_alfClipMerged[iShapeIdx][0][0], MAX_NUM_ALF_LUMA_COEFF*MAX_NUM_ALF_CLASSES*MAX_NUM_ALF_CLASSES, m_alfSliceParamTemp.nonLinearFlag[channel] ? AlfNumClippingValues[CHANNEL_TYPE_LUMA] / 2 : 0);
+    // Reset Merge Tmp Cov
+    m_alfCovarianceMerged[iShapeIdx][MAX_NUM_ALF_CLASSES].reset(AlfNumClippingValues[channel]);
     //distortion
+    dist += mergeFiltersAndCost( m_alfSliceParamTemp, alfFilterShape, m_alfCovarianceFrame[channel][iShapeIdx], m_alfCovarianceMerged[iShapeIdx], m_alfClipMerged[iShapeIdx], uiCoeffBits );
+#else
     dist += mergeFiltersAndCost( m_alfSliceParamTemp, alfFilterShape, m_alfCovarianceFrame[channel][iShapeIdx], m_alfCovarianceMerged[iShapeIdx], uiCoeffBits );
+#endif
   }
   else
   {
     //distortion
+#if JVET_N0242_NON_LINEAR_ALF
+    assert(alfFilterShape.numCoeff == m_alfCovarianceFrame[channel][iShapeIdx][0].numCoeff);
+    std::fill_n(m_filterClippSet[0], MAX_NUM_ALF_CHROMA_COEFF, m_alfSliceParamTemp.nonLinearFlag[channel] ? AlfNumClippingValues[CHANNEL_TYPE_CHROMA] / 2 : 0);
+    dist += m_alfCovarianceFrame[channel][iShapeIdx][0].pixAcc + deriveCoeffQuant( m_filterClippSet[0], m_filterCoeffSet[0], m_alfCovarianceFrame[channel][iShapeIdx][0], alfFilterShape, m_NUM_BITS, m_alfSliceParamTemp.nonLinearFlag[channel] );
+#else
     dist += m_alfCovarianceFrame[channel][iShapeIdx][0].pixAcc + deriveCoeffQuant( m_filterCoeffQuant, m_alfCovarianceFrame[channel][iShapeIdx][0].E, m_alfCovarianceFrame[channel][iShapeIdx][0].y, alfFilterShape.numCoeff, alfFilterShape.weights, m_NUM_BITS, true );
     memcpy( m_filterCoeffSet[0], m_filterCoeffQuant, sizeof( *m_filterCoeffQuant ) * alfFilterShape.numCoeff );
+#endif
     //setEnableFlag( m_alfSliceParamTemp, channel, m_ctuEnableFlag );
     const int alfChromaIdc = m_alfSliceParamTemp.enabledFlag[COMPONENT_Cb] * 2 + m_alfSliceParamTemp.enabledFlag[COMPONENT_Cr];
     for( int i = 0; i < MAX_NUM_ALF_CHROMA_COEFF; i++ )
     {
+#if JVET_N0242_NON_LINEAR_ALF
+      m_alfSliceParamTemp.chromaCoeff[i] = m_filterCoeffSet[0][i];
+      m_alfSliceParamTemp.chromaClipp[i] = m_filterClippSet[0][i];
+#else
       m_alfSliceParamTemp.chromaCoeff[i] = m_filterCoeffQuant[i];
+#endif
     }
     uiCoeffBits += getCoeffRate( m_alfSliceParamTemp, true );
     uiSliceFlag = lengthTruncatedUnary(alfChromaIdc, 3);
@@ -530,6 +983,9 @@ double EncAdaptiveLoopFilter::getFilterCoeffAndCost( CodingStructure& cs, double
 int EncAdaptiveLoopFilter::getCoeffRate( AlfSliceParam& alfSliceParam, bool isChroma )
 {
   int iBits = 0;
+#if JVET_N0242_NON_LINEAR_ALF
+  assert( isChroma );
+#else
   if( !isChroma )
   {
     iBits++;                                               // alf_coefficients_delta_flag
@@ -541,10 +997,29 @@ int EncAdaptiveLoopFilter::getCoeffRate( AlfSliceParam& alfSliceParam, bool isCh
       }
     }
   }
+#endif
 
   memset( m_bitsCoeffScan, 0, sizeof( m_bitsCoeffScan ) );
+#if JVET_N0242_NON_LINEAR_ALF
+  AlfFilterShape alfShape( 5 );
+#else
   AlfFilterShape alfShape( isChroma ? 5 : 7 );
+#endif
   const int maxGolombIdx = AdaptiveLoopFilter::getMaxGolombIdx( alfShape.filterType );
+#if JVET_N0242_NON_LINEAR_ALF
+  const int numFilters = 1;
+
+  // vlc for all
+  for( int i = 0; i < alfShape.numCoeff - 1; i++ )
+  {
+    int coeffVal = abs( alfSliceParam.chromaCoeff[i] );
+
+    for( int k = 1; k < 15; k++ )
+    {
+      m_bitsCoeffScan[alfShape.golombIdx[i]][k] += lengthGolomb( coeffVal, k );
+    }
+  }
+#else
   const short* coeff = isChroma ? alfSliceParam.chromaCoeff : alfSliceParam.lumaCoeff;
   const int numFilters = isChroma ? 1 : alfSliceParam.numLumaFilters;
 
@@ -564,6 +1039,7 @@ int EncAdaptiveLoopFilter::getCoeffRate( AlfSliceParam& alfSliceParam, bool isCh
       }
     }
   }
+#endif
 
   int kMin = getGolombKMin( alfShape, numFilters, m_kMinTab, m_bitsCoeffScan );
 
@@ -579,6 +1055,13 @@ int EncAdaptiveLoopFilter::getCoeffRate( AlfSliceParam& alfSliceParam, bool isCh
     kMin = m_kMinTab[idx];
   }
 
+#if JVET_N0242_NON_LINEAR_ALF
+  // Filter coefficients
+  for( int i = 0; i < alfShape.numCoeff - 1; i++ )
+  {
+    iBits += lengthGolomb( alfSliceParam.chromaCoeff[i], m_kMinTab[alfShape.golombIdx[i]] );  // alf_coeff_chroma[i], alf_coeff_luma_delta[i][j]
+  }
+#else
   if( !isChroma )
   {
     if( alfSliceParam.alfLumaCoeffDeltaFlag )
@@ -600,6 +1083,48 @@ int EncAdaptiveLoopFilter::getCoeffRate( AlfSliceParam& alfSliceParam, bool isCh
       iBits += lengthGolomb( coeff[ind* MAX_NUM_ALF_LUMA_COEFF + i], m_kMinTab[alfShape.golombIdx[i]] );  // alf_coeff_chroma[i], alf_coeff_luma_delta[i][j]
     }
   }
+#endif
+
+#if JVET_N0242_NON_LINEAR_ALF
+  if( m_alfSliceParamTemp.nonLinearFlag[isChroma] )
+  {
+    memset( m_bitsCoeffScan, 0, sizeof( m_bitsCoeffScan ) );
+    // vlc for all
+    for( int i = 0; i < alfShape.numCoeff - 1; i++ )
+    {
+      if( !abs( alfSliceParam.chromaCoeff[i] ) )
+        continue;
+      int coeffVal = abs( alfSliceParam.chromaClipp[i] );
+
+      for( int k = 1; k < 15; k++ )
+      {
+        m_bitsCoeffScan[alfShape.golombIdx[i]][k] += lengthGolomb( coeffVal, k, false );
+      }
+    }
+
+    kMin = getGolombKMin( alfShape, numFilters, m_kMinTab, m_bitsCoeffScan );
+
+    // Golomb parameters
+    iBits += lengthUvlc( kMin - 1 );  // "min_golomb_order"
+    golombOrderIncreaseFlag = 0;
+
+    for( int idx = 0; idx < maxGolombIdx; idx++ )
+    {
+      golombOrderIncreaseFlag = ( m_kMinTab[idx] != kMin ) ? 1 : 0;
+      CHECK( !( m_kMinTab[idx] <= kMin + 1 ), "ALF Golomb parameter not consistent" );
+      iBits += golombOrderIncreaseFlag;                           //golomb_order_increase_flag
+      kMin = m_kMinTab[idx];
+    }
+
+    // Filter coefficients
+    for( int i = 0; i < alfShape.numCoeff - 1; i++ )
+    {
+      if( !abs( alfSliceParam.chromaCoeff[i] ) )
+        continue;
+      iBits += lengthGolomb( alfSliceParam.chromaClipp[i], m_kMinTab[alfShape.golombIdx[i]], false );  // alf_coeff_chroma[i], alf_coeff_luma_delta[i][j]
+    }
+  }
+#endif
   return iBits;
 }
 
@@ -634,13 +1159,21 @@ double EncAdaptiveLoopFilter::getFilteredDistortion( AlfCovariance* cov, const i
   for( int classIdx = 0; classIdx < numClasses; classIdx++ )
   {
     int filterIdx = numClasses == 1 ? 0 : m_filterIndices[numFiltersMinus1][classIdx];
+#if JVET_N0242_NON_LINEAR_ALF
+    dist += cov[classIdx].calcErrorForCoeffs( m_filterClippSet[filterIdx], m_filterCoeffSet[filterIdx], numCoeff, m_NUM_BITS );
+#else
     dist += calcErrorForCoeffs( cov[classIdx].E, cov[classIdx].y, m_filterCoeffSet[filterIdx], numCoeff, m_NUM_BITS );
+#endif
   }
 
   return dist;
 }
 
+#if JVET_N0242_NON_LINEAR_ALF
+double EncAdaptiveLoopFilter::mergeFiltersAndCost( AlfSliceParam& alfSliceParam, AlfFilterShape& alfShape, AlfCovariance* covFrame, AlfCovariance* covMerged, int clipMerged[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF], int& uiCoeffBits )
+#else
 double EncAdaptiveLoopFilter::mergeFiltersAndCost( AlfSliceParam& alfSliceParam, AlfFilterShape& alfShape, AlfCovariance* covFrame, AlfCovariance* covMerged, int& uiCoeffBits )
+#endif
 {
   int numFiltersBest = 0;
   int numFilters = MAX_NUM_ALF_CLASSES;
@@ -650,11 +1183,19 @@ double EncAdaptiveLoopFilter::mergeFiltersAndCost( AlfSliceParam& alfSliceParam,
   double cost, cost0, dist, distForce0, costMin = MAX_DOUBLE;
   int predMode = 0, bestPredMode = 0, coeffBits, coeffBitsForce0;
 
+#if JVET_N0242_NON_LINEAR_ALF
+  mergeClasses( alfShape, covFrame, covMerged, clipMerged, MAX_NUM_ALF_CLASSES, m_filterIndices );
+#else
   mergeClasses( covFrame, covMerged, MAX_NUM_ALF_CLASSES, m_filterIndices );
+#endif
 
   while( numFilters >= 1 )
   {
+#if JVET_N0242_NON_LINEAR_ALF
+    dist = deriveFilterCoeffs( covFrame, covMerged, clipMerged, alfShape, m_filterIndices[numFilters - 1], numFilters, errorForce0CoeffTab );
+#else
     dist = deriveFilterCoeffs( covFrame, covMerged, alfShape, m_filterIndices[numFilters - 1], numFilters, errorForce0CoeffTab );
+#endif
     // filter coeffs are stored in m_filterCoeffSet
     distForce0 = getDistForce0( alfShape, numFilters, errorForce0CoeffTab, codedVarBins );
     coeffBits = deriveFilterCoefficientsPredictionMode( alfShape, m_filterCoeffSet, m_diffFilterCoeff, numFilters, predMode );
@@ -677,7 +1218,11 @@ double EncAdaptiveLoopFilter::mergeFiltersAndCost( AlfSliceParam& alfSliceParam,
     numFilters--;
   }
 
+#if JVET_N0242_NON_LINEAR_ALF
+  dist = deriveFilterCoeffs( covFrame, covMerged, clipMerged, alfShape, m_filterIndices[numFiltersBest - 1], numFiltersBest, errorForce0CoeffTab );
+#else
   dist = deriveFilterCoeffs( covFrame, covMerged, alfShape, m_filterIndices[numFiltersBest - 1], numFiltersBest, errorForce0CoeffTab );
+#endif
   coeffBits = deriveFilterCoefficientsPredictionMode( alfShape, m_filterCoeffSet, m_diffFilterCoeff, numFiltersBest, predMode );
   distForce0 = getDistForce0( alfShape, numFiltersBest, errorForce0CoeffTab, codedVarBins );
   coeffBitsForce0 = getCostFilterCoeffForce0( alfShape, m_filterCoeffSet, numFiltersBest, codedVarBins );
@@ -707,6 +1252,9 @@ double EncAdaptiveLoopFilter::mergeFiltersAndCost( AlfSliceParam& alfSliceParam,
       if( codedVarBins[varInd] == 0 )
       {
         memset( m_filterCoeffSet[varInd], 0, sizeof( int )*MAX_NUM_ALF_LUMA_COEFF );
+#if JVET_N0242_NON_LINEAR_ALF
+        memset( m_filterClippSet[varInd], 0, sizeof( int )*MAX_NUM_ALF_LUMA_COEFF );
+#endif
       }
     }
   }
@@ -723,6 +1271,9 @@ double EncAdaptiveLoopFilter::mergeFiltersAndCost( AlfSliceParam& alfSliceParam,
       {
         alfSliceParam.lumaCoeff[ind * MAX_NUM_ALF_LUMA_COEFF + i] = m_filterCoeffSet[ind][i];
       }
+#if JVET_N0242_NON_LINEAR_ALF
+      alfSliceParam.lumaClipp[ind * MAX_NUM_ALF_LUMA_COEFF + i] = m_filterClippSet[ind][i];
+#endif
     }
   }
 
@@ -847,6 +1398,52 @@ int EncAdaptiveLoopFilter::getCostFilterCoeffForce0( AlfFilterShape& alfShape, i
     }
   }
 
+#if JVET_N0242_NON_LINEAR_ALF
+  if( m_alfSliceParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] )
+  {
+    memset( m_bitsCoeffScan, 0, sizeof( m_bitsCoeffScan ) );
+
+    for( int ind = 0; ind < numFilters; ++ind )
+    {
+      if( !codedVarBins[ind] )
+      {
+        continue;
+      }
+      for( int i = 0; i < alfShape.numCoeff - 1; i++ )
+      {
+        if( !abs( pDiffQFilterCoeffIntPP[ind][i] ) )
+          continue;
+        int coeffVal = abs( m_filterClippSet[ind][i] );
+        for( int k = 1; k < 15; k++ )
+        {
+          m_bitsCoeffScan[alfShape.golombIdx[i]][k] += lengthGolomb( coeffVal, k, false );
+        }
+      }
+    }
+
+    kMin = getGolombKMin( alfShape, numFilters, m_kMinTab, m_bitsCoeffScan );
+
+    // Coding parameters
+    len += kMin           //min_golomb_order
+        + maxGolombIdx   //golomb_order_increase_flag
+      ;
+
+    // Filter coefficients
+    for( int ind = 0; ind < numFilters; ++ind )
+    {
+      if( codedVarBins[ind] )
+      {
+        for( int i = 0; i < alfShape.numCoeff - 1; i++ )
+        {
+          if( !abs( pDiffQFilterCoeffIntPP[ind][i] ) )
+            continue;
+          len += lengthGolomb( abs( m_filterClippSet[ind][i] ), m_kMinTab[alfShape.golombIdx[i]], false ); // alf_coeff_luma_delta[i][j]
+        }
+      }
+    }
+  }
+
+#endif
   return len;
 }
 
@@ -873,8 +1470,16 @@ int EncAdaptiveLoopFilter::deriveFilterCoefficientsPredictionMode( AlfFilterShap
 
   predMode = ( ratePredMode1 < ratePredMode0 && numFilters > 1 ) ? 1 : 0;
 
+#if JVET_N0242_NON_LINEAR_ALF
+  int rateClipp = m_alfSliceParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] ? getCostFilterClipp( alfShape, filterSet, numFilters ) : 0;
+
+  return ( numFilters > 1 ? 1 : 0 )        // coeff_delta_pred_mode_flag
+       + rateClipp
+       + ( predMode ? ratePredMode1 : ratePredMode0 ); // min_golomb_order, golomb_order_increase_flag, alf_coeff_luma_delta
+#else
   return ( numFilters > 1 ? 1 : 0 )        // coeff_delta_pred_mode_flag
        + ( predMode ? ratePredMode1 : ratePredMode0 ); // min_golomb_order, golomb_order_increase_flag, alf_coeff_luma_delta
+#endif
 }
 
 int EncAdaptiveLoopFilter::getCostFilterCoeff( AlfFilterShape& alfShape, int **pDiffQFilterCoeffIntPP, const int numFilters )
@@ -907,6 +1512,30 @@ int EncAdaptiveLoopFilter::getCostFilterCoeff( AlfFilterShape& alfShape, int **p
   return len;
 }
 
+#if JVET_N0242_NON_LINEAR_ALF
+int EncAdaptiveLoopFilter::getCostFilterClipp( AlfFilterShape& alfShape, int **pDiffQFilterCoeffIntPP, const int numFilters )
+{
+  memset( m_bitsCoeffScan, 0, sizeof( m_bitsCoeffScan ) );
+  for( int filterIdx = 0; filterIdx < numFilters; ++filterIdx )
+  {
+    for( int i = 0; i < alfShape.numCoeff - 1; i++ )
+    {
+      if( !abs( pDiffQFilterCoeffIntPP[filterIdx][i] ) )
+        continue;
+      int clippVal = abs( m_filterClippSet[filterIdx][i] );
+      for( int k = 1; k < 15; k++ )
+      {
+        m_bitsCoeffScan[alfShape.golombIdx[i]][k] += lengthGolomb( clippVal, k );
+      }
+    }
+  }
+  int len = getGolombKMin( alfShape, numFilters, m_kMinTab, m_bitsCoeffScan );
+  return len           //min_golomb_order
+          + getMaxGolombIdx( alfShape.filterType ) //golomb_order_increase_flag
+          + lengthFilterClipps( alfShape, numFilters, pDiffQFilterCoeffIntPP, m_kMinTab ); // Filter clippings
+}
+
+#endif
 int EncAdaptiveLoopFilter::lengthFilterCoeffs( AlfFilterShape& alfShape, const int numFilters, int **FilterCoeff, int* kMinTab )
 {
   int bitCnt = 0;
@@ -921,6 +1550,24 @@ int EncAdaptiveLoopFilter::lengthFilterCoeffs( AlfFilterShape& alfShape, const i
   return bitCnt;
 }
 
+#if JVET_N0242_NON_LINEAR_ALF
+int EncAdaptiveLoopFilter::lengthFilterClipps( AlfFilterShape& alfShape, const int numFilters, int **FilterCoeff, int* kMinTab )
+{
+  int bitCnt = 0;
+
+  for( int ind = 0; ind < numFilters; ++ind )
+  {
+    for( int i = 0; i < alfShape.numCoeff - 1; i++ )
+    {
+      if( !abs( FilterCoeff[ind][i] ) )
+        continue;
+      bitCnt += lengthGolomb( abs( m_filterClippSet[ind][i] ), kMinTab[alfShape.golombIdx[i]], false );
+    }
+  }
+  return bitCnt;
+}
+
+#endif
 double EncAdaptiveLoopFilter::getDistForce0( AlfFilterShape& alfShape, const int numFilters, double errorTabForce0Coeff[MAX_NUM_ALF_CLASSES][2], bool* codedVarBins )
 {
   static int bitsVarBin[MAX_NUM_ALF_CLASSES];
@@ -949,6 +1596,38 @@ double EncAdaptiveLoopFilter::getDistForce0( AlfFilterShape& alfShape, const int
     }
   }
 
+#if JVET_N0242_NON_LINEAR_ALF
+  if( m_alfSliceParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] )
+  {
+    memset( m_bitsCoeffScan, 0, sizeof( m_bitsCoeffScan ) );
+    for( int ind = 0; ind < numFilters; ++ind )
+    {
+      for( int i = 0; i < alfShape.numCoeff - 1; i++ )
+      {
+        if( !abs( m_filterCoeffSet[ind][i] ) )
+          continue;
+        int coeffVal = abs( m_filterClippSet[ind][i] );
+        for( int k = 1; k < 15; k++ )
+        {
+          m_bitsCoeffScan[alfShape.golombIdx[i]][k] += lengthGolomb( coeffVal, k, false );
+        }
+      }
+    }
+
+    getGolombKMin( alfShape, numFilters, m_kMinTab, m_bitsCoeffScan );
+
+    for( int ind = 0; ind < numFilters; ++ind )
+    {
+      for( int i = 0; i < alfShape.numCoeff - 1; i++ )
+      {
+        if( !abs( m_filterCoeffSet[ind][i] ) )
+          continue;
+        bitsVarBin[ind] += lengthGolomb( abs( m_filterClippSet[ind][i] ), m_kMinTab[alfShape.golombIdx[i]], false );
+      }
+    }
+  }
+
+#endif
   double distForce0 = getDistCoeffForce0( codedVarBins, errorTabForce0Coeff, bitsVarBin, numFilters );
 
   return distForce0;
@@ -1035,11 +1714,19 @@ int EncAdaptiveLoopFilter::lengthUvlc( int uiCode )
   return ( uiLength >> 1 ) + ( ( uiLength + 1 ) >> 1 );
 }
 
+#if JVET_N0242_NON_LINEAR_ALF
+int EncAdaptiveLoopFilter::lengthGolomb( int coeffVal, int k, bool signed_coeff )
+#else
 int EncAdaptiveLoopFilter::lengthGolomb( int coeffVal, int k )
+#endif
 {
   int m = 2 << ( k - 1 );
   int q = coeffVal / m;
+#if JVET_N0242_NON_LINEAR_ALF
+  if( signed_coeff && coeffVal != 0 )
+#else
   if( coeffVal != 0 )
+#endif
   {
     return q + 2 + k;
   }
@@ -1049,47 +1736,127 @@ int EncAdaptiveLoopFilter::lengthGolomb( int coeffVal, int k )
   }
 }
 
+#if JVET_N0242_NON_LINEAR_ALF
+double EncAdaptiveLoopFilter::deriveFilterCoeffs( AlfCovariance* cov, AlfCovariance* covMerged, int clipMerged[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF], AlfFilterShape& alfShape, short* filterIndices, int numFilters, double errorTabForce0Coeff[MAX_NUM_ALF_CLASSES][2] )
+#else
 double EncAdaptiveLoopFilter::deriveFilterCoeffs( AlfCovariance* cov, AlfCovariance* covMerged, AlfFilterShape& alfShape, short* filterIndices, int numFilters, double errorTabForce0Coeff[MAX_NUM_ALF_CLASSES][2] )
+#endif
 {
   double error = 0.0;
   AlfCovariance& tmpCov = covMerged[MAX_NUM_ALF_CLASSES];
   for( int filtIdx = 0; filtIdx < numFilters; filtIdx++ )
   {
     tmpCov.reset();
+#if JVET_N0242_NON_LINEAR_ALF
+    bool found_clip = false;
+#endif
     for( int classIdx = 0; classIdx < MAX_NUM_ALF_CLASSES; classIdx++ )
     {
       if( filterIndices[classIdx] == filtIdx )
       {
         tmpCov += cov[classIdx];
+#if JVET_N0242_NON_LINEAR_ALF
+        if( !found_clip )
+        {
+          found_clip = true; // clip should be at the adress of shortest one
+          memcpy(m_filterClippSet[filtIdx], clipMerged[numFilters-1][classIdx], sizeof(int[MAX_NUM_ALF_LUMA_COEFF]));
+        }
+#endif
       }
     }
 
     // Find coeffcients
+#if JVET_N0242_NON_LINEAR_ALF
+    assert(alfShape.numCoeff == tmpCov.numCoeff);
+    errorTabForce0Coeff[filtIdx][1] = tmpCov.pixAcc + deriveCoeffQuant( m_filterClippSet[filtIdx], m_filterCoeffSet[filtIdx], tmpCov, alfShape, m_NUM_BITS, false );
+#else
     errorTabForce0Coeff[filtIdx][1] = tmpCov.pixAcc + deriveCoeffQuant( m_filterCoeffQuant, tmpCov.E, tmpCov.y, alfShape.numCoeff, alfShape.weights, m_NUM_BITS );
+#endif
     errorTabForce0Coeff[filtIdx][0] = tmpCov.pixAcc;
     error += errorTabForce0Coeff[filtIdx][1];
+#if !JVET_N0242_NON_LINEAR_ALF
 
     // store coeff
     memcpy( m_filterCoeffSet[filtIdx], m_filterCoeffQuant, sizeof( int )*alfShape.numCoeff );
+#endif
   }
   return error;
 }
 
+#if JVET_N0242_NON_LINEAR_ALF
+double EncAdaptiveLoopFilter::deriveCoeffQuant( int *filterClipp, int *filterCoeffQuant, const AlfCovariance& cov, const AlfFilterShape& shape, const int bitDepth, const bool optimizeClip )
+#else
 double EncAdaptiveLoopFilter::deriveCoeffQuant( int *filterCoeffQuant, double **E, double *y, const int numCoeff, std::vector<int>& weights, const int bitDepth, const bool bChroma )
+#endif
 {
   const int factor = 1 << ( bitDepth - 1 );
+#if JVET_N0242_NON_LINEAR_ALF
+const int numCoeff = shape.numCoeff;
+#else
   static int filterCoeffQuantMod[MAX_NUM_ALF_LUMA_COEFF];
+#endif
   static double filterCoeff[MAX_NUM_ALF_LUMA_COEFF];
 
+#if JVET_N0242_NON_LINEAR_ALF
+  cov.optimizeFilter( shape, filterClipp, filterCoeff, optimizeClip );
+#else
   gnsSolveByChol( E, y, filterCoeff, numCoeff );
+#endif
   roundFiltCoeff( filterCoeffQuant, filterCoeff, numCoeff, factor );
+#if JVET_N0242_NON_LINEAR_ALF
+
+  const int max_value = factor - 1;
+  const int min_value = -factor;
+
+  for ( int i = 0; i < numCoeff - 1; i++ )
+  {
+    filterCoeffQuant[i] = std::min( max_value, std::max( min_value, filterCoeffQuant[i] ) );
+  }
+  filterCoeffQuant[numCoeff - 1] = 0;
+
+  int modified=1;
+
+  double errRef=cov.calcErrorForCoeffs( filterClipp, filterCoeffQuant, numCoeff, bitDepth );
+  while( modified )
+  {
+    modified=0;
+    for( int sign: {1, -1} )
+    {
+      double errMin = MAX_DOUBLE;
+      int minInd = -1;
+
+      for( int k = 0; k < numCoeff-1; k++ )
+      {
+        if( filterCoeffQuant[k] - sign > max_value || filterCoeffQuant[k] - sign < min_value )
+          continue;
+
+        filterCoeffQuant[k] -= sign;
+
+        double error = cov.calcErrorForCoeffs( filterClipp, filterCoeffQuant, numCoeff, bitDepth );
+        if( error < errMin )
+        {
+          errMin = error;
+          minInd = k;
+        }
+        filterCoeffQuant[k] += sign;
+      }
+      if( errMin < errRef )
+      {
+        filterCoeffQuant[minInd] -= sign;
+        modified++;
+        errRef = errMin;
+      }
+    }
+  }
+
+  return errRef;
+#else
   const int targetCoeffSumInt = 0;
   int quantCoeffSum = 0;
   for( int i = 0; i < numCoeff; i++ )
   {
     quantCoeffSum += weights[i] * filterCoeffQuant[i];
   }
-
   int count = 0;
   while( quantCoeffSum != targetCoeffSumInt && count < 10 )
   {
@@ -1219,8 +1986,10 @@ double EncAdaptiveLoopFilter::deriveCoeffQuant( int *filterCoeffQuant, double **
 
   double error = calcErrorForCoeffs( E, y, filterCoeffQuant, numCoeff, bitDepth );
   return error;
+#endif
 }
 
+#if !JVET_N0242_NON_LINEAR_ALF
 double EncAdaptiveLoopFilter::calcErrorForCoeffs( double **E, double *y, int *coeff, const int numCoeff, const int bitDepth )
 {
   double factor = 1 << ( bitDepth - 1 );
@@ -1240,6 +2009,7 @@ double EncAdaptiveLoopFilter::calcErrorForCoeffs( double **E, double *y, int *co
   return error / factor;
 }
 
+#endif
 void EncAdaptiveLoopFilter::roundFiltCoeff( int *filterCoeffQuant, double *filterCoeff, const int numCoeff, const int factor )
 {
   for( int i = 0; i < numCoeff; i++ )
@@ -1249,8 +2019,18 @@ void EncAdaptiveLoopFilter::roundFiltCoeff( int *filterCoeffQuant, double *filte
   }
 }
 
+#if JVET_N0242_NON_LINEAR_ALF
+void EncAdaptiveLoopFilter::mergeClasses( const AlfFilterShape& alfShape, AlfCovariance* cov, AlfCovariance* covMerged, int clipMerged[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF], const int numClasses, short filterIndices[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES] )
+#else
 void EncAdaptiveLoopFilter::mergeClasses( AlfCovariance* cov, AlfCovariance* covMerged, const int numClasses, short filterIndices[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES] )
+#endif
 {
+#if JVET_N0242_NON_LINEAR_ALF
+  static int tmpClip[MAX_NUM_ALF_LUMA_COEFF];
+  static int bestMergeClip[MAX_NUM_ALF_LUMA_COEFF];
+  static double err[MAX_NUM_ALF_CLASSES];
+  static double bestMergeErr;
+#endif
   static bool availableClass[MAX_NUM_ALF_CLASSES];
   static uint8_t indexList[MAX_NUM_ALF_CLASSES];
   static uint8_t indexListTemp[MAX_NUM_ALF_CLASSES];
@@ -1264,14 +2044,38 @@ void EncAdaptiveLoopFilter::mergeClasses( AlfCovariance* cov, AlfCovariance* cov
     indexList[i] = i;
     availableClass[i] = true;
     covMerged[i] = cov[i];
+#if JVET_N0242_NON_LINEAR_ALF
+    covMerged[i].numBins = m_alfSliceParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] ? AlfNumClippingValues[COMPONENT_Y] : 1;
+#endif
   }
 
   // Try merging different covariance matrices
 
   // temporal AlfCovariance structure is allocated as the last element in covMerged array, the size of covMerged is MAX_NUM_ALF_CLASSES + 1
   AlfCovariance& tmpCov = covMerged[MAX_NUM_ALF_CLASSES];
+#if JVET_N0242_NON_LINEAR_ALF
+  tmpCov.numBins = m_alfSliceParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] ? AlfNumClippingValues[COMPONENT_Y] : 1;
+
+  // init Clip
+  for( int i = 0; i < numClasses; i++ )
+  {
+    std::fill_n(clipMerged[numRemaining-1][i], MAX_NUM_ALF_LUMA_COEFF, m_alfSliceParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] ? AlfNumClippingValues[CHANNEL_TYPE_LUMA] / 2 : 0);
+    if ( m_alfSliceParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] )
+    {
+      err[i] = covMerged[i].optimizeFilterClip( alfShape, clipMerged[numRemaining-1][i] );
+    }
+    else
+    {
+      err[i] = covMerged[i].calculateError( clipMerged[numRemaining-1][i] );
+    }
+  }
+#endif
 
+#if JVET_N0242_NON_LINEAR_ALF
+  while( numRemaining >= 2 )
+#else
   while( numRemaining > 2 )
+#endif
   {
     double errorMin = std::numeric_limits<double>::max();
     int bestToMergeIdx1 = 0, bestToMergeIdx2 = 1;
@@ -1284,14 +2088,32 @@ void EncAdaptiveLoopFilter::mergeClasses( AlfCovariance* cov, AlfCovariance* cov
         {
           if( availableClass[j] )
           {
+#if JVET_N0242_NON_LINEAR_ALF
+            double error1 = err[i];
+            double error2 = err[j];
+#else
             double error1 = calculateError( covMerged[i] );
             double error2 = calculateError( covMerged[j] );
+#endif
 
             tmpCov.add( covMerged[i], covMerged[j] );
+#if JVET_N0242_NON_LINEAR_ALF
+            for( int l = 0; l < MAX_NUM_ALF_LUMA_COEFF; ++l )
+            {
+              tmpClip[l] = (clipMerged[numRemaining-1][i][l] + clipMerged[numRemaining-1][j][l] + 1 ) >> 1;
+            }
+            double errorMerged = m_alfSliceParamTemp.nonLinearFlag[CHANNEL_TYPE_LUMA] ? tmpCov.optimizeFilterClip( alfShape, tmpClip ) : tmpCov.calculateError( tmpClip );
+            double error = errorMerged - error1 - error2;
+#else
             double error = calculateError( tmpCov ) - error1 - error2;
+#endif
 
             if( error < errorMin )
             {
+#if JVET_N0242_NON_LINEAR_ALF
+              bestMergeErr = errorMerged;
+              memcpy(bestMergeClip, tmpClip, sizeof(bestMergeClip));
+#endif
               errorMin = error;
               bestToMergeIdx1 = i;
               bestToMergeIdx2 = j;
@@ -1302,6 +2124,11 @@ void EncAdaptiveLoopFilter::mergeClasses( AlfCovariance* cov, AlfCovariance* cov
     }
 
     covMerged[bestToMergeIdx1] += covMerged[bestToMergeIdx2];
+#if JVET_N0242_NON_LINEAR_ALF
+    memcpy(clipMerged[numRemaining-2], clipMerged[numRemaining-1], sizeof(int[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF]));
+    memcpy(clipMerged[numRemaining-2][bestToMergeIdx1], bestMergeClip, sizeof(bestMergeClip));
+    err[bestToMergeIdx1] = bestMergeErr;
+#endif
     availableClass[bestToMergeIdx2] = false;
 
     for( int i = 0; i < numClasses; i++ )
@@ -1354,7 +2181,11 @@ void EncAdaptiveLoopFilter::getFrameStats( ChannelType channel, int iShapeIdx )
   int numClasses = isLuma( channel ) ? MAX_NUM_ALF_CLASSES : 1;
   for( int i = 0; i < numClasses; i++ )
   {
+#if JVET_N0242_NON_LINEAR_ALF
+    m_alfCovarianceFrame[channel][iShapeIdx][i].reset(AlfNumClippingValues[channel]);
+#else
     m_alfCovarianceFrame[channel][iShapeIdx][i].reset();
+#endif
   }
   if( isLuma( channel ) )
   {
@@ -1398,7 +2229,11 @@ void EncAdaptiveLoopFilter::deriveStatsForFiltering( PelUnitBuf& orgYuv, PelUnit
       {
         for( int ctuIdx = 0; ctuIdx < m_numCTUsInPic; ctuIdx++ )
         {
+#if JVET_N0242_NON_LINEAR_ALF
+          m_alfCovariance[compIdx][shape][ctuIdx][classIdx].reset(AlfNumClippingValues[toChannelType( compID )]);
+#else
           m_alfCovariance[compIdx][shape][ctuIdx][classIdx].reset();
+#endif
         }
       }
     }
@@ -1415,7 +2250,11 @@ void EncAdaptiveLoopFilter::deriveStatsForFiltering( PelUnitBuf& orgYuv, PelUnit
     {
       for( int classIdx = 0; classIdx < numClasses; classIdx++ )
       {
+#if JVET_N0242_NON_LINEAR_ALF
+        m_alfCovarianceFrame[channelIdx][shape][classIdx].reset(AlfNumClippingValues[channelID]);
+#else
         m_alfCovarianceFrame[channelIdx][shape][classIdx].reset();
+#endif
       }
     }
   }
@@ -1443,7 +2282,11 @@ void EncAdaptiveLoopFilter::deriveStatsForFiltering( PelUnitBuf& orgYuv, PelUnit
 
         for( int shape = 0; shape != m_filterShapes[chType].size(); shape++ )
         {
+#if JVET_N0242_NON_LINEAR_ALF
+          getBlkStats( m_alfCovariance[compIdx][shape][ctuRsAddr], m_filterShapes[chType][shape], compIdx ? nullptr : m_classifier, org, orgStride, rec, recStride, compArea, chType );
+#else
           getBlkStats( m_alfCovariance[compIdx][shape][ctuRsAddr], m_filterShapes[chType][shape], compIdx ? nullptr : m_classifier, org, orgStride, rec, recStride, compArea );
+#endif
 
           const int numClasses = isLuma( compID ) ? MAX_NUM_ALF_CLASSES : 1;
 
@@ -1458,9 +2301,19 @@ void EncAdaptiveLoopFilter::deriveStatsForFiltering( PelUnitBuf& orgYuv, PelUnit
   }
 }
 
+#if JVET_N0242_NON_LINEAR_ALF
+void EncAdaptiveLoopFilter::getBlkStats( AlfCovariance* alfCovariance, const AlfFilterShape& shape, AlfClassifier** classifier, Pel* org, const int orgStride, Pel* rec, const int recStride, const CompArea& area, const ChannelType channel )
+#else
 void EncAdaptiveLoopFilter::getBlkStats( AlfCovariance* alfCovariace, const AlfFilterShape& shape, AlfClassifier** classifier, Pel* org, const int orgStride, Pel* rec, const int recStride, const CompArea& area )
+#endif
 {
+#if JVET_N0242_NON_LINEAR_ALF
+  static int ELocal[MAX_NUM_ALF_LUMA_COEFF][MaxAlfNumClippingValues];
+
+  const int numBins = AlfNumClippingValues[channel];
+#else
   static int ELocal[MAX_NUM_ALF_LUMA_COEFF];
+#endif
 
   int transposeIdx = 0;
   int classIdx = 0;
@@ -1473,7 +2326,11 @@ void EncAdaptiveLoopFilter::getBlkStats( AlfCovariance* alfCovariace, const AlfF
       {
         continue;
       }
+#if JVET_N0242_NON_LINEAR_ALF
+      std::memset( ELocal, 0, sizeof( ELocal ) );
+#else
       std::memset( ELocal, 0, shape.numCoeff * sizeof( int ) );
+#endif
       if( classifier )
       {
         AlfClassifier& cl = classifier[area.y + i][area.x + j];
@@ -1487,31 +2344,76 @@ void EncAdaptiveLoopFilter::getBlkStats( AlfCovariance* alfCovariace, const AlfF
         weight = m_lumaLevelToWeightPLUT[org[j]];
       }
       int yLocal = org[j] - rec[j];
+#if JVET_N0242_NON_LINEAR_ALF
+      calcCovariance( ELocal, rec + j, recStride, shape, transposeIdx, channel );
+#else
       calcCovariance( ELocal, rec + j, recStride, shape.pattern.data(), shape.filterLength >> 1, transposeIdx );
+#endif
       for( int k = 0; k < shape.numCoeff; k++ )
       {
         for( int l = k; l < shape.numCoeff; l++ )
         {
+#if JVET_N0242_NON_LINEAR_ALF
+          for( int b0 = 0; b0 < numBins; b0++ )
+          {
+            for( int b1 = 0; b1 < numBins; b1++ )
+            {
+              if (m_alfWSSD)
+              {
+                alfCovariance[classIdx].E[b0][b1][k][l] += weight * (double)(ELocal[k][b0] * ELocal[l][b1]);
+              }
+              else
+              {
+                alfCovariance[classIdx].E[b0][b1][k][l] += ELocal[k][b0] * ELocal[l][b1];
+              }
+            }
+          }
+#else
           if (m_alfWSSD)
           {
             alfCovariace[classIdx].E[k][l] += weight * (double)(ELocal[k] * ELocal[l]);
           }
           else
           alfCovariace[classIdx].E[k][l] += ELocal[k] * ELocal[l];
+#endif
+        }
+#if JVET_N0242_NON_LINEAR_ALF
+        for( int b = 0; b < numBins; b++ )
+        {
+          if (m_alfWSSD)
+          {
+            alfCovariance[classIdx].y[b][k] += weight * (double)(ELocal[k][b] * yLocal);
+          }
+          else
+          {
+            alfCovariance[classIdx].y[b][k] += ELocal[k][b] * yLocal;
+          }
         }
+#else
         if (m_alfWSSD)
         {
           alfCovariace[classIdx].y[k] += weight * (double)(ELocal[k] * yLocal);
         }
         else
         alfCovariace[classIdx].y[k] += ELocal[k] * yLocal;
+#endif
       }
       if (m_alfWSSD)
       {
+#if JVET_N0242_NON_LINEAR_ALF
+        alfCovariance[classIdx].pixAcc += weight * (double)(yLocal * yLocal);
+#else
         alfCovariace[classIdx].pixAcc += weight * (double)(yLocal * yLocal);
+#endif
       }
       else
+#if JVET_N0242_NON_LINEAR_ALF
+      {
+        alfCovariance[classIdx].pixAcc += yLocal * yLocal;
+      }
+#else
       alfCovariace[classIdx].pixAcc += yLocal * yLocal;
+#endif
     }
     org += orgStride;
     rec += recStride;
@@ -1524,16 +2426,41 @@ void EncAdaptiveLoopFilter::getBlkStats( AlfCovariance* alfCovariace, const AlfF
     {
       for( int l = 0; l < k; l++ )
       {
+#if JVET_N0242_NON_LINEAR_ALF
+        for( int b0 = 0; b0 < numBins; b0++ )
+        {
+          for( int b1 = 0; b1 < numBins; b1++ )
+          {
+            alfCovariance[classIdx].E[b0][b1][k][l] = alfCovariance[classIdx].E[b1][b0][l][k];
+          }
+        }
+#else
         alfCovariace[classIdx].E[k][l] = alfCovariace[classIdx].E[l][k];
+#endif
       }
     }
   }
 }
 
+#if JVET_N0242_NON_LINEAR_ALF
+void EncAdaptiveLoopFilter::calcCovariance( int ELocal[MAX_NUM_ALF_LUMA_COEFF][MaxAlfNumClippingValues], const Pel *rec, const int stride, const AlfFilterShape& shape, const int transposeIdx, const ChannelType channel )
+#else
 void EncAdaptiveLoopFilter::calcCovariance( int *ELocal, const Pel *rec, const int stride, const int *filterPattern, const int halfFilterLength, const int transposeIdx )
+#endif
 {
+#if JVET_N0242_NON_LINEAR_ALF
+  const int *filterPattern = shape.pattern.data();
+  const int halfFilterLength = shape.filterLength >> 1;
+  const Pel* clip = m_alfClippingValues[channel];
+  const int numBins = AlfNumClippingValues[channel];
+
+#endif
   int k = 0;
 
+#if JVET_N0242_NON_LINEAR_ALF
+  const short curr = rec[0];
+#endif
+
   if( transposeIdx == 0 )
   {
     for( int i = -halfFilterLength; i < 0; i++ )
@@ -1541,15 +2468,35 @@ void EncAdaptiveLoopFilter::calcCovariance( int *ELocal, const Pel *rec, const i
       const Pel* rec0 = rec + i * stride;
       const Pel* rec1 = rec - i * stride;
 
+#if JVET_N0242_NON_LINEAR_ALF
+      for( int j = -halfFilterLength - i; j <= halfFilterLength + i; j++, k++ )
+      {
+        for( int b = 0; b < numBins; b++ )
+        {
+          ELocal[filterPattern[k]][b] += clipALF(clip[b], curr, rec0[j], rec1[-j]);
+        }
+      }
+#else
       for( int j = -halfFilterLength - i; j <= halfFilterLength + i; j++ )
       {
         ELocal[filterPattern[k++]] += rec0[j] + rec1[-j];
       }
+#endif
+    }
+#if JVET_N0242_NON_LINEAR_ALF
+    for( int j = -halfFilterLength; j < 0; j++, k++ )
+    {
+      for( int b = 0; b < numBins; b++ )
+      {
+        ELocal[filterPattern[k]][b] += clipALF(clip[b], curr, rec[j], rec[-j]);
+      }
     }
+#else
     for( int j = -halfFilterLength; j < 0; j++ )
     {
       ELocal[filterPattern[k++]] += rec[j] + rec[-j];
     }
+#endif
   }
   else if( transposeIdx == 1 )
   {
@@ -1558,15 +2505,35 @@ void EncAdaptiveLoopFilter::calcCovariance( int *ELocal, const Pel *rec, const i
       const Pel* rec0 = rec + j;
       const Pel* rec1 = rec - j;
 
+#if JVET_N0242_NON_LINEAR_ALF
+      for( int i = -halfFilterLength - j; i <= halfFilterLength + j; i++, k++ )
+      {
+        for( int b = 0; b < numBins; b++ )
+        {
+          ELocal[filterPattern[k]][b] += clipALF(clip[b], curr, rec0[i * stride], rec1[-i * stride]);
+        }
+      }
+#else
       for( int i = -halfFilterLength - j; i <= halfFilterLength + j; i++ )
       {
         ELocal[filterPattern[k++]] += rec0[i * stride] + rec1[-i * stride];
       }
+#endif
+    }
+#if JVET_N0242_NON_LINEAR_ALF
+    for( int i = -halfFilterLength; i < 0; i++, k++ )
+    {
+      for( int b = 0; b < numBins; b++ )
+      {
+        ELocal[filterPattern[k]][b] += clipALF(clip[b], curr, rec[i*stride], rec[-i * stride]);
+      }
     }
+#else
     for( int i = -halfFilterLength; i < 0; i++ )
     {
       ELocal[filterPattern[k++]] += rec[i*stride] + rec[-i * stride];
     }
+#endif
   }
   else if( transposeIdx == 2 )
   {
@@ -1575,15 +2542,35 @@ void EncAdaptiveLoopFilter::calcCovariance( int *ELocal, const Pel *rec, const i
       const Pel* rec0 = rec + i * stride;
       const Pel* rec1 = rec - i * stride;
 
+#if JVET_N0242_NON_LINEAR_ALF
+      for( int j = halfFilterLength + i; j >= -halfFilterLength - i; j--, k++ )
+      {
+        for( int b = 0; b < numBins; b++ )
+        {
+          ELocal[filterPattern[k]][b] += clipALF(clip[b], curr, rec0[j], rec1[-j]);
+        }
+      }
+#else
       for( int j = halfFilterLength + i; j >= -halfFilterLength - i; j-- )
       {
         ELocal[filterPattern[k++]] += rec0[j] + rec1[-j];
       }
+#endif
+    }
+#if JVET_N0242_NON_LINEAR_ALF
+    for( int j = -halfFilterLength; j < 0; j++, k++ )
+    {
+      for( int b = 0; b < numBins; b++ )
+      {
+        ELocal[filterPattern[k]][b] += clipALF(clip[b], curr, rec[j], rec[-j]);
+      }
     }
+#else
     for( int j = -halfFilterLength; j < 0; j++ )
     {
       ELocal[filterPattern[k++]] += rec[j] + rec[-j];
     }
+#endif
   }
   else
   {
@@ -1592,21 +2579,49 @@ void EncAdaptiveLoopFilter::calcCovariance( int *ELocal, const Pel *rec, const i
       const Pel* rec0 = rec + j;
       const Pel* rec1 = rec - j;
 
+#if JVET_N0242_NON_LINEAR_ALF
+      for( int i = halfFilterLength + j; i >= -halfFilterLength - j; i--, k++ )
+      {
+        for( int b = 0; b < numBins; b++ )
+        {
+          ELocal[filterPattern[k]][b] += clipALF(clip[b], curr, rec0[i * stride], rec1[-i * stride]);
+        }
+      }
+#else
       for( int i = halfFilterLength + j; i >= -halfFilterLength - j; i-- )
       {
         ELocal[filterPattern[k++]] += rec0[i * stride] + rec1[-i * stride];
       }
+#endif
+    }
+#if JVET_N0242_NON_LINEAR_ALF
+    for( int i = -halfFilterLength; i < 0; i++, k++ )
+    {
+      for( int b = 0; b < numBins; b++ )
+      {
+        ELocal[filterPattern[k]][b] += clipALF(clip[b], curr, rec[i*stride], rec[-i * stride]);
+      }
     }
+#else
     for( int i = -halfFilterLength; i < 0; i++ )
     {
       ELocal[filterPattern[k++]] += rec[i*stride] + rec[-i * stride];
     }
+#endif
+  }
+#if JVET_N0242_NON_LINEAR_ALF
+  for( int b = 0; b < numBins; b++ )
+  {
+    ELocal[filterPattern[k]][b] += curr;
   }
+#else
   ELocal[filterPattern[k++]] += rec[0];
+#endif
 }
 
 
 
+#if !JVET_N0242_NON_LINEAR_ALF
 double EncAdaptiveLoopFilter::calculateError( AlfCovariance& cov )
 {
   static double c[MAX_NUM_ALF_COEFF];
@@ -1753,6 +2768,7 @@ int EncAdaptiveLoopFilter::gnsSolveByChol( double **LHS, double *rhs, double *x,
   return res;
 }
 //////////////////////////////////////////////////////////////////////////////////////////
+#endif
 void EncAdaptiveLoopFilter::setEnableFlag( AlfSliceParam& alfSlicePara, ChannelType channel, bool val )
 {
   if( channel == CHANNEL_TYPE_LUMA )
diff --git a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.h b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.h
index d2b02d902..f8b766729 100644
--- a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.h
+++ b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.h
@@ -41,20 +41,46 @@
 #include "CommonLib/AdaptiveLoopFilter.h"
 
 #include "CABACWriter.h"
+#if JVET_N0242_NON_LINEAR_ALF
+#include "EncCfg.h"
+#endif
 
 struct AlfCovariance
 {
+#if JVET_N0242_NON_LINEAR_ALF
+  static constexpr int MaxAlfNumClippingValues = AdaptiveLoopFilter::MaxAlfNumClippingValues;
+  using TE = double[MAX_NUM_ALF_LUMA_COEFF][MAX_NUM_ALF_LUMA_COEFF];
+  using Ty = double[MAX_NUM_ALF_LUMA_COEFF];
+  using TKE = TE[AdaptiveLoopFilter::MaxAlfNumClippingValues][AdaptiveLoopFilter::MaxAlfNumClippingValues];
+  using TKy = Ty[AdaptiveLoopFilter::MaxAlfNumClippingValues];
+#endif
+
   int numCoeff;
+#if JVET_N0242_NON_LINEAR_ALF
+  int numBins;
+  TKy y;
+  TKE E;
+#else
   double *y;
   double **E;
+#endif
   double pixAcc;
 
   AlfCovariance() {}
   ~AlfCovariance() {}
 
+#if JVET_N0242_NON_LINEAR_ALF
+  void create( int size, int num_bins = MaxAlfNumClippingValues )
+#else
   void create( int size )
+#endif
   {
     numCoeff = size;
+#if JVET_N0242_NON_LINEAR_ALF
+    numBins = num_bins;
+    std::memset( y, 0, sizeof( y ) );
+    std::memset( E, 0, sizeof( E ) );
+#else
 
     y = new double[numCoeff];
     E = new double*[numCoeff];
@@ -63,10 +89,12 @@ struct AlfCovariance
     {
       E[i] = new double[numCoeff];
     }
+#endif
   }
 
   void destroy()
   {
+#if !JVET_N0242_NON_LINEAR_ALF
     for( int i = 0; i < numCoeff; i++ )
     {
       delete[] E[i];
@@ -78,25 +106,46 @@ struct AlfCovariance
 
     delete[] y;
     y = nullptr;
+#endif
   }
 
+#if JVET_N0242_NON_LINEAR_ALF
+  void reset( int num_bins = -1 )
+#else
   void reset()
+#endif
   {
+#if JVET_N0242_NON_LINEAR_ALF
+    if ( num_bins > 0 )
+      numBins = num_bins;
+#endif
     pixAcc = 0;
+#if JVET_N0242_NON_LINEAR_ALF
+    std::memset( y, 0, sizeof( y ) );
+    std::memset( E, 0, sizeof( E ) );
+#else
     std::memset( y, 0, sizeof( *y ) * numCoeff );
     for( int i = 0; i < numCoeff; i++ )
     {
       std::memset( E[i], 0, sizeof( *E[i] ) * numCoeff );
     }
+#endif
   }
 
   const AlfCovariance& operator=( const AlfCovariance& src )
   {
+#if JVET_N0242_NON_LINEAR_ALF
+    numCoeff = src.numCoeff;
+    numBins = src.numBins;
+    std::memcpy( E, src.E, sizeof( E ) );
+    std::memcpy( y, src.y, sizeof( y ) );
+#else
     for( int i = 0; i < numCoeff; i++ )
     {
       std::memcpy( E[i], src.E[i], sizeof( *E[i] ) * numCoeff );
     }
     std::memcpy( y, src.y, sizeof( *y ) * numCoeff );
+#endif
     pixAcc = src.pixAcc;
 
     return *this;
@@ -104,6 +153,30 @@ struct AlfCovariance
 
   void add( const AlfCovariance& lhs, const AlfCovariance& rhs )
   {
+#if JVET_N0242_NON_LINEAR_ALF
+    numCoeff = lhs.numCoeff;
+    numBins = lhs.numBins;
+    for( int b0 = 0; b0 < numBins; b0++ )
+    {
+      for( int b1 = 0; b1 < numBins; b1++ )
+      {
+        for( int j = 0; j < numCoeff; j++ )
+        {
+          for( int i = 0; i < numCoeff; i++ )
+          {
+            E[b0][b1][j][i] = lhs.E[b0][b1][j][i] + rhs.E[b0][b1][j][i];
+          }
+        }
+      }
+    }
+    for( int b = 0; b < numBins; b++ )
+    {
+      for( int j = 0; j < numCoeff; j++ )
+      {
+        y[b][j] = lhs.y[b][j] + rhs.y[b][j];
+      }
+    }
+#else
     for( int j = 0; j < numCoeff; j++ )
     {
       for( int i = 0; i < numCoeff; i++ )
@@ -112,11 +185,34 @@ struct AlfCovariance
       }
       y[j] = lhs.y[j] + rhs.y[j];
     }
+#endif
     pixAcc = lhs.pixAcc + rhs.pixAcc;
   }
 
   const AlfCovariance& operator+= ( const AlfCovariance& src )
   {
+#if JVET_N0242_NON_LINEAR_ALF
+    for( int b0 = 0; b0 < numBins; b0++ )
+    {
+      for( int b1 = 0; b1 < numBins; b1++ )
+      {
+        for( int j = 0; j < numCoeff; j++ )
+        {
+          for( int i = 0; i < numCoeff; i++ )
+          {
+            E[b0][b1][j][i] += src.E[b0][b1][j][i];
+          }
+        }
+      }
+    }
+    for( int b = 0; b < numBins; b++ )
+    {
+      for( int j = 0; j < numCoeff; j++ )
+      {
+        y[b][j] += src.y[b][j];
+      }
+    }
+#else
     for( int j = 0; j < numCoeff; j++ )
     {
       for( int i = 0; i < numCoeff; i++ )
@@ -125,6 +221,7 @@ struct AlfCovariance
       }
       y[j] += src.y[j];
     }
+#endif
     pixAcc += src.pixAcc;
 
     return *this;
@@ -132,6 +229,28 @@ struct AlfCovariance
 
   const AlfCovariance& operator-= ( const AlfCovariance& src )
   {
+#if JVET_N0242_NON_LINEAR_ALF
+    for( int b0 = 0; b0 < numBins; b0++ )
+    {
+      for( int b1 = 0; b1 < numBins; b1++ )
+      {
+        for( int j = 0; j < numCoeff; j++ )
+        {
+          for( int i = 0; i < numCoeff; i++ )
+          {
+            E[b0][b1][j][i] -= src.E[b0][b1][j][i];
+          }
+        }
+      }
+    }
+    for( int b = 0; b < numBins; b++ )
+    {
+      for( int j = 0; j < numCoeff; j++ )
+      {
+        y[b][j] -= src.y[b][j];
+      }
+    }
+#else
     for( int j = 0; j < numCoeff; j++ )
     {
       for( int i = 0; i < numCoeff; i++ )
@@ -140,10 +259,55 @@ struct AlfCovariance
       }
       y[j] -= src.y[j];
     }
+#endif
     pixAcc -= src.pixAcc;
 
     return *this;
   }
+
+#if JVET_N0242_NON_LINEAR_ALF
+  void setEyFromClip(const int* clip, TE _E, Ty _y, int size) const
+  {
+    for (int k=0; k<size; k++)
+    {
+      _y[k] = y[clip[k]][k];
+      for (int l=0; l<size; l++)
+      {
+        _E[k][l] = E[clip[k]][clip[l]][k][l];
+      }
+    }
+  }
+
+  double optimizeFilter(const int* clip, double *f, int size) const
+  {
+    gnsSolveByChol( clip, f, size );
+    return calculateError( clip, f );
+  }
+
+  double optimizeFilter(const AlfFilterShape& alfShape, int* clip, double *f, bool optimize_clip) const;
+  double optimizeFilterClip(const AlfFilterShape& alfShape, int* clip) const
+  {
+    Ty f;
+    return optimizeFilter(alfShape, clip, f, true);
+  }
+
+  double calculateError( const int *clip ) const;
+  double calculateError( const int *clip, const double *coeff ) const { return calculateError(clip, coeff, numCoeff); }
+  double calculateError( const int *clip, const double *coeff, const int numCoeff ) const;
+  double calcErrorForCoeffs( const int *clip, const int *coeff, const int numCoeff, const int bitDepth ) const;
+
+  void getClipMax(const AlfFilterShape& alfShape, int *clip_max) const;
+  void reduceClipCost(const AlfFilterShape& alfShape, int *clip) const;
+
+private:
+  // Cholesky decomposition
+
+  int  gnsSolveByChol( const int *clip, double *x, int numEq ) const;
+  int  gnsSolveByChol( TE LHS, double* rhs, double *x, int numEq ) const;
+  void gnsBacksubstitution( TE R, double* z, int size, double* A ) const;
+  void gnsTransposeBacksubstitution( TE U, double* rhs, double* x, int order ) const;
+  int  gnsCholeskyDec( TE inpMatr, TE outMatr, int numEq ) const;
+#endif
 };
 
 class EncAdaptiveLoopFilter : public AdaptiveLoopFilter
@@ -157,6 +321,9 @@ public:
   inline std::vector<double>& getLumaLevelWeightTable() { return m_lumaLevelToWeightPLUT; }
 
 private:
+#if JVET_N0242_NON_LINEAR_ALF
+  const EncCfg*          m_encCfg;
+#endif
   AlfCovariance***       m_alfCovariance[MAX_NUM_COMPONENT];          // [compIdx][shapeIdx][ctbAddr][classIdx]
   AlfCovariance**        m_alfCovarianceFrame[MAX_NUM_CHANNEL_TYPE];   // [CHANNEL][shapeIdx][classIdx]
   uint8_t*                 m_ctuEnableFlagTmp[MAX_NUM_COMPONENT];
@@ -164,13 +331,21 @@ private:
   //for RDO
   AlfSliceParam          m_alfSliceParamTemp;
   AlfCovariance          m_alfCovarianceMerged[ALF_NUM_OF_FILTER_TYPES][MAX_NUM_ALF_CLASSES + 1];
+#if JVET_N0242_NON_LINEAR_ALF
+  int                    m_alfClipMerged[ALF_NUM_OF_FILTER_TYPES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF];
+#endif
   CABACWriter*           m_CABACEstimator;
   CtxCache*              m_CtxCache;
   double                 m_lambda[MAX_NUM_COMPONENT];
   const double           FracBitsScale = 1.0 / double( 1 << SCALE_BITS );
 
+#if !JVET_N0242_NON_LINEAR_ALF
   int*                   m_filterCoeffQuant;
+#endif
   int**                  m_filterCoeffSet;
+#if JVET_N0242_NON_LINEAR_ALF
+  int**                  m_filterClippSet;
+#endif
   int**                  m_diffFilterCoeff;
   int                    m_kMinTab[MAX_NUM_ALF_LUMA_COEFF];
   int                    m_bitsCoeffScan[m_MAX_SCAN_VAL][m_MAX_EXP_GOLOMB];
@@ -186,9 +361,17 @@ public:
 #endif
                    AlfSliceParam& alfSliceParam );
   void initCABACEstimator( CABACEncoder* cabacEncoder, CtxCache* ctxCache, Slice* pcSlice );
+#if JVET_N0242_NON_LINEAR_ALF
+  void create( const EncCfg* encCfg, const int picWidth, const int picHeight, const ChromaFormat chromaFormatIDC, const int maxCUWidth, const int maxCUHeight, const int maxCUDepth, const int inputBitDepth[MAX_NUM_CHANNEL_TYPE], const int internalBitDepth[MAX_NUM_CHANNEL_TYPE] );
+#else
   void create( const int picWidth, const int picHeight, const ChromaFormat chromaFormatIDC, const int maxCUWidth, const int maxCUHeight, const int maxCUDepth, const int inputBitDepth[MAX_NUM_CHANNEL_TYPE], const int internalBitDepth[MAX_NUM_CHANNEL_TYPE] );
+#endif
   void destroy();
+#if JVET_N0242_NON_LINEAR_ALF
+  static int lengthGolomb( int coeffVal, int k, bool signed_coeff=true );
+#else
   static int lengthGolomb( int coeffVal, int k );
+#endif
   static int getGolombKMin( AlfFilterShape& alfShape, const int numFilters, int kMinTab[MAX_NUM_ALF_LUMA_COEFF], int bitsCoeffScan[m_MAX_SCAN_VAL][m_MAX_EXP_GOLOMB] );
 
 private:
@@ -199,21 +382,41 @@ private:
                    );
 
   void   copyAlfSliceParam( AlfSliceParam& alfSliceParamDst, AlfSliceParam& alfSliceParamSrc, ChannelType channel );
+#if JVET_N0242_NON_LINEAR_ALF
+  double mergeFiltersAndCost( AlfSliceParam& alfSliceParam, AlfFilterShape& alfShape, AlfCovariance* covFrame, AlfCovariance* covMerged, int clipMerged[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF], int& uiCoeffBits );
+#else
   double mergeFiltersAndCost( AlfSliceParam& alfSliceParam, AlfFilterShape& alfShape, AlfCovariance* covFrame, AlfCovariance* covMerged, int& uiCoeffBits );
+#endif
 
   void   getFrameStats( ChannelType channel, int iShapeIdx );
   void   getFrameStat( AlfCovariance* frameCov, AlfCovariance** ctbCov, uint8_t* ctbEnableFlags, const int numClasses );
   void   deriveStatsForFiltering( PelUnitBuf& orgYuv, PelUnitBuf& recYuv );
+#if JVET_N0242_NON_LINEAR_ALF
+  void   getBlkStats( AlfCovariance* alfCovariace, const AlfFilterShape& shape, AlfClassifier** classifier, Pel* org, const int orgStride, Pel* rec, const int recStride, const CompArea& area, const ChannelType channel );
+  void   calcCovariance( int ELocal[MAX_NUM_ALF_LUMA_COEFF][MaxAlfNumClippingValues], const Pel *rec, const int stride, const AlfFilterShape& shape, const int transposeIdx, const ChannelType channel );
+  void   mergeClasses( const AlfFilterShape& alfShape, AlfCovariance* cov, AlfCovariance* covMerged, int clipMerged[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF], const int numClasses, short filterIndices[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES] );
+#else
   void   getBlkStats( AlfCovariance* alfCovariace, const AlfFilterShape& shape, AlfClassifier** classifier, Pel* org, const int orgStride, Pel* rec, const int recStride, const CompArea& area );
   void   calcCovariance( int *ELocal, const Pel *rec, const int stride, const int *filterPattern, const int halfFilterLength, const int transposeIdx );
   void   mergeClasses( AlfCovariance* cov, AlfCovariance* covMerged, const int numClasses, short filterIndices[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES] );
+#endif
 
+#if !JVET_N0242_NON_LINEAR_ALF
   double calculateError( AlfCovariance& cov );
   double calcErrorForCoeffs( double **E, double *y, int *coeff, const int numCoeff, const int bitDepth );
+#endif
   double getFilterCoeffAndCost( CodingStructure& cs, double distUnfilter, ChannelType channel, bool bReCollectStat, int iShapeIdx, int& uiCoeffBits );
+#if JVET_N0242_NON_LINEAR_ALF
+  double deriveFilterCoeffs( AlfCovariance* cov, AlfCovariance* covMerged, int clipMerged[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF], AlfFilterShape& alfShape, short* filterIndices, int numFilters, double errorTabForce0Coeff[MAX_NUM_ALF_CLASSES][2] );
+#else
   double deriveFilterCoeffs( AlfCovariance* cov, AlfCovariance* covMerged, AlfFilterShape& alfShape, short* filterIndices, int numFilters, double errorTabForce0Coeff[MAX_NUM_ALF_CLASSES][2] );
+#endif
   int    deriveFilterCoefficientsPredictionMode( AlfFilterShape& alfShape, int **filterSet, int** filterCoeffDiff, const int numFilters, int& predMode );
+#if JVET_N0242_NON_LINEAR_ALF
+  double deriveCoeffQuant( int *filterClipp, int *filterCoeffQuant, const AlfCovariance& cov, const AlfFilterShape& shape, const int bitDepth, const bool optimizeClip );
+#else
   double deriveCoeffQuant( int *filterCoeffQuant, double **E, double *y, const int numCoeff, std::vector<int>& weights, const int bitDepth, const bool bChroma = false );
+#endif
   double deriveCtbAlfEnableFlags( CodingStructure& cs, const int iShapeIdx, ChannelType channel,
 #if ENABLE_QPA
                                   const double chromaWeight,
@@ -229,7 +432,13 @@ private:
 
   int    getCostFilterCoeffForce0( AlfFilterShape& alfShape, int **pDiffQFilterCoeffIntPP, const int numFilters, bool* codedVarBins );
   int    getCostFilterCoeff( AlfFilterShape& alfShape, int **pDiffQFilterCoeffIntPP, const int numFilters );
+#if JVET_N0242_NON_LINEAR_ALF
+  int    getCostFilterClipp( AlfFilterShape& alfShape, int **pDiffQFilterCoeffIntPP, const int numFilters );
+#endif
   int    lengthFilterCoeffs( AlfFilterShape& alfShape, const int numFilters, int **FilterCoeff, int* kMinTab );
+#if JVET_N0242_NON_LINEAR_ALF
+  int    lengthFilterClipps( AlfFilterShape& alfShape, const int numFilters, int **FilterCoeff, int* kMinTab );
+#endif
   double getDistForce0( AlfFilterShape& alfShape, const int numFilters, double errorTabForce0Coeff[MAX_NUM_ALF_CLASSES][2], bool* codedVarBins );
   int    getCoeffRate( AlfSliceParam& alfSliceParam, bool isChroma );
 
@@ -237,12 +446,14 @@ private:
   double getUnfilteredDistortion( AlfCovariance* cov, const int numClasses );
   double getFilteredDistortion( AlfCovariance* cov, const int numClasses, const int numFiltersMinus1, const int numCoeff );
 
+#if !JVET_N0242_NON_LINEAR_ALF
   // Cholesky decomposition
   int  gnsSolveByChol( double **LHS, double *rhs, double *x, int numEq );
   void gnsBacksubstitution( double R[MAX_NUM_ALF_COEFF][MAX_NUM_ALF_COEFF], double* z, int size, double* A );
   void gnsTransposeBacksubstitution( double U[MAX_NUM_ALF_COEFF][MAX_NUM_ALF_COEFF], double* rhs, double* x, int order );
   int  gnsCholeskyDec( double **inpMatr, double outMatr[MAX_NUM_ALF_COEFF][MAX_NUM_ALF_COEFF], int numEq );
 
+#endif
   void setEnableFlag( AlfSliceParam& alfSlicePara, ChannelType channel, bool val );
   void setEnableFlag( AlfSliceParam& alfSlicePara, ChannelType channel, uint8_t** ctuFlags );
   void setCtuEnableFlag( uint8_t** ctuFlags, ChannelType channel, uint8_t val );
diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h
index ceb945663..f1ece8c0e 100644
--- a/source/Lib/EncoderLib/EncCfg.h
+++ b/source/Lib/EncoderLib/EncCfg.h
@@ -266,6 +266,10 @@ protected:
   bool      m_useAMaxBT;
   bool      m_e0023FastEnc;
   bool      m_contentBasedFastQtbt;
+#if JVET_N0242_NON_LINEAR_ALF
+  bool      m_useNonLinearAlfLuma;
+  bool      m_useNonLinearAlfChroma;
+#endif
 
 #if MAX_TB_SIZE_SIGNALLING
   uint32_t  m_log2MaxTbSize;
@@ -822,6 +826,12 @@ public:
   bool      getUseE0023FastEnc              () const         { return m_e0023FastEnc; }
   void      setUseContentBasedFastQtbt      ( bool b )       { m_contentBasedFastQtbt = b; }
   bool      getUseContentBasedFastQtbt      () const         { return m_contentBasedFastQtbt; }
+#if JVET_N0242_NON_LINEAR_ALF
+  void      setUseNonLinearAlfLuma          ( bool b )       { m_useNonLinearAlfLuma = b; }
+  bool      getUseNonLinearAlfLuma          () const         { return m_useNonLinearAlfLuma; }
+  void      setUseNonLinearAlfChroma        ( bool b )       { m_useNonLinearAlfChroma = b; }
+  bool      getUseNonLinearAlfChroma        () const         { return m_useNonLinearAlfChroma; }
+#endif
 
 #if MAX_TB_SIZE_SIGNALLING
   void      setLog2MaxTbSize                ( uint32_t  u )   { m_log2MaxTbSize = u; }
diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp
index dcc383dc2..fbb1edeb1 100644
--- a/source/Lib/EncoderLib/EncLib.cpp
+++ b/source/Lib/EncoderLib/EncLib.cpp
@@ -135,7 +135,11 @@ void EncLib::create ()
   }
   if( m_alf )
   {
+#if JVET_N0242_NON_LINEAR_ALF
+    m_cEncALF.create( this, getSourceWidth(), getSourceHeight(), m_chromaFormatIDC, m_maxCUWidth, m_maxCUHeight, m_maxTotalCUDepth, m_bitDepth, m_inputBitDepth );
+#else
     m_cEncALF.create( getSourceWidth(), getSourceHeight(), m_chromaFormatIDC, m_maxCUWidth, m_maxCUHeight, m_maxTotalCUDepth, m_bitDepth, m_inputBitDepth );
+#endif
   }
 
 #if ENABLE_SPLIT_PARALLELISM || ENABLE_WPP_PARALLELISM
diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp
index ae81af9f9..57b20baad 100644
--- a/source/Lib/EncoderLib/VLCWriter.cpp
+++ b/source/Lib/EncoderLib/VLCWriter.cpp
@@ -377,6 +377,14 @@ void HLSWriter::codeAPS( APS* pcAPS)
   const int alfChromaIdc = param.enabledFlag[COMPONENT_Cb] * 2 + param.enabledFlag[COMPONENT_Cr];
   truncatedUnaryEqProb(alfChromaIdc, 3);   // alf_chroma_idc
 
+#if JVET_N0242_NON_LINEAR_ALF
+  WRITE_FLAG( param.nonLinearFlag[CHANNEL_TYPE_LUMA], "alf_luma_clip" );
+  if( alfChromaIdc )
+  {
+    WRITE_FLAG( param.nonLinearFlag[CHANNEL_TYPE_CHROMA], "alf_chroma_clip" );
+  }
+#endif
+
   xWriteTruncBinCode(param.numLumaFilters - 1, MAX_NUM_ALF_CLASSES);  //number_of_filters_minus1
   if (param.numLumaFilters > 1)
   {
@@ -1793,8 +1801,11 @@ bool HLSWriter::xFindMatchingLTRP(Slice* pcSlice, uint32_t *ltrpsIndex, int ltrp
   return false;
 }
 
-
+#if JVET_N0242_NON_LINEAR_ALF
+void HLSWriter::alfGolombEncode( int coeff, int k, const bool signed_coeff )
+#else
 void HLSWriter::alfGolombEncode( int coeff, int k )
+#endif
 {
   int symbol = abs( coeff );
 
@@ -1814,7 +1825,11 @@ void HLSWriter::alfGolombEncode( int coeff, int k )
     symbol >>= 1;
   }
 
+#if JVET_N0242_NON_LINEAR_ALF
+  if( signed_coeff && coeff != 0 )
+#else
   if( coeff != 0 )
+#endif
   {
     int sign = ( coeff > 0 ) ? 1 : 0;
     xWriteFlag( sign );
@@ -1840,6 +1855,9 @@ void HLSWriter::alfFilter( const AlfSliceParam& alfSliceParam, const bool isChro
   AlfFilterShape alfShape( isChroma ? 5 : 7 );
   const int maxGolombIdx = AdaptiveLoopFilter::getMaxGolombIdx( alfShape.filterType );
   const short* coeff = isChroma ? alfSliceParam.chromaCoeff : alfSliceParam.lumaCoeff;
+#if JVET_N0242_NON_LINEAR_ALF
+  const short* clipp = isChroma ? alfSliceParam.chromaClipp : alfSliceParam.lumaClipp;
+#endif
   const int numFilters = isChroma ? 1 : alfSliceParam.numLumaFilters;
 
   // vlc for all
@@ -1897,6 +1915,82 @@ void HLSWriter::alfFilter( const AlfSliceParam& alfSliceParam, const bool isChro
       alfGolombEncode( coeff[ind* MAX_NUM_ALF_LUMA_COEFF + i], kMinTab[alfShape.golombIdx[i]] );  // alf_coeff_chroma[i], alf_coeff_luma_delta[i][j]
     }
   }
+#if JVET_N0242_NON_LINEAR_ALF
+
+  // Clipping values coding
+  if( alfSliceParam.nonLinearFlag[isChroma] )
+  {
+    memset( bitsCoeffScan, 0, sizeof( bitsCoeffScan ) );
+
+    short recCoeff[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF];
+    if( isChroma )
+    {
+      memcpy( recCoeff, coeff, sizeof(short) * MAX_NUM_ALF_CHROMA_COEFF );
+    }
+    else
+    {
+      memcpy( recCoeff, coeff, sizeof(short) * numFilters * MAX_NUM_ALF_LUMA_COEFF );
+
+      if( alfSliceParam.alfLumaCoeffDeltaPredictionFlag )
+      {
+        for( int i = 1; i < numFilters; i++ )
+        {
+          for( int j = 0; j < alfShape.numCoeff - 1; j++ )
+          {
+            recCoeff[i * MAX_NUM_ALF_LUMA_COEFF + j] += recCoeff[( i - 1 ) * MAX_NUM_ALF_LUMA_COEFF + j];
+          }
+        }
+      }
+    }
+    // vlc for all
+    for( int ind = 0; ind < numFilters; ++ind )
+    {
+      if( isChroma || !alfSliceParam.alfLumaCoeffDeltaFlag || alfSliceParam.alfLumaCoeffFlag[ind] )
+      {
+        for( int i = 0; i < alfShape.numCoeff - 1; i++ )
+        {
+          if( !abs( recCoeff[ind * MAX_NUM_ALF_LUMA_COEFF + i] ) )
+            continue;
+          int coeffVal = abs( clipp[ind * MAX_NUM_ALF_LUMA_COEFF + i] );
+
+          for( int k = 1; k < 15; k++ )
+          {
+            bitsCoeffScan[alfShape.golombIdx[i]][k] += EncAdaptiveLoopFilter::lengthGolomb( coeffVal, k, false );
+          }
+        }
+      }
+    }
+
+    kMin = EncAdaptiveLoopFilter::getGolombKMin( alfShape, numFilters, kMinTab, bitsCoeffScan );
+
+    // Golomb parameters
+    WRITE_UVLC( kMin - 1, "clip_min_golomb_order" );
+
+    for( int idx = 0; idx < maxGolombIdx; idx++ )
+    {
+      bool golombOrderIncreaseFlag = ( kMinTab[idx] != kMin ) ? true : false;
+      CHECK( !( kMinTab[idx] <= kMin + 1 ), "ALF Golomb parameter not consistent" );
+      WRITE_FLAG( golombOrderIncreaseFlag, "clip_golomb_order_increase_flag" );
+      kMin = kMinTab[idx];
+    }
+
+    // Filter coefficients
+    for( int ind = 0; ind < numFilters; ++ind )
+    {
+      if( !isChroma && !alfSliceParam.alfLumaCoeffFlag[ind] && alfSliceParam.alfLumaCoeffDeltaFlag )
+      {
+        continue;
+      }
+
+      for( int i = 0; i < alfShape.numCoeff - 1; i++ )
+      {
+        if( !abs( recCoeff[ind * MAX_NUM_ALF_LUMA_COEFF + i] ) )
+          continue;
+        alfGolombEncode( clipp[ind* MAX_NUM_ALF_LUMA_COEFF + i], kMinTab[alfShape.golombIdx[i]], false );  // alf_coeff_chroma[i], alf_coeff_luma_delta[i][j]
+      }
+    }
+  }
+#endif
 }
 
 void HLSWriter::xWriteTruncBinCode( uint32_t uiSymbol, const int uiMaxSymbol )
diff --git a/source/Lib/EncoderLib/VLCWriter.h b/source/Lib/EncoderLib/VLCWriter.h
index 2ec729bde..b4ddd26eb 100644
--- a/source/Lib/EncoderLib/VLCWriter.h
+++ b/source/Lib/EncoderLib/VLCWriter.h
@@ -149,7 +149,11 @@ public:
 
 private:
   void xWriteTruncBinCode( uint32_t uiSymbol, const int uiMaxSymbol );
+#if JVET_N0242_NON_LINEAR_ALF
+  void alfGolombEncode( const int coeff, const int k, const bool signed_coeff=true );
+#else
   void alfGolombEncode( const int coeff, const int k );
+#endif
   void truncatedUnaryEqProb( int symbol, int maxSymbol );
 
   void  codeReshaper            ( const SliceReshapeInfo& pSliceReshaperInfo, const SPS* pcSPS, const bool isIntra);
-- 
GitLab