diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp
index a2881df31554040e244d372d2ca78c24a3912e29..6713cb839e84e1f97994f5ff32603b8f670e9489 100644
--- a/source/App/EncoderApp/EncApp.cpp
+++ b/source/App/EncoderApp/EncApp.cpp
@@ -1256,6 +1256,9 @@ void EncApp::xInitLibCfg()
 #endif
 #if JVET_AH0057_CCALF_COEFF_PRECISION
   m_cEncLib.setUseCCALFPrecision                                 ( m_ccalfPrecision );
+#endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  m_cEncLib.setAlfLumaFixedFilterAdjust                          ( m_alfLumaFixedFilterAdjust );
 #endif
   m_cEncLib.setTestSAODisableAtPictureLevel                      ( m_bTestSAODisableAtPictureLevel );
   m_cEncLib.setSaoEncodingRate                                   ( m_saoEncodingRate );
diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp
index 737460da0d59fcf4437eb1a11f688ece2af21650..49aa88e1dbec1b60d4ee66748bfa6f026006bc3d 100644
--- a/source/App/EncoderApp/EncAppCfg.cpp
+++ b/source/App/EncoderApp/EncAppCfg.cpp
@@ -1567,6 +1567,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
 #endif
 #if JVET_AH0057_CCALF_COEFF_PRECISION
   ("CCALFPrecision",                                  m_ccalfPrecision,                                  true, "Cross-component Alf with variable precision coefficients")
+#endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  ("AlfLumaFixedFilterAdjust",                        m_alfLumaFixedFilterAdjust,                         true, "Alf Luma Fixed Filter Adjustment" )
 #endif
   ("TestSAODisableAtPictureLevel",                    m_bTestSAODisableAtPictureLevel,                  false, "Enables the testing of disabling SAO at the picture level after having analysed all blocks")
   ("SaoEncodingRate",                                 m_saoEncodingRate,                                 0.75, "When >0 SAO early picture termination is enabled for luma and chroma")
diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h
index 9a478c16a0901ac17536b60ee3dbf14f38c0769d..2b1b81859dbbe74e8f9f24fbd01cc41e9871808d 100644
--- a/source/App/EncoderApp/EncAppCfg.h
+++ b/source/App/EncoderApp/EncAppCfg.h
@@ -756,6 +756,9 @@ protected:
 #endif
 #if JVET_AH0057_CCALF_COEFF_PRECISION
   bool      m_ccalfPrecision;
+#endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  bool      m_alfLumaFixedFilterAdjust;
 #endif
   bool      m_bTestSAODisableAtPictureLevel;
   double    m_saoEncodingRate;                                ///< When >0 SAO early picture termination is enabled for luma and chroma
diff --git a/source/Lib/CommonLib/AdaptiveLoopFilter.cpp b/source/Lib/CommonLib/AdaptiveLoopFilter.cpp
index 4a8a3c36089e9cc4441f4dead5b38e76c28f1669..ceaa4d3d184ca1d214f7bc3878b571862737a2b8 100644
--- a/source/Lib/CommonLib/AdaptiveLoopFilter.cpp
+++ b/source/Lib/CommonLib/AdaptiveLoopFilter.cpp
@@ -64,6 +64,12 @@ AdaptiveLoopFilter::AdaptiveLoopFilter()
   {
     m_classifier[i] = nullptr;
   }
+#endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  for( int i = 0; i < 1; i++ )
+  {
+    m_classifierCodingInfo[i] = nullptr;
+  }
 #endif
   for (size_t i = 0; i < NUM_DIRECTIONS; i++)
   {
@@ -163,6 +169,10 @@ AdaptiveLoopFilter::AdaptiveLoopFilter()
 #else
   m_deriveClassificationBlk = deriveClassificationBlk;
 #endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  m_textureClassMapping = textureClassMapping;
+  m_calcAlfLumaCodingInfoBlk = calcAlfLumaCodingInfoBlk;
+#endif
 
 #if ENABLE_SIMD_OPT_ALF
 #ifdef TARGET_SIMD_X86
@@ -671,6 +681,9 @@ void AdaptiveLoopFilter::ALFProcess(CodingStructure& cs)
 #endif
 #if JVET_AE0139_ALF_IMPROVED_FIXFILTER
   memset(m_ctuPadFlag, 0, sizeof(uint8_t) * m_numCTUsInPic);
+#endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  PelUnitBuf tmpYuvCodingInfo = m_tempBufCodingInfo.getBuf( cs.area );
 #endif
   const PreCalcValues& pcv = *cs.pcv;
 
@@ -769,6 +782,23 @@ void AdaptiveLoopFilter::ALFProcess(CodingStructure& cs)
 #endif
             buf = buf.subBuf( UnitArea( cs.area.chromaFormat, Area( clipL ? 0 : MAX_ALF_PADDING_SIZE, clipT ? 0 : MAX_ALF_PADDING_SIZE, w, h ) ) );
 
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+            PelUnitBuf bufCodingInfo = m_tempBufCodingInfo2.subBuf( UnitArea( CHROMA_400, Area( 0, 0, wBuf, hBuf ) ) );
+            bufCodingInfo.copyFrom( tmpYuvCodingInfo.subBuf( UnitArea( CHROMA_400, Area( xStart - ( clipL ? 0 : MAX_ALF_PADDING_SIZE ), yStart - ( clipT ? 0 : MAX_ALF_PADDING_SIZE ), wBuf, hBuf ) ) ) );
+            // pad top-left unavailable samples for raster slice
+            if( xStart == xPos && yStart == yPos && ( rasterSliceAlfPad & 1 ) )
+            {
+              bufCodingInfo.padBorderPel( MAX_ALF_PADDING_SIZE, 1 );
+            }
+
+            // pad bottom-right unavailable samples for raster slice
+            if( xEnd == xPos + width && yEnd == yPos + height && ( rasterSliceAlfPad & 2 ) )
+            {
+              bufCodingInfo.padBorderPel( MAX_ALF_PADDING_SIZE, 2 );
+            }
+            mirroredPaddingForAlf(cs, bufCodingInfo, MAX_ALF_PADDING_SIZE, true, false);
+            bufCodingInfo = bufCodingInfo.subBuf( UnitArea( CHROMA_400, Area( clipL ? 0 : MAX_ALF_PADDING_SIZE, clipT ? 0 : MAX_ALF_PADDING_SIZE, w, h ) ) );
+#endif
 #if JVET_AC0162_ALF_RESIDUAL_SAMPLES_INPUT
 #if JVET_AF0197_LUMA_RESIDUAL_TAP_IN_CCALF
             PelUnitBuf bufResi = m_tempBufResi2.subBuf(UnitArea(CHROMA_400, Area(0, 0, wBuf, hBuf)));
@@ -817,6 +847,9 @@ void AdaptiveLoopFilter::ALFProcess(CodingStructure& cs)
               bufDb.extendBorderPel(NUM_DB_PAD);
               bufDb = bufDb.subBuf(UnitArea(CHROMA_400, Area(clipL ? 0 : NUM_DB_PAD, clipT ? 0 : NUM_DB_PAD, w, h)));
 #endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+              calcAlfLumaCodingInfoBlk(cs, m_classifierCodingInfo[0], blkDst, blkSrc, buf.get(COMPONENT_Y), 2, 2, m_inputBitDepth[CHANNEL_TYPE_LUMA], bufResi.get(COMPONENT_Y), m_laplacian[0], bufCodingInfo.get(COMPONENT_Y) );
+#endif
 #if JVET_X0071_ALF_BAND_CLASSIFIER
               deriveClassification( m_classifier, buf.get(COMPONENT_Y), 
 #if JVET_AC0162_ALF_RESIDUAL_SAMPLES_INPUT
@@ -1213,6 +1246,9 @@ void AdaptiveLoopFilter::ALFProcess(CodingStructure& cs)
         {
           Area blk( xPos, yPos, width, height );
           short filterSetIndex = alfCtuFilterIndex[ctuIdx];
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+          calcAlfLumaCodingInfoBlk(cs, m_classifierCodingInfo[0], blk, blk, recYuv.get(COMPONENT_Y), 2, 2, m_inputBitDepth[CHANNEL_TYPE_LUMA], tmpYuvResi.get(COMPONENT_Y), m_laplacian[0], tmpYuvCodingInfo.get(COMPONENT_Y) );
+#endif
 #if JVET_X0071_ALF_BAND_CLASSIFIER
           deriveClassification( m_classifier, tmpYuv.get(COMPONENT_Y),
 #if JVET_AC0162_ALF_RESIDUAL_SAMPLES_INPUT
@@ -1232,13 +1268,21 @@ void AdaptiveLoopFilter::ALFProcess(CodingStructure& cs)
           if( filterSetIndex != 0 )
           {
 #if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
-            deriveFixedFilterResultsCtuBoundary( m_classifier, m_fixFilterResult[COMPONENT_Y], tmpYuv.get( COMPONENT_Y ), tmpYuvBeforeDb.get( COMPONENT_Y ), blk, m_inputBitDepth[CHANNEL_TYPE_LUMA], cs, m_clpRngs.comp[COMPONENT_Y], m_alfClippingValues[CHANNEL_TYPE_LUMA], cs.slice->getSliceQp(), fixedFilterSetIdx, m_mappingDir, m_laplacian, m_ctuEnableFlag[COMPONENT_Y], m_ctuEnableOnlineLumaFlag, ctuIdx, 0 );
+            deriveFixedFilterResultsCtuBoundary( m_classifier, m_fixFilterResult[COMPONENT_Y], tmpYuv.get( COMPONENT_Y ), tmpYuvBeforeDb.get( COMPONENT_Y ), blk, m_inputBitDepth[CHANNEL_TYPE_LUMA], cs, m_clpRngs.comp[COMPONENT_Y], m_alfClippingValues[CHANNEL_TYPE_LUMA], cs.slice->getSliceQp(), fixedFilterSetIdx, m_mappingDir, m_laplacian, m_ctuEnableFlag[COMPONENT_Y], m_ctuEnableOnlineLumaFlag, ctuIdx, 0
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+              , tmpYuvCodingInfo.get(COMPONENT_Y), tmpYuvResi.get( COMPONENT_Y )
+#endif
+              );
 #else
             deriveFixedFilterResultsCtuBoundary( m_classifier, m_fixFilterResult, tmpYuv.get( COMPONENT_Y ), tmpYuvBeforeDb.get( COMPONENT_Y ), blk, m_inputBitDepth[CHANNEL_TYPE_LUMA], cs, m_clpRngs.comp[COMPONENT_Y], m_alfClippingValues[CHANNEL_TYPE_LUMA], cs.slice->getSliceQp(), fixedFilterSetIdx, m_mappingDir, m_laplacian, m_ctuEnableFlag[COMPONENT_Y], m_ctuEnableOnlineLumaFlag, ctuIdx, 0 );
 #endif
             deriveFixedFilterResults( m_classifier, tmpYuv.get( COMPONENT_Y ), m_tempBufBeforeDb.get( COMPONENT_Y ), blk, blk, cs, 1, fixedFilterSetIdx );
 #if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
-            deriveFixedFilterResultsCtuBoundary( m_classifier, m_fixFilterResult[COMPONENT_Y], tmpYuv.get( COMPONENT_Y ), tmpYuvBeforeDb.get( COMPONENT_Y ), blk, m_inputBitDepth[CHANNEL_TYPE_LUMA], cs, m_clpRngs.comp[COMPONENT_Y], m_alfClippingValues[CHANNEL_TYPE_LUMA], cs.slice->getSliceQp(), fixedFilterSetIdx, m_mappingDir, m_laplacian, m_ctuEnableFlag[COMPONENT_Y], m_ctuEnableOnlineLumaFlag, ctuIdx, 1 );
+            deriveFixedFilterResultsCtuBoundary( m_classifier, m_fixFilterResult[COMPONENT_Y], tmpYuv.get( COMPONENT_Y ), tmpYuvBeforeDb.get( COMPONENT_Y ), blk, m_inputBitDepth[CHANNEL_TYPE_LUMA], cs, m_clpRngs.comp[COMPONENT_Y], m_alfClippingValues[CHANNEL_TYPE_LUMA], cs.slice->getSliceQp(), fixedFilterSetIdx, m_mappingDir, m_laplacian, m_ctuEnableFlag[COMPONENT_Y], m_ctuEnableOnlineLumaFlag, ctuIdx, 1
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+              , tmpYuvCodingInfo.get(COMPONENT_Y), tmpYuvResi.get( COMPONENT_Y )
+#endif
+              );
 #else
             deriveFixedFilterResultsCtuBoundary( m_classifier, m_fixFilterResult, tmpYuv.get( COMPONENT_Y ), tmpYuvBeforeDb.get( COMPONENT_Y ), blk, m_inputBitDepth[CHANNEL_TYPE_LUMA], cs, m_clpRngs.comp[COMPONENT_Y], m_alfClippingValues[CHANNEL_TYPE_LUMA], cs.slice->getSliceQp(), fixedFilterSetIdx, m_mappingDir, m_laplacian, m_ctuEnableFlag[COMPONENT_Y], m_ctuEnableOnlineLumaFlag, ctuIdx, 1 );
 #endif
@@ -1842,6 +1886,12 @@ void AdaptiveLoopFilter::create(const int picWidth, const int picHeight, const C
   m_tempBufSAO2.destroy();
   m_tempBufSAO2.create(format, Area(0, 0, maxCUWidth + (MAX_ALF_PADDING_SIZE << 1), maxCUHeight + (MAX_ALF_PADDING_SIZE << 1)), maxCUWidth, MAX_ALF_PADDING_SIZE, 0, false);
 #endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  m_tempBufCodingInfo.destroy();
+  m_tempBufCodingInfo.create(CHROMA_400, Area(0, 0, picWidth, picHeight), maxCUWidth, MAX_FILTER_LENGTH_FIXED, 0, false);
+  m_tempBufCodingInfo2.destroy();
+  m_tempBufCodingInfo2.create(CHROMA_400, Area( 0, 0, maxCUWidth + (MAX_ALF_PADDING_SIZE << 1), maxCUHeight + (MAX_ALF_PADDING_SIZE << 1) ), maxCUWidth, MAX_ALF_PADDING_SIZE, 0, false );
+#endif
 
 #if ALF_IMPROVEMENT
   int numFixedFilters = EXT_LENGTH << 1;
@@ -2142,6 +2192,21 @@ void AdaptiveLoopFilter::create(const int picWidth, const int picHeight, const C
     }
   }
 #endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  for( int classifier = 0; classifier < 1; classifier++ )
+  {
+    if( m_classifierCodingInfo[classifier] == nullptr )
+    {
+      m_classifierCodingInfo[classifier] = new AlfClassifier*[picHeight];
+      m_classifierCodingInfo[classifier][0] = new AlfClassifier[picWidth * picHeight];
+
+      for( int i = 1; i < picHeight; i++ )
+      {
+        m_classifierCodingInfo[classifier][i] = m_classifierCodingInfo[classifier][0] + i * picWidth;
+      }
+    }
+  }
+#endif
 #if !ALF_IMPROVEMENT
   for (int filterSetIndex = 0; filterSetIndex < NUM_FIXED_FILTER_SETS; filterSetIndex++)
   {
@@ -2199,6 +2264,17 @@ void AdaptiveLoopFilter::destroy()
     m_classifier = nullptr;
   }
 #endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  for (int classifier = 0; classifier < 1; classifier++)
+  {
+    if (m_classifierCodingInfo[classifier] )
+    {
+      delete[] m_classifierCodingInfo[classifier][0];
+      delete[] m_classifierCodingInfo[classifier];
+      m_classifierCodingInfo[classifier] = nullptr;
+    }
+  }
+#endif
 
 #if ALF_IMPROVEMENT
 #if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
@@ -2391,6 +2467,10 @@ void AdaptiveLoopFilter::destroy()
 #if JVET_AI0166_CCALF_CHROMA_SAO_INPUT
   m_tempBufSAO.destroy();
   m_tempBufSAO2.destroy();
+#endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  m_tempBufCodingInfo.destroy();
+  m_tempBufCodingInfo2.destroy();
 #endif
   m_filterShapes[CHANNEL_TYPE_LUMA].clear();
   m_filterShapes[CHANNEL_TYPE_CHROMA].clear();
@@ -3127,7 +3207,11 @@ void AdaptiveLoopFilter::deriveFixFilterResultsBlkChroma(AlfClassifier ***classi
   {
     if (fixedFiltSetInd == targetFixedFilterSetInd || targetFixedFilterSetInd == -1)
     {
-      alfFixedFilterBlk(classifier[ALF_NUM_CLASSIFIER + 1], src, blk, blkDst, srcBeforeDb, fixedFilterResults, m_picWidth, fixedFiltInd, fixedFiltSetInd, 0, clpRng, clippingValues, false);
+      alfFixedFilterBlk(classifier[ALF_NUM_CLASSIFIER + 1], src, blk, blkDst, srcBeforeDb, fixedFilterResults, m_picWidth, fixedFiltInd, fixedFiltSetInd, 0, clpRng, clippingValues, false
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+        , cs
+#endif
+        );
     }
     fixedFiltInd++;
   }
@@ -3485,54 +3569,89 @@ void AdaptiveLoopFilter::calcClass0Var( AlfClassifier **classifier, const Area &
       {
         for( int jj = posXDst + j; jj < posXDst + j + subBlkSize; jj++ )
         {
+          classifier[ii][jj] = (actDirInd << 2) + transposeIdx;
+        }
+      }
 #else
       for( int ii = curBlk.y + i; ii < curBlk.y + i + subBlkSize; ii++ )
       {
         for( int jj = curBlk.x + j; jj < curBlk.x + j + subBlkSize; jj++ )
         {
-#endif
           classifier[ii][jj] = (actDirInd << 2) + transposeIdx;
         }
       }
+#endif
     }
   }
 }
 
 #if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
-void AdaptiveLoopFilter::alfFixedFilterBlkNonSimd( AlfClassifier **classifier, const CPelBuf &src, const Area &curBlk, const Area &blkDst, const CPelBuf &srcBeforeDb, Pel ***fixedFilterResults, int picWidth, const int fixedFiltInd, int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4], bool isLuma )
+void AdaptiveLoopFilter::alfFixedFilterBlkNonSimd( AlfClassifier **classifier, const CPelBuf &src, const Area &curBlk, const Area &blkDst, const CPelBuf &srcBeforeDb, Pel ***fixedFilterResults, int picWidth, const int fixedFiltInd, int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4], bool isLuma
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  , CodingStructure &cs
+#endif
+  )
 {
   if( isLuma )
   {
     if( dirWindSize == 0 )
     { 
-      fixedFilterBlk<ALF_FIXED_FILTER_9_DB_9>( classifier, src, curBlk, blkDst, srcBeforeDb, fixedFilterResults, picWidth, fixedFiltInd, m_classIdnFixedFilter9Db9[fixedFiltQpInd], fixedFiltQpInd, dirWindSize, clpRng, clippingValues );
+      fixedFilterBlk<ALF_FIXED_FILTER_9_DB_9>( classifier, src, curBlk, blkDst, srcBeforeDb, fixedFilterResults, picWidth, fixedFiltInd, m_classIdnFixedFilter9Db9[fixedFiltQpInd], fixedFiltQpInd, dirWindSize, clpRng, clippingValues
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+        , isLuma, cs, m_classifierCodingInfo[0]
+#endif
+        );
     }
     else
     {
-      fixedFilterBlk<ALF_FIXED_FILTER_13_DB_9>( classifier, src, curBlk, blkDst, srcBeforeDb, fixedFilterResults, picWidth, fixedFiltInd, m_classIdnFixedFilter13Db9[fixedFiltQpInd], fixedFiltQpInd, dirWindSize, clpRng, clippingValues );
+      fixedFilterBlk<ALF_FIXED_FILTER_13_DB_9>( classifier, src, curBlk, blkDst, srcBeforeDb, fixedFilterResults, picWidth, fixedFiltInd, m_classIdnFixedFilter13Db9[fixedFiltQpInd], fixedFiltQpInd, dirWindSize, clpRng, clippingValues
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+        , isLuma, cs, m_classifierCodingInfo[0]
+#endif
+        );
     }
   }
   else
   {
-    fixedFilterBlk<ALF_FIXED_FILTER_9_DB_9>(classifier, src, curBlk, blkDst, srcBeforeDb, fixedFilterResults, picWidth, fixedFiltInd, m_classIdnFixedFilter9Db9[fixedFiltQpInd], fixedFiltQpInd, dirWindSize, clpRng, clippingValues);
+    fixedFilterBlk<ALF_FIXED_FILTER_9_DB_9>(classifier, src, curBlk, blkDst, srcBeforeDb, fixedFilterResults, picWidth, fixedFiltInd, m_classIdnFixedFilter9Db9[fixedFiltQpInd], fixedFiltQpInd, dirWindSize, clpRng, clippingValues
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+      , isLuma, cs, m_classifierCodingInfo[0]
+#endif
+      );
   }
 }
-void AdaptiveLoopFilter::alfFixedFilterBlk( AlfClassifier **classifier, const CPelBuf &src, const Area &curBlk, const Area &blkDst, const CPelBuf &srcBeforeDb, Pel ***fixedFilterResults, int picWidth, const int fixedFiltInd, int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4], bool isLuma )
+void AdaptiveLoopFilter::alfFixedFilterBlk( AlfClassifier **classifier, const CPelBuf &src, const Area &curBlk, const Area &blkDst, const CPelBuf &srcBeforeDb, Pel ***fixedFilterResults, int picWidth, const int fixedFiltInd, int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4], bool isLuma
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  , CodingStructure &cs
+#endif
+  )
 {
   if( isLuma )
   {
     if( dirWindSize == 0 )
     { 
-      m_fixFilter9x9Db9Blk( classifier, src, curBlk, blkDst, srcBeforeDb, fixedFilterResults, picWidth, fixedFiltInd, m_classIdnFixedFilter9Db9[fixedFiltQpInd], fixedFiltQpInd, dirWindSize, clpRng, clippingValues );
+      m_fixFilter9x9Db9Blk( classifier, src, curBlk, blkDst, srcBeforeDb, fixedFilterResults, picWidth, fixedFiltInd, m_classIdnFixedFilter9Db9[fixedFiltQpInd], fixedFiltQpInd, dirWindSize, clpRng, clippingValues
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+        , isLuma, cs, m_classifierCodingInfo[0]
+#endif
+        );
     }
     else
     {
-      m_fixFilter13x13Db9Blk( classifier, src, curBlk, blkDst, srcBeforeDb, fixedFilterResults, picWidth, fixedFiltInd, m_classIdnFixedFilter13Db9[fixedFiltQpInd], fixedFiltQpInd, dirWindSize, clpRng, clippingValues );
+      m_fixFilter13x13Db9Blk( classifier, src, curBlk, blkDst, srcBeforeDb, fixedFilterResults, picWidth, fixedFiltInd, m_classIdnFixedFilter13Db9[fixedFiltQpInd], fixedFiltQpInd, dirWindSize, clpRng, clippingValues
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+        , isLuma, cs, m_classifierCodingInfo[0]
+#endif
+        );
     }
   }
   else
   {
-    m_fixFilter9x9Db9Blk( classifier, src, curBlk, blkDst, srcBeforeDb, fixedFilterResults, picWidth, fixedFiltInd, m_classIdnFixedFilter9Db9[fixedFiltQpInd], fixedFiltQpInd, dirWindSize, clpRng, clippingValues );
+    m_fixFilter9x9Db9Blk( classifier, src, curBlk, blkDst, srcBeforeDb, fixedFilterResults, picWidth, fixedFiltInd, m_classIdnFixedFilter9Db9[fixedFiltQpInd], fixedFiltQpInd, dirWindSize, clpRng, clippingValues
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+      , isLuma, cs, m_classifierCodingInfo[0]
+#endif
+      );
   }
 }
 #else
@@ -3562,7 +3681,11 @@ void AdaptiveLoopFilter::alfFixedFilterBlk( AlfClassifier **classifier, const CP
 
 template<AlfFixedFilterType filtType>
 #if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
-void AdaptiveLoopFilter::fixedFilterBlk( AlfClassifier **classifier, const CPelBuf &src, const Area &curBlk, const Area &blkDst, const CPelBuf &srcBeforeDb, Pel ***fixedFilterResults, int picWidth, const int fixedFiltInd, const short classIndFixed[NUM_CLASSES_FIX], int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4] )
+void AdaptiveLoopFilter::fixedFilterBlk( AlfClassifier **classifier, const CPelBuf &src, const Area &curBlk, const Area &blkDst, const CPelBuf &srcBeforeDb, Pel ***fixedFilterResults, int picWidth, const int fixedFiltInd, const short classIndFixed[NUM_CLASSES_FIX], int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4]
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  , bool applyCodingInfo, CodingStructure &cs, AlfClassifier** classifierCodingInfo
+#endif
+  )
 #else
 void AdaptiveLoopFilter::fixedFilterBlk( AlfClassifier **classifier, const CPelBuf &srcLuma, const Area &curBlk, const Area &blkDst, const CPelBuf &srcLumaBeforeDb, Pel ***fixedFilterResults, int picWidth, const int fixedFiltInd, const short classIndFixed[NUM_CLASSES_FIX], int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4] )
 #endif
@@ -3604,6 +3727,15 @@ void AdaptiveLoopFilter::fixedFilterBlk( AlfClassifier **classifier, const CPelB
   const Pel *pImgYBeforeDbPad0 = srcLumaBeforeDb.buf + posY * srcBeforeDbStride + posX;
 #endif
   const int srcBeforeDbStride2 = srcBeforeDbStride * clsSizeY;
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  const bool isIntraSlice = cs.slice->isIntra();
+  const bool isSpsAdjust  = cs.sps->getAlfLumaFixedFilterAdjust();
+  const bool useCodingInfo = true;
+
+  const bool useBounCondition = applyCodingInfo && !( !isSpsAdjust && isIntraSlice ) && useCodingInfo;
+  const bool useResiCondition = applyCodingInfo && (isSpsAdjust || !isSpsAdjust) && !isIntraSlice && useCodingInfo;
+  const int offsetClipValue = 1 << ( clpRng.bd - 1 );
+#endif
   int fixedFiltIndF0 = -1;
 
   int numCoeff;
@@ -3635,7 +3767,25 @@ void AdaptiveLoopFilter::fixedFilterBlk( AlfClassifier **classifier, const CPelB
       int classIdx = classifier[posY + i][posX + j] >> 2;
       int transposeIdx = classifier[posY + i][posX + j] & 0x3;
 #endif
-      int filterIdx = classIndFixed[classIdx];    
+      int filterIdx = classIndFixed[classIdx];
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+      int classIdxBs = 0;
+      if( useBounCondition )
+      {
+        //0: BS0 Resi0, 1:BS0 Resi1, 2:BS1 Resi0 3:BS1 Resi1
+        // A >> 1 = BS
+        // A - ( A >> 1) * 2
+        classIdxBs = classifierCodingInfo[posYDst + i][posXDst + j] >> 1;
+      }
+      int classIdxResi = 0;
+      if( useResiCondition )
+      {
+        //0: BS0 Resi0, 1:BS0 Resi1, 2:BS1 Resi0 3:BS1 Resi1
+        // A >> 1 = BS
+        // A - ( A >> 1) * 2
+        classIdxResi = classifierCodingInfo[posYDst + i][posXDst + j] - ((classifierCodingInfo[posYDst + i][posXDst + j] >> 1 ) * 2);
+      }
+#endif
       const short* coeff;
       const short* clipp;
 
@@ -3889,6 +4039,26 @@ void AdaptiveLoopFilter::fixedFilterBlk( AlfClassifier **classifier, const CPelB
             CHECK( 1, "not supported" );
           }
           sum = ( sum + offset ) >> shift;
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+          if( useBounCondition )
+          {
+            sum = Clip3(-offsetClipValue, +offsetClipValue, sum);
+
+            int sign = sum < 0 ? -1 : +1;
+
+            int boundaryStrengthFactor = isIntraSlice ? 4 : 3;
+            sum = classIdxBs ? sign * ((abs(sum) * (16 + boundaryStrengthFactor) + 8 ) >> 4) : sum;
+          }
+
+          if( useResiCondition )
+          {
+            sum = Clip3(-offsetClipValue, +offsetClipValue, sum);
+
+            int sign = sum < 0 ? -1 : +1;
+            int resiStrengthFactor = isIntraSlice ? 0 >> ( !isSpsAdjust ? 1 : 0) : 3 >> (!isSpsAdjust ? 1 : 0);
+            sum = classIdxResi ? sign * ((abs(sum) * (16 + resiStrengthFactor) + 8 ) >> 4) : sum;
+          }
+#endif
           sum += curr;
 #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY
 #if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
@@ -3918,6 +4088,9 @@ void AdaptiveLoopFilter::fixedFilterBlk( AlfClassifier **classifier, const CPelB
 void AdaptiveLoopFilter::calcClassNew( AlfClassifier **classifier, const Area &blkDst, const Area &curBlk, const CPelBuf& srcLuma, int subBlkSize, AlfClassifier **classifier0, int classifierIdx, int bitDepth
 #if JVET_AD0222_ALF_RESI_CLASS
   , const CPelBuf& srcLumaResi, uint32_t **buffer
+#endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  , AlfClassifier ** classifierCodingInfo
 #endif
   )
 {
@@ -3939,7 +4112,21 @@ void AdaptiveLoopFilter::calcClassNew( AlfClassifier **classifier, const Area &b
       const Pel *pY0 = src0 + xOffset;
       const Pel *pY1 = src1 + xOffset;
       int sum = pY0[0] + pY0[1] + pY1[0] + pY1[1];
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+      int boundShift = classifierCodingInfo[curBlk.y + i][curBlk.x + j ] >> 1;
+
+      int classIdx = 0;
+      if( boundShift == 0 )
+      {
+        classIdx = ( sum * 12 ) >> (bitDepth + 2);
+      }
+      else
+      {
+        classIdx = 12 + (( sum * 12 ) >> (bitDepth + 2));
+      }
+#else
       int classIdx = (sum * ALF_NUM_CLASSES_CLASSIFIER[classifierIdx]) >> (bitDepth + 2);
+#endif
       for (int ii = curBlk.y + i; ii < curBlk.y + i + subBlkSize; ii++)
       {
         for (int jj = curBlk.x + j; jj < curBlk.x + j + subBlkSize; jj++)
@@ -3996,11 +4183,34 @@ void AdaptiveLoopFilter::calcClassNew( AlfClassifier **classifier, const Area &b
         int j2 = j >> 1;
         int sum = buffer[i2][j2] + buffer[i2][j2 + 2] + buffer[i2 + 2][j2] + buffer[i2 + 2][j2 + 2];
         int shiftOffset = ALF_RESI_SHIFT_OFFSET;
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+        int boundShift = classifierCodingInfo[curBlk.y + i][curBlk.x + j] >> 1;
+
+        int classIdx = sum >> (bitDepth - shiftOffset);
+        // Merge Neighbor Class, Then Clip
+        classIdx >>= 1;
+        if( boundShift == 0)
+        {
+          if( classIdx > 12 - 1)
+          {
+            classIdx = 12 - 1;
+          }
+        }
+        else
+        {
+          if( classIdx > 12 - 1)
+          {
+            classIdx = 12 - 1;
+          }
+          classIdx +=  12;
+        }
+#else
         int classIdx = sum >> (bitDepth - shiftOffset);
         if (classIdx > 24)
         {
           classIdx = 24;
         }
+#endif
         for (int ii = curBlk.y + i; ii < curBlk.y + i + subBlkSize; ii++)
         {
           for (int jj = curBlk.x + j; jj < curBlk.x + j + subBlkSize; jj++)
@@ -4223,15 +4433,18 @@ void AdaptiveLoopFilter::calcClass(AlfClassifier **classifier, const Area &blkDs
       {
         for (int jj = posXDst + j; jj < posXDst + j + subBlkSize; jj++)
         {
+          classifier[ii][jj] = (actDirInd << 2) + transposeIdx;
+        }
+      }
 #else
       for (int ii = curBlk.y + i; ii < curBlk.y + i + subBlkSize; ii++)
       {
         for (int jj = curBlk.x + j; jj < curBlk.x + j + subBlkSize; jj++)
         {
-#endif
           classifier[ii][jj] = (actDirInd << 2) + transposeIdx;
         }
       }
+#endif
     }
   }
 }
@@ -4411,7 +4624,11 @@ void AdaptiveLoopFilter::fixedFilteringResi(AlfClassifier **classifier, const CP
 #endif
                                             Pel ***fixedFilterResiResults, int picWidth, const int fixedFiltInd,
                                             const short classIndFixed[NUM_CLASSES_FIX], int fixedFiltQpInd,
-                                            int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4])
+                                            int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4]
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  , bool applyCodingInfo, CodingStructure &cs, AlfClassifier** classifierCodingInfo
+#endif
+  )
 {
   const int shift     = m_NUM_BITS_FIXED_FILTER - 1;
   const int offset    = 1 << (shift - 1);
@@ -4432,6 +4649,14 @@ void AdaptiveLoopFilter::fixedFilteringResi(AlfClassifier **classifier, const CP
   const int  clsSizeY   = 2;
   const int  clsSizeX   = 2;
   const int  srcStride2 = srcStride * clsSizeY;
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  const bool isIntraSlice = cs.slice->isIntra();
+  const bool isSpsAdjust = cs.sps->getAlfLumaFixedFilterAdjust();
+  const bool useCodingInfo = isSpsAdjust ? true : false;
+  const bool useBounCondition = applyCodingInfo && !( !isSpsAdjust && isIntraSlice ) && useCodingInfo;
+  const bool useResiCondition = applyCodingInfo && (isSpsAdjust || !isSpsAdjust) && !isIntraSlice && useCodingInfo;
+  const int offsetClipValue =  1 << ( clpRng.bd - 1 );
+#endif
 
   for (int i = 0; i < curBlk.height; i += clsSizeY)
   {
@@ -4445,6 +4670,25 @@ void AdaptiveLoopFilter::fixedFilteringResi(AlfClassifier **classifier, const CP
       int transposeIdx = classifier[posY + i][posX + j] & 0x3;
 #endif
       int filterIdx = classIndFixed[classIdx];
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+      int classIdxBs = 0;
+      if( useBounCondition )
+      {
+        //0: BS0 Resi0, 1:BS0 Resi1, 2:BS1 Resi0 3:BS1 Resi1
+        // A >> 1 = BS
+        // A - ( A >> 1) * 2
+        classIdxBs = classifierCodingInfo[posYDst + i][posXDst + j] >> 1;
+      }
+
+      int classIdxResi = 0;
+      if( useResiCondition )
+      {
+        //0: BS0 Resi0, 1:BS0 Resi1, 2:BS1 Resi0 3:BS1 Resi1
+        // A >> 1 = BS
+        // A - ( A >> 1) * 2
+        classIdxResi = classifierCodingInfo[posYDst + i][posXDst + j] - ((classifierCodingInfo[posYDst + i][posXDst + j] >> 1 ) * 2);
+      }
+#endif
 #if JVET_AE0139_ALF_IMPROVED_FIXFILTER
       std::array<short, FIX_FILTER_NUM_COEFF_9_DB_9> filterCoeff;
       std::array<short, FIX_FILTER_NUM_COEFF_9_DB_9> filterClipp;
@@ -4656,6 +4900,25 @@ void AdaptiveLoopFilter::fixedFilteringResi(AlfClassifier **classifier, const CP
 #endif
 
           sum = (sum + offset) >> shift;
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+          if( useBounCondition )
+          {
+            sum = Clip3(-offsetClipValue, +offsetClipValue, sum);
+
+            int sign = sum < 0 ? -1 : +1;
+
+            int boundaryStrengthFactor = isIntraSlice ? 4 : 3;
+            sum = classIdxBs ? sign * ((abs(sum) * (16 + boundaryStrengthFactor) + 8 ) >> 4) : sum;
+          }
+          if( useResiCondition )
+          {
+            sum = Clip3(-offsetClipValue, +offsetClipValue, sum);
+
+            int sign = sum < 0 ? -1 : +1;
+            int resiStrengthFactor = isIntraSlice ? 0 >> (!isSpsAdjust ? 1 : 0) : 3 >> (!isSpsAdjust ? 1 : 0);
+            sum = classIdxResi ? sign * ((abs(sum) * (16 + resiStrengthFactor) + 8 ) >> 4) : sum;
+          }
+#endif
           sum += curr;
 
 #if JVET_AE0139_ALF_IMPROVED_FIXFILTER
@@ -4901,7 +5164,11 @@ void AdaptiveLoopFilter::deriveClassificationAndFixFilterResultsBlk( AlfClassifi
 #if JVET_X0071_ALF_BAND_CLASSIFIER
 #if JVET_AE0139_ALF_IMPROVED_FIXFILTER
 #if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
-            alfFixedFilterBlk( classifier[storeIdx], srcLuma, blkNew, blkDstNew, srcLumaBeforeDb, fixedFilterResults, m_picWidth, fixedFiltInd, fixedFiltSetInd, dirWindSize, clpRng, clippingValues, true );
+            alfFixedFilterBlk( classifier[storeIdx], srcLuma, blkNew, blkDstNew, srcLumaBeforeDb, fixedFilterResults, m_picWidth, fixedFiltInd, fixedFiltSetInd, dirWindSize, clpRng, clippingValues, true
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+              , cs
+#endif
+              );
 #else
             alfFixedFilterBlk( classifier[storeIdx], srcLuma, blkNew, blkDstNew, srcLumaBeforeDb, fixedFilterResults, m_picWidth, fixedFiltInd, fixedFiltSetInd, dirWindSize, clpRng, clippingValues );
 #endif
@@ -4969,7 +5236,11 @@ void AdaptiveLoopFilter::deriveClassificationAndFixFilterResultsBlk( AlfClassifi
           if (bResiFixed && dirWindSize == 0)
           {
 #if JVET_AE0139_ALF_IMPROVED_FIXFILTER
-            m_filterResi9x9Blk(classifier[0], srcResiLuma, blk, blkDst, fixedFilterResiResults, m_picWidth, fixedFiltInd, m_classIdnFixedFilter9Db9[fixedFiltSetInd], fixedFiltSetInd, dirWindSize, clpRng, clippingValues);
+            m_filterResi9x9Blk(classifier[0], srcResiLuma, blk, blkDst, fixedFilterResiResults, m_picWidth, fixedFiltInd, m_classIdnFixedFilter9Db9[fixedFiltSetInd], fixedFiltSetInd, dirWindSize, clpRng, clippingValues
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+              , true, cs, m_classifierCodingInfo[0]
+#endif
+              );
 #else
             m_filterResi13x13Blk(classifier[0], srcResiLuma,
 #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY
@@ -5005,6 +5276,9 @@ void AdaptiveLoopFilter::deriveClassificationAndFixFilterResultsBlk( AlfClassifi
 #else
       m_calcClass1(classifier[0], blkDst, Area(blkDst.pos().x, blkDst.pos().y, blkDst.width, blkDst.height), 5, 0, 5, 5,
                    bits, 2, mappingDir, laplacian);
+#endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+      m_textureClassMapping(classifier[0], blkDst, 0, 2, m_classifierCodingInfo[0] );
 #endif
     }
     for( int curClassifierIdx = 1; curClassifierIdx < ALF_NUM_CLASSIFIER; curClassifierIdx++ )
@@ -5017,7 +5291,11 @@ void AdaptiveLoopFilter::deriveClassificationAndFixFilterResultsBlk( AlfClassifi
         {
           continue;
         }
-        m_calcClass2( classifier[curClassifierIdx], blk, Area( blkDst.pos().x, blkDst.pos().y, blkDst.width, blkDst.height ), srcLuma, 2, classifier[0], curClassifierIdx, bits, srcResiLuma, laplacian[0] );
+        m_calcClass2( classifier[curClassifierIdx], blk, Area( blkDst.pos().x, blkDst.pos().y, blkDst.width, blkDst.height ), srcLuma, 2, classifier[0], curClassifierIdx, bits, srcResiLuma, laplacian[0]
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+          , m_classifierCodingInfo[0]
+#endif
+          );
 #else
         m_calcClass2( classifier[curClassifierIdx], blk, Area( blkDst.pos().x, blkDst.pos().y, blkDst.width, blkDst.height ), srcLuma, 2, classifier[0], curClassifierIdx, bits );
 #endif
@@ -6855,7 +7133,11 @@ void AdaptiveLoopFilter::deriveFixedFilterResultsBlk(AlfClassifier*** classifier
     if (fixedFiltSetInd == targetFixedFilterSetInd || targetFixedFilterSetInd == -1)
     {
 #if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
-      alfFixedFilterBlk(winIdx == 0 ? classifier[0] : classifier[ALF_NUM_CLASSIFIER], srcLuma, blk, blkDst, srcLumaBeforeDb, m_fixFilterResult[COMPONENT_Y], m_picWidth, fixedFiltInd, fixedFiltSetInd, winIdx, clpRng, clippingValues, true);
+      alfFixedFilterBlk(winIdx == 0 ? classifier[0] : classifier[ALF_NUM_CLASSIFIER], srcLuma, blk, blkDst, srcLumaBeforeDb, m_fixFilterResult[COMPONENT_Y], m_picWidth, fixedFiltInd, fixedFiltSetInd, winIdx, clpRng, clippingValues, true
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+        , cs
+#endif
+        );
 #else
       alfFixedFilterBlk(winIdx == 0 ? classifier[0] : classifier[ALF_NUM_CLASSIFIER], srcLuma, blk, blkDst, srcLumaBeforeDb, m_fixFilterResult, m_picWidth, fixedFiltInd, fixedFiltSetInd, winIdx, clpRng, clippingValues);
 #endif
@@ -6905,14 +7187,22 @@ void AdaptiveLoopFilter::deriveFixedFilterResultsPerBlkChroma(AlfClassifier ***c
     m_deriveVariance(src, blk, blk, laplacian);
     m_deriveClassificationLaplacian(src, blk, blk, laplacian, ALF_CLASSIFIER_FL_CHROMA);
     m_calcClass0(classifier[ALF_NUM_CLASSIFIER + 1], blk, blk, ALF_CLASSIFIER_FL_CHROMA + 10, 1, NUM_DIR_FIX, NUM_ACT_FIX, bits, 2, mappingDir, laplacian);
-    alfFixedFilterBlk(classifier[ALF_NUM_CLASSIFIER + 1], src, blk, blk, srcBeforeDb, fixedFilterResults, m_picWidth, fixedFilterSetIdx, targetFixedFilterSetInd, 0, clpRng, clippingValues, false);
+    alfFixedFilterBlk(classifier[ALF_NUM_CLASSIFIER + 1], src, blk, blk, srcBeforeDb, fixedFilterResults, m_picWidth, fixedFilterSetIdx, targetFixedFilterSetInd, 0, clpRng, clippingValues, false
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+      , cs
+#endif
+      );
   }
   else
   {
     deriveVariance(src, blk, blk, laplacian);
     deriveClassificationLaplacian(src, blk, blk, laplacian, ALF_CLASSIFIER_FL_CHROMA);
     calcClass0Var(classifier[ALF_NUM_CLASSIFIER + 1], blk, blk, ALF_CLASSIFIER_FL_CHROMA + 10, 1, NUM_DIR_FIX, NUM_ACT_FIX, bits, 2, mappingDir, laplacian);
-    alfFixedFilterBlkNonSimd(classifier[ALF_NUM_CLASSIFIER + 1], src, blk, blk, srcBeforeDb, fixedFilterResults, m_picWidth, fixedFilterSetIdx, targetFixedFilterSetInd, 0, clpRng, clippingValues, false);
+    alfFixedFilterBlkNonSimd(classifier[ALF_NUM_CLASSIFIER + 1], src, blk, blk, srcBeforeDb, fixedFilterResults, m_picWidth, fixedFilterSetIdx, targetFixedFilterSetInd, 0, clpRng, clippingValues, false
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+      , cs
+#endif
+      );
   }
 }
 
@@ -7000,7 +7290,11 @@ void AdaptiveLoopFilter::deriveFixedFilterResultsCtuBoundaryChroma(AlfClassifier
 }
 #endif
 
-void AdaptiveLoopFilter::deriveFixedFilterResultsCtuBoundary(AlfClassifier ***classifier, Pel ***fixedFilterResults, const CPelBuf &srcLuma, const CPelBuf &srcLumaBeforeDb, const Area &blkDst, const int bits, CodingStructure& cs, const ClpRng &clpRng, const Pel clippingValues[4], int qp, int fixedFilterSetIdx, int mappingDir[NUM_DIR_FIX][NUM_DIR_FIX], uint32_t **laplacian[NUM_DIRECTIONS], uint8_t* ctuEnableFlagLuma, uint8_t* ctuEnableOnlineLuma, int ctuIdx, int classifierIdx)
+void AdaptiveLoopFilter::deriveFixedFilterResultsCtuBoundary(AlfClassifier ***classifier, Pel ***fixedFilterResults, const CPelBuf &srcLuma, const CPelBuf &srcLumaBeforeDb, const Area &blkDst, const int bits, CodingStructure& cs, const ClpRng &clpRng, const Pel clippingValues[4], int qp, int fixedFilterSetIdx, int mappingDir[NUM_DIR_FIX][NUM_DIR_FIX], uint32_t **laplacian[NUM_DIRECTIONS], uint8_t* ctuEnableFlagLuma, uint8_t* ctuEnableOnlineLuma, int ctuIdx, int classifierIdx
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  , const CPelBuf& srcCodingInfo, const CPelBuf& srcResi
+#endif
+  )
 #else
 void AdaptiveLoopFilter::deriveFixedFilterResultsCtuBoundary(AlfClassifier **classifier, Pel ***fixedFilterResults, const CPelBuf &srcLuma, const Area &blkDst, const int bits, CodingStructure& cs, const ClpRng &clpRng, const Pel clippingValues[4], int qp, int fixedFilterSetIdx, int mappingDir[NUM_DIR_FIX][NUM_DIR_FIX], uint32_t **laplacian[NUM_DIRECTIONS], uint8_t* ctuEnableFlagLuma, uint8_t* ctuEnableOnlineLuma, int ctuIdx )
 #endif
@@ -7080,6 +7374,13 @@ void AdaptiveLoopFilter::deriveFixedFilterResultsCtuBoundary(AlfClassifier **cla
   {
     if(isBoundaryValid[boundaryIdx] && !isNeighborAvai[boundaryIdx])
     {
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+      //classifierIdx = 1 can reuse coding info results
+      if( classifierIdx == 0 )
+      {
+        calcAlfLumaCodingInfoBlk(cs, m_classifierCodingInfo[0], blkCur[boundaryIdx], blkCur[boundaryIdx], srcLuma, 2, 2, m_inputBitDepth[CHANNEL_TYPE_LUMA], srcResi, m_laplacian[0], srcCodingInfo);
+      }
+#endif
 #if JVET_AE0139_ALF_IMPROVED_FIXFILTER
       if (boundaryIdx == 1)
       {
@@ -7429,11 +7730,19 @@ void AdaptiveLoopFilter::deriveFixedFilterResultsPerBlk( AlfClassifier **classif
 #if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
     if( useSimd)
     {
-      alfFixedFilterBlk( classifier[ALF_NUM_CLASSIFIER], srcLuma, blkCur, blkCur, srcLumaBeforeDb, fixedFilterResults, m_picWidth, classifierIdx * NUM_FIXED_FILTER_SETS + fixedFilterSetIdx, targetFixedFilterSetInd, classifierIdx, clpRng, clippingValues, true );
+      alfFixedFilterBlk( classifier[ALF_NUM_CLASSIFIER], srcLuma, blkCur, blkCur, srcLumaBeforeDb, fixedFilterResults, m_picWidth, classifierIdx * NUM_FIXED_FILTER_SETS + fixedFilterSetIdx, targetFixedFilterSetInd, classifierIdx, clpRng, clippingValues, true
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+       , cs
+#endif
+       );
     }
     else
     {
-      alfFixedFilterBlkNonSimd( classifier[ALF_NUM_CLASSIFIER], srcLuma, blkCur, blkCur, srcLumaBeforeDb, fixedFilterResults, m_picWidth, classifierIdx * NUM_FIXED_FILTER_SETS + fixedFilterSetIdx, targetFixedFilterSetInd, classifierIdx, clpRng, clippingValues, true );
+      alfFixedFilterBlkNonSimd( classifier[ALF_NUM_CLASSIFIER], srcLuma, blkCur, blkCur, srcLumaBeforeDb, fixedFilterResults, m_picWidth, classifierIdx * NUM_FIXED_FILTER_SETS + fixedFilterSetIdx, targetFixedFilterSetInd, classifierIdx, clpRng, clippingValues, true
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+       , cs
+#endif
+       );
     }
 #else
     if( useSimd)
@@ -7456,7 +7765,11 @@ void AdaptiveLoopFilter::deriveFixedFilterResultsPerBlk( AlfClassifier **classif
 #if JVET_AE0139_ALF_IMPROVED_FIXFILTER
       m_calcClass0( classifier[0], blkCur, blkCur, usedWindowIdx[0], 1, NUM_DIR_FIX, NUM_ACT_FIX, bits, 2, mappingDir, laplacian);
 #if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
-      alfFixedFilterBlk( classifier[0], srcLuma, blkCur, blkCur, srcLumaBeforeDb, fixedFilterResults, m_picWidth, classifierIdx * NUM_FIXED_FILTER_SETS + fixedFilterSetIdx, targetFixedFilterSetInd, classifierIdx, clpRng, clippingValues, true );
+      alfFixedFilterBlk( classifier[0], srcLuma, blkCur, blkCur, srcLumaBeforeDb, fixedFilterResults, m_picWidth, classifierIdx * NUM_FIXED_FILTER_SETS + fixedFilterSetIdx, targetFixedFilterSetInd, classifierIdx, clpRng, clippingValues, true
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+        , cs
+#endif
+        );
 #else
       alfFixedFilterBlk(classifier[0], srcLuma, blkCur, blkCur, srcLumaBeforeDb, fixedFilterResults, m_picWidth, classifierIdx * NUM_FIXED_FILTER_SETS + fixedFilterSetIdx, targetFixedFilterSetInd, classifierIdx, clpRng, clippingValues );
 #endif
@@ -7471,7 +7784,11 @@ void AdaptiveLoopFilter::deriveFixedFilterResultsPerBlk( AlfClassifier **classif
 #if JVET_AE0139_ALF_IMPROVED_FIXFILTER
       calcClass0Var( classifier[0], blkCur, blkCur, usedWindowIdx[0], 1, NUM_DIR_FIX, NUM_ACT_FIX, bits, 2, mappingDir, laplacian );
 #if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
-      alfFixedFilterBlkNonSimd( classifier[0], srcLuma, blkCur, blkCur, srcLumaBeforeDb, fixedFilterResults, m_picWidth, classifierIdx * NUM_FIXED_FILTER_SETS + fixedFilterSetIdx, targetFixedFilterSetInd, classifierIdx, clpRng, clippingValues, true );
+      alfFixedFilterBlkNonSimd( classifier[0], srcLuma, blkCur, blkCur, srcLumaBeforeDb, fixedFilterResults, m_picWidth, classifierIdx * NUM_FIXED_FILTER_SETS + fixedFilterSetIdx, targetFixedFilterSetInd, classifierIdx, clpRng, clippingValues, true
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+       , cs
+#endif
+      );
 #else
       alfFixedFilterBlkNonSimd( classifier[0], srcLuma, blkCur, blkCur, srcLumaBeforeDb, fixedFilterResults, m_picWidth, classifierIdx * NUM_FIXED_FILTER_SETS + fixedFilterSetIdx, targetFixedFilterSetInd, classifierIdx, clpRng, clippingValues );
 #endif
@@ -7779,15 +8096,27 @@ void AdaptiveLoopFilter::deriveGaussResultsBlk(Pel ***gaussPic, const CPelBuf &s
   bool useSimd = blkDst.size().width % 8 == 0 ? true : false;
   if( useSimd )
   {
-    m_gaussFiltering(cs, gaussPic, srcLuma, blkDst, blk, clpRng, clippingValues, filterSetIdx, storeIdx);
+    m_gaussFiltering(cs, gaussPic, srcLuma, blkDst, blk, clpRng, clippingValues, filterSetIdx, storeIdx
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+    , true, m_classifierCodingInfo[0]
+#endif
+    );
   }
   else
   {
-    gaussFiltering(cs, gaussPic, srcLuma, blkDst, blk, clpRng, clippingValues, filterSetIdx, storeIdx);
+    gaussFiltering(cs, gaussPic, srcLuma, blkDst, blk, clpRng, clippingValues, filterSetIdx, storeIdx
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+    , true, m_classifierCodingInfo[0]
+#endif
+    );
   }
 }
 
-void AdaptiveLoopFilter::gaussFiltering(CodingStructure &cs, Pel ***gaussPic, const CPelBuf &srcLuma, const Area &blkDst, const Area &blk, const ClpRng &clpRng, const Pel clippingValues[4], int filterSetIdx, int storeIdx )
+void AdaptiveLoopFilter::gaussFiltering(CodingStructure &cs, Pel ***gaussPic, const CPelBuf &srcLuma, const Area &blkDst, const Area &blk, const ClpRng &clpRng, const Pel clippingValues[4], int filterSetIdx, int storeIdx
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  , bool applyCodingInfo, AlfClassifier** classifierCodingInfo
+#endif
+  )
 {
   int strideSrc = srcLuma.stride;
   int xPosSrc = blk.pos().x;
@@ -7798,6 +8127,13 @@ void AdaptiveLoopFilter::gaussFiltering(CodingStructure &cs, Pel ***gaussPic, co
   int shift = 10;
   const int numCoeff = 12;
   int diffTH = 32;
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  const bool isIntraSlice = cs.slice->isIntra();
+  const bool isSpsAdjust  = cs.sps->getAlfLumaFixedFilterAdjust();
+  const bool useBounCondition = applyCodingInfo && !(!isSpsAdjust && isIntraSlice);
+  const bool useResiCondition = applyCodingInfo && (isSpsAdjust || !isSpsAdjust) && !isIntraSlice && false;
+  const int offsetClipValue = 1 << ( clpRng.bd - 1 );
+#endif
 
   int gaussTable[NUM_GAUSS_FILTERED_SOURCE][25] =
   {
@@ -7827,6 +8163,24 @@ void AdaptiveLoopFilter::gaussFiltering(CodingStructure &cs, Pel ***gaussPic, co
   {
     for (int j = 0; j < width; j++)
     {
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+      int classIdxBs = 0;
+      if( useBounCondition )
+      {
+        //0: BS0 Resi0, 1:BS0 Resi1, 2:BS1 Resi0 3:BS1 Resi1
+        // A >> 1 = BS
+        // A - ( A >> 1) * 2
+        classIdxBs = classifierCodingInfo[blkDst.y + i][blkDst.x + j] >> 1;
+      }
+      int classIdxResi = 0;
+      if( useResiCondition )
+      {
+        //0: BS0 Resi0, 1:BS0 Resi1, 2:BS1 Resi0 3:BS1 Resi1
+        // A >> 1 = BS
+        // A - ( A >> 1) * 2
+        classIdxResi = classifierCodingInfo[blkDst.y + i][blkDst.x + j] - ((classifierCodingInfo[blkDst.y + i][blkDst.x + j] >> 1 ) * 2);
+      }
+#endif
       int dstPosY = blkDst.y + i + padSize;
       int dstPosX = blkDst.x + j + padSize;
 
@@ -7867,6 +8221,27 @@ void AdaptiveLoopFilter::gaussFiltering(CodingStructure &cs, Pel ***gaussPic, co
 
       sum += 1 << (shift - 1);
       sum >>= shift;
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+      if( useBounCondition )
+      {
+        sum = Clip3(-offsetClipValue, +offsetClipValue, sum);
+
+        int sign = sum < 0 ? -1 : +1;
+
+        int boundaryStrengthFactor = isIntraSlice ? 4 + 2 : 3 + 2;
+
+        sum = classIdxBs ? sign * ((abs(sum) * (16 + boundaryStrengthFactor) + 8 ) >> 4) : sum;
+      }
+
+      if( useResiCondition )
+      {
+        sum = Clip3(-offsetClipValue, +offsetClipValue, sum);
+
+        int sign = sum < 0 ? -1 : +1;
+        int resiStrengthFactor = isIntraSlice ? 0 >> (!isSpsAdjust ? 1 : 0) : 3 >> (!isSpsAdjust ? 1 : 0);
+        sum = classIdxResi ? sign * ((abs(sum) * (16 + resiStrengthFactor) + 8 ) >> 4) : sum;
+      }
+#endif
 
       int diff = Clip3<int>(-diffTH, +diffTH, sum);
       sum = curr + diff;
@@ -7876,3 +8251,143 @@ void AdaptiveLoopFilter::gaussFiltering(CodingStructure &cs, Pel ***gaussPic, co
   }//height
 }
 #endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+void AdaptiveLoopFilter::textureClassMapping(AlfClassifier **classifier, const Area& blk, int classifierIdx, int subBlkSize, AlfClassifier **classifierCodingInfo )
+{
+  CHECK(classifierIdx != 0, "Wrong Classifier Index for DBF-BS Mapping");
+
+  int bsMappingTable[2][25] =
+  {
+    //A: (0)(12)(3)(4)
+    //D: (0)(12)(34)
+    //4A x 3D
+    {
+      0, 1, 1, 2, 2,
+      3, 4, 4, 5, 5,
+      3, 4, 4, 5, 5,
+      6, 7, 7, 8, 8,
+      9, 10, 10, 11, 11,
+    },
+    //4A x 3D
+    {
+      0, 1, 1, 2, 2,
+      3, 4, 4, 5, 5,
+      3, 4, 4, 5, 5,
+      6, 7, 7, 8, 8,
+      9, 10, 10, 11, 11,
+    },
+  };
+
+  int width  = blk.width;
+  int height = blk.height;
+  int posY   = blk.pos().y;
+  int posX   = blk.pos().x;
+
+  int bsUnit       = 0;
+  int transposeIdx = 0;
+  int classIdx     = 0;
+  int classIdxMap  = 0;
+  int bsClassOffet = 12;
+
+  for(int y = 0; y < height; y += subBlkSize)
+  {
+    for(int x = 0; x < width; x += subBlkSize)
+    {
+      bsUnit = classifierCodingInfo[posY + y][posX + x] >> 1;
+
+      transposeIdx = classifier[posY + y][posX + x] & 0x3;
+      classIdx     = classifier[posY + y][posX + x] >> 2;
+
+      classIdxMap = bsMappingTable[bsUnit][classIdx] + bsUnit * bsClassOffet;
+
+      classIdxMap = (classIdxMap << 2) + transposeIdx;
+
+      for(int ii = 0; ii < subBlkSize; ii++)
+      {
+        for(int jj = 0; jj < subBlkSize; jj++)
+        {
+          int curPosY = posY + y + ii;
+          int curPosX = posX + x + jj;
+          classifier[curPosY][curPosX] = classIdxMap;
+        }
+      }
+      //Unit 2x2
+    }
+  }
+}
+
+void AdaptiveLoopFilter::calcAlfLumaCodingInfoBlk( CodingStructure& cs, AlfClassifier** classifier, const Area &blkDst, const Area &blkSrc, const CPelBuf& srcLuma, int subBlkSize, int classifierIdx, int bitDepth, const CPelBuf& srcLumaResi, uint32_t **buffer, const CPelBuf& srcCodingInfo )
+{
+
+  const bool isIntraSlice = cs.slice->isIntra();
+  const bool isSpsAdjust = cs.sps->getAlfLumaFixedFilterAdjust();
+  const bool calcResi = !isIntraSlice && (isSpsAdjust || !isSpsAdjust);
+
+  const Pel *srcResiPtr = srcLumaResi.buf;
+  int        srcResiStride = srcLumaResi.stride;
+  int        yOffset = blkSrc.pos().y * srcResiStride;
+  const Pel *srcResi0 = &srcResiPtr[yOffset];
+  int srcResiStride2 = srcResiStride * 2;
+
+  const Pel *srcResiUp = srcResi0 - 1 * srcResiStride + blkSrc.pos().x - 1;
+
+  const Pel *srcResiDn = srcResiUp + srcResiStride;
+
+  if( calcResi )
+  {
+    //2x2 sum
+    for (int i = 0; i < blkSrc.height + 1 * 2; i += 2)
+    {
+      for (int j = 0; j < blkSrc.width + 1 * 2; j += 2)
+      {
+        buffer[i >> 1][j >> 1] = abs(srcResiUp[j]) + abs(srcResiUp[j + 1]) + abs(srcResiDn[j]) + abs(srcResiDn[j + 1]);
+      }
+      srcResiUp += srcResiStride2;
+      srcResiDn += srcResiStride2;
+    }
+  }
+
+  const int srcCodingInfoStride = srcCodingInfo.stride;
+  const Pel* srcCodingInfoPtr = srcCodingInfo.buf + blkSrc.pos().y * srcCodingInfoStride + blkSrc.pos().x;
+  const Pel* srcCodingInfoPtr0 = srcCodingInfoPtr;
+  const Pel* srcCodingInfoPtr1 = srcCodingInfoPtr;
+
+  for (int i = 0; i < blkDst.height; i += subBlkSize)
+  {
+    for (int j = 0; j < blkDst.width; j += subBlkSize)
+    {
+      int classIdxResi = 0;
+      if( calcResi )
+      {
+        int i2 = i >> 1;
+        int j2 = j >> 1;
+
+        int sum = buffer[i2][j2] + buffer[i2][j2 + 1] + buffer[i2 + 1][j2] + buffer[i2 + 1][j2 + 1];
+        int shiftOffset = 6;
+
+        int avgResi = sum >> (bitDepth - shiftOffset);
+        int highResiTh = +4;
+        classIdxResi = avgResi > highResiTh ? 1 : 0;
+      }
+
+      int classIdxCodingInfo = 0;
+      srcCodingInfoPtr0 = srcCodingInfoPtr + i * srcCodingInfoStride + j;
+      srcCodingInfoPtr1 = srcCodingInfoPtr0 + srcCodingInfoStride;
+      classIdxCodingInfo = ( srcCodingInfoPtr0[+0] + srcCodingInfoPtr0[+1] + srcCodingInfoPtr1[+0] + srcCodingInfoPtr1[+1] ) > 0 ? 1 : 0;
+
+      for (int ii = blkDst.y + i; ii < blkDst.y + i + subBlkSize; ii++)
+      {
+        for (int jj = blkDst.x + j; jj < blkDst.x + j + subBlkSize; jj++)
+        {
+          //0: BS0 Resi0, 1:BS0 Resi1, 2:BS1 Resi0 3:BS1 Resi1
+          // A >> 1 = BS
+          // A - ( A >> 1) * 2
+          classifier[ii][jj] = classIdxCodingInfo * 2 + classIdxResi;
+        }
+      }
+
+    }
+  }
+
+}
+#endif
diff --git a/source/Lib/CommonLib/AdaptiveLoopFilter.h b/source/Lib/CommonLib/AdaptiveLoopFilter.h
index 9808eb42a09f654bd07fcb88dbe22f97414cf661..d2dacb490aee9c719ad26b12472efe41bcd319eb 100644
--- a/source/Lib/CommonLib/AdaptiveLoopFilter.h
+++ b/source/Lib/CommonLib/AdaptiveLoopFilter.h
@@ -113,6 +113,9 @@ public:
 #if JVET_AC0162_ALF_RESIDUAL_SAMPLES_INPUT
   void copyResiData(CodingStructure &cs) { m_tempBufResi.bufs[COMPONENT_Y].copyFrom(cs.getResiBuf().bufs[COMPONENT_Y]); }
 #endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  PelUnitBuf callCodingInfoBuf( CodingStructure &cs ) { return m_tempBufCodingInfo; }
+#endif
 
   static constexpr int AlfNumClippingValues[MAX_NUM_CHANNEL_TYPE] = { 4, 4 };
   static constexpr int MaxAlfNumClippingValues = 4;
@@ -160,7 +163,11 @@ public:
 #endif
                                  Pel ***fixedFilterResiResults, int picWidth, const int fixedFiltInd,
                                  const short classIndFixed[NUM_CLASSES_FIX], int fixedFiltQpInd, int dirWindSize,
-                                 const ClpRng &clpRng, const Pel clippingValues[4]);
+                                 const ClpRng &clpRng, const Pel clippingValues[4]
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  , bool applyCodingInfo, CodingStructure &cs, AlfClassifier** classifierCodingInfo
+#endif
+  );
 #endif
 #if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
 #if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
@@ -174,7 +181,11 @@ public:
   void deriveFixedFilterResults( AlfClassifier*** classifier, const CPelBuf& srcLuma, const CPelBuf& srcLumaBeforeDb, const Area& blkDst, const Area& blk, CodingStructure &cs, int winIdx, int fixedFilterSetIdx );
   static void calcClass0Var( AlfClassifier **classifier, const Area &blkDst, const Area &cu, int dirWindSize, int classDir, int noDir, int noAct, int bitDepth, int subBlkSize, int mappingDir[NUM_DIR_FIX][NUM_DIR_FIX], uint32_t **laplacian[NUM_DIRECTIONS] );
   static void deriveVariance( const CPelBuf &srcLuma, const Area &blkDst, const Area &blk, uint32_t ***laplacian );
-  void deriveFixedFilterResultsCtuBoundary( AlfClassifier ***classifier, Pel ***fixedFilterResults, const CPelBuf &srcLuma, const CPelBuf &srcLumaBeforeDb, const Area &blkDst, const int bits, CodingStructure& cs, const ClpRng &clpRng, const Pel clippingValues[4], int qp, int fixedFilterSetIdx, int mappingDir[NUM_DIR_FIX][NUM_DIR_FIX], uint32_t **laplacian[NUM_DIRECTIONS], uint8_t* ctuEnableFlagLuma, uint8_t* ctuEnableOnlineLuma, int ctuIdx, int classifierIdx );
+  void deriveFixedFilterResultsCtuBoundary( AlfClassifier ***classifier, Pel ***fixedFilterResults, const CPelBuf &srcLuma, const CPelBuf &srcLumaBeforeDb, const Area &blkDst, const int bits, CodingStructure& cs, const ClpRng &clpRng, const Pel clippingValues[4], int qp, int fixedFilterSetIdx, int mappingDir[NUM_DIR_FIX][NUM_DIR_FIX], uint32_t **laplacian[NUM_DIRECTIONS], uint8_t* ctuEnableFlagLuma, uint8_t* ctuEnableOnlineLuma, int ctuIdx, int classifierIdx
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+    , const CPelBuf& srcCodingInfo, const CPelBuf& srcResi
+#endif
+    );
   void deriveFixedFilterResultsPerBlk( AlfClassifier ***classifier, Pel ***fixedFilterResults, const CPelBuf &srcLuma, const CPelBuf &srcLumaBeforeDb, const Area &blkCur, const int bits, CodingStructure& cs, const ClpRng &clpRng, const Pel clippingValues[4], int qp, int fixedFilterSetIdx, int mappingDir[NUM_DIR_FIX][NUM_DIR_FIX], uint32_t **laplacian[NUM_DIRECTIONS], const int classifierIdx );
   void(*m_deriveVariance)(const CPelBuf &srcLuma, const Area &blkDst, const Area &blk, uint32_t ***variance);
 #if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
@@ -182,17 +193,37 @@ public:
   void deriveFixedFilterResultsPerBlkChroma(AlfClassifier ***classifier, Pel ***fixedFilterResults, const CPelBuf &src, const CPelBuf &srcBeforeDb, const Area &blk, const int bits, CodingStructure& cs, const ClpRng &clpRng, const Pel clippingValues[4], int qp, int fixedFilterSetIdx, int mappingDir[NUM_DIR_FIX][NUM_DIR_FIX], uint32_t **laplacian[NUM_DIRECTIONS]);
   void deriveFixFilterResultsBlkChroma( AlfClassifier ***classifier, Pel ***fixedFilterResults, const CPelBuf &src, const CPelBuf &srcBeforeDb, const Area &blkDst, const Area &blk, const int bits, CodingStructure& cs, const ClpRng &clpRng, const Pel clippingValues[4], int qp, int fixedFilterSetIdx, int mappingDir[NUM_DIR_FIX][NUM_DIR_FIX], uint32_t **laplacian[NUM_DIRECTIONS] );
   void deriveFixedFilterChroma(AlfClassifier*** classifier, const PelUnitBuf& src, const PelUnitBuf& srcBeforeDb, const Area& blkDst, const Area& blk, CodingStructure &cs, const int classifierIdx, ComponentID compID);
-  void alfFixedFilterBlkNonSimd(AlfClassifier **classifier, const CPelBuf &src, const Area &curBlk, const Area &blkDst, const CPelBuf &srcBeforeDb, Pel ***fixedFilterResults, int picWidth, const int fixedFiltInd, int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4], bool isLuma);
-  void alfFixedFilterBlk(AlfClassifier **classifier, const CPelBuf &src, const Area &curBlk, const Area &blkDst, const CPelBuf &srcBeforeDb, Pel ***fixedFilterResults, int picWidth, const int fixedFiltInd, int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4], bool isLuma);
+  void alfFixedFilterBlkNonSimd(AlfClassifier **classifier, const CPelBuf &src, const Area &curBlk, const Area &blkDst, const CPelBuf &srcBeforeDb, Pel ***fixedFilterResults, int picWidth, const int fixedFiltInd, int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4], bool isLuma
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+    , CodingStructure &cs
+#endif
+    );
+  void alfFixedFilterBlk(AlfClassifier **classifier, const CPelBuf &src, const Area &curBlk, const Area &blkDst, const CPelBuf &srcBeforeDb, Pel ***fixedFilterResults, int picWidth, const int fixedFiltInd, int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4], bool isLuma
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+    , CodingStructure &cs
+#endif
+    );
   template<AlfFixedFilterType filtType>
 #else
   void alfFixedFilterBlkNonSimd(AlfClassifier **classifier, const CPelBuf &srcLuma, const Area &curBlk, const Area &blkDst, const CPelBuf &srcLumaBeforeDb, Pel ***fixedFilterResults, int picWidth, const int fixedFiltInd, int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4]);
   void alfFixedFilterBlk(AlfClassifier **classifier, const CPelBuf &srcLuma, const Area &curBlk, const Area &blkDst, const CPelBuf &srcLumaBeforeDb, Pel ***fixedFilterResults, int picWidth, const int fixedFiltInd, int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4]);
   template<AlfFixedFilterType filtType>
 #endif
-  static void fixedFilterBlk(AlfClassifier **classifier, const CPelBuf &srcLuma, const Area &curBlk, const Area &blkDst, const CPelBuf &srcLumaBeforeDb, Pel ***fixedFilterResults, int picWidth, const int fixedFiltInd, const short classIndFixed[NUM_CLASSES_FIX], int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4]);
-  void(*m_fixFilter13x13Db9Blk)(AlfClassifier **classifier, const CPelBuf &srcLuma, const Area &curBlk, const Area &blkDst, const CPelBuf &srcLumaBeforeDb, Pel ***fixedFilterResults, int picWidth, const int fixedFiltInd, const short classIndFixed[NUM_CLASSES_FIX], int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4]);
-  void(*m_fixFilter9x9Db9Blk)(AlfClassifier **classifier, const CPelBuf &srcLuma, const Area &curBlk, const Area &blkDst, const CPelBuf &srcLumaBeforeDb, Pel ***fixedFilterResults, int picWidth, const int fixedFiltInd, const short classIndFixed[NUM_CLASSES_FIX], int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4]);
+  static void fixedFilterBlk(AlfClassifier **classifier, const CPelBuf &srcLuma, const Area &curBlk, const Area &blkDst, const CPelBuf &srcLumaBeforeDb, Pel ***fixedFilterResults, int picWidth, const int fixedFiltInd, const short classIndFixed[NUM_CLASSES_FIX], int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4]
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+    , bool applyCodingInfo, CodingStructure &cs, AlfClassifier** classifierCodingInfo
+#endif
+    );
+  void(*m_fixFilter13x13Db9Blk)(AlfClassifier **classifier, const CPelBuf &srcLuma, const Area &curBlk, const Area &blkDst, const CPelBuf &srcLumaBeforeDb, Pel ***fixedFilterResults, int picWidth, const int fixedFiltInd, const short classIndFixed[NUM_CLASSES_FIX], int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4]
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+    , bool applyCodingInfo, CodingStructure &cs, AlfClassifier** classifierCodingInfo
+#endif
+    );
+  void(*m_fixFilter9x9Db9Blk)(AlfClassifier **classifier, const CPelBuf &srcLuma, const Area &curBlk, const Area &blkDst, const CPelBuf &srcLumaBeforeDb, Pel ***fixedFilterResults, int picWidth, const int fixedFiltInd, const short classIndFixed[NUM_CLASSES_FIX], int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4]
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+    , bool applyCodingInfo, CodingStructure &cs, AlfClassifier** classifierCodingInfo
+#endif
+    );
 #else
   void paddingFixedFilterResultsCtu(Pel*** fixedFilterResultsPic, Pel*** fixedFilterResultsCtu, const int fixedFilterSetIdx, const Area &blk);
   void deriveFixedFilterResultsCtuBoundary(AlfClassifier **classifier, Pel ***fixedFilterResults, const CPelBuf &srcLuma, const Area &blkDst, const int bits, CodingStructure& cs, const ClpRng &clpRng, const Pel clippingValues[4], int qp, int fixedFilterSetIdx, int mappingDir[NUM_DIR_FIX][NUM_DIR_FIX], uint32_t **laplacian[NUM_DIRECTIONS], uint8_t* ctuEnableFlagLuma, uint8_t* ctuEnableOnlineLuma, int ctuIdx);
@@ -206,8 +237,23 @@ public:
   void deriveGaussResultsBlk( Pel*** gaussPic, const CPelBuf &srcLuma, const Area &blkDst, const Area &blk, CodingStructure& cs, const ClpRng &clpRng, const Pel clippingValues[4], int filterSetIdx, const int storeIdx);
   void deriveGaussResults(const CPelBuf& srcLumaDb, const Area& blkDst, const Area& blk, CodingStructure &cs, const int filterSetIdx, const int storeIdx );
 
-  static void gaussFiltering(CodingStructure &cs, Pel ***gaussPic, const CPelBuf &srcLuma, const Area &blkDst, const Area &blk, const ClpRng &clpRng, const Pel clippingValues[4], int filterSetIdx, int storeIdx );
-  void(*m_gaussFiltering)   (CodingStructure &cs, Pel ***gaussPic, const CPelBuf &srcLuma, const Area &blkDst, const Area &blk, const ClpRng &clpRng, const Pel clippingValues[4], int filterSetIdx, int storeIdx );
+  static void gaussFiltering(CodingStructure &cs, Pel ***gaussPic, const CPelBuf &srcLuma, const Area &blkDst, const Area &blk, const ClpRng &clpRng, const Pel clippingValues[4], int filterSetIdx, int storeIdx
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+    , bool applyCodingInfo, AlfClassifier** classifierCodingInfo
+#endif
+    );
+  void(*m_gaussFiltering)   (CodingStructure &cs, Pel ***gaussPic, const CPelBuf &srcLuma, const Area &blkDst, const Area &blk, const ClpRng &clpRng, const Pel clippingValues[4], int filterSetIdx, int storeIdx
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+    , bool applyCodingInfo, AlfClassifier** classifierCodingInfo
+#endif
+    );
+#endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  static void textureClassMapping(AlfClassifier **classifier, const Area& blk, int classifierIdx, int subBlkSize, AlfClassifier **classifierCodingInfo);
+  void( *m_textureClassMapping ) (AlfClassifier **classifier, const Area& blk, int classifierIdx, int subBlkSize, AlfClassifier **classifierCodingInfo);
+
+  static void calcAlfLumaCodingInfoBlk( CodingStructure& cs, AlfClassifier** classifier, const Area &blkDst, const Area &blkSrc, const CPelBuf& srcLuma, int subBlkSize, int classifierIdx, int bitDepth, const CPelBuf& srcLumaResi, uint32_t **buffer, const CPelBuf& srcCodingInfo );
+  void(  *m_calcAlfLumaCodingInfoBlk )( CodingStructure& cs, AlfClassifier** classifier, const Area &blkDst, const Area &blkSrc, const CPelBuf& srcLuma, int subBlkSize, int classifierIdx, int bitDepth, const CPelBuf& srcLumaResi, uint32_t **buffer, const CPelBuf& srcCodingInfo );
 #endif
 
   int assignAct(int avg_varPrec, int shift, int noAct);
@@ -230,6 +276,9 @@ public:
   static void calcClassNew( AlfClassifier **classifier, const Area &blkDst, const Area &cu, const CPelBuf& srcLuma, int subBlkSize, AlfClassifier **classifier0, int classifierIdx, int bitDepth
 #if JVET_AD0222_ALF_RESI_CLASS
     , const CPelBuf& srcResiLuma, uint32_t **buffer
+#endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+   , AlfClassifier **classifierCodingInfo
 #endif
   );
 #else
@@ -292,7 +341,11 @@ public:
   void(*m_calcClass1)(AlfClassifier **classifier, const Area &blkDst, const Area &cu, int dirWindSize, int classDir, int noDir, int noAct, int bitDepth, int subBlkSize, int mappingDir[NUM_DIR_FIX][NUM_DIR_FIX], uint32_t **laplacian[NUM_DIRECTIONS]);
 #if JVET_X0071_ALF_BAND_CLASSIFIER
 #if JVET_AD0222_ALF_RESI_CLASS
-  void(*m_calcClass2)(AlfClassifier **classifier, const Area &blkDst, const Area &cu, const CPelBuf& srcLuma, int subBlkSize, AlfClassifier **classifier0, int classifierIdx, int bitDepth, const CPelBuf& srcLumaResi, uint32_t **buffer);
+  void(*m_calcClass2)(AlfClassifier **classifier, const Area &blkDst, const Area &cu, const CPelBuf& srcLuma, int subBlkSize, AlfClassifier **classifier0, int classifierIdx, int bitDepth, const CPelBuf& srcLumaResi, uint32_t **buffer
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+    , AlfClassifier **classifierCodingInfo
+#endif
+    );
 #else
   void(*m_calcClass2)(AlfClassifier **classifier, const Area &blkDst, const Area &cu, const CPelBuf& srcLuma, int subBlkSize, AlfClassifier **classifier0, int classifierIdx, int bitDepth);
 #endif
@@ -318,7 +371,11 @@ public:
 #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY
     const Area &blkDst,
 #endif
-    Pel ***fixedFilterResiResults, int picWidth, const int fixedFiltInd, const short classIndFixed[NUM_CLASSES_FIX], int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4]);
+    Pel ***fixedFilterResiResults, int picWidth, const int fixedFiltInd, const short classIndFixed[NUM_CLASSES_FIX], int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4]
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+    , bool applyCodingInfo, CodingStructure &cs, AlfClassifier** classifierCodingInfo
+#endif
+    );
 #else
   void (*m_filterResi13x13Blk)(AlfClassifier **classifier, const CPelBuf &srcResiLuma, const Area &curBlk,
 #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY
@@ -783,6 +840,9 @@ protected:
 #else
   AlfClassifier**              m_classifier;
 #endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  AlfClassifier**              m_classifierCodingInfo[1];
+#endif
 #if ALF_IMPROVEMENT
   int                          m_numLumaAltAps[ALF_CTB_MAX_NUM_APS];
   short                        m_coeffApsLuma[ALF_CTB_MAX_NUM_APS][MAX_NUM_ALF_ALTERNATIVES_LUMA][MAX_NUM_ALF_LUMA_COEFF * MAX_NUM_ALF_CLASSES];
@@ -861,6 +921,10 @@ protected:
 #if JVET_AI0166_CCALF_CHROMA_SAO_INPUT
   PelStorage                   m_tempBufSAO;
   PelStorage                   m_tempBufSAO2;
+#endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  PelStorage                   m_tempBufCodingInfo;
+  PelStorage                   m_tempBufCodingInfo2;
 #endif
   int                          m_inputBitDepth[MAX_NUM_CHANNEL_TYPE];
   int                          m_picWidth;
diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h
index a5a6c32fc81feba3cbfcafb07e19305a1a398b66..2eb2493d741c3cb8b6f8aaec6e689be212e07ca1 100644
--- a/source/Lib/CommonLib/CommonDef.h
+++ b/source/Lib/CommonLib/CommonDef.h
@@ -417,7 +417,11 @@ static const int MAX_CCSAO_BAND_NUM_U_BAND_BITS = 4;
 #endif
 
 static const int MAX_NUM_ALF_ALTERNATIVES_CHROMA =                  8;
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+static const int MAX_NUM_ALF_CLASSES         =                     24;
+#else
 static const int MAX_NUM_ALF_CLASSES         =                     25;
+#endif
 #if ALF_IMPROVEMENT
 static const int MAX_NUM_ALF_ALTERNATIVES_LUMA = 4;
 static const int EXT_LENGTH = 2;
@@ -544,8 +548,13 @@ static const int ALF_RESI_SHIFT_OFFSET       =                      4;
 static const int NUM_RESI_ABS_PAD            =                      8;
 static const int ALF_PADDING_SIZE_PRED       =                      3;
 static const int ALF_NUM_CLASSIFIER          =                      3;
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+static const int ALF_CLASSES_RESI            =                     24;
+static const int ALF_CLASSES_NEW             =                     24;
+#else
 static const int ALF_CLASSES_RESI            =                     25;
 static const int ALF_CLASSES_NEW             =                     25;
+#endif
 static const int ALF_NUM_CLASSES_CLASSIFIER[ALF_NUM_CLASSIFIER] = { MAX_NUM_ALF_CLASSES, ALF_CLASSES_NEW, ALF_CLASSES_RESI };
 #else
 static const int ALF_NUM_CLASSIFIER          =                      2;
diff --git a/source/Lib/CommonLib/LoopFilter.cpp b/source/Lib/CommonLib/LoopFilter.cpp
index a5c2be2e0d7590bf3a54564b77804397f0f3000e..cb24c5113b75d1420bdcbaf86f0ce8d46e16d8f9 100644
--- a/source/Lib/CommonLib/LoopFilter.cpp
+++ b/source/Lib/CommonLib/LoopFilter.cpp
@@ -151,6 +151,9 @@ void LoopFilter::destroy()
  \param  pcPic   picture class (Pic) pointer
  */
 void LoopFilter::loopFilterPic( CodingStructure& cs
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  , PelUnitBuf& alfCodingInfo, bool storeInfo
+#endif
                                 )
 {
   const PreCalcValues& pcv = *cs.pcv;
@@ -193,7 +196,11 @@ void LoopFilter::loopFilterPic( CodingStructure& cs
           continue;
         }
 #endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+        xDeblockCU( currCU, EDGE_VER, storeInfo, alfCodingInfo );
+#else
         xDeblockCU( currCU, EDGE_VER );
+#endif
       }
 
 
@@ -215,7 +222,11 @@ void LoopFilter::loopFilterPic( CodingStructure& cs
             continue;
           }
 #endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+          xDeblockCU( currCU, EDGE_VER, storeInfo, alfCodingInfo );
+#else
           xDeblockCU( currCU, EDGE_VER );
+#endif
         }
       }
     }
@@ -245,7 +256,11 @@ void LoopFilter::loopFilterPic( CodingStructure& cs
           continue;
         }
 #endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+        xDeblockCU( currCU, EDGE_HOR, storeInfo, alfCodingInfo );
+#else
         xDeblockCU( currCU, EDGE_HOR );
+#endif
       }
 
 #if JVET_AI0136_ADAPTIVE_DUAL_TREE
@@ -266,7 +281,11 @@ void LoopFilter::loopFilterPic( CodingStructure& cs
             continue;
           }
 #endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+          xDeblockCU( currCU, EDGE_HOR, storeInfo, alfCodingInfo );
+#else
           xDeblockCU( currCU, EDGE_HOR );
+#endif
         }
       }
     }
@@ -299,7 +318,11 @@ void LoopFilter::resetFilterLengths()
  \param cu               the CU to be deblocked
  \param edgeDir          the direction of the edge in block boundary (horizontal/vertical), which is added newly
 */
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+void LoopFilter::xDeblockCU( CodingUnit& cu, const DeblockEdgeDir edgeDir, bool storeInfo, PelUnitBuf& alfCodingInfo )
+#else
 void LoopFilter::xDeblockCU( CodingUnit& cu, const DeblockEdgeDir edgeDir )
+#endif
 {
   const PreCalcValues& pcv = *cu.cs->pcv;
   const Area area          = cu.Y().valid() ? cu.Y() : Area( recalcPosition( cu.chromaFormat, cu.chType, CHANNEL_TYPE_LUMA, cu.blocks[cu.chType].pos() ), recalcSize( cu.chromaFormat, cu.chType, CHANNEL_TYPE_LUMA, cu.blocks[cu.chType].size() ) );
@@ -425,6 +448,46 @@ void LoopFilter::xDeblockCU( CodingUnit& cu, const DeblockEdgeDir edgeDir )
   }
 
   const unsigned uiPelsInPart = pcv.minCUWidth;
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  auto storeBoundaryInfo = [this, cu, uiPelsInPart, edgeDir]( char bs, Position pos, ComponentID comp, PelUnitBuf& bsBuf) -> void
+  {
+    Size sz;
+    Position posPQ;
+
+    int edgeLongside = uiPelsInPart;
+    int edgeShortside = 2;
+
+    int scaleX = getComponentScaleX( comp, cu.chromaFormat );
+    int scaleY = getComponentScaleY( comp, cu.chromaFormat );
+
+    if(edgeDir == EDGE_HOR)
+    {
+      sz.width = edgeLongside;
+      sz.height= edgeShortside << scaleY;
+      posPQ = Position( pos.x, pos.y - ( 1 << scaleY) );
+    }
+    else
+    {
+      sz.height= edgeLongside;
+      sz.width = edgeShortside << scaleX;
+      posPQ = Position( pos.x - ( 1 << scaleX) , pos.y);
+    }
+
+    auto bsComp = BsGet(bs, comp);
+    Pel toFill = bsComp > 0 ? 1 : 0;
+
+    int bsStride = bsBuf.get(comp).stride;
+    Pel* bsPtr = bsBuf.get(comp).buf + (posPQ.y >> scaleY) * bsStride + (posPQ.x >> scaleX);
+    for(int y = 0; y < sz.height >> scaleY; y++)
+    {
+      for(int x = 0; x < sz.width >> scaleX; x++)
+      {
+        bsPtr[x] = std::max( bsPtr[x], toFill );
+      }
+      bsPtr += bsStride;
+    }
+  };
+#endif
 
   for( int y = 0; y < area.height; y += uiPelsInPart )
   {
@@ -442,6 +505,12 @@ void LoopFilter::xDeblockCU( CodingUnit& cu, const DeblockEdgeDir edgeDir )
 #endif
         {
           bS |= xGetBoundaryStrengthSingle( cu, edgeDir, localPos, CHANNEL_TYPE_LUMA );
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+          if( cu.blocks[COMPONENT_Y].valid() && storeInfo )
+          {
+            storeBoundaryInfo(bS, Position(area.x + x, area.y + y), COMPONENT_Y, alfCodingInfo);
+          }
+#endif
         }
 #if !INTRA_RM_SMALL_BLOCK_SIZE_CONSTRAINTS
         if(cu.treeType != TREE_L && cu.chromaFormat != CHROMA_400 && cu.blocks[COMPONENT_Cb].valid())
diff --git a/source/Lib/CommonLib/LoopFilter.h b/source/Lib/CommonLib/LoopFilter.h
index 0059914d42b3a59d22aafab0fbf6747f0e20e3d6..db56b9cea17a8343cf73863054b3f59ff6be63f1 100644
--- a/source/Lib/CommonLib/LoopFilter.h
+++ b/source/Lib/CommonLib/LoopFilter.h
@@ -129,7 +129,11 @@ public:
   ~LoopFilter();
 
   /// CU-level deblocking function
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  void xDeblockCU(CodingUnit& cu, const DeblockEdgeDir edgeDir, bool storeInfo, PelUnitBuf& alfCodingInfo);
+#else
   void xDeblockCU(CodingUnit& cu, const DeblockEdgeDir edgeDir);
+#endif
   void  initEncPicYuvBuffer(ChromaFormat chromaFormat, const Size &size, const unsigned maxCUSize);
   PelStorage& getDbEncPicYuvBuffer() { return m_encPicYuvBuffer; }
   void  setEnc(bool b) { m_enc = b; }
@@ -139,6 +143,9 @@ public:
 
   /// picture-level deblocking filter
   void loopFilterPic              ( CodingStructure& cs
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  , PelUnitBuf& alfCodingInfo, bool storeInfo
+#endif
                                     );
 
   static int getBeta              ( const int qp )
diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h
index 4e132f78c0cf89d346a997752c0b1f695e89ea10..f523b5be70dd2b47289ac351bde04b2c0728083c 100644
--- a/source/Lib/CommonLib/Slice.h
+++ b/source/Lib/CommonLib/Slice.h
@@ -1775,6 +1775,9 @@ private:
 
   bool              m_alfEnabledFlag;
   bool              m_ccalfEnabledFlag;
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  bool              m_alfLumaFixedFilterAdjust;
+#endif
   bool              m_wrapAroundEnabledFlag;
   unsigned          m_IBCFlag;
 #if JVET_AD0208_IBC_ADAPT_FOR_CAM_CAPTURED_CONTENTS
@@ -2239,6 +2242,10 @@ public:
   void                    setALFEnabledFlag( bool b )                                                     { m_alfEnabledFlag = b; }
 bool                    getCCALFEnabledFlag() const                                                       { return m_ccalfEnabledFlag; }
 void                    setCCALFEnabledFlag( bool b )                                                     { m_ccalfEnabledFlag = b; }
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  bool                    getAlfLumaFixedFilterAdjust() const                                             { return m_alfLumaFixedFilterAdjust; }
+  void                    setAlfLumaFixedFilterAdjust( bool b )                                           { m_alfLumaFixedFilterAdjust = b; }
+#endif
   void                    setJointCbCrEnabledFlag(bool bVal)                                              { m_JointCbCrEnabledFlag = bVal; }
   bool                    getJointCbCrEnabledFlag() const                                                 { return m_JointCbCrEnabledFlag; }
 
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index c2c5e192c06ced35c0536cd3819d17e6a412e109..6a8daf3b5f335ec492032221c60387793009695f 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -482,6 +482,7 @@
 #define JVET_AI0084_ALF_RESIDUALS_SCALING                 1 // JVET_AI0084: non-fixed ALF residuals scaling
 #define JVET_AI0058_ALF_RELAXED_RDO_LUMA                  1 // JVET-AI0058: Relaxed ALF Luma RDO
 #define JVET_AI0166_CCALF_CHROMA_SAO_INPUT                1 // JVET-AI0166: CCALF with Chroma inputs
+#define JVET_AJ0188_CODING_INFO_CLASSIFICATION            1 // JVET-AJ0188: Coding Information based Classification for ALF
 
 // SIMD optimizations
 #if IF_12TAP
diff --git a/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h b/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h
index fd0efa2db34a06577a2e59f6067a90a99ca4216f..bd42c0c21426d14f05823d270766a21357508e72 100644
--- a/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h
+++ b/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h
@@ -1193,14 +1193,302 @@ static void simdFilter9x9Blk(AlfClassifier **classifier, const PelUnitBuf &recDs
   const Pel *src = srcBuffer.buf + blk.y * srcStride + blk.x;
   Pel *      dst = dstBuffer.buf + blkDst.y * dstStride + blkDst.x;
 
+#if !( USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION )
   const __m128i mmOffset = _mm_set1_epi32(round);
   const __m128i mmMin = _mm_set1_epi16(clpRng.min);
   const __m128i mmMax = _mm_set1_epi16(clpRng.max);
+#endif
 
 #if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
   const int padSize = ALF_PADDING_SIZE_FIXED_RESULTS;
 #endif
 
+#if USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  const bool use256BitSimd = vext >= AVX2 && blkDst.width % 16 == 0 ? true : false;
+
+  if( use256BitSimd )
+  {
+    const __m256i mmOffset = _mm256_set1_epi32(round);
+    const __m256i mmMin    = _mm256_set1_epi16(clpRng.min);
+    const __m256i mmMax    = _mm256_set1_epi16(clpRng.max);
+
+    for (size_t i = 0; i < height; i += stepY)
+    {
+      const AlfClassifier *pClass = isChroma(compId) ? nullptr : classifier[blkDst.y + i] + blkDst.x;
+      for (size_t j = 0; j < width; j += stepX * 2)
+      {
+#if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
+        __m256i params[2][2][13];
+#else
+        __m256i params[2][2][10];
+#endif
+        for (int k = 0; k < 2; k++)
+        {
+#if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
+          __m128i rawCoeffTmp[2][2][4], rawClipTmp[2][2][4], s0Tmp[2], s1Tmp[2], s2Tmp[2], s3Tmp[2];
+          __m256i rawCoeff[2][4], rawClip[2][4];
+#else
+          __m128i rawCoeffTmp[2][2][3], rawClipTmp[2][2][3], s0Tmp[2], s1Tmp[2], s2Tmp[2], s3Tmp[2];
+          __m256i rawCoeff[2][3], rawClip[2][3];
+#endif
+
+          for (int l = 0; l < 2; l++)
+          {
+            const int transposeIdx0 = pClass ? (pClass[j + 4 * k + 2 * l + 0] & 0x3) : 0;
+            const int classIdx0     = pClass ? (pClass[j + 4 * k + 2 * l + 0] >> 2) : 0;
+
+            rawCoeffTmp[0][l][0] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF));
+            rawCoeffTmp[0][l][1] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF + 8));
+#if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
+            rawCoeffTmp[0][l][2] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF + 16));
+            rawCoeffTmp[0][l][3] = _mm_loadl_epi64((const __m128i *) (filterSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF + 24));
+#else
+            rawCoeffTmp[0][l][2] = _mm_loadl_epi64((const __m128i *) (filterSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF + 16));
+#endif
+            rawClipTmp[0][l][0] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF));
+            rawClipTmp[0][l][1] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF + 8));
+#if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
+            rawClipTmp[0][l][2] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF + 16));
+            rawClipTmp[0][l][3] = _mm_loadl_epi64((const __m128i *) (fClipSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF + 24));
+#else
+            rawClipTmp[0][l][2] = _mm_loadl_epi64((const __m128i *) (fClipSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF + 16));
+#endif
+
+            for (int m = 0; m < shuffleTime9[transposeIdx0]; m++)
+            {
+              int op0 = shuffleOp9[transposeIdx0][m][0];
+              int op1 = shuffleOp9[transposeIdx0][m][1];
+
+              s0Tmp[0] = _mm_loadu_si128((const __m128i *) shuffleTab9[transposeIdx0][m][0]);
+              s1Tmp[0] = _mm_xor_si128(s0Tmp[0], _mm_set1_epi8((char) 0x80));
+              s2Tmp[0] = _mm_loadu_si128((const __m128i *) shuffleTab9[transposeIdx0][m][1]);
+              s3Tmp[0] = _mm_xor_si128(s2Tmp[0], _mm_set1_epi8((char) 0x80));
+
+              __m128i rawTmp0 = _mm_or_si128(_mm_shuffle_epi8(rawCoeffTmp[0][l][op0], s0Tmp[0]), _mm_shuffle_epi8(rawCoeffTmp[0][l][op1], s1Tmp[0]));
+              __m128i rawTmp1 = _mm_or_si128(_mm_shuffle_epi8(rawCoeffTmp[0][l][op0], s2Tmp[0]), _mm_shuffle_epi8(rawCoeffTmp[0][l][op1], s3Tmp[0]));
+              rawCoeffTmp[0][l][op0] = rawTmp0;
+              rawCoeffTmp[0][l][op1] = rawTmp1;
+
+              rawTmp0 = _mm_or_si128(_mm_shuffle_epi8(rawClipTmp[0][l][op0], s0Tmp[0]), _mm_shuffle_epi8(rawClipTmp[0][l][op1], s1Tmp[0]));
+              rawTmp1 = _mm_or_si128(_mm_shuffle_epi8(rawClipTmp[0][l][op0], s2Tmp[0]), _mm_shuffle_epi8(rawClipTmp[0][l][op1], s3Tmp[0]));
+              rawClipTmp[0][l][op0] = rawTmp0;
+              rawClipTmp[0][l][op1] = rawTmp1;
+            }
+
+            const int transposeIdx1 = pClass ? (pClass[j + 4 * k + 2 * l + 8] & 0x3) : 0;
+            const int classIdx1     = pClass ? (pClass[j + 4 * k + 2 * l + 8] >> 2) : 0;
+
+            rawCoeffTmp[1][l][0] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF));
+            rawCoeffTmp[1][l][1] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF + 8));
+#if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
+            rawCoeffTmp[1][l][2] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF + 16));
+            rawCoeffTmp[1][l][3] = _mm_loadl_epi64((const __m128i *) (filterSet + classIdx1* MAX_NUM_ALF_LUMA_COEFF + 24));
+#else
+            rawCoeffTmp[1][l][2] = _mm_loadl_epi64((const __m128i *) (filterSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF + 16));
+#endif
+            rawClipTmp[1][l][0] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF));
+            rawClipTmp[1][l][1] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF + 8));
+#if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
+            rawClipTmp[1][l][2] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF + 16));
+            rawClipTmp[1][l][3] = _mm_loadl_epi64((const __m128i *) (fClipSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF + 24));
+#else
+            rawClipTmp[1][l][2] = _mm_loadl_epi64((const __m128i *) (fClipSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF + 16));
+#endif
+
+            for (int m = 0; m < shuffleTime9[transposeIdx1]; m++)
+            {
+              int op0 = shuffleOp9[transposeIdx1][m][0];
+              int op1 = shuffleOp9[transposeIdx1][m][1];
+
+              s0Tmp[1] = _mm_loadu_si128((const __m128i *) shuffleTab9[transposeIdx1][m][0]);
+              s1Tmp[1] = _mm_xor_si128(s0Tmp[1], _mm_set1_epi8((char) 0x80));
+              s2Tmp[1] = _mm_loadu_si128((const __m128i *) shuffleTab9[transposeIdx1][m][1]);
+              s3Tmp[1] = _mm_xor_si128(s2Tmp[1], _mm_set1_epi8((char) 0x80));
+
+              __m128i rawTmp0        = _mm_or_si128(_mm_shuffle_epi8(rawCoeffTmp[1][l][op0], s0Tmp[1]), _mm_shuffle_epi8(rawCoeffTmp[1][l][op1], s1Tmp[1]));
+              __m128i rawTmp1        = _mm_or_si128(_mm_shuffle_epi8(rawCoeffTmp[1][l][op0], s2Tmp[1]), _mm_shuffle_epi8(rawCoeffTmp[1][l][op1], s3Tmp[1]));
+              rawCoeffTmp[1][l][op0] = rawTmp0;
+              rawCoeffTmp[1][l][op1] = rawTmp1;
+
+              rawTmp0               = _mm_or_si128(_mm_shuffle_epi8(rawClipTmp[1][l][op0], s0Tmp[1]), _mm_shuffle_epi8(rawClipTmp[1][l][op1], s1Tmp[1]));
+              rawTmp1               = _mm_or_si128(_mm_shuffle_epi8(rawClipTmp[1][l][op0], s2Tmp[1]), _mm_shuffle_epi8(rawClipTmp[1][l][op1], s3Tmp[1]));
+              rawClipTmp[1][l][op0] = rawTmp0;
+              rawClipTmp[1][l][op1] = rawTmp1;
+            }
+
+            rawCoeff[l][0] = _mm256_castsi128_si256( rawCoeffTmp[0][l][0]);
+            rawCoeff[l][0] = _mm256_insertf128_si256(rawCoeff[l][0], rawCoeffTmp[1][l][0], 1);
+            rawCoeff[l][1] = _mm256_castsi128_si256(rawCoeffTmp[0][l][1]);
+            rawCoeff[l][1] = _mm256_insertf128_si256(rawCoeff[l][1], rawCoeffTmp[1][l][1], 1);
+            rawCoeff[l][2] = _mm256_castsi128_si256(rawCoeffTmp[0][l][2]);
+            rawCoeff[l][2] = _mm256_insertf128_si256(rawCoeff[l][2], rawCoeffTmp[1][l][2], 1);
+            rawCoeff[l][3] = _mm256_castsi128_si256(rawCoeffTmp[0][l][3]);
+            rawCoeff[l][3] = _mm256_insertf128_si256(rawCoeff[l][3], rawCoeffTmp[1][l][3], 1);
+
+            rawClip[l][0] = _mm256_castsi128_si256(rawClipTmp[0][l][0]);
+            rawClip[l][0] = _mm256_insertf128_si256(rawClip[l][0], rawClipTmp[1][l][0], 1);
+            rawClip[l][1] = _mm256_castsi128_si256(rawClipTmp[0][l][1]);
+            rawClip[l][1] = _mm256_insertf128_si256(rawClip[l][1], rawClipTmp[1][l][1], 1);
+            rawClip[l][2] = _mm256_castsi128_si256(rawClipTmp[0][l][2]);
+            rawClip[l][2] = _mm256_insertf128_si256(rawClip[l][2], rawClipTmp[1][l][2], 1);
+            rawClip[l][3] = _mm256_castsi128_si256(rawClipTmp[0][l][3]);
+            rawClip[l][3] = _mm256_insertf128_si256(rawClip[l][3], rawClipTmp[1][l][3], 1);
+          }
+
+          params[k][0][0] = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoeff[0][0], 0x00), _mm256_shuffle_epi32(rawCoeff[1][0], 0x00));
+          params[k][0][1] = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoeff[0][0], 0x55), _mm256_shuffle_epi32(rawCoeff[1][0], 0x55));
+          params[k][0][2] = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoeff[0][0], 0xaa), _mm256_shuffle_epi32(rawCoeff[1][0], 0xaa));
+          params[k][0][3] = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoeff[0][0], 0xff), _mm256_shuffle_epi32(rawCoeff[1][0], 0xff));
+          params[k][0][4] = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoeff[0][1], 0x00), _mm256_shuffle_epi32(rawCoeff[1][1], 0x00));
+          params[k][0][5] = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoeff[0][1], 0x55), _mm256_shuffle_epi32(rawCoeff[1][1], 0x55));
+          params[k][0][6] = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoeff[0][1], 0xaa), _mm256_shuffle_epi32(rawCoeff[1][1], 0xaa));
+          params[k][0][7] = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoeff[0][1], 0xff), _mm256_shuffle_epi32(rawCoeff[1][1], 0xff));
+          params[k][0][8] = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoeff[0][2], 0x00), _mm256_shuffle_epi32(rawCoeff[1][2], 0x00));
+          params[k][0][9] = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoeff[0][2], 0x55), _mm256_shuffle_epi32(rawCoeff[1][2], 0x55));
+#if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
+          params[k][0][10] = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoeff[0][2], 0xaa), _mm256_shuffle_epi32(rawCoeff[1][2], 0xaa));
+          params[k][0][11] = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoeff[0][2], 0xff), _mm256_shuffle_epi32(rawCoeff[1][2], 0xff));
+          params[k][0][12] = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoeff[0][3], 0x00), _mm256_shuffle_epi32(rawCoeff[1][3], 0x00));
+#endif
+
+          params[k][1][0] = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[0][0], 0x00), _mm256_shuffle_epi32(rawClip[1][0], 0x00));
+          params[k][1][1] = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[0][0], 0x55), _mm256_shuffle_epi32(rawClip[1][0], 0x55));
+          params[k][1][2] = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[0][0], 0xaa), _mm256_shuffle_epi32(rawClip[1][0], 0xaa));
+          params[k][1][3] = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[0][0], 0xff), _mm256_shuffle_epi32(rawClip[1][0], 0xff));
+          params[k][1][4] = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[0][1], 0x00), _mm256_shuffle_epi32(rawClip[1][1], 0x00));
+          params[k][1][5] = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[0][1], 0x55), _mm256_shuffle_epi32(rawClip[1][1], 0x55));
+          params[k][1][6] = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[0][1], 0xaa), _mm256_shuffle_epi32(rawClip[1][1], 0xaa));
+          params[k][1][7] = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[0][1], 0xff), _mm256_shuffle_epi32(rawClip[1][1], 0xff));
+          params[k][1][8] = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[0][2], 0x00), _mm256_shuffle_epi32(rawClip[1][2], 0x00));
+          params[k][1][9] = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[0][2], 0x55), _mm256_shuffle_epi32(rawClip[1][2], 0x55));
+#if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
+          params[k][1][10] = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[0][2], 0xaa), _mm256_shuffle_epi32(rawClip[1][2], 0xaa));
+          params[k][1][11] = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[0][2], 0xff), _mm256_shuffle_epi32(rawClip[1][2], 0xff));
+          params[k][1][12] = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[0][3], 0x00), _mm256_shuffle_epi32(rawClip[1][3], 0x00));
+#endif
+        }
+
+        for (size_t ii = 0; ii < stepY; ii++)
+        {
+          const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4, *pImg5, *pImg6, *pImg7, *pImg8;
+
+          pImg0 = src + j + ii * srcStride;
+          pImg1 = pImg0 + srcStride;
+          pImg2 = pImg0 - srcStride;
+          pImg3 = pImg1 + srcStride;
+          pImg4 = pImg2 - srcStride;
+          pImg5 = pImg3 + srcStride;
+          pImg6 = pImg4 - srcStride;
+          pImg7 = pImg5 + srcStride;
+          pImg8 = pImg6 - srcStride;
+
+#if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
+          const Pel *pImg0FixedBased = fixedFilterResults[fixedFilterSetIdx][blkDst.y + i + ii + padSize + 0] + blkDst.x + j + padSize;
+          const Pel *pImg1FixedBased = fixedFilterResults[fixedFilterSetIdx][blkDst.y + i + ii + padSize + 1] + blkDst.x + j + padSize;
+          const Pel *pImg2FixedBased = fixedFilterResults[fixedFilterSetIdx][blkDst.y + i + ii + padSize - 1] + blkDst.x + j + padSize;
+          const Pel *pImg3FixedBased = fixedFilterResults[fixedFilterSetIdx][blkDst.y + i + ii + padSize + 2] + blkDst.x + j + padSize;
+          const Pel *pImg4FixedBased = fixedFilterResults[fixedFilterSetIdx][blkDst.y + i + ii + padSize - 2] + blkDst.x + j + padSize;
+#endif
+          __m256i cur    = _mm256_loadu_si256((const __m256i *) pImg0);
+          __m256i accumA = mmOffset;
+          __m256i accumB = mmOffset;
+
+          auto process2coeffs = [&](const int i, const Pel *ptr0, const Pel *ptr1, const Pel *ptr2, const Pel *ptr3)
+          {
+            const __m256i val00 = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) ptr0), cur);
+            const __m256i val10 = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) ptr2), cur);
+            const __m256i val01 = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) ptr1), cur);
+            const __m256i val11 = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) ptr3), cur);
+
+            __m256i val01A = _mm256_unpacklo_epi16(val00, val10);
+            __m256i val01B = _mm256_unpackhi_epi16(val00, val10);
+            __m256i val01C = _mm256_unpacklo_epi16(val01, val11);
+            __m256i val01D = _mm256_unpackhi_epi16(val01, val11);
+
+            __m256i limit01A = params[0][1][i];
+            __m256i limit01B = params[1][1][i];
+
+            val01A = _mm256_min_epi16(val01A, limit01A);
+            val01B = _mm256_min_epi16(val01B, limit01B);
+            val01C = _mm256_min_epi16(val01C, limit01A);
+            val01D = _mm256_min_epi16(val01D, limit01B);
+
+            limit01A = _mm256_sub_epi16(_mm256_setzero_si256(), limit01A);
+            limit01B = _mm256_sub_epi16(_mm256_setzero_si256(), limit01B);
+
+            val01A = _mm256_max_epi16(val01A, limit01A);
+            val01B = _mm256_max_epi16(val01B, limit01B);
+            val01C = _mm256_max_epi16(val01C, limit01A);
+            val01D = _mm256_max_epi16(val01D, limit01B);
+
+            val01A = _mm256_add_epi16(val01A, val01C);
+            val01B = _mm256_add_epi16(val01B, val01D);
+
+            const __m256i coeff01A = params[0][0][i];
+            const __m256i coeff01B = params[1][0][i];
+
+            accumA = _mm256_add_epi32(accumA, _mm256_madd_epi16(val01A, coeff01A));
+            accumB = _mm256_add_epi32(accumB, _mm256_madd_epi16(val01B, coeff01B));
+          };
+
+          process2coeffs(0, pImg7 + 0, pImg8 + 0, pImg5 + 1, pImg6 - 1);
+          process2coeffs(1, pImg5 + 0, pImg6 + 0, pImg5 - 1, pImg6 + 1);
+          process2coeffs(2, pImg3 + 2, pImg4 - 2, pImg3 + 1, pImg4 - 1);
+          process2coeffs(3, pImg3 + 0, pImg4 + 0, pImg3 - 1, pImg4 + 1);
+          process2coeffs(4, pImg3 - 2, pImg4 + 2, pImg1 + 3, pImg2 - 3);
+          process2coeffs(5, pImg1 + 2, pImg2 - 2, pImg1 + 1, pImg2 - 1);
+          process2coeffs(6, pImg1 + 0, pImg2 + 0, pImg1 - 1, pImg2 + 1);
+          process2coeffs(7, pImg1 - 2, pImg2 + 2, pImg1 - 3, pImg2 + 3);
+          process2coeffs(8, pImg0 + 4, pImg0 - 4, pImg0 + 3, pImg0 - 3);
+          process2coeffs(9, pImg0 + 2, pImg0 - 2, pImg0 + 1, pImg0 - 1);
+
+#if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
+          process2coeffs(10, pImg3FixedBased + 0, pImg4FixedBased + 0, pImg1FixedBased + 0, pImg2FixedBased + 0);
+          process2coeffs(11, pImg0FixedBased - 2, pImg0FixedBased + 2, pImg0FixedBased - 1, pImg0FixedBased + 1);
+
+          __m256i val00 = _mm256_sub_epi16( _mm256_loadu_si256((const __m256i *) (fixedFilterResults[fixedFilterSetIdx][blkDst.y + i + ii + padSize] + blkDst.x + j + padSize)), cur);
+          __m256i val10    = _mm256_setzero_si256();
+          __m256i val01A   = _mm256_unpacklo_epi16(val00, val10);
+          __m256i val01B   = _mm256_unpackhi_epi16(val00, val10);
+          __m256i limit01A = params[0][1][12];
+          __m256i limit01B = params[1][1][12];
+
+          val01A   = _mm256_min_epi16(val01A, limit01A);
+          val01B   = _mm256_min_epi16(val01B, limit01B);
+          limit01A = _mm256_sub_epi16(_mm256_setzero_si256(), limit01A);
+          limit01B = _mm256_sub_epi16(_mm256_setzero_si256(), limit01B);
+          val01A   = _mm256_max_epi16(val01A, limit01A);
+          val01B   = _mm256_max_epi16(val01B, limit01B);
+
+          __m256i coeff01A = params[0][0][12];
+          __m256i coeff01B = params[1][0][12];
+          accumA           = _mm256_add_epi32(accumA, _mm256_madd_epi16(val01A, coeff01A));
+          accumB           = _mm256_add_epi32(accumB, _mm256_madd_epi16(val01B, coeff01B));
+#endif
+
+          accumA = _mm256_srai_epi32(accumA, shift);
+          accumB = _mm256_srai_epi32(accumB, shift);
+
+          accumA = _mm256_packs_epi32(accumA, accumB);
+          accumA = _mm256_add_epi16(accumA, cur);
+          accumA = _mm256_min_epi16(mmMax, _mm256_max_epi16(accumA, mmMin));
+
+          _mm256_storeu_si256((__m256i *) (dst + ii * dstStride + j), accumA);
+
+        }   // for (size_t ii = 0; ii < stepY; ii++)
+      } // for (size_t j = 0; j < width; j += stepX)
+      src += srcStride * stepY;
+      dst += dstStride * stepY;
+    }
+  }
+  else
+ {
+
+   const __m128i mmOffset = _mm_set1_epi32(round);
+   const __m128i mmMin = _mm_set1_epi16(clpRng.min);
+   const __m128i mmMax = _mm_set1_epi16(clpRng.max);
+#endif
+
   for (size_t i = 0; i < height; i += stepY)
   {
     const AlfClassifier *pClass = isChroma(compId) ? nullptr : classifier[blkDst.y + i] + blkDst.x;
@@ -1413,6 +1701,9 @@ static void simdFilter9x9Blk(AlfClassifier **classifier, const PelUnitBuf &recDs
     src += srcStride * stepY;
     dst += dstStride * stepY;
   }
+#if USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  }//use 256 Bit Simd
+#endif
 }
 #endif
 
@@ -3473,7 +3764,9 @@ static void simdFilter13x13BlkExtDbResiDirect(
   const Pel currBase = 512;
   int round = 1 << (shift - 1);
 
+#if !( USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION )
   __m128i curBase = _mm_set_epi16( currBase, currBase, currBase, currBase, currBase, currBase, currBase, currBase );
+#endif
 #else
 #if JVET_AG0158_ALF_LUMA_COEFF_PRECISION
   int shift = coeffBits;
@@ -3491,9 +3784,11 @@ static void simdFilter13x13BlkExtDbResiDirect(
   constexpr size_t stepX = 8;
   size_t           stepY = 1;
 
+#if !( USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION )
   const __m128i mmOffset = _mm_set1_epi32(round);
   const __m128i mmMin    = _mm_set1_epi16(clpRng.min);
   const __m128i mmMax    = _mm_set1_epi16(clpRng.max);
+#endif
 
   static_assert(sizeof(*filterSet) == 2, "ALF coeffs must be 16-bit wide");
   static_assert(sizeof(*fClipSet) == 2, "ALF clip values must be 16-bit wide");
@@ -3509,600 +3804,1798 @@ static void simdFilter13x13BlkExtDbResiDirect(
   const int padSizeGauss = ALF_PADDING_SIZE_GAUSS_RESULTS;
 #endif
 
-  for (size_t i = 0; i < height; i += stepY)
+#if USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  const bool use256BitSimd = vext >= AVX2 && blkDst.width % 16 == 0 ? true : false;
+
+  if( use256BitSimd )
   {
-    const AlfClassifier *pClass = classifier[blkDst.y + i] + blkDst.x;
-    for (size_t j = 0; j < width; j += stepX)
+    const __m256i mmOffset = _mm256_set1_epi32(round);
+    const __m256i mmMin    = _mm256_set1_epi16(clpRng.min);
+    const __m256i mmMax    = _mm256_set1_epi16(clpRng.max);
+#if JVET_AI0084_ALF_RESIDUALS_SCALING
+    const __m256i curBase  = _mm256_set1_epi16(currBase);
+#endif
+
+    for (size_t i = 0; i < height; i += stepY)
     {
+      const AlfClassifier *pClass = classifier[blkDst.y + i] + blkDst.x;
+      for (size_t j = 0; j < width; j += stepX * 2)
+      {
 #if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
 #if JVET_AD0222_ALF_LONG_FIXFILTER && JVET_AD0222_ADDITONAL_ALF_FIXFILTER
-      __m128i params[2][2][20];
+        __m256i params[2][2][20];
 #elif JVET_AD0222_ALF_LONG_FIXFILTER
-      __m128i params[2][2][17];
+        __m256i params[2][2][17];
 #elif JVET_AD0222_ADDITONAL_ALF_FIXFILTER
-      __m128i params[2][2][19];
+        __m256i params[2][2][19];
 #else
-      __m128i params[2][2][16];
+        __m256i params[2][2][16];
 #endif
 #else
-      __m128i params[2][2][13];
+        __m256i params[2][2][13];
 #endif
-      for (int k = 0; k < 2; k++)
-      {
+        for (int k = 0; k < 2; k++)
+        {
 #if JVET_AD0222_ALF_LONG_FIXFILTER || JVET_AD0222_ADDITONAL_ALF_FIXFILTER
-        __m128i rawCoef[4][5], rawClip[4][5], s0, s1, s2, s3, rawTmp0, rawTmp1;
+          __m256i rawCoef[4][5], rawClip[4][5], s0, s1;
+          __m128i rawCoefTmp[2][4][5], rawClipTmp[2][4][5], s0Tmp[2], s1Tmp[2], s2Tmp[2], s3Tmp[2];
 #else
-        __m128i rawCoef[4][4], rawClip[4][4], s0, s1, s2, s3, rawTmp0, rawTmp1;
+          __m256i rawCoef[4][4], rawClip[4][4], s0, s1;
+          __m128i rawCoefTmp[2][4][4], rawClipTmp[2][4][4], s0Tmp[2], s1Tmp[2], s2Tmp[2], s3Tmp[2];
 #endif
-        for (int l = 0; l < 4; l++)
-        {
-          const int transposeIdx = pClass[j + 4 * k + l] & 0x3;
-          const int classIdx     = pClass[j + 4 * k + l] >> 2;
+          for (int l = 0; l < 4; l++)
+          {
+            const int transposeIdx0 = pClass[j + 4 * k + l + 0] & 0x3;
+            const int classIdx0     = pClass[j + 4 * k + l + 0] >> 2;
 
-          rawCoef[l][0] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx * MAX_NUM_ALF_LUMA_COEFF));
-          rawCoef[l][1] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx * MAX_NUM_ALF_LUMA_COEFF + 8));
-          rawCoef[l][2] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx * MAX_NUM_ALF_LUMA_COEFF + 16));
-          rawCoef[l][3] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx * MAX_NUM_ALF_LUMA_COEFF + 24));
+            rawCoefTmp[0][l][0] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF));
+            rawCoefTmp[0][l][1] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF + 8));
+            rawCoefTmp[0][l][2] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF + 16));
+            rawCoefTmp[0][l][3] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF + 24));
 #if JVET_AD0222_ALF_LONG_FIXFILTER || JVET_AD0222_ADDITONAL_ALF_FIXFILTER
-          rawCoef[l][4] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx * MAX_NUM_ALF_LUMA_COEFF + 32));
+            rawCoefTmp[0][l][4] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF + 32));
 #endif
-          rawClip[l][0] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx * MAX_NUM_ALF_LUMA_COEFF));
-          rawClip[l][1] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx * MAX_NUM_ALF_LUMA_COEFF + 8));
-          rawClip[l][2] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx * MAX_NUM_ALF_LUMA_COEFF + 16));
-          rawClip[l][3] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx * MAX_NUM_ALF_LUMA_COEFF + 24));
+            rawClipTmp[0][l][0] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF));
+            rawClipTmp[0][l][1] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF + 8));
+            rawClipTmp[0][l][2] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF + 16));
+            rawClipTmp[0][l][3] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF + 24));
 #if JVET_AD0222_ALF_LONG_FIXFILTER || JVET_AD0222_ADDITONAL_ALF_FIXFILTER
-          rawClip[l][4] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx * MAX_NUM_ALF_LUMA_COEFF + 32));
+            rawClipTmp[0][l][4] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF + 32));
 #endif
 #if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
-          for (int m = 0; m < shuffleTime13FixedBasedLongLength[transposeIdx]; m++)
+            for (int m = 0; m < shuffleTime13FixedBasedLongLength[transposeIdx0]; m++)
 #else
-          for (int m = 0; m < shuffleTime13LongLength[transposeIdx]; m++)
+            for (int m = 0; m < shuffleTime13LongLength[transposeIdx0]; m++)
 #endif
-          {
+            {
 #if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
-            int op0 = shuffleOp13FixedBasedLongLength[transposeIdx][m][0];
-            int op1 = shuffleOp13FixedBasedLongLength[transposeIdx][m][1];
+              int op0 = shuffleOp13FixedBasedLongLength[transposeIdx0][m][0];
+              int op1 = shuffleOp13FixedBasedLongLength[transposeIdx0][m][1];
 #else
-            int op0 = shuffleOp13LongLength[transposeIdx][m][0];
-            int op1 = shuffleOp13LongLength[transposeIdx][m][1];
+              int op0 = shuffleOp13LongLength[transposeIdx0][m][0];
+              int op1 = shuffleOp13LongLength[transposeIdx0][m][1];
 #endif
 #if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
-            s0 = _mm_loadu_si128((const __m128i *) shuffleTab13FixedBasedLongLength[transposeIdx][m][0]);
-            s1 = _mm_xor_si128(s0, _mm_set1_epi8((char) 0x80));
-            s2 = _mm_loadu_si128((const __m128i *) shuffleTab13FixedBasedLongLength[transposeIdx][m][1]);
-            s3 = _mm_xor_si128(s2, _mm_set1_epi8((char) 0x80));
+              s0Tmp[0] = _mm_loadu_si128((const __m128i *) shuffleTab13FixedBasedLongLength[transposeIdx0][m][0]);
+              s1Tmp[0] = _mm_xor_si128(s0Tmp[0], _mm_set1_epi8((char) 0x80));
+              s2Tmp[0] = _mm_loadu_si128((const __m128i *) shuffleTab13FixedBasedLongLength[transposeIdx0][m][1]);
+              s3Tmp[0] = _mm_xor_si128(s2Tmp[0], _mm_set1_epi8((char) 0x80));
+#else
+              s0Tmp[0] = _mm_loadu_si128((const __m128i *) shuffleTab13LongLength[transposeIdx0][m][0]);
+              s1Tmp[0] = _mm_xor_si128(s0Tmp[0], _mm_set1_epi8((char) 0x80));
+              s2Tmp[0] = _mm_loadu_si128((const __m128i *) shuffleTab13LongLength[transposeIdx0][m][1]);
+              s3Tmp[0] = _mm_xor_si128(s2Tmp[0], _mm_set1_epi8((char) 0x80));
+#endif
+
+             __m128i rawTmp0 = _mm_or_si128(_mm_shuffle_epi8(rawCoefTmp[0][l][op0], s0Tmp[0]), _mm_shuffle_epi8(rawCoefTmp[0][l][op1], s1Tmp[0]));
+             __m128i rawTmp1 = _mm_or_si128(_mm_shuffle_epi8(rawCoefTmp[0][l][op0], s2Tmp[0]), _mm_shuffle_epi8(rawCoefTmp[0][l][op1], s3Tmp[0]));
+              rawCoefTmp[0][l][op0] = rawTmp0;
+              rawCoefTmp[0][l][op1] = rawTmp1;
+
+              rawTmp0 = _mm_or_si128(_mm_shuffle_epi8(rawClipTmp[0][l][op0], s0Tmp[0]), _mm_shuffle_epi8(rawClipTmp[0][l][op1], s1Tmp[0]));
+              rawTmp1 = _mm_or_si128(_mm_shuffle_epi8(rawClipTmp[0][l][op0], s2Tmp[0]), _mm_shuffle_epi8(rawClipTmp[0][l][op1], s3Tmp[0]));
+              rawClipTmp[0][l][op0] = rawTmp0;
+              rawClipTmp[0][l][op1] = rawTmp1;
+            }
+
+            const int transposeIdx1 = pClass[j + 4 * k + l + 8] & 0x3;
+            const int classIdx1     = pClass[j + 4 * k + l + 8] >> 2;
+
+            rawCoefTmp[1][l][0] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF));
+            rawCoefTmp[1][l][1] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF + 8));
+            rawCoefTmp[1][l][2] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF + 16));
+            rawCoefTmp[1][l][3] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF + 24));
+#if JVET_AD0222_ALF_LONG_FIXFILTER || JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+            rawCoefTmp[1][l][4] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF + 32));
+#endif
+            rawClipTmp[1][l][0] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF));
+            rawClipTmp[1][l][1] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF + 8));
+            rawClipTmp[1][l][2] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF + 16));
+            rawClipTmp[1][l][3] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF + 24));
+#if JVET_AD0222_ALF_LONG_FIXFILTER || JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+            rawClipTmp[1][l][4] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF + 32));
+#endif
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+            for (int m = 0; m < shuffleTime13FixedBasedLongLength[transposeIdx1]; m++)
 #else
-            s0 = _mm_loadu_si128((const __m128i *) shuffleTab13LongLength[transposeIdx][m][0]);
-            s1 = _mm_xor_si128(s0, _mm_set1_epi8((char) 0x80));
-            s2 = _mm_loadu_si128((const __m128i *) shuffleTab13LongLength[transposeIdx][m][1]);
-            s3 = _mm_xor_si128(s2, _mm_set1_epi8((char) 0x80));
+            for (int m = 0; m < shuffleTime13LongLength[transposeIdx1]; m++)
 #endif
+            {
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+              int op0 = shuffleOp13FixedBasedLongLength[transposeIdx1][m][0];
+              int op1 = shuffleOp13FixedBasedLongLength[transposeIdx1][m][1];
+#else
+              int op0 = shuffleOp13LongLength[transposeIdx1][m][0];
+              int op1 = shuffleOp13LongLength[transposeIdx1][m][1];
+#endif
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+              s0Tmp[1] = _mm_loadu_si128((const __m128i *) shuffleTab13FixedBasedLongLength[transposeIdx1][m][0]);
+              s1Tmp[1] = _mm_xor_si128(s0Tmp[1], _mm_set1_epi8((char) 0x80));
+              s2Tmp[1] = _mm_loadu_si128((const __m128i *) shuffleTab13FixedBasedLongLength[transposeIdx1][m][1]);
+              s3Tmp[1] = _mm_xor_si128(s2Tmp[1], _mm_set1_epi8((char) 0x80));
+#else
+              s0Tmp[1] = _mm_loadu_si128((const __m128i *) shuffleTab13LongLength[transposeIdx1][m][0]);
+              s1Tmp[1] = _mm_xor_si128(s0Tmp[1], _mm_set1_epi8((char) 0x80));
+              s2Tmp[1] = _mm_loadu_si128((const __m128i *) shuffleTab13LongLength[transposeIdx1][m][1]);
+              s3Tmp[1] = _mm_xor_si128(s2Tmp[1], _mm_set1_epi8((char) 0x80));
+#endif
+
+              __m128i rawTmp0       = _mm_or_si128(_mm_shuffle_epi8(rawCoefTmp[1][l][op0], s0Tmp[1]), _mm_shuffle_epi8(rawCoefTmp[1][l][op1], s1Tmp[1]));
+              __m128i rawTmp1       = _mm_or_si128(_mm_shuffle_epi8(rawCoefTmp[1][l][op0], s2Tmp[1]), _mm_shuffle_epi8(rawCoefTmp[1][l][op1], s3Tmp[1]));
+              rawCoefTmp[1][l][op0] = rawTmp0;
+              rawCoefTmp[1][l][op1] = rawTmp1;
+
+              rawTmp0               = _mm_or_si128(_mm_shuffle_epi8(rawClipTmp[1][l][op0], s0Tmp[1]), _mm_shuffle_epi8(rawClipTmp[1][l][op1], s1Tmp[1]));
+              rawTmp1               = _mm_or_si128(_mm_shuffle_epi8(rawClipTmp[1][l][op0], s2Tmp[1]), _mm_shuffle_epi8(rawClipTmp[1][l][op1], s3Tmp[1]));
+              rawClipTmp[1][l][op0] = rawTmp0;
+              rawClipTmp[1][l][op1] = rawTmp1;
+            }
 
-            rawTmp0 = _mm_or_si128(_mm_shuffle_epi8(rawCoef[l][op0], s0), _mm_shuffle_epi8(rawCoef[l][op1], s1));
-            rawTmp1 = _mm_or_si128(_mm_shuffle_epi8(rawCoef[l][op0], s2), _mm_shuffle_epi8(rawCoef[l][op1], s3));
-            rawCoef[l][op0] = rawTmp0;
-            rawCoef[l][op1] = rawTmp1;
-
-            rawTmp0 = _mm_or_si128(_mm_shuffle_epi8(rawClip[l][op0], s0), _mm_shuffle_epi8(rawClip[l][op1], s1));
-            rawTmp1 = _mm_or_si128(_mm_shuffle_epi8(rawClip[l][op0], s2), _mm_shuffle_epi8(rawClip[l][op1], s3));
-            rawClip[l][op0] = rawTmp0;
-            rawClip[l][op1] = rawTmp1;
-          }
-        }   // for l
+            rawCoef[l][0] = _mm256_castsi128_si256(rawCoefTmp[0][l][0]);
+            rawCoef[l][0] = _mm256_insertf128_si256(rawCoef[l][0], rawCoefTmp[1][l][0], 1);
+            rawCoef[l][1] = _mm256_castsi128_si256(rawCoefTmp[0][l][1]);
+            rawCoef[l][1] = _mm256_insertf128_si256(rawCoef[l][1], rawCoefTmp[1][l][1], 1);
+            rawCoef[l][2] = _mm256_castsi128_si256(rawCoefTmp[0][l][2]);
+            rawCoef[l][2] = _mm256_insertf128_si256(rawCoef[l][2], rawCoefTmp[1][l][2], 1);
+            rawCoef[l][3] = _mm256_castsi128_si256(rawCoefTmp[0][l][3]);
+            rawCoef[l][3] = _mm256_insertf128_si256(rawCoef[l][3], rawCoefTmp[1][l][3], 1);
+            rawCoef[l][4] = _mm256_castsi128_si256(rawCoefTmp[0][l][4]);
+            rawCoef[l][4] = _mm256_insertf128_si256(rawCoef[l][4], rawCoefTmp[1][l][4], 1);
+
+            rawClip[l][0] = _mm256_castsi128_si256(rawClipTmp[0][l][0]);
+            rawClip[l][0] = _mm256_insertf128_si256(rawClip[l][0], rawClipTmp[1][l][0], 1);
+            rawClip[l][1] = _mm256_castsi128_si256(rawClipTmp[0][l][1]);
+            rawClip[l][1] = _mm256_insertf128_si256(rawClip[l][1], rawClipTmp[1][l][1], 1);
+            rawClip[l][2] = _mm256_castsi128_si256(rawClipTmp[0][l][2]);
+            rawClip[l][2] = _mm256_insertf128_si256(rawClip[l][2], rawClipTmp[1][l][2], 1);
+            rawClip[l][3] = _mm256_castsi128_si256(rawClipTmp[0][l][3]);
+            rawClip[l][3] = _mm256_insertf128_si256(rawClip[l][3], rawClipTmp[1][l][3], 1);
+            rawClip[l][4] = _mm256_castsi128_si256(rawClipTmp[0][l][4]);
+            rawClip[l][4] = _mm256_insertf128_si256(rawClip[l][4], rawClipTmp[1][l][4], 1);
+          }   // for l
 #if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
-        int limR, lim0, lim1, lim2, lim3;
+          int limR, lim0, lim1, lim2, lim3;
 #if JVET_AD0222_ALF_LONG_FIXFILTER && JVET_AD0222_ADDITONAL_ALF_FIXFILTER
-        limR = 5, lim0 = 5, lim1 = 5, lim2 = 5, lim3 = 5;
+          limR = 5, lim0 = 5, lim1 = 5, lim2 = 5, lim3 = 5;
 #elif JVET_AD0222_ALF_LONG_FIXFILTER
-        limR = 5, lim0 = 5, lim1 = 4, lim2 = 4, lim3 = 4;
+          limR = 5, lim0 = 5, lim1 = 4, lim2 = 4, lim3 = 4;
 #elif JVET_AD0222_ADDITONAL_ALF_FIXFILTER
-        limR = 5, lim0 = 5, lim1 = 5, lim2 = 5, lim3 = 4;
+          limR = 5, lim0 = 5, lim1 = 5, lim2 = 5, lim3 = 4;
 #else
-        limR = 4, lim0 = 4, lim1 = 4, lim2 = 4, lim3 = 4;
+          limR = 4, lim0 = 4, lim1 = 4, lim2 = 4, lim3 = 4;
 #endif
-        for (unsigned char l = 0; l < limR; l++)
+          for (unsigned char l = 0; l < limR; l++)
 #else
-        for (unsigned char l = 0; l < 4; l++)
+          for (unsigned char l = 0; l < 4; l++)
 #endif
-        {
-          int m = l << 2;
-#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
-          if (l < lim0)
           {
+            int m = l << 2;
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+            if (l < lim0)
+            {
 #endif
-          s0 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawCoef[0][l], 0x00), _mm_shuffle_epi32(rawCoef[1][l], 0x00));
-          s1 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawCoef[2][l], 0x00), _mm_shuffle_epi32(rawCoef[3][l], 0x00));
-          params[k][0][0 + m] = _mm_blend_epi16(_mm_shuffle_epi32(s0, 0x88), _mm_shuffle_epi32(s1, 0x88), 0xf0);
-          s0 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawClip[0][l], 0x00), _mm_shuffle_epi32(rawClip[1][l], 0x00));
-          s1 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawClip[2][l], 0x00), _mm_shuffle_epi32(rawClip[3][l], 0x00));
-          params[k][1][0 + m] = _mm_blend_epi16(_mm_shuffle_epi32(s0, 0x88), _mm_shuffle_epi32(s1, 0x88), 0xf0);
+              s0 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoef[0][l], 0x00), _mm256_shuffle_epi32(rawCoef[1][l], 0x00));
+              s1 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoef[2][l], 0x00), _mm256_shuffle_epi32(rawCoef[3][l], 0x00));
+              params[k][0][0 + m] = _mm256_blend_epi16(_mm256_shuffle_epi32(s0, 0x88), _mm256_shuffle_epi32(s1, 0x88), 0xf0);
+              s0 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[0][l], 0x00), _mm256_shuffle_epi32(rawClip[1][l], 0x00));
+              s1 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[2][l], 0x00), _mm256_shuffle_epi32(rawClip[3][l], 0x00));
+              params[k][1][0 + m] = _mm256_blend_epi16(_mm256_shuffle_epi32(s0, 0x88), _mm256_shuffle_epi32(s1, 0x88), 0xf0);
 #if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
-          }
-          if (l < lim1)
-          {
+            }
+            if (l < lim1)
+            {
 #endif
-            s0 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawCoef[0][l], 0x55), _mm_shuffle_epi32(rawCoef[1][l], 0x55));
-            s1 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawCoef[2][l], 0x55), _mm_shuffle_epi32(rawCoef[3][l], 0x55));
-            params[k][0][1 + m] = _mm_blend_epi16(_mm_shuffle_epi32(s0, 0x88), _mm_shuffle_epi32(s1, 0x88), 0xf0);
-            s0 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawClip[0][l], 0x55), _mm_shuffle_epi32(rawClip[1][l], 0x55));
-            s1 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawClip[2][l], 0x55), _mm_shuffle_epi32(rawClip[3][l], 0x55));
-            params[k][1][1 + m] = _mm_blend_epi16(_mm_shuffle_epi32(s0, 0x88), _mm_shuffle_epi32(s1, 0x88), 0xf0);
+              s0 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoef[0][l], 0x55), _mm256_shuffle_epi32(rawCoef[1][l], 0x55));
+              s1 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoef[2][l], 0x55), _mm256_shuffle_epi32(rawCoef[3][l], 0x55));
+              params[k][0][1 + m] = _mm256_blend_epi16(_mm256_shuffle_epi32(s0, 0x88), _mm256_shuffle_epi32(s1, 0x88), 0xf0);
+              s0 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[0][l], 0x55), _mm256_shuffle_epi32(rawClip[1][l], 0x55));
+              s1 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[2][l], 0x55), _mm256_shuffle_epi32(rawClip[3][l], 0x55));
+              params[k][1][1 + m] = _mm256_blend_epi16(_mm256_shuffle_epi32(s0, 0x88), _mm256_shuffle_epi32(s1, 0x88), 0xf0);
 #if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
-          }
-          if (l < lim2)
-          {
+            }
+            if (l < lim2)
+            {
 #endif
-            s0 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawCoef[0][l], 0xaa), _mm_shuffle_epi32(rawCoef[1][l], 0xaa));
-            s1 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawCoef[2][l], 0xaa), _mm_shuffle_epi32(rawCoef[3][l], 0xaa));
-            params[k][0][2 + m] = _mm_blend_epi16(_mm_shuffle_epi32(s0, 0x88), _mm_shuffle_epi32(s1, 0x88), 0xf0);
-            s0 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawClip[0][l], 0xaa), _mm_shuffle_epi32(rawClip[1][l], 0xaa));
-            s1 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawClip[2][l], 0xaa), _mm_shuffle_epi32(rawClip[3][l], 0xaa));
-            params[k][1][2 + m] = _mm_blend_epi16(_mm_shuffle_epi32(s0, 0x88), _mm_shuffle_epi32(s1, 0x88), 0xf0);
+              s0 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoef[0][l], 0xaa), _mm256_shuffle_epi32(rawCoef[1][l], 0xaa));
+              s1 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoef[2][l], 0xaa), _mm256_shuffle_epi32(rawCoef[3][l], 0xaa));
+              params[k][0][2 + m] = _mm256_blend_epi16(_mm256_shuffle_epi32(s0, 0x88), _mm256_shuffle_epi32(s1, 0x88), 0xf0);
+              s0 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[0][l], 0xaa), _mm256_shuffle_epi32(rawClip[1][l], 0xaa));
+              s1 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[2][l], 0xaa), _mm256_shuffle_epi32(rawClip[3][l], 0xaa));
+              params[k][1][2 + m] = _mm256_blend_epi16(_mm256_shuffle_epi32(s0, 0x88), _mm256_shuffle_epi32(s1, 0x88), 0xf0);
 #if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
-          }
-          if (l < lim3)
-          {
+            }
+            if (l < lim3)
+            {
 #endif
-            s0 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawCoef[0][l], 0xff), _mm_shuffle_epi32(rawCoef[1][l], 0xff));
-            s1 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawCoef[2][l], 0xff), _mm_shuffle_epi32(rawCoef[3][l], 0xff));
-            params[k][0][3 + m] = _mm_blend_epi16(_mm_shuffle_epi32(s0, 0x88), _mm_shuffle_epi32(s1, 0x88), 0xf0);
-            s0 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawClip[0][l], 0xff), _mm_shuffle_epi32(rawClip[1][l], 0xff));
-            s1 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawClip[2][l], 0xff), _mm_shuffle_epi32(rawClip[3][l], 0xff));
-            params[k][1][3 + m] = _mm_blend_epi16(_mm_shuffle_epi32(s0, 0x88), _mm_shuffle_epi32(s1, 0x88), 0xf0);
+              s0 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoef[0][l], 0xff), _mm256_shuffle_epi32(rawCoef[1][l], 0xff));
+              s1 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoef[2][l], 0xff), _mm256_shuffle_epi32(rawCoef[3][l], 0xff));
+              params[k][0][3 + m] = _mm256_blend_epi16(_mm256_shuffle_epi32(s0, 0x88), _mm256_shuffle_epi32(s1, 0x88), 0xf0);
+              s0 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[0][l], 0xff), _mm256_shuffle_epi32(rawClip[1][l], 0xff));
+              s1 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[2][l], 0xff), _mm256_shuffle_epi32(rawClip[3][l], 0xff));
+              params[k][1][3 + m] = _mm256_blend_epi16(_mm256_shuffle_epi32(s0, 0x88), _mm256_shuffle_epi32(s1, 0x88), 0xf0);
 #if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
-          }
+            }
 #endif
-        }   // for l
-      }     // for k
+          }   // for l
+        }     // for k
 
-      const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4, *pImg5, *pImg6, *pImg7, *pImg8;
+        const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4, *pImg5, *pImg6, *pImg7, *pImg8;
 #if !JVET_AD0222_ALF_LONG_FIXFILTER
-      const Pel *pImg9, *pImg10, *pImg11, *pImg12;
+        const Pel *pImg9, *pImg10, *pImg11, *pImg12;
 #endif
-      const Pel *pImgP0;
+        const Pel *pImgP0;
 
-      pImg0  = src + j;
-      pImg1  = pImg0 + srcStride;
-      pImg2  = pImg0 - srcStride;
-      pImg3  = pImg1 + srcStride;
-      pImg4  = pImg2 - srcStride;
-      pImg5  = pImg3 + srcStride;
-      pImg6  = pImg4 - srcStride;
-      pImg7  = pImg5 + srcStride;
-      pImg8  = pImg6 - srcStride;
-#if !JVET_AD0222_ALF_LONG_FIXFILTER
-      pImg9  = pImg7 + srcStride;
-      pImg10 = pImg8 - srcStride;
-      pImg11 = pImg9 + srcStride;
-      pImg12 = pImg10 - srcStride;
-#endif
-#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+        pImg0 = src + j;
+        pImg1 = pImg0 + srcStride;
+        pImg2 = pImg0 - srcStride;
+        pImg3 = pImg1 + srcStride;
+        pImg4 = pImg2 - srcStride;
+        pImg5 = pImg3 + srcStride;
+        pImg6 = pImg4 - srcStride;
+        pImg7 = pImg5 + srcStride;
+        pImg8 = pImg6 - srcStride;
+#if !JVET_AD0222_ALF_LONG_FIXFILTER
+        pImg9  = pImg7 + srcStride;
+        pImg10 = pImg8 - srcStride;
+        pImg11 = pImg9 + srcStride;
+        pImg12 = pImg10 - srcStride;
+#endif
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
 #if JVET_AE0139_ALF_IMPROVED_FIXFILTER
-      int        filterSetIdx = 2 + fixedFilterSetIdx;
+        int filterSetIdx = 2 + fixedFilterSetIdx;
 #else
-      int        filterSetIdx = 0 + fixedFilterSetIdx;
+        int filterSetIdx = 0 + fixedFilterSetIdx;
 #endif
-      const Pel *pImg0FixedBased, *pImg1FixedBased, *pImg2FixedBased, *pImg3FixedBased, *pImg4FixedBased;
+        const Pel *pImg0FixedBased, *pImg1FixedBased, *pImg2FixedBased, *pImg3FixedBased, *pImg4FixedBased;
 #if JVET_AD0222_ALF_LONG_FIXFILTER
-      const Pel *pImg5FixedBased, *pImg6FixedBased, *pImg7FixedBased, *pImg8FixedBased, *pImg9FixedBased, *pImg10FixedBased, *pImg11FixedBased, *pImg12FixedBased;
+        const Pel *pImg5FixedBased, *pImg6FixedBased, *pImg7FixedBased, *pImg8FixedBased, *pImg9FixedBased,
+          *pImg10FixedBased, *pImg11FixedBased, *pImg12FixedBased;
 #endif
-      if (isFixedFilterPaddedPerCtu)
-      {
-        pImg0FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 0] + j + padSize;
-        pImg1FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 1] + j + padSize;
-        pImg2FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize - 1] + j + padSize;
-        pImg3FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 2] + j + padSize;
-        pImg4FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize - 2] + j + padSize;
+        if (isFixedFilterPaddedPerCtu)
+        {
+          pImg0FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 0] + j + padSize;
+          pImg1FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 1] + j + padSize;
+          pImg2FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize - 1] + j + padSize;
+          pImg3FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 2] + j + padSize;
+          pImg4FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize - 2] + j + padSize;
 #if JVET_AD0222_ALF_LONG_FIXFILTER
-        pImg5FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 3] + j + padSize;
-        pImg6FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize - 3] + j + padSize;
-        pImg7FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 4] + j + padSize;
-        pImg8FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize - 4] + j + padSize;
-        pImg9FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 5] + j + padSize;
-        pImg10FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize - 5] + j + padSize;
-        pImg11FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 6] + j + padSize;
-        pImg12FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize - 6] + j + padSize;
+          pImg5FixedBased  = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 3] + j + padSize;
+          pImg6FixedBased  = fixedFilterResultsPerCtu[filterSetIdx][i + padSize - 3] + j + padSize;
+          pImg7FixedBased  = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 4] + j + padSize;
+          pImg8FixedBased  = fixedFilterResultsPerCtu[filterSetIdx][i + padSize - 4] + j + padSize;
+          pImg9FixedBased  = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 5] + j + padSize;
+          pImg10FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize - 5] + j + padSize;
+          pImg11FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 6] + j + padSize;
+          pImg12FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize - 6] + j + padSize;
 #endif
-      }
-      else
-      {
-        pImg0FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 0] + blkDst.x + j + padSize;
-        pImg1FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 1] + blkDst.x + j + padSize;
-        pImg2FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize - 1] + blkDst.x + j + padSize;
-        pImg3FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 2] + blkDst.x + j + padSize;
-        pImg4FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize - 2] + blkDst.x + j + padSize;
+        }
+        else
+        {
+          pImg0FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 0] + blkDst.x + j + padSize;
+          pImg1FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 1] + blkDst.x + j + padSize;
+          pImg2FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize - 1] + blkDst.x + j + padSize;
+          pImg3FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 2] + blkDst.x + j + padSize;
+          pImg4FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize - 2] + blkDst.x + j + padSize;
 #if JVET_AD0222_ALF_LONG_FIXFILTER
-        pImg5FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 3] + blkDst.x + j + padSize;
-        pImg6FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize - 3] + blkDst.x + j + padSize;
-        pImg7FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 4] + blkDst.x + j + padSize;
-        pImg8FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize - 4] + blkDst.x + j + padSize;
-        pImg9FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 5] + blkDst.x + j + padSize;
-        pImg10FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize - 5] + blkDst.x + j + padSize;
-        pImg11FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 6] + blkDst.x + j + padSize;
-        pImg12FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize - 6] + blkDst.x + j + padSize;
+          pImg5FixedBased  = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 3] + blkDst.x + j + padSize;
+          pImg6FixedBased  = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize - 3] + blkDst.x + j + padSize;
+          pImg7FixedBased  = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 4] + blkDst.x + j + padSize;
+          pImg8FixedBased  = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize - 4] + blkDst.x + j + padSize;
+          pImg9FixedBased  = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 5] + blkDst.x + j + padSize;
+          pImg10FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize - 5] + blkDst.x + j + padSize;
+          pImg11FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 6] + blkDst.x + j + padSize;
+          pImg12FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize - 6] + blkDst.x + j + padSize;
 #endif
-      }
+        }
 #endif
 #if JVET_AD0222_ADDITONAL_ALF_FIXFILTER
-      const Pel *pImg0Gauss[NUM_GAUSS_FILTERED_SOURCE];
-      const Pel *pImg1Gauss[NUM_GAUSS_FILTERED_SOURCE], *pImg2Gauss[NUM_GAUSS_FILTERED_SOURCE];
-      const Pel *pImg3Gauss[NUM_GAUSS_FILTERED_SOURCE], *pImg4Gauss[NUM_GAUSS_FILTERED_SOURCE];
+        const Pel *pImg0Gauss[NUM_GAUSS_FILTERED_SOURCE];
+        const Pel *pImg1Gauss[NUM_GAUSS_FILTERED_SOURCE], *pImg2Gauss[NUM_GAUSS_FILTERED_SOURCE];
+        const Pel *pImg3Gauss[NUM_GAUSS_FILTERED_SOURCE], *pImg4Gauss[NUM_GAUSS_FILTERED_SOURCE];
 
-      for( int gaussIdx = 0; gaussIdx < NUM_GAUSS_FILTERED_SOURCE; gaussIdx++ )
-      {
-        if( isFixedFilterPaddedPerCtu )
-        {
-          pImg0Gauss[gaussIdx] = gaussCtu[gaussIdx][i + padSizeGauss + 0] + j + padSizeGauss;
-          pImg1Gauss[gaussIdx] = gaussCtu[gaussIdx][i + padSizeGauss + 1] + j + padSizeGauss;
-          pImg2Gauss[gaussIdx] = gaussCtu[gaussIdx][i + padSizeGauss - 1] + j + padSizeGauss;
-          pImg3Gauss[gaussIdx] = gaussCtu[gaussIdx][i + padSizeGauss + 2] + j + padSizeGauss;
-          pImg4Gauss[gaussIdx] = gaussCtu[gaussIdx][i + padSizeGauss - 2] + j + padSizeGauss;
-        }
-        else
+        for (int gaussIdx = 0; gaussIdx < NUM_GAUSS_FILTERED_SOURCE; gaussIdx++)
         {
-          pImg0Gauss[gaussIdx] = gaussPic[gaussIdx][blkDst.y + i + padSizeGauss + 0] + blkDst.x + j + padSizeGauss;
-          pImg1Gauss[gaussIdx] = gaussPic[gaussIdx][blkDst.y + i + padSizeGauss + 1] + blkDst.x + j + padSizeGauss;
-          pImg2Gauss[gaussIdx] = gaussPic[gaussIdx][blkDst.y + i + padSizeGauss - 1] + blkDst.x + j + padSizeGauss;
-          pImg3Gauss[gaussIdx] = gaussPic[gaussIdx][blkDst.y + i + padSizeGauss + 2] + blkDst.x + j + padSizeGauss;
-          pImg4Gauss[gaussIdx] = gaussPic[gaussIdx][blkDst.y + i + padSizeGauss - 2] + blkDst.x + j + padSizeGauss;
+          if (isFixedFilterPaddedPerCtu)
+          {
+            pImg0Gauss[gaussIdx] = gaussCtu[gaussIdx][i + padSizeGauss + 0] + j + padSizeGauss;
+            pImg1Gauss[gaussIdx] = gaussCtu[gaussIdx][i + padSizeGauss + 1] + j + padSizeGauss;
+            pImg2Gauss[gaussIdx] = gaussCtu[gaussIdx][i + padSizeGauss - 1] + j + padSizeGauss;
+            pImg3Gauss[gaussIdx] = gaussCtu[gaussIdx][i + padSizeGauss + 2] + j + padSizeGauss;
+            pImg4Gauss[gaussIdx] = gaussCtu[gaussIdx][i + padSizeGauss - 2] + j + padSizeGauss;
+          }
+          else
+          {
+            pImg0Gauss[gaussIdx] = gaussPic[gaussIdx][blkDst.y + i + padSizeGauss + 0] + blkDst.x + j + padSizeGauss;
+            pImg1Gauss[gaussIdx] = gaussPic[gaussIdx][blkDst.y + i + padSizeGauss + 1] + blkDst.x + j + padSizeGauss;
+            pImg2Gauss[gaussIdx] = gaussPic[gaussIdx][blkDst.y + i + padSizeGauss - 1] + blkDst.x + j + padSizeGauss;
+            pImg3Gauss[gaussIdx] = gaussPic[gaussIdx][blkDst.y + i + padSizeGauss + 2] + blkDst.x + j + padSizeGauss;
+            pImg4Gauss[gaussIdx] = gaussPic[gaussIdx][blkDst.y + i + padSizeGauss - 2] + blkDst.x + j + padSizeGauss;
+          }
         }
-      }
 #endif
-      __m128i cur    = _mm_loadu_si128((const __m128i *) pImg0);
-      __m128i accumA = mmOffset;
-      __m128i accumB = mmOffset;
+        __m256i cur    = _mm256_loadu_si256((const __m256i *) pImg0);
+        __m256i accumA = mmOffset;
+        __m256i accumB = mmOffset;
 
-      auto process2coeffs = [&](const int i, const Pel *ptr0, const Pel *ptr1, const Pel *ptr2, const Pel *ptr3)
-      {
-        const __m128i val00 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) ptr0), cur);
-        const __m128i val10 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) ptr2), cur);
-        const __m128i val01 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) ptr1), cur);
-        const __m128i val11 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) ptr3), cur);
+        auto process2coeffs = [&](const int i, const Pel *ptr0, const Pel *ptr1, const Pel *ptr2, const Pel *ptr3)
+        {
+          const __m256i val00 = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) ptr0), cur);
+          const __m256i val10 = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) ptr2), cur);
+          const __m256i val01 = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) ptr1), cur);
+          const __m256i val11 = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) ptr3), cur);
 
-        __m128i val01A = _mm_unpacklo_epi16(val00, val10);
-        __m128i val01B = _mm_unpackhi_epi16(val00, val10);
-        __m128i val01C = _mm_unpacklo_epi16(val01, val11);
-        __m128i val01D = _mm_unpackhi_epi16(val01, val11);
+          __m256i val01A = _mm256_unpacklo_epi16(val00, val10);
+          __m256i val01B = _mm256_unpackhi_epi16(val00, val10);
+          __m256i val01C = _mm256_unpacklo_epi16(val01, val11);
+          __m256i val01D = _mm256_unpackhi_epi16(val01, val11);
 
-        __m128i limit01A = params[0][1][i];
-        __m128i limit01B = params[1][1][i];
+          __m256i limit01A = params[0][1][i];
+          __m256i limit01B = params[1][1][i];
 
-        val01A = _mm_min_epi16(val01A, limit01A);
-        val01B = _mm_min_epi16(val01B, limit01B);
-        val01C = _mm_min_epi16(val01C, limit01A);
-        val01D = _mm_min_epi16(val01D, limit01B);
+          val01A = _mm256_min_epi16(val01A, limit01A);
+          val01B = _mm256_min_epi16(val01B, limit01B);
+          val01C = _mm256_min_epi16(val01C, limit01A);
+          val01D = _mm256_min_epi16(val01D, limit01B);
 
-        limit01A = _mm_sub_epi16(_mm_setzero_si128(), limit01A);
-        limit01B = _mm_sub_epi16(_mm_setzero_si128(), limit01B);
+          limit01A = _mm256_sub_epi16(_mm256_setzero_si256(), limit01A);
+          limit01B = _mm256_sub_epi16(_mm256_setzero_si256(), limit01B);
 
-        val01A = _mm_max_epi16(val01A, limit01A);
-        val01B = _mm_max_epi16(val01B, limit01B);
-        val01C = _mm_max_epi16(val01C, limit01A);
-        val01D = _mm_max_epi16(val01D, limit01B);
+          val01A = _mm256_max_epi16(val01A, limit01A);
+          val01B = _mm256_max_epi16(val01B, limit01B);
+          val01C = _mm256_max_epi16(val01C, limit01A);
+          val01D = _mm256_max_epi16(val01D, limit01B);
 
-        val01A = _mm_add_epi16(val01A, val01C);
-        val01B = _mm_add_epi16(val01B, val01D);
+          val01A = _mm256_add_epi16(val01A, val01C);
+          val01B = _mm256_add_epi16(val01B, val01D);
 
-        const __m128i coeff01A = params[0][0][i];
-        const __m128i coeff01B = params[1][0][i];
+          const __m256i coeff01A = params[0][0][i];
+          const __m256i coeff01B = params[1][0][i];
 
-        accumA = _mm_add_epi32(accumA, _mm_madd_epi16(val01A, coeff01A));
-        accumB = _mm_add_epi32(accumB, _mm_madd_epi16(val01B, coeff01B));
-      };
+          accumA = _mm256_add_epi32(accumA, _mm256_madd_epi16(val01A, coeff01A));
+          accumB = _mm256_add_epi32(accumB, _mm256_madd_epi16(val01B, coeff01B));
+        };
 #if JVET_AD0222_ALF_LONG_FIXFILTER && JVET_AD0222_ADDITONAL_ALF_FIXFILTER
-      process2coeffs(0, pImg7 + 0, pImg8 - 0, pImg5 + 0, pImg6 - 0);
-      process2coeffs(1, pImg3 + 0, pImg4 - 0, pImg1 + 1, pImg2 - 1);
-      process2coeffs(2, pImg1 + 0, pImg2 - 0, pImg1 - 1, pImg2 + 1);
-      process2coeffs(3, pImg0 + 4, pImg0 - 4, pImg0 + 3, pImg0 - 3);
-      process2coeffs(4, pImg0 + 2, pImg0 - 2, pImg0 + 1, pImg0 - 1);
-      process2coeffs(5, pImg12FixedBased - 0, pImg11FixedBased + 0, pImg10FixedBased - 0, pImg9FixedBased + 0);
-      process2coeffs(6, pImg8FixedBased - 0, pImg7FixedBased + 0, pImg6FixedBased - 0, pImg5FixedBased + 0);
-      process2coeffs(7, pImg4FixedBased - 1, pImg3FixedBased + 1, pImg4FixedBased - 0, pImg3FixedBased + 0);
-      process2coeffs(8, pImg4FixedBased + 1, pImg3FixedBased - 1, pImg2FixedBased - 2, pImg1FixedBased + 2);
-      process2coeffs(9, pImg2FixedBased - 1, pImg1FixedBased + 1, pImg2FixedBased - 0, pImg1FixedBased + 0);
-      process2coeffs(10, pImg2FixedBased + 1, pImg1FixedBased - 1, pImg2FixedBased + 2, pImg1FixedBased - 2);
-      process2coeffs(11, pImg0FixedBased - 6, pImg0FixedBased + 6, pImg0FixedBased - 5, pImg0FixedBased + 5);
-      process2coeffs(12, pImg0FixedBased - 4, pImg0FixedBased + 4, pImg0FixedBased - 3, pImg0FixedBased + 3);
-      process2coeffs(13, pImg0FixedBased - 2, pImg0FixedBased + 2, pImg0FixedBased - 1, pImg0FixedBased + 1);
-      process2coeffs(14, pImg3Gauss[0] - 0, pImg4Gauss[0] + 0, pImg1Gauss[0] - 0, pImg2Gauss[0] + 0);
-      process2coeffs(15, pImg0Gauss[0] - 2, pImg0Gauss[0] + 2, pImg0Gauss[0] - 1, pImg0Gauss[0] + 1);
+        process2coeffs(0, pImg7 + 0, pImg8 - 0, pImg5 + 0, pImg6 - 0);
+        process2coeffs(1, pImg3 + 0, pImg4 - 0, pImg1 + 1, pImg2 - 1);
+        process2coeffs(2, pImg1 + 0, pImg2 - 0, pImg1 - 1, pImg2 + 1);
+        process2coeffs(3, pImg0 + 4, pImg0 - 4, pImg0 + 3, pImg0 - 3);
+        process2coeffs(4, pImg0 + 2, pImg0 - 2, pImg0 + 1, pImg0 - 1);
+        process2coeffs(5, pImg12FixedBased - 0, pImg11FixedBased + 0, pImg10FixedBased - 0, pImg9FixedBased + 0);
+        process2coeffs(6, pImg8FixedBased - 0, pImg7FixedBased + 0, pImg6FixedBased - 0, pImg5FixedBased + 0);
+        process2coeffs(7, pImg4FixedBased - 1, pImg3FixedBased + 1, pImg4FixedBased - 0, pImg3FixedBased + 0);
+        process2coeffs(8, pImg4FixedBased + 1, pImg3FixedBased - 1, pImg2FixedBased - 2, pImg1FixedBased + 2);
+        process2coeffs(9, pImg2FixedBased - 1, pImg1FixedBased + 1, pImg2FixedBased - 0, pImg1FixedBased + 0);
+        process2coeffs(10, pImg2FixedBased + 1, pImg1FixedBased - 1, pImg2FixedBased + 2, pImg1FixedBased - 2);
+        process2coeffs(11, pImg0FixedBased - 6, pImg0FixedBased + 6, pImg0FixedBased - 5, pImg0FixedBased + 5);
+        process2coeffs(12, pImg0FixedBased - 4, pImg0FixedBased + 4, pImg0FixedBased - 3, pImg0FixedBased + 3);
+        process2coeffs(13, pImg0FixedBased - 2, pImg0FixedBased + 2, pImg0FixedBased - 1, pImg0FixedBased + 1);
+        process2coeffs(14, pImg3Gauss[0] - 0, pImg4Gauss[0] + 0, pImg1Gauss[0] - 0, pImg2Gauss[0] + 0);
+        process2coeffs(15, pImg0Gauss[0] - 2, pImg0Gauss[0] + 2, pImg0Gauss[0] - 1, pImg0Gauss[0] + 1);
 #elif JVET_AD0222_ALF_LONG_FIXFILTER
-      process2coeffs(0, pImg7 + 0, pImg8 - 0, pImg5 + 0, pImg6 - 0);
-      process2coeffs(1, pImg3 + 0, pImg4 - 0, pImg1 + 1, pImg2 - 1);
-      process2coeffs(2, pImg1 + 0, pImg2 - 0, pImg1 - 1, pImg2 + 1);
-      process2coeffs(3, pImg0 + 4, pImg0 - 4, pImg0 + 3, pImg0 - 3);
-      process2coeffs(4, pImg0 + 2, pImg0 - 2, pImg0 + 1, pImg0 - 1);
-      process2coeffs(5, pImg12FixedBased - 0, pImg11FixedBased + 0, pImg10FixedBased - 0, pImg9FixedBased + 0);
-      process2coeffs(6, pImg8FixedBased - 0, pImg7FixedBased + 0, pImg6FixedBased - 0, pImg5FixedBased + 0);
-      process2coeffs(7, pImg4FixedBased - 1, pImg3FixedBased + 1, pImg4FixedBased - 0, pImg3FixedBased + 0);
-      process2coeffs(8, pImg4FixedBased + 1, pImg3FixedBased - 1, pImg2FixedBased - 2, pImg1FixedBased + 2);
-      process2coeffs(9, pImg2FixedBased - 1, pImg1FixedBased + 1, pImg2FixedBased - 0, pImg1FixedBased + 0);
-      process2coeffs(10, pImg2FixedBased + 1, pImg1FixedBased - 1, pImg2FixedBased + 2, pImg1FixedBased - 2);
-      process2coeffs(11, pImg0FixedBased - 6, pImg0FixedBased + 6, pImg0FixedBased - 5, pImg0FixedBased + 5);
-      process2coeffs(12, pImg0FixedBased - 4, pImg0FixedBased + 4, pImg0FixedBased - 3, pImg0FixedBased + 3);
-      process2coeffs(13, pImg0FixedBased - 2, pImg0FixedBased + 2, pImg0FixedBased - 1, pImg0FixedBased + 1);
+        process2coeffs(0, pImg7 + 0, pImg8 - 0, pImg5 + 0, pImg6 - 0);
+        process2coeffs(1, pImg3 + 0, pImg4 - 0, pImg1 + 1, pImg2 - 1);
+        process2coeffs(2, pImg1 + 0, pImg2 - 0, pImg1 - 1, pImg2 + 1);
+        process2coeffs(3, pImg0 + 4, pImg0 - 4, pImg0 + 3, pImg0 - 3);
+        process2coeffs(4, pImg0 + 2, pImg0 - 2, pImg0 + 1, pImg0 - 1);
+        process2coeffs(5, pImg12FixedBased - 0, pImg11FixedBased + 0, pImg10FixedBased - 0, pImg9FixedBased + 0);
+        process2coeffs(6, pImg8FixedBased - 0, pImg7FixedBased + 0, pImg6FixedBased - 0, pImg5FixedBased + 0);
+        process2coeffs(7, pImg4FixedBased - 1, pImg3FixedBased + 1, pImg4FixedBased - 0, pImg3FixedBased + 0);
+        process2coeffs(8, pImg4FixedBased + 1, pImg3FixedBased - 1, pImg2FixedBased - 2, pImg1FixedBased + 2);
+        process2coeffs(9, pImg2FixedBased - 1, pImg1FixedBased + 1, pImg2FixedBased - 0, pImg1FixedBased + 0);
+        process2coeffs(10, pImg2FixedBased + 1, pImg1FixedBased - 1, pImg2FixedBased + 2, pImg1FixedBased - 2);
+        process2coeffs(11, pImg0FixedBased - 6, pImg0FixedBased + 6, pImg0FixedBased - 5, pImg0FixedBased + 5);
+        process2coeffs(12, pImg0FixedBased - 4, pImg0FixedBased + 4, pImg0FixedBased - 3, pImg0FixedBased + 3);
+        process2coeffs(13, pImg0FixedBased - 2, pImg0FixedBased + 2, pImg0FixedBased - 1, pImg0FixedBased + 1);
 #elif JVET_AD0222_ADDITONAL_ALF_FIXFILTER
-      process2coeffs(0, pImg11 + 0, pImg12 - 0, pImg9 + 0, pImg10 - 0);
-      process2coeffs(1, pImg7 + 0, pImg8 - 0, pImg5 + 0, pImg6 - 0);
-      process2coeffs(2, pImg3 + 2, pImg4 - 2, pImg3 + 1, pImg4 - 1);
-      process2coeffs(3, pImg3 + 0, pImg4 - 0, pImg3 - 1, pImg4 + 1);
-      process2coeffs(4, pImg3 - 2, pImg4 + 2, pImg1 + 2, pImg2 - 2);
-      process2coeffs(5, pImg1 + 1, pImg2 - 1, pImg1 + 0, pImg2 - 0);
-      process2coeffs(6, pImg1 - 1, pImg2 + 1, pImg1 - 2, pImg2 + 2);
-      process2coeffs(7, pImg0 + 6, pImg0 - 6, pImg0 + 5, pImg0 - 5);
-      process2coeffs(8, pImg0 + 4, pImg0 - 4, pImg0 + 3, pImg0 - 3);
-      process2coeffs(9, pImg0 + 2, pImg0 - 2, pImg0 + 1, pImg0 - 1);
-      process2coeffs(10, pImg4FixedBased - 0, pImg3FixedBased + 0, pImg2FixedBased - 1, pImg1FixedBased + 1);
-      process2coeffs(11, pImg2FixedBased - 0, pImg1FixedBased + 0, pImg2FixedBased + 1, pImg1FixedBased - 1);
-      process2coeffs(12, pImg0FixedBased - 2, pImg0FixedBased + 2, pImg0FixedBased - 1, pImg0FixedBased + 1);
-      process2coeffs(13, pImg3Gauss[0] - 0, pImg4Gauss[0] + 0, pImg1Gauss[0] - 0, pImg2Gauss[0] + 0);
-      process2coeffs(14, pImg0Gauss[0] - 2, pImg0Gauss[0] + 2, pImg0Gauss[0] - 1, pImg0Gauss[0] + 1);
-#else
-      process2coeffs(0, pImg11 + 0, pImg12 - 0, pImg9 + 0, pImg10 - 0);
-      process2coeffs(1, pImg7 + 0, pImg8 - 0, pImg5 + 0, pImg6 - 0);
-      process2coeffs(2, pImg3 + 2, pImg4 - 2, pImg3 + 1, pImg4 - 1);
-      process2coeffs(3, pImg3 + 0, pImg4 - 0, pImg3 - 1, pImg4 + 1);
-      process2coeffs(4, pImg3 - 2, pImg4 + 2, pImg1 + 2, pImg2 - 2);
-      process2coeffs(5, pImg1 + 1, pImg2 - 1, pImg1 + 0, pImg2 - 0);
-      process2coeffs(6, pImg1 - 1, pImg2 + 1, pImg1 - 2, pImg2 + 2);
-      process2coeffs(7, pImg0 + 6, pImg0 - 6, pImg0 + 5, pImg0 - 5);
-      process2coeffs(8, pImg0 + 4, pImg0 - 4, pImg0 + 3, pImg0 - 3);
-      process2coeffs(9, pImg0 + 2, pImg0 - 2, pImg0 + 1, pImg0 - 1);
+        process2coeffs(0, pImg11 + 0, pImg12 - 0, pImg9 + 0, pImg10 - 0);
+        process2coeffs(1, pImg7 + 0, pImg8 - 0, pImg5 + 0, pImg6 - 0);
+        process2coeffs(2, pImg3 + 2, pImg4 - 2, pImg3 + 1, pImg4 - 1);
+        process2coeffs(3, pImg3 + 0, pImg4 - 0, pImg3 - 1, pImg4 + 1);
+        process2coeffs(4, pImg3 - 2, pImg4 + 2, pImg1 + 2, pImg2 - 2);
+        process2coeffs(5, pImg1 + 1, pImg2 - 1, pImg1 + 0, pImg2 - 0);
+        process2coeffs(6, pImg1 - 1, pImg2 + 1, pImg1 - 2, pImg2 + 2);
+        process2coeffs(7, pImg0 + 6, pImg0 - 6, pImg0 + 5, pImg0 - 5);
+        process2coeffs(8, pImg0 + 4, pImg0 - 4, pImg0 + 3, pImg0 - 3);
+        process2coeffs(9, pImg0 + 2, pImg0 - 2, pImg0 + 1, pImg0 - 1);
+        process2coeffs(10, pImg4FixedBased - 0, pImg3FixedBased + 0, pImg2FixedBased - 1, pImg1FixedBased + 1);
+        process2coeffs(11, pImg2FixedBased - 0, pImg1FixedBased + 0, pImg2FixedBased + 1, pImg1FixedBased - 1);
+        process2coeffs(12, pImg0FixedBased - 2, pImg0FixedBased + 2, pImg0FixedBased - 1, pImg0FixedBased + 1);
+        process2coeffs(13, pImg3Gauss[0] - 0, pImg4Gauss[0] + 0, pImg1Gauss[0] - 0, pImg2Gauss[0] + 0);
+        process2coeffs(14, pImg0Gauss[0] - 2, pImg0Gauss[0] + 2, pImg0Gauss[0] - 1, pImg0Gauss[0] + 1);
+#else
+        process2coeffs(0, pImg11 + 0, pImg12 - 0, pImg9 + 0, pImg10 - 0);
+        process2coeffs(1, pImg7 + 0, pImg8 - 0, pImg5 + 0, pImg6 - 0);
+        process2coeffs(2, pImg3 + 2, pImg4 - 2, pImg3 + 1, pImg4 - 1);
+        process2coeffs(3, pImg3 + 0, pImg4 - 0, pImg3 - 1, pImg4 + 1);
+        process2coeffs(4, pImg3 - 2, pImg4 + 2, pImg1 + 2, pImg2 - 2);
+        process2coeffs(5, pImg1 + 1, pImg2 - 1, pImg1 + 0, pImg2 - 0);
+        process2coeffs(6, pImg1 - 1, pImg2 + 1, pImg1 - 2, pImg2 + 2);
+        process2coeffs(7, pImg0 + 6, pImg0 - 6, pImg0 + 5, pImg0 - 5);
+        process2coeffs(8, pImg0 + 4, pImg0 - 4, pImg0 + 3, pImg0 - 3);
+        process2coeffs(9, pImg0 + 2, pImg0 - 2, pImg0 + 1, pImg0 - 1);
 #if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
-      process2coeffs(10, pImg4FixedBased - 0, pImg3FixedBased + 0, pImg2FixedBased - 1, pImg1FixedBased + 1);
-      process2coeffs(11, pImg2FixedBased - 0, pImg1FixedBased + 0, pImg2FixedBased + 1, pImg1FixedBased - 1);
-      process2coeffs(12, pImg0FixedBased - 2, pImg0FixedBased + 2, pImg0FixedBased - 1, pImg0FixedBased + 1);
+        process2coeffs(10, pImg4FixedBased - 0, pImg3FixedBased + 0, pImg2FixedBased - 1, pImg1FixedBased + 1);
+        process2coeffs(11, pImg2FixedBased - 0, pImg1FixedBased + 0, pImg2FixedBased + 1, pImg1FixedBased - 1);
+        process2coeffs(12, pImg0FixedBased - 2, pImg0FixedBased + 2, pImg0FixedBased - 1, pImg0FixedBased + 1);
 #endif
 #endif
-      pImg0 = srcBeforeDb + j;
-      pImg1 = pImg0 + srcBeforeDbStride;
-      pImg2 = pImg0 - srcBeforeDbStride;
+        pImg0 = srcBeforeDb + j;
+        pImg1 = pImg0 + srcBeforeDbStride;
+        pImg2 = pImg0 - srcBeforeDbStride;
 
-      pImgP0 = srcResi + j;
+        pImgP0 = srcResi + j;
 
 #if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
 #if JVET_AD0222_ALF_LONG_FIXFILTER && JVET_AD0222_ADDITONAL_ALF_FIXFILTER
-      process2coeffs(16, pImg1 + 0, pImg2 + 0, pImg0 + 1, pImg0 - 1);
+        process2coeffs(16, pImg1 + 0, pImg2 + 0, pImg0 + 1, pImg0 - 1);
 #elif JVET_AD0222_ALF_LONG_FIXFILTER
-      process2coeffs(14, pImg1 + 0, pImg2 + 0, pImg0 + 1, pImg0 - 1);
+        process2coeffs(14, pImg1 + 0, pImg2 + 0, pImg0 + 1, pImg0 - 1);
 #elif JVET_AD0222_ADDITONAL_ALF_FIXFILTER
-      process2coeffs(15, pImg1 + 0, pImg2 + 0, pImg0 + 1, pImg0 - 1);
+        process2coeffs(15, pImg1 + 0, pImg2 + 0, pImg0 + 1, pImg0 - 1);
 #else
-      process2coeffs(13, pImg1 + 0, pImg2 + 0, pImg0 + 1, pImg0 - 1);
+        process2coeffs(13, pImg1 + 0, pImg2 + 0, pImg0 + 1, pImg0 - 1);
 #endif
 #else
-      process2coeffs(10, pImg1 + 0, pImg2 + 0, pImg0 + 1, pImg0 - 1);
+        process2coeffs(10, pImg1 + 0, pImg2 + 0, pImg0 + 1, pImg0 - 1);
 #endif
 
 #if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
-      __m128i val00 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *)(fixedFilterResults[0 + fixedFilterSetIdx][blkDst.y + i + padSize] + blkDst.x + j + padSize)),
-        cur);
-      __m128i val10 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *)(fixedFilterResults[2 + fixedFilterSetIdx][blkDst.y + i + padSize] + blkDst.x + j + padSize)),
-        cur);
+        __m256i val00 = _mm256_sub_epi16( _mm256_loadu_si256((const __m256i *) (fixedFilterResults[0 + fixedFilterSetIdx][blkDst.y + i + padSize] + blkDst.x + j + padSize)),  cur);
+        __m256i val10 = _mm256_sub_epi16( _mm256_loadu_si256((const __m256i *) (fixedFilterResults[2 + fixedFilterSetIdx][blkDst.y + i + padSize] + blkDst.x + j + padSize)),  cur);
 #else
-      __m128i val00 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) pImg0), cur);
-      __m128i val10 = _mm_sub_epi16(
-        _mm_loadu_si128((const __m128i *) (fixedFilterResults[fixedFilterSetIdx][blkDst.y + i] + blkDst.x + j)), cur);
+        __m256i val00 = _mm265_sub_epi16(_mm256_loadu_si256((const __m256i *) pImg0), cur);
+        __m256i val10 = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) (fixedFilterResults[fixedFilterSetIdx][blkDst.y + i] + blkDst.x + j)), cur);
 #endif
-      __m128i val01A = _mm_unpacklo_epi16(val00, val10);
-      __m128i val01B = _mm_unpackhi_epi16(val00, val10);
+        __m256i val01A = _mm256_unpacklo_epi16(val00, val10);
+        __m256i val01B = _mm256_unpackhi_epi16(val00, val10);
 #if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
 #if JVET_AD0222_ALF_LONG_FIXFILTER && JVET_AD0222_ADDITONAL_ALF_FIXFILTER
-      __m128i limit01A = params[0][1][17];
-      __m128i limit01B = params[1][1][17];
+        __m256i limit01A = params[0][1][17];
+        __m256i limit01B = params[1][1][17];
 #elif JVET_AD0222_ALF_LONG_FIXFILTER
-      __m128i limit01A = params[0][1][15];
-      __m128i limit01B = params[1][1][15];
+        __m256i limit01A = params[0][1][15];
+        __m256i limit01B = params[1][1][15];
 #elif JVET_AD0222_ADDITONAL_ALF_FIXFILTER
-      __m128i limit01A = params[0][1][16];
-      __m128i limit01B = params[1][1][16];
+        __m256i limit01A = params[0][1][16];
+        __m256i limit01B = params[1][1][16];
 #else
-      __m128i limit01A = params[0][1][14];
-      __m128i limit01B = params[1][1][14];
+        __m256i limit01A = params[0][1][14];
+        __m256i limit01B = params[1][1][14];
 #endif
 #else
-      __m128i limit01A = params[0][1][11];
-      __m128i limit01B = params[1][1][11];
+        __m256i limit01A = params[0][1][11];
+        __m256i limit01B = params[1][1][11];
 #endif
-      val01A   = _mm_min_epi16(val01A, limit01A);
-      val01B   = _mm_min_epi16(val01B, limit01B);
-      limit01A = _mm_sub_epi16(_mm_setzero_si128(), limit01A);
-      limit01B = _mm_sub_epi16(_mm_setzero_si128(), limit01B);
-      val01A   = _mm_max_epi16(val01A, limit01A);
-      val01B   = _mm_max_epi16(val01B, limit01B);
+        val01A   = _mm256_min_epi16(val01A, limit01A);
+        val01B   = _mm256_min_epi16(val01B, limit01B);
+        limit01A = _mm256_sub_epi16(_mm256_setzero_si256(), limit01A);
+        limit01B = _mm256_sub_epi16(_mm256_setzero_si256(), limit01B);
+        val01A   = _mm256_max_epi16(val01A, limit01A);
+        val01B   = _mm256_max_epi16(val01B, limit01B);
 #if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
 #if JVET_AD0222_ALF_LONG_FIXFILTER && JVET_AD0222_ADDITONAL_ALF_FIXFILTER
-      __m128i coeff01A = params[0][0][17];
-      __m128i coeff01B = params[1][0][17];
+        __m256i coeff01A = params[0][0][17];
+        __m256i coeff01B = params[1][0][17];
 #elif JVET_AD0222_ALF_LONG_FIXFILTER
-      __m128i coeff01A = params[0][0][15];
-      __m128i coeff01B = params[1][0][15];
+        __m256i coeff01A = params[0][0][15];
+        __m256i coeff01B = params[1][0][15];
 #elif JVET_AD0222_ADDITONAL_ALF_FIXFILTER
-      __m128i coeff01A = params[0][0][16];
-      __m128i coeff01B = params[1][0][16];
+        __m256i coeff01A = params[0][0][16];
+        __m256i coeff01B = params[1][0][16];
 #else
-      __m128i coeff01A = params[0][0][14];
-      __m128i coeff01B = params[1][0][14];
+        __m256i coeff01A = params[0][0][14];
+        __m256i coeff01B = params[1][0][14];
 #endif
 #else
-      __m128i coeff01A = params[0][0][11];
-      __m128i coeff01B = params[1][0][11];
+        __m256i coeff01A = params[0][0][11];
+        __m256i coeff01B = params[1][0][11];
 #endif
-      accumA = _mm_add_epi32(accumA, _mm_madd_epi16(val01A, coeff01A));
-      accumB = _mm_add_epi32(accumB, _mm_madd_epi16(val01B, coeff01B));
+        accumA = _mm256_add_epi32(accumA, _mm256_madd_epi16(val01A, coeff01A));
+        accumB = _mm256_add_epi32(accumB, _mm256_madd_epi16(val01B, coeff01B));
 
-      // start prediction fixed filter
-      __m128i zero = _mm_setzero_si128();
-      val00        = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) (pImg0)), cur);
-      val10        = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) pImgP0), zero);
-      val01A       = _mm_unpacklo_epi16(val00, val10);
-      val01B       = _mm_unpackhi_epi16(val00, val10);
+        // start prediction fixed filter
+        __m256i zero = _mm256_setzero_si256();
+        val00        = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) (pImg0)), cur);
+        val10        = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) pImgP0), zero);
+        val01A       = _mm256_unpacklo_epi16(val00, val10);
+        val01B       = _mm256_unpackhi_epi16(val00, val10);
 #if JVET_AD0222_ALF_LONG_FIXFILTER && JVET_AD0222_ADDITONAL_ALF_FIXFILTER
-      limit01A = params[0][1][18];
-      limit01B = params[1][1][18];
+        limit01A = params[0][1][18];
+        limit01B = params[1][1][18];
 #elif JVET_AD0222_ALF_LONG_FIXFILTER
-      limit01A = params[0][1][16];
-      limit01B = params[1][1][16];
+        limit01A = params[0][1][16];
+        limit01B = params[1][1][16];
 #elif JVET_AD0222_ADDITONAL_ALF_FIXFILTER
-      limit01A = params[0][1][17];
-      limit01B = params[1][1][17];
+        limit01A = params[0][1][17];
+        limit01B = params[1][1][17];
 #else
-      limit01A = params[0][1][15];
-      limit01B = params[1][1][15];
+        limit01A = params[0][1][15];
+        limit01B = params[1][1][15];
 #endif
 
-      val01A   = _mm_min_epi16(val01A, limit01A);
-      val01B   = _mm_min_epi16(val01B, limit01B);
-      limit01A = _mm_sub_epi16(_mm_setzero_si128(), limit01A);
+        val01A   = _mm256_min_epi16(val01A, limit01A);
+        val01B   = _mm256_min_epi16(val01B, limit01B);
+        limit01A = _mm256_sub_epi16(_mm256_setzero_si256(), limit01A);
+        limit01B = _mm256_sub_epi16(_mm256_setzero_si256(), limit01B);
+        val01A   = _mm256_max_epi16(val01A, limit01A);
+        val01B   = _mm256_max_epi16(val01B, limit01B);
+#if JVET_AD0222_ALF_LONG_FIXFILTER && JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+        coeff01A = params[0][0][18];
+        coeff01B = params[1][0][18];
+#elif JVET_AD0222_ALF_LONG_FIXFILTER
+        coeff01A = params[0][0][16];
+        coeff01B = params[1][0][16];
+#elif JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+        coeff01A = params[0][0][17];
+        coeff01B = params[1][0][17];
+#else
+        coeff01A = params[0][0][15];
+        coeff01B = params[1][0][15];
+#endif
+
+        accumA = _mm256_add_epi32(accumA, _mm256_madd_epi16(val01A, coeff01A));
+        accumB = _mm256_add_epi32(accumB, _mm256_madd_epi16(val01B, coeff01B));
+        // end prediction fixed filter
+#if JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+        val00  = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) (pImg0Gauss[0])), cur);
+        val10  = _mm256_setzero_si256();
+        val01A = _mm256_unpacklo_epi16(val00, val10);
+        val01B = _mm256_unpackhi_epi16(val00, val10);
+#if JVET_AD0222_ALF_LONG_FIXFILTER
+        limit01A = params[0][1][19];
+        limit01B = params[1][1][19];
+#else
+        limit01A = params[0][1][18];
+        limit01B = params[1][1][18];
+#endif
+
+        val01A   = _mm256_min_epi16(val01A, limit01A);
+        val01B   = _mm256_min_epi16(val01B, limit01B);
+        limit01A = _mm256_sub_epi16(_mm256_setzero_si256(), limit01A);
+        limit01B = _mm256_sub_epi16(_mm256_setzero_si256(), limit01B);
+        val01A   = _mm256_max_epi16(val01A, limit01A);
+        val01B   = _mm256_max_epi16(val01B, limit01B);
+#if JVET_AD0222_ALF_LONG_FIXFILTER
+        coeff01A = params[0][0][19];
+        coeff01B = params[1][0][19];
+#else
+        coeff01A = params[0][0][18];
+        coeff01B = params[1][0][18];
+#endif
+        accumA = _mm256_add_epi32(accumA, _mm256_madd_epi16(val01A, coeff01A));
+        accumB = _mm256_add_epi32(accumB, _mm256_madd_epi16(val01B, coeff01B));
+#endif
+        accumA = _mm256_srai_epi32(accumA, shift);
+        accumB = _mm256_srai_epi32(accumB, shift);
+
+        accumA = _mm256_packs_epi32(accumA, accumB);
+#if JVET_AI0084_ALF_RESIDUALS_SCALING
+        if( bScalingCorr )
+        {
+          accumA = _mm256_add_epi16(accumA, curBase);
+        }
+        else
+#endif
+        accumA = _mm256_add_epi16(accumA, cur);
+        accumA = _mm256_min_epi16(mmMax, _mm256_max_epi16(accumA, mmMin));
+
+        _mm256_storeu_si256((__m256i *) (dst + j), accumA);
+      }   // for j
+      src += srcStride * stepY;
+      dst += dstStride * stepY;
+      srcBeforeDb += srcBeforeDbStride * stepY;
+      srcResi += srcResiStride * stepY;
+    }   // for i
+  }
+  else
+  {
+
+  const __m128i mmOffset = _mm_set1_epi32(round);
+  const __m128i mmMin    = _mm_set1_epi16(clpRng.min);
+  const __m128i mmMax    = _mm_set1_epi16(clpRng.max);
+#if JVET_AI0084_ALF_RESIDUALS_SCALING
+  const __m128i curBase  = _mm_set1_epi16(currBase);
+#endif
+#endif//Use AVX2 SIMD
+  for (size_t i = 0; i < height; i += stepY)
+  {
+    const AlfClassifier *pClass = classifier[blkDst.y + i] + blkDst.x;
+    for (size_t j = 0; j < width; j += stepX)
+    {
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+#if JVET_AD0222_ALF_LONG_FIXFILTER && JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+      __m128i params[2][2][20];
+#elif JVET_AD0222_ALF_LONG_FIXFILTER
+      __m128i params[2][2][17];
+#elif JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+      __m128i params[2][2][19];
+#else
+      __m128i params[2][2][16];
+#endif
+#else
+      __m128i params[2][2][13];
+#endif
+      for (int k = 0; k < 2; k++)
+      {
+#if JVET_AD0222_ALF_LONG_FIXFILTER || JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+        __m128i rawCoef[4][5], rawClip[4][5], s0, s1, s2, s3, rawTmp0, rawTmp1;
+#else
+        __m128i rawCoef[4][4], rawClip[4][4], s0, s1, s2, s3, rawTmp0, rawTmp1;
+#endif
+        for (int l = 0; l < 4; l++)
+        {
+          const int transposeIdx = pClass[j + 4 * k + l] & 0x3;
+          const int classIdx     = pClass[j + 4 * k + l] >> 2;
+
+          rawCoef[l][0] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx * MAX_NUM_ALF_LUMA_COEFF));
+          rawCoef[l][1] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx * MAX_NUM_ALF_LUMA_COEFF + 8));
+          rawCoef[l][2] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx * MAX_NUM_ALF_LUMA_COEFF + 16));
+          rawCoef[l][3] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx * MAX_NUM_ALF_LUMA_COEFF + 24));
+#if JVET_AD0222_ALF_LONG_FIXFILTER || JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+          rawCoef[l][4] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx * MAX_NUM_ALF_LUMA_COEFF + 32));
+#endif
+          rawClip[l][0] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx * MAX_NUM_ALF_LUMA_COEFF));
+          rawClip[l][1] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx * MAX_NUM_ALF_LUMA_COEFF + 8));
+          rawClip[l][2] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx * MAX_NUM_ALF_LUMA_COEFF + 16));
+          rawClip[l][3] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx * MAX_NUM_ALF_LUMA_COEFF + 24));
+#if JVET_AD0222_ALF_LONG_FIXFILTER || JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+          rawClip[l][4] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx * MAX_NUM_ALF_LUMA_COEFF + 32));
+#endif
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+          for (int m = 0; m < shuffleTime13FixedBasedLongLength[transposeIdx]; m++)
+#else
+          for (int m = 0; m < shuffleTime13LongLength[transposeIdx]; m++)
+#endif
+          {
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+            int op0 = shuffleOp13FixedBasedLongLength[transposeIdx][m][0];
+            int op1 = shuffleOp13FixedBasedLongLength[transposeIdx][m][1];
+#else
+            int op0 = shuffleOp13LongLength[transposeIdx][m][0];
+            int op1 = shuffleOp13LongLength[transposeIdx][m][1];
+#endif
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+            s0 = _mm_loadu_si128((const __m128i *) shuffleTab13FixedBasedLongLength[transposeIdx][m][0]);
+            s1 = _mm_xor_si128(s0, _mm_set1_epi8((char) 0x80));
+            s2 = _mm_loadu_si128((const __m128i *) shuffleTab13FixedBasedLongLength[transposeIdx][m][1]);
+            s3 = _mm_xor_si128(s2, _mm_set1_epi8((char) 0x80));
+#else
+            s0 = _mm_loadu_si128((const __m128i *) shuffleTab13LongLength[transposeIdx][m][0]);
+            s1 = _mm_xor_si128(s0, _mm_set1_epi8((char) 0x80));
+            s2 = _mm_loadu_si128((const __m128i *) shuffleTab13LongLength[transposeIdx][m][1]);
+            s3 = _mm_xor_si128(s2, _mm_set1_epi8((char) 0x80));
+#endif
+
+            rawTmp0 = _mm_or_si128(_mm_shuffle_epi8(rawCoef[l][op0], s0), _mm_shuffle_epi8(rawCoef[l][op1], s1));
+            rawTmp1 = _mm_or_si128(_mm_shuffle_epi8(rawCoef[l][op0], s2), _mm_shuffle_epi8(rawCoef[l][op1], s3));
+            rawCoef[l][op0] = rawTmp0;
+            rawCoef[l][op1] = rawTmp1;
+
+            rawTmp0 = _mm_or_si128(_mm_shuffle_epi8(rawClip[l][op0], s0), _mm_shuffle_epi8(rawClip[l][op1], s1));
+            rawTmp1 = _mm_or_si128(_mm_shuffle_epi8(rawClip[l][op0], s2), _mm_shuffle_epi8(rawClip[l][op1], s3));
+            rawClip[l][op0] = rawTmp0;
+            rawClip[l][op1] = rawTmp1;
+          }
+        }   // for l
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+        int limR, lim0, lim1, lim2, lim3;
+#if JVET_AD0222_ALF_LONG_FIXFILTER && JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+        limR = 5, lim0 = 5, lim1 = 5, lim2 = 5, lim3 = 5;
+#elif JVET_AD0222_ALF_LONG_FIXFILTER
+        limR = 5, lim0 = 5, lim1 = 4, lim2 = 4, lim3 = 4;
+#elif JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+        limR = 5, lim0 = 5, lim1 = 5, lim2 = 5, lim3 = 4;
+#else
+        limR = 4, lim0 = 4, lim1 = 4, lim2 = 4, lim3 = 4;
+#endif
+        for (unsigned char l = 0; l < limR; l++)
+#else
+        for (unsigned char l = 0; l < 4; l++)
+#endif
+        {
+          int m = l << 2;
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+          if (l < lim0)
+          {
+#endif
+          s0 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawCoef[0][l], 0x00), _mm_shuffle_epi32(rawCoef[1][l], 0x00));
+          s1 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawCoef[2][l], 0x00), _mm_shuffle_epi32(rawCoef[3][l], 0x00));
+          params[k][0][0 + m] = _mm_blend_epi16(_mm_shuffle_epi32(s0, 0x88), _mm_shuffle_epi32(s1, 0x88), 0xf0);
+          s0 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawClip[0][l], 0x00), _mm_shuffle_epi32(rawClip[1][l], 0x00));
+          s1 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawClip[2][l], 0x00), _mm_shuffle_epi32(rawClip[3][l], 0x00));
+          params[k][1][0 + m] = _mm_blend_epi16(_mm_shuffle_epi32(s0, 0x88), _mm_shuffle_epi32(s1, 0x88), 0xf0);
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+          }
+          if (l < lim1)
+          {
+#endif
+            s0 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawCoef[0][l], 0x55), _mm_shuffle_epi32(rawCoef[1][l], 0x55));
+            s1 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawCoef[2][l], 0x55), _mm_shuffle_epi32(rawCoef[3][l], 0x55));
+            params[k][0][1 + m] = _mm_blend_epi16(_mm_shuffle_epi32(s0, 0x88), _mm_shuffle_epi32(s1, 0x88), 0xf0);
+            s0 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawClip[0][l], 0x55), _mm_shuffle_epi32(rawClip[1][l], 0x55));
+            s1 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawClip[2][l], 0x55), _mm_shuffle_epi32(rawClip[3][l], 0x55));
+            params[k][1][1 + m] = _mm_blend_epi16(_mm_shuffle_epi32(s0, 0x88), _mm_shuffle_epi32(s1, 0x88), 0xf0);
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+          }
+          if (l < lim2)
+          {
+#endif
+            s0 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawCoef[0][l], 0xaa), _mm_shuffle_epi32(rawCoef[1][l], 0xaa));
+            s1 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawCoef[2][l], 0xaa), _mm_shuffle_epi32(rawCoef[3][l], 0xaa));
+            params[k][0][2 + m] = _mm_blend_epi16(_mm_shuffle_epi32(s0, 0x88), _mm_shuffle_epi32(s1, 0x88), 0xf0);
+            s0 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawClip[0][l], 0xaa), _mm_shuffle_epi32(rawClip[1][l], 0xaa));
+            s1 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawClip[2][l], 0xaa), _mm_shuffle_epi32(rawClip[3][l], 0xaa));
+            params[k][1][2 + m] = _mm_blend_epi16(_mm_shuffle_epi32(s0, 0x88), _mm_shuffle_epi32(s1, 0x88), 0xf0);
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+          }
+          if (l < lim3)
+          {
+#endif
+            s0 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawCoef[0][l], 0xff), _mm_shuffle_epi32(rawCoef[1][l], 0xff));
+            s1 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawCoef[2][l], 0xff), _mm_shuffle_epi32(rawCoef[3][l], 0xff));
+            params[k][0][3 + m] = _mm_blend_epi16(_mm_shuffle_epi32(s0, 0x88), _mm_shuffle_epi32(s1, 0x88), 0xf0);
+            s0 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawClip[0][l], 0xff), _mm_shuffle_epi32(rawClip[1][l], 0xff));
+            s1 = _mm_unpacklo_epi64(_mm_shuffle_epi32(rawClip[2][l], 0xff), _mm_shuffle_epi32(rawClip[3][l], 0xff));
+            params[k][1][3 + m] = _mm_blend_epi16(_mm_shuffle_epi32(s0, 0x88), _mm_shuffle_epi32(s1, 0x88), 0xf0);
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+          }
+#endif
+        }   // for l
+      }     // for k
+
+      const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4, *pImg5, *pImg6, *pImg7, *pImg8;
+#if !JVET_AD0222_ALF_LONG_FIXFILTER
+      const Pel *pImg9, *pImg10, *pImg11, *pImg12;
+#endif
+      const Pel *pImgP0;
+
+      pImg0  = src + j;
+      pImg1  = pImg0 + srcStride;
+      pImg2  = pImg0 - srcStride;
+      pImg3  = pImg1 + srcStride;
+      pImg4  = pImg2 - srcStride;
+      pImg5  = pImg3 + srcStride;
+      pImg6  = pImg4 - srcStride;
+      pImg7  = pImg5 + srcStride;
+      pImg8  = pImg6 - srcStride;
+#if !JVET_AD0222_ALF_LONG_FIXFILTER
+      pImg9  = pImg7 + srcStride;
+      pImg10 = pImg8 - srcStride;
+      pImg11 = pImg9 + srcStride;
+      pImg12 = pImg10 - srcStride;
+#endif
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+#if JVET_AE0139_ALF_IMPROVED_FIXFILTER
+      int        filterSetIdx = 2 + fixedFilterSetIdx;
+#else
+      int        filterSetIdx = 0 + fixedFilterSetIdx;
+#endif
+      const Pel *pImg0FixedBased, *pImg1FixedBased, *pImg2FixedBased, *pImg3FixedBased, *pImg4FixedBased;
+#if JVET_AD0222_ALF_LONG_FIXFILTER
+      const Pel *pImg5FixedBased, *pImg6FixedBased, *pImg7FixedBased, *pImg8FixedBased, *pImg9FixedBased, *pImg10FixedBased, *pImg11FixedBased, *pImg12FixedBased;
+#endif
+      if (isFixedFilterPaddedPerCtu)
+      {
+        pImg0FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 0] + j + padSize;
+        pImg1FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 1] + j + padSize;
+        pImg2FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize - 1] + j + padSize;
+        pImg3FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 2] + j + padSize;
+        pImg4FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize - 2] + j + padSize;
+#if JVET_AD0222_ALF_LONG_FIXFILTER
+        pImg5FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 3] + j + padSize;
+        pImg6FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize - 3] + j + padSize;
+        pImg7FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 4] + j + padSize;
+        pImg8FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize - 4] + j + padSize;
+        pImg9FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 5] + j + padSize;
+        pImg10FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize - 5] + j + padSize;
+        pImg11FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 6] + j + padSize;
+        pImg12FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize - 6] + j + padSize;
+#endif
+      }
+      else
+      {
+        pImg0FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 0] + blkDst.x + j + padSize;
+        pImg1FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 1] + blkDst.x + j + padSize;
+        pImg2FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize - 1] + blkDst.x + j + padSize;
+        pImg3FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 2] + blkDst.x + j + padSize;
+        pImg4FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize - 2] + blkDst.x + j + padSize;
+#if JVET_AD0222_ALF_LONG_FIXFILTER
+        pImg5FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 3] + blkDst.x + j + padSize;
+        pImg6FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize - 3] + blkDst.x + j + padSize;
+        pImg7FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 4] + blkDst.x + j + padSize;
+        pImg8FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize - 4] + blkDst.x + j + padSize;
+        pImg9FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 5] + blkDst.x + j + padSize;
+        pImg10FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize - 5] + blkDst.x + j + padSize;
+        pImg11FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 6] + blkDst.x + j + padSize;
+        pImg12FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize - 6] + blkDst.x + j + padSize;
+#endif
+      }
+#endif
+#if JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+      const Pel *pImg0Gauss[NUM_GAUSS_FILTERED_SOURCE];
+      const Pel *pImg1Gauss[NUM_GAUSS_FILTERED_SOURCE], *pImg2Gauss[NUM_GAUSS_FILTERED_SOURCE];
+      const Pel *pImg3Gauss[NUM_GAUSS_FILTERED_SOURCE], *pImg4Gauss[NUM_GAUSS_FILTERED_SOURCE];
+
+      for( int gaussIdx = 0; gaussIdx < NUM_GAUSS_FILTERED_SOURCE; gaussIdx++ )
+      {
+        if( isFixedFilterPaddedPerCtu )
+        {
+          pImg0Gauss[gaussIdx] = gaussCtu[gaussIdx][i + padSizeGauss + 0] + j + padSizeGauss;
+          pImg1Gauss[gaussIdx] = gaussCtu[gaussIdx][i + padSizeGauss + 1] + j + padSizeGauss;
+          pImg2Gauss[gaussIdx] = gaussCtu[gaussIdx][i + padSizeGauss - 1] + j + padSizeGauss;
+          pImg3Gauss[gaussIdx] = gaussCtu[gaussIdx][i + padSizeGauss + 2] + j + padSizeGauss;
+          pImg4Gauss[gaussIdx] = gaussCtu[gaussIdx][i + padSizeGauss - 2] + j + padSizeGauss;
+        }
+        else
+        {
+          pImg0Gauss[gaussIdx] = gaussPic[gaussIdx][blkDst.y + i + padSizeGauss + 0] + blkDst.x + j + padSizeGauss;
+          pImg1Gauss[gaussIdx] = gaussPic[gaussIdx][blkDst.y + i + padSizeGauss + 1] + blkDst.x + j + padSizeGauss;
+          pImg2Gauss[gaussIdx] = gaussPic[gaussIdx][blkDst.y + i + padSizeGauss - 1] + blkDst.x + j + padSizeGauss;
+          pImg3Gauss[gaussIdx] = gaussPic[gaussIdx][blkDst.y + i + padSizeGauss + 2] + blkDst.x + j + padSizeGauss;
+          pImg4Gauss[gaussIdx] = gaussPic[gaussIdx][blkDst.y + i + padSizeGauss - 2] + blkDst.x + j + padSizeGauss;
+        }
+      }
+#endif
+      __m128i cur    = _mm_loadu_si128((const __m128i *) pImg0);
+      __m128i accumA = mmOffset;
+      __m128i accumB = mmOffset;
+
+      auto process2coeffs = [&](const int i, const Pel *ptr0, const Pel *ptr1, const Pel *ptr2, const Pel *ptr3)
+      {
+        const __m128i val00 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) ptr0), cur);
+        const __m128i val10 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) ptr2), cur);
+        const __m128i val01 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) ptr1), cur);
+        const __m128i val11 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) ptr3), cur);
+
+        __m128i val01A = _mm_unpacklo_epi16(val00, val10);
+        __m128i val01B = _mm_unpackhi_epi16(val00, val10);
+        __m128i val01C = _mm_unpacklo_epi16(val01, val11);
+        __m128i val01D = _mm_unpackhi_epi16(val01, val11);
+
+        __m128i limit01A = params[0][1][i];
+        __m128i limit01B = params[1][1][i];
+
+        val01A = _mm_min_epi16(val01A, limit01A);
+        val01B = _mm_min_epi16(val01B, limit01B);
+        val01C = _mm_min_epi16(val01C, limit01A);
+        val01D = _mm_min_epi16(val01D, limit01B);
+
+        limit01A = _mm_sub_epi16(_mm_setzero_si128(), limit01A);
+        limit01B = _mm_sub_epi16(_mm_setzero_si128(), limit01B);
+
+        val01A = _mm_max_epi16(val01A, limit01A);
+        val01B = _mm_max_epi16(val01B, limit01B);
+        val01C = _mm_max_epi16(val01C, limit01A);
+        val01D = _mm_max_epi16(val01D, limit01B);
+
+        val01A = _mm_add_epi16(val01A, val01C);
+        val01B = _mm_add_epi16(val01B, val01D);
+
+        const __m128i coeff01A = params[0][0][i];
+        const __m128i coeff01B = params[1][0][i];
+
+        accumA = _mm_add_epi32(accumA, _mm_madd_epi16(val01A, coeff01A));
+        accumB = _mm_add_epi32(accumB, _mm_madd_epi16(val01B, coeff01B));
+      };
+#if JVET_AD0222_ALF_LONG_FIXFILTER && JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+      process2coeffs(0, pImg7 + 0, pImg8 - 0, pImg5 + 0, pImg6 - 0);
+      process2coeffs(1, pImg3 + 0, pImg4 - 0, pImg1 + 1, pImg2 - 1);
+      process2coeffs(2, pImg1 + 0, pImg2 - 0, pImg1 - 1, pImg2 + 1);
+      process2coeffs(3, pImg0 + 4, pImg0 - 4, pImg0 + 3, pImg0 - 3);
+      process2coeffs(4, pImg0 + 2, pImg0 - 2, pImg0 + 1, pImg0 - 1);
+      process2coeffs(5, pImg12FixedBased - 0, pImg11FixedBased + 0, pImg10FixedBased - 0, pImg9FixedBased + 0);
+      process2coeffs(6, pImg8FixedBased - 0, pImg7FixedBased + 0, pImg6FixedBased - 0, pImg5FixedBased + 0);
+      process2coeffs(7, pImg4FixedBased - 1, pImg3FixedBased + 1, pImg4FixedBased - 0, pImg3FixedBased + 0);
+      process2coeffs(8, pImg4FixedBased + 1, pImg3FixedBased - 1, pImg2FixedBased - 2, pImg1FixedBased + 2);
+      process2coeffs(9, pImg2FixedBased - 1, pImg1FixedBased + 1, pImg2FixedBased - 0, pImg1FixedBased + 0);
+      process2coeffs(10, pImg2FixedBased + 1, pImg1FixedBased - 1, pImg2FixedBased + 2, pImg1FixedBased - 2);
+      process2coeffs(11, pImg0FixedBased - 6, pImg0FixedBased + 6, pImg0FixedBased - 5, pImg0FixedBased + 5);
+      process2coeffs(12, pImg0FixedBased - 4, pImg0FixedBased + 4, pImg0FixedBased - 3, pImg0FixedBased + 3);
+      process2coeffs(13, pImg0FixedBased - 2, pImg0FixedBased + 2, pImg0FixedBased - 1, pImg0FixedBased + 1);
+      process2coeffs(14, pImg3Gauss[0] - 0, pImg4Gauss[0] + 0, pImg1Gauss[0] - 0, pImg2Gauss[0] + 0);
+      process2coeffs(15, pImg0Gauss[0] - 2, pImg0Gauss[0] + 2, pImg0Gauss[0] - 1, pImg0Gauss[0] + 1);
+#elif JVET_AD0222_ALF_LONG_FIXFILTER
+      process2coeffs(0, pImg7 + 0, pImg8 - 0, pImg5 + 0, pImg6 - 0);
+      process2coeffs(1, pImg3 + 0, pImg4 - 0, pImg1 + 1, pImg2 - 1);
+      process2coeffs(2, pImg1 + 0, pImg2 - 0, pImg1 - 1, pImg2 + 1);
+      process2coeffs(3, pImg0 + 4, pImg0 - 4, pImg0 + 3, pImg0 - 3);
+      process2coeffs(4, pImg0 + 2, pImg0 - 2, pImg0 + 1, pImg0 - 1);
+      process2coeffs(5, pImg12FixedBased - 0, pImg11FixedBased + 0, pImg10FixedBased - 0, pImg9FixedBased + 0);
+      process2coeffs(6, pImg8FixedBased - 0, pImg7FixedBased + 0, pImg6FixedBased - 0, pImg5FixedBased + 0);
+      process2coeffs(7, pImg4FixedBased - 1, pImg3FixedBased + 1, pImg4FixedBased - 0, pImg3FixedBased + 0);
+      process2coeffs(8, pImg4FixedBased + 1, pImg3FixedBased - 1, pImg2FixedBased - 2, pImg1FixedBased + 2);
+      process2coeffs(9, pImg2FixedBased - 1, pImg1FixedBased + 1, pImg2FixedBased - 0, pImg1FixedBased + 0);
+      process2coeffs(10, pImg2FixedBased + 1, pImg1FixedBased - 1, pImg2FixedBased + 2, pImg1FixedBased - 2);
+      process2coeffs(11, pImg0FixedBased - 6, pImg0FixedBased + 6, pImg0FixedBased - 5, pImg0FixedBased + 5);
+      process2coeffs(12, pImg0FixedBased - 4, pImg0FixedBased + 4, pImg0FixedBased - 3, pImg0FixedBased + 3);
+      process2coeffs(13, pImg0FixedBased - 2, pImg0FixedBased + 2, pImg0FixedBased - 1, pImg0FixedBased + 1);
+#elif JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+      process2coeffs(0, pImg11 + 0, pImg12 - 0, pImg9 + 0, pImg10 - 0);
+      process2coeffs(1, pImg7 + 0, pImg8 - 0, pImg5 + 0, pImg6 - 0);
+      process2coeffs(2, pImg3 + 2, pImg4 - 2, pImg3 + 1, pImg4 - 1);
+      process2coeffs(3, pImg3 + 0, pImg4 - 0, pImg3 - 1, pImg4 + 1);
+      process2coeffs(4, pImg3 - 2, pImg4 + 2, pImg1 + 2, pImg2 - 2);
+      process2coeffs(5, pImg1 + 1, pImg2 - 1, pImg1 + 0, pImg2 - 0);
+      process2coeffs(6, pImg1 - 1, pImg2 + 1, pImg1 - 2, pImg2 + 2);
+      process2coeffs(7, pImg0 + 6, pImg0 - 6, pImg0 + 5, pImg0 - 5);
+      process2coeffs(8, pImg0 + 4, pImg0 - 4, pImg0 + 3, pImg0 - 3);
+      process2coeffs(9, pImg0 + 2, pImg0 - 2, pImg0 + 1, pImg0 - 1);
+      process2coeffs(10, pImg4FixedBased - 0, pImg3FixedBased + 0, pImg2FixedBased - 1, pImg1FixedBased + 1);
+      process2coeffs(11, pImg2FixedBased - 0, pImg1FixedBased + 0, pImg2FixedBased + 1, pImg1FixedBased - 1);
+      process2coeffs(12, pImg0FixedBased - 2, pImg0FixedBased + 2, pImg0FixedBased - 1, pImg0FixedBased + 1);
+      process2coeffs(13, pImg3Gauss[0] - 0, pImg4Gauss[0] + 0, pImg1Gauss[0] - 0, pImg2Gauss[0] + 0);
+      process2coeffs(14, pImg0Gauss[0] - 2, pImg0Gauss[0] + 2, pImg0Gauss[0] - 1, pImg0Gauss[0] + 1);
+#else
+      process2coeffs(0, pImg11 + 0, pImg12 - 0, pImg9 + 0, pImg10 - 0);
+      process2coeffs(1, pImg7 + 0, pImg8 - 0, pImg5 + 0, pImg6 - 0);
+      process2coeffs(2, pImg3 + 2, pImg4 - 2, pImg3 + 1, pImg4 - 1);
+      process2coeffs(3, pImg3 + 0, pImg4 - 0, pImg3 - 1, pImg4 + 1);
+      process2coeffs(4, pImg3 - 2, pImg4 + 2, pImg1 + 2, pImg2 - 2);
+      process2coeffs(5, pImg1 + 1, pImg2 - 1, pImg1 + 0, pImg2 - 0);
+      process2coeffs(6, pImg1 - 1, pImg2 + 1, pImg1 - 2, pImg2 + 2);
+      process2coeffs(7, pImg0 + 6, pImg0 - 6, pImg0 + 5, pImg0 - 5);
+      process2coeffs(8, pImg0 + 4, pImg0 - 4, pImg0 + 3, pImg0 - 3);
+      process2coeffs(9, pImg0 + 2, pImg0 - 2, pImg0 + 1, pImg0 - 1);
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+      process2coeffs(10, pImg4FixedBased - 0, pImg3FixedBased + 0, pImg2FixedBased - 1, pImg1FixedBased + 1);
+      process2coeffs(11, pImg2FixedBased - 0, pImg1FixedBased + 0, pImg2FixedBased + 1, pImg1FixedBased - 1);
+      process2coeffs(12, pImg0FixedBased - 2, pImg0FixedBased + 2, pImg0FixedBased - 1, pImg0FixedBased + 1);
+#endif
+#endif
+      pImg0 = srcBeforeDb + j;
+      pImg1 = pImg0 + srcBeforeDbStride;
+      pImg2 = pImg0 - srcBeforeDbStride;
+
+      pImgP0 = srcResi + j;
+
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+#if JVET_AD0222_ALF_LONG_FIXFILTER && JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+      process2coeffs(16, pImg1 + 0, pImg2 + 0, pImg0 + 1, pImg0 - 1);
+#elif JVET_AD0222_ALF_LONG_FIXFILTER
+      process2coeffs(14, pImg1 + 0, pImg2 + 0, pImg0 + 1, pImg0 - 1);
+#elif JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+      process2coeffs(15, pImg1 + 0, pImg2 + 0, pImg0 + 1, pImg0 - 1);
+#else
+      process2coeffs(13, pImg1 + 0, pImg2 + 0, pImg0 + 1, pImg0 - 1);
+#endif
+#else
+      process2coeffs(10, pImg1 + 0, pImg2 + 0, pImg0 + 1, pImg0 - 1);
+#endif
+
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+      __m128i val00 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *)(fixedFilterResults[0 + fixedFilterSetIdx][blkDst.y + i + padSize] + blkDst.x + j + padSize)),
+        cur);
+      __m128i val10 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *)(fixedFilterResults[2 + fixedFilterSetIdx][blkDst.y + i + padSize] + blkDst.x + j + padSize)),
+        cur);
+#else
+      __m128i val00 = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) pImg0), cur);
+      __m128i val10 = _mm_sub_epi16(
+        _mm_loadu_si128((const __m128i *) (fixedFilterResults[fixedFilterSetIdx][blkDst.y + i] + blkDst.x + j)), cur);
+#endif
+      __m128i val01A = _mm_unpacklo_epi16(val00, val10);
+      __m128i val01B = _mm_unpackhi_epi16(val00, val10);
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+#if JVET_AD0222_ALF_LONG_FIXFILTER && JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+      __m128i limit01A = params[0][1][17];
+      __m128i limit01B = params[1][1][17];
+#elif JVET_AD0222_ALF_LONG_FIXFILTER
+      __m128i limit01A = params[0][1][15];
+      __m128i limit01B = params[1][1][15];
+#elif JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+      __m128i limit01A = params[0][1][16];
+      __m128i limit01B = params[1][1][16];
+#else
+      __m128i limit01A = params[0][1][14];
+      __m128i limit01B = params[1][1][14];
+#endif
+#else
+      __m128i limit01A = params[0][1][11];
+      __m128i limit01B = params[1][1][11];
+#endif
+      val01A   = _mm_min_epi16(val01A, limit01A);
+      val01B   = _mm_min_epi16(val01B, limit01B);
+      limit01A = _mm_sub_epi16(_mm_setzero_si128(), limit01A);
+      limit01B = _mm_sub_epi16(_mm_setzero_si128(), limit01B);
+      val01A   = _mm_max_epi16(val01A, limit01A);
+      val01B   = _mm_max_epi16(val01B, limit01B);
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+#if JVET_AD0222_ALF_LONG_FIXFILTER && JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+      __m128i coeff01A = params[0][0][17];
+      __m128i coeff01B = params[1][0][17];
+#elif JVET_AD0222_ALF_LONG_FIXFILTER
+      __m128i coeff01A = params[0][0][15];
+      __m128i coeff01B = params[1][0][15];
+#elif JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+      __m128i coeff01A = params[0][0][16];
+      __m128i coeff01B = params[1][0][16];
+#else
+      __m128i coeff01A = params[0][0][14];
+      __m128i coeff01B = params[1][0][14];
+#endif
+#else
+      __m128i coeff01A = params[0][0][11];
+      __m128i coeff01B = params[1][0][11];
+#endif
+      accumA = _mm_add_epi32(accumA, _mm_madd_epi16(val01A, coeff01A));
+      accumB = _mm_add_epi32(accumB, _mm_madd_epi16(val01B, coeff01B));
+
+      // start prediction fixed filter
+      __m128i zero = _mm_setzero_si128();
+      val00        = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) (pImg0)), cur);
+      val10        = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) pImgP0), zero);
+      val01A       = _mm_unpacklo_epi16(val00, val10);
+      val01B       = _mm_unpackhi_epi16(val00, val10);
+#if JVET_AD0222_ALF_LONG_FIXFILTER && JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+      limit01A = params[0][1][18];
+      limit01B = params[1][1][18];
+#elif JVET_AD0222_ALF_LONG_FIXFILTER
+      limit01A = params[0][1][16];
+      limit01B = params[1][1][16];
+#elif JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+      limit01A = params[0][1][17];
+      limit01B = params[1][1][17];
+#else
+      limit01A = params[0][1][15];
+      limit01B = params[1][1][15];
+#endif
+
+      val01A   = _mm_min_epi16(val01A, limit01A);
+      val01B   = _mm_min_epi16(val01B, limit01B);
+      limit01A = _mm_sub_epi16(_mm_setzero_si128(), limit01A);
+      limit01B = _mm_sub_epi16(_mm_setzero_si128(), limit01B);
+      val01A   = _mm_max_epi16(val01A, limit01A);
+      val01B   = _mm_max_epi16(val01B, limit01B);
+#if JVET_AD0222_ALF_LONG_FIXFILTER && JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+      coeff01A = params[0][0][18];
+      coeff01B = params[1][0][18];
+#elif JVET_AD0222_ALF_LONG_FIXFILTER
+      coeff01A = params[0][0][16];
+      coeff01B = params[1][0][16];
+#elif JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+      coeff01A = params[0][0][17];
+      coeff01B = params[1][0][17];
+#else
+      coeff01A = params[0][0][15];
+      coeff01B = params[1][0][15];
+#endif
+
+      accumA = _mm_add_epi32(accumA, _mm_madd_epi16(val01A, coeff01A));
+      accumB = _mm_add_epi32(accumB, _mm_madd_epi16(val01B, coeff01B));
+      // end prediction fixed filter
+#if JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+      val00    = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) (pImg0Gauss[0])), cur);
+      val10    = _mm_setzero_si128();
+      val01A   = _mm_unpacklo_epi16(val00, val10);
+      val01B   = _mm_unpackhi_epi16(val00, val10);
+#if JVET_AD0222_ALF_LONG_FIXFILTER
+      limit01A = params[0][1][19];
+      limit01B = params[1][1][19];
+#else
+      limit01A = params[0][1][18];
+      limit01B = params[1][1][18];
+#endif
+
+      val01A   = _mm_min_epi16(val01A, limit01A);
+      val01B   = _mm_min_epi16(val01B, limit01B);
+      limit01A = _mm_sub_epi16(_mm_setzero_si128(), limit01A);
       limit01B = _mm_sub_epi16(_mm_setzero_si128(), limit01B);
       val01A   = _mm_max_epi16(val01A, limit01A);
       val01B   = _mm_max_epi16(val01B, limit01B);
+#if JVET_AD0222_ALF_LONG_FIXFILTER
+      coeff01A = params[0][0][19];
+      coeff01B = params[1][0][19];
+#else
+      coeff01A = params[0][0][18];
+      coeff01B = params[1][0][18];
+#endif
+      accumA = _mm_add_epi32(accumA, _mm_madd_epi16(val01A, coeff01A));
+      accumB = _mm_add_epi32(accumB, _mm_madd_epi16(val01B, coeff01B));
+#endif
+      accumA = _mm_srai_epi32(accumA, shift);
+      accumB = _mm_srai_epi32(accumB, shift);
+
+      accumA = _mm_packs_epi32(accumA, accumB);
+#if JVET_AI0084_ALF_RESIDUALS_SCALING
+      if ( bScalingCorr )
+      {
+        accumA = _mm_add_epi16(accumA, curBase);
+      }
+      else
+#endif
+      accumA = _mm_add_epi16(accumA, cur);
+      accumA = _mm_min_epi16(mmMax, _mm_max_epi16(accumA, mmMin));
+
+      _mm_storeu_si128((__m128i *) (dst + j), accumA);
+    }   // for j
+    src += srcStride * stepY;
+    dst += dstStride * stepY;
+    srcBeforeDb += srcBeforeDbStride * stepY;
+    srcResi += srcResiStride * stepY;
+  }   // for i
+#if USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  }//Use 256 Bit Simd
+ #endif
+}
+
+template<X86_VEXT vext>
+static void simdFilter13x13BlkExtDbResi(
+  AlfClassifier * *classifier, const PelUnitBuf &recDst, const PelUnitBuf &recBeforeDb, const PelUnitBuf &resi,
+  const CPelUnitBuf &recSrc, const Area &blkDst, const Area &blk, const ComponentID compId, const short *filterSet,
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  const Pel *fClipSet
+#else
+  const short *fClipSet
+#endif
+  ,const ClpRng &clpRng, CodingStructure &cs, Pel ***fixedFilterResults, Pel ***fixedFilterResiResults, int fixedFilterSetIdx
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+  ,Pel ***fixedFilterResultsPerCtu, bool isFixedFilterPaddedPerCtu
+#endif
+#if JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+  , Pel ***gaussPic, Pel ***gaussCtu
+#endif
+#if JVET_AG0158_ALF_LUMA_COEFF_PRECISION
+  , char coeffBits
+#endif
+)
+{
+  const CPelBuf srcBuffer         = recSrc.get(compId);
+  PelBuf        dstBuffer         = recDst.get(compId);
+  const CPelBuf scrBufferBeforeDb = recBeforeDb.get(compId);
+  const CPelBuf scrBufferResi     = resi.get(compId);
+
+  const size_t srcStride         = srcBuffer.stride;
+  const size_t dstStride         = dstBuffer.stride;
+  const size_t srcBeforeDbStride = scrBufferBeforeDb.stride;
+  const size_t srcResiStride     = scrBufferResi.stride;
+#if JVET_AI0084_ALF_RESIDUALS_SCALING
+  int adjustShift = coeffBits - 1;
+  const bool  bScalingCorr = isLuma(compId) && fixedFilterSetIdx < 0;
+  if ( bScalingCorr )
+  {
+    fixedFilterSetIdx = -fixedFilterSetIdx - 1;
+    adjustShift -= shiftPrecis; // add more precision
+  }
+  const int shift = adjustShift;
+  const Pel currBase = 512;
+  int round = 1 << (shift - 1);
+
+#if !( USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION )
+  __m128i curBase = _mm_set_epi16( currBase, currBase, currBase, currBase, currBase, currBase, currBase, currBase );
+#endif
+#else
+#if JVET_AG0158_ALF_LUMA_COEFF_PRECISION
+  int shift = coeffBits;
+  shift -= 1;
+  int round = 1 << (shift - 1);
+#else
+  constexpr int shift = AdaptiveLoopFilter::m_NUM_BITS - 1;
+  constexpr int round = 1 << (shift - 1);
+#endif
+#endif
+
+  const size_t width  = blk.width;
+  const size_t height = blk.height;
+
+  constexpr size_t stepX = 8;
+  size_t           stepY = 1;
+
+#if !( USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION )
+  const __m128i mmOffset = _mm_set1_epi32(round);
+  const __m128i mmMin    = _mm_set1_epi16(clpRng.min);
+  const __m128i mmMax    = _mm_set1_epi16(clpRng.max);
+#endif
+
+  static_assert(sizeof(*filterSet) == 2, "ALF coeffs must be 16-bit wide");
+  static_assert(sizeof(*fClipSet) == 2, "ALF clip values must be 16-bit wide");
+
+  const Pel *src         = srcBuffer.buf + blk.y * srcStride + blk.x;
+  Pel       *dst         = dstBuffer.buf + blkDst.y * dstStride + blkDst.x;
+  const Pel *srcBeforeDb = scrBufferBeforeDb.buf + blk.y * srcBeforeDbStride + blk.x;
+  const Pel *srcResi     = scrBufferResi.buf + blk.y * srcResiStride + blk.x;
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+  const int padSize = ALF_PADDING_SIZE_FIXED_RESULTS;
+#endif
+#if JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+  const int padSizeGauss = ALF_PADDING_SIZE_GAUSS_RESULTS;
+#endif
+
+#if USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  const bool use256BitSimd = vext >= AVX2 && blkDst.width % 16 == 0 ? true : false;
+
+  if( use256BitSimd )
+  {
+    const __m256i mmOffset = _mm256_set1_epi32(round);
+    const __m256i mmMin    = _mm256_set1_epi16(clpRng.min);
+    const __m256i mmMax    = _mm256_set1_epi16(clpRng.max);
+#if JVET_AI0084_ALF_RESIDUALS_SCALING
+    const __m256i curBase  = _mm256_set1_epi16(currBase);
+#endif
+    for (size_t i = 0; i < height; i += stepY)
+    {
+      const AlfClassifier *pClass = classifier[blkDst.y + i] + blkDst.x;
+      for (size_t j = 0; j < width; j += stepX * 2)
+      {
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+#if JVET_AD0222_ALF_LONG_FIXFILTER
+        __m256i params[2][2][18];
+#else
+        __m256i params[2][2][17];
+#endif
+#else
+        __m256i params[2][2][13];
+#endif
+        for (int k = 0; k < 2; k++)
+        {
+          __m256i rawCoef[4][5], rawClip[4][5], s0, s1;
+          __m128i rawCoefTmp[2][4][5], rawClipTmp[2][4][5], s0Tmp[2], s1Tmp[2], s2Tmp[2], s3Tmp[2];
+          for (int l = 0; l < 4; l++)
+          {
+            const int transposeIdx0 = pClass[j + 4 * k + l + 0] & 0x3;
+            const int classIdx0     = pClass[j + 4 * k + l + 0] >> 2;
+
+            rawCoefTmp[0][l][0] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF));
+            rawCoefTmp[0][l][1] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF + 8));
+            rawCoefTmp[0][l][2] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF + 16));
+            rawCoefTmp[0][l][3] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF + 24));
+            rawCoefTmp[0][l][4] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF + 32));
+
+            rawClipTmp[0][l][0] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF));
+            rawClipTmp[0][l][1] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF + 8));
+            rawClipTmp[0][l][2] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF + 16));
+            rawClipTmp[0][l][3] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF + 24));
+            rawClipTmp[0][l][4] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx0 * MAX_NUM_ALF_LUMA_COEFF + 32));
+
+            const int transposeIdx1 = pClass[j + 4 * k + l + 8] & 0x3;
+            const int classIdx1     = pClass[j + 4 * k + l + 8] >> 2;
+
+            rawCoefTmp[1][l][0] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF));
+            rawCoefTmp[1][l][1] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF + 8));
+            rawCoefTmp[1][l][2] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF + 16));
+            rawCoefTmp[1][l][3] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF + 24));
+            rawCoefTmp[1][l][4] = _mm_loadu_si128((const __m128i *) (filterSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF + 32));
+
+            rawClipTmp[1][l][0] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF));
+            rawClipTmp[1][l][1] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF + 8));
+            rawClipTmp[1][l][2] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF + 16));
+            rawClipTmp[1][l][3] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF + 24));
+            rawClipTmp[1][l][4] = _mm_loadu_si128((const __m128i *) (fClipSet + classIdx1 * MAX_NUM_ALF_LUMA_COEFF + 32));
+
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+            for (int m = 0; m < shuffleTime13FixedBasedLongLength[transposeIdx0]; m++)
+#else
+            for (int m = 0; m < shuffleTime13LongLength[transposeIdx0]; m++)
+#endif
+            {
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+              int op0 = shuffleOp13FixedBasedLongLength[transposeIdx0][m][0];
+              int op1 = shuffleOp13FixedBasedLongLength[transposeIdx0][m][1];
+#else
+              int op0 = shuffleOp13LongLength[transposeIdx0][m][0];
+              int op1 = shuffleOp13LongLength[transposeIdx0][m][1];
+#endif
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+              s0Tmp[0] = _mm_loadu_si128((const __m128i *) shuffleTab13FixedBasedLongLength[transposeIdx0][m][0]);
+              s1Tmp[0] = _mm_xor_si128(s0Tmp[0], _mm_set1_epi8((char) 0x80));
+              s2Tmp[0] = _mm_loadu_si128((const __m128i *) shuffleTab13FixedBasedLongLength[transposeIdx0][m][1]);
+              s3Tmp[0] = _mm_xor_si128(s2Tmp[0], _mm_set1_epi8((char) 0x80));
+#else
+              s0Tmp[0] = _mm_loadu_si128((const __m128i *) shuffleTab13LongLength[transposeIdx0][m][0]);
+              s1Tmp[0] = _mm_xor_si128(s0Tmp[0], _mm_set1_epi8((char) 0x80));
+              s2Tmp[0] = _mm_loadu_si128((const __m128i *) shuffleTab13LongLength[transposeIdx0][m][1]);
+              s3Tmp[0] = _mm_xor_si128(s2Tmp[0], _mm_set1_epi8((char) 0x80));
+#endif
+
+              __m128i rawTmp0 = _mm_or_si128(_mm_shuffle_epi8(rawCoefTmp[0][l][op0], s0Tmp[0]), _mm_shuffle_epi8(rawCoefTmp[0][l][op1], s1Tmp[0]));
+              __m128i rawTmp1 = _mm_or_si128(_mm_shuffle_epi8(rawCoefTmp[0][l][op0], s2Tmp[0]), _mm_shuffle_epi8(rawCoefTmp[0][l][op1], s3Tmp[0]));
+              rawCoefTmp[0][l][op0] = rawTmp0;
+              rawCoefTmp[0][l][op1] = rawTmp1;
+
+              rawTmp0 = _mm_or_si128(_mm_shuffle_epi8(rawClipTmp[0][l][op0], s0Tmp[0]), _mm_shuffle_epi8(rawClipTmp[0][l][op1], s1Tmp[0]));
+              rawTmp1 = _mm_or_si128(_mm_shuffle_epi8(rawClipTmp[0][l][op0], s2Tmp[0]), _mm_shuffle_epi8(rawClipTmp[0][l][op1], s3Tmp[0]));
+              rawClipTmp[0][l][op0] = rawTmp0;
+              rawClipTmp[0][l][op1] = rawTmp1;
+            }
+
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+            for (int m = 0; m < shuffleTime13FixedBasedLongLength[transposeIdx1]; m++)
+#else
+            for (int m = 0; m < shuffleTime13LongLength[transposeIdx1]; m++)
+#endif
+            {
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+              int op0 = shuffleOp13FixedBasedLongLength[transposeIdx1][m][0];
+              int op1 = shuffleOp13FixedBasedLongLength[transposeIdx1][m][1];
+#else
+              int op0 = shuffleOp13LongLength[transposeIdx1][m][0];
+              int op1 = shuffleOp13LongLength[transposeIdx1][m][1];
+#endif
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+              s0Tmp[1] = _mm_loadu_si128((const __m128i *) shuffleTab13FixedBasedLongLength[transposeIdx1][m][0]);
+              s1Tmp[1] = _mm_xor_si128(s0Tmp[1], _mm_set1_epi8((char) 0x80));
+              s2Tmp[1] = _mm_loadu_si128((const __m128i *) shuffleTab13FixedBasedLongLength[transposeIdx1][m][1]);
+              s3Tmp[1] = _mm_xor_si128(s2Tmp[1], _mm_set1_epi8((char) 0x80));
+#else
+              s0Tmp[1] = _mm_loadu_si128((const __m128i *) shuffleTab13LongLength[transposeIdx1][m][0]);
+              s1Tmp[1] = _mm_xor_si128(s0Tmp[1], _mm_set1_epi8((char) 0x80));
+              s2Tmp[1] = _mm_loadu_si128((const __m128i *) shuffleTab13LongLength[transposeIdx1][m][1]);
+              s3Tmp[1] = _mm_xor_si128(s2Tmp[1], _mm_set1_epi8((char) 0x80));
+#endif
+
+              __m128i rawTmp0       = _mm_or_si128(_mm_shuffle_epi8(rawCoefTmp[1][l][op0], s0Tmp[1]), _mm_shuffle_epi8(rawCoefTmp[1][l][op1], s1Tmp[1]));
+              __m128i rawTmp1       = _mm_or_si128(_mm_shuffle_epi8(rawCoefTmp[1][l][op0], s2Tmp[1]), _mm_shuffle_epi8(rawCoefTmp[1][l][op1], s3Tmp[1]));
+              rawCoefTmp[1][l][op0] = rawTmp0;
+              rawCoefTmp[1][l][op1] = rawTmp1;
+
+              rawTmp0               = _mm_or_si128(_mm_shuffle_epi8(rawClipTmp[1][l][op0], s0Tmp[1]), _mm_shuffle_epi8(rawClipTmp[1][l][op1], s1Tmp[1]));
+              rawTmp1               = _mm_or_si128(_mm_shuffle_epi8(rawClipTmp[1][l][op0], s2Tmp[1]), _mm_shuffle_epi8(rawClipTmp[1][l][op1], s3Tmp[1]));
+              rawClipTmp[1][l][op0] = rawTmp0;
+              rawClipTmp[1][l][op1] = rawTmp1;
+            }
+
+            rawCoef[l][0] = _mm256_castsi128_si256(rawCoefTmp[0][l][0] );
+            rawCoef[l][0] = _mm256_insertf128_si256(rawCoef[l][0], rawCoefTmp[1][l][0], 1 );
+            rawCoef[l][1] = _mm256_castsi128_si256(rawCoefTmp[0][l][1]);
+            rawCoef[l][1] = _mm256_insertf128_si256(rawCoef[l][1], rawCoefTmp[1][l][1], 1);
+            rawCoef[l][2] = _mm256_castsi128_si256(rawCoefTmp[0][l][2]);
+            rawCoef[l][2] = _mm256_insertf128_si256(rawCoef[l][2], rawCoefTmp[1][l][2], 1);
+            rawCoef[l][3] = _mm256_castsi128_si256(rawCoefTmp[0][l][3]);
+            rawCoef[l][3] = _mm256_insertf128_si256(rawCoef[l][3], rawCoefTmp[1][l][3], 1);
+            rawCoef[l][4] = _mm256_castsi128_si256(rawCoefTmp[0][l][4]);
+            rawCoef[l][4] = _mm256_insertf128_si256(rawCoef[l][4], rawCoefTmp[1][l][4], 1);
+
+            rawClip[l][0] = _mm256_castsi128_si256(rawClipTmp[0][l][0]);
+            rawClip[l][0] = _mm256_insertf128_si256(rawClip[l][0], rawClipTmp[1][l][0], 1);
+            rawClip[l][1] = _mm256_castsi128_si256(rawClipTmp[0][l][1]);
+            rawClip[l][1] = _mm256_insertf128_si256(rawClip[l][1], rawClipTmp[1][l][1], 1);
+            rawClip[l][2] = _mm256_castsi128_si256(rawClipTmp[0][l][2]);
+            rawClip[l][2] = _mm256_insertf128_si256(rawClip[l][2], rawClipTmp[1][l][2], 1);
+            rawClip[l][3] = _mm256_castsi128_si256(rawClipTmp[0][l][3]);
+            rawClip[l][3] = _mm256_insertf128_si256(rawClip[l][3], rawClipTmp[1][l][3], 1);
+            rawClip[l][4] = _mm256_castsi128_si256(rawClipTmp[0][l][4]);
+            rawClip[l][4] = _mm256_insertf128_si256(rawClip[l][4], rawClipTmp[1][l][4], 1);
+          }   // for l
+
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+          int limR, lim0, lim1, lim2, lim3;
+#if JVET_AD0222_ALF_LONG_FIXFILTER && JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+          limR = 5, lim0 = 5, lim1 = 5, lim2 = 4, lim3 = 4;
+#elif JVET_AD0222_ALF_LONG_FIXFILTER
+          limR = 5, lim0 = 5, lim1 = 5, lim2 = 4, lim3 = 4;
+#elif JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+          limR = 5, lim0 = 5, lim1 = 4, lim2 = 4, lim3 = 4;
+#else
+          limR = 5, lim0 = 5, lim1 = 4, lim2 = 4, lim3 = 4;
+#endif
+          for (unsigned char l = 0; l < limR; l++)
+#else
+          for (unsigned char l = 0; l < 5; l++)
+#endif
+          {
+            int m = l << 2;
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+            if (l < lim0)
+            {
+#endif
+              s0 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoef[0][l], 0x00), _mm256_shuffle_epi32(rawCoef[1][l], 0x00));
+              s1 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoef[2][l], 0x00), _mm256_shuffle_epi32(rawCoef[3][l], 0x00));
+              params[k][0][0 + m] = _mm256_blend_epi16(_mm256_shuffle_epi32(s0, 0x88), _mm256_shuffle_epi32(s1, 0x88), 0xf0);
+              s0 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[0][l], 0x00), _mm256_shuffle_epi32(rawClip[1][l], 0x00));
+              s1 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[2][l], 0x00), _mm256_shuffle_epi32(rawClip[3][l], 0x00));
+              params[k][1][0 + m] = _mm256_blend_epi16(_mm256_shuffle_epi32(s0, 0x88), _mm256_shuffle_epi32(s1, 0x88), 0xf0);
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+            }
+            if (l < lim1)
+            {
+#endif
+              s0 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoef[0][l], 0x55), _mm256_shuffle_epi32(rawCoef[1][l], 0x55));
+              s1 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoef[2][l], 0x55), _mm256_shuffle_epi32(rawCoef[3][l], 0x55));
+              params[k][0][1 + m] = _mm256_blend_epi16(_mm256_shuffle_epi32(s0, 0x88), _mm256_shuffle_epi32(s1, 0x88), 0xf0);
+              s0 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[0][l], 0x55), _mm256_shuffle_epi32(rawClip[1][l], 0x55));
+              s1 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[2][l], 0x55), _mm256_shuffle_epi32(rawClip[3][l], 0x55));
+              params[k][1][1 + m] = _mm256_blend_epi16(_mm256_shuffle_epi32(s0, 0x88), _mm256_shuffle_epi32(s1, 0x88), 0xf0);
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+            }
+            if (l < lim2)
+            {
+#endif
+              s0 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoef[0][l], 0xaa), _mm256_shuffle_epi32(rawCoef[1][l], 0xaa));
+              s1 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoef[2][l], 0xaa), _mm256_shuffle_epi32(rawCoef[3][l], 0xaa));
+              params[k][0][2 + m] = _mm256_blend_epi16(_mm256_shuffle_epi32(s0, 0x88), _mm256_shuffle_epi32(s1, 0x88), 0xf0);
+              s0 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[0][l], 0xaa), _mm256_shuffle_epi32(rawClip[1][l], 0xaa));
+              s1 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[2][l], 0xaa), _mm256_shuffle_epi32(rawClip[3][l], 0xaa));
+              params[k][1][2 + m] = _mm256_blend_epi16(_mm256_shuffle_epi32(s0, 0x88), _mm256_shuffle_epi32(s1, 0x88), 0xf0);
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+            }
+            if (l < lim3)
+            {
+#endif
+              s0 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoef[0][l], 0xff), _mm256_shuffle_epi32(rawCoef[1][l], 0xff));
+              s1 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoef[2][l], 0xff), _mm256_shuffle_epi32(rawCoef[3][l], 0xff));
+              params[k][0][3 + m] = _mm256_blend_epi16(_mm256_shuffle_epi32(s0, 0x88), _mm256_shuffle_epi32(s1, 0x88), 0xf0);
+              s0 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[0][l], 0xff), _mm256_shuffle_epi32(rawClip[1][l], 0xff));
+              s1 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[2][l], 0xff), _mm256_shuffle_epi32(rawClip[3][l], 0xff));
+              params[k][1][3 + m] = _mm256_blend_epi16(_mm256_shuffle_epi32(s0, 0x88), _mm256_shuffle_epi32(s1, 0x88), 0xf0);
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+            }
+#endif
+          }   // for l
+        } // for k
+
+        const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4, *pImg5, *pImg6, *pImg7, *pImg8;
+#if !JVET_AD0222_ALF_LONG_FIXFILTER
+        const Pel *pImg9, *pImg10, *pImg11, *pImg12;
+#endif
+        const Pel *pImgP0;
+
+        pImg0 = src + j;
+        pImg1 = pImg0 + srcStride;
+        pImg2 = pImg0 - srcStride;
+        pImg3 = pImg1 + srcStride;
+        pImg4 = pImg2 - srcStride;
+        pImg5 = pImg3 + srcStride;
+        pImg6 = pImg4 - srcStride;
+        pImg7 = pImg5 + srcStride;
+        pImg8 = pImg6 - srcStride;
+#if !JVET_AD0222_ALF_LONG_FIXFILTER
+        pImg9  = pImg7 + srcStride;
+        pImg10 = pImg8 - srcStride;
+        pImg11 = pImg9 + srcStride;
+        pImg12 = pImg10 - srcStride;
+#endif
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+#if JVET_AE0139_ALF_IMPROVED_FIXFILTER
+        int filterSetIdx = 2 + fixedFilterSetIdx;
+#else
+        int filterSetIdx = 0 + fixedFilterSetIdx;
+#endif
+        const Pel *pImg0FixedBased, *pImg1FixedBased, *pImg2FixedBased, *pImg3FixedBased, *pImg4FixedBased;
+#if JVET_AD0222_ALF_LONG_FIXFILTER
+        const Pel *pImg5FixedBased, *pImg6FixedBased, *pImg7FixedBased, *pImg8FixedBased, *pImg9FixedBased,
+          *pImg10FixedBased, *pImg11FixedBased, *pImg12FixedBased;
+#endif
+        if (isFixedFilterPaddedPerCtu)
+        {
+          pImg0FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 0] + j + padSize;
+          pImg1FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 1] + j + padSize;
+          pImg2FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize - 1] + j + padSize;
+          pImg3FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 2] + j + padSize;
+          pImg4FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize - 2] + j + padSize;
+#if JVET_AD0222_ALF_LONG_FIXFILTER
+          pImg5FixedBased  = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 3] + j + padSize;
+          pImg6FixedBased  = fixedFilterResultsPerCtu[filterSetIdx][i + padSize - 3] + j + padSize;
+          pImg7FixedBased  = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 4] + j + padSize;
+          pImg8FixedBased  = fixedFilterResultsPerCtu[filterSetIdx][i + padSize - 4] + j + padSize;
+          pImg9FixedBased  = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 5] + j + padSize;
+          pImg10FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize - 5] + j + padSize;
+          pImg11FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize + 6] + j + padSize;
+          pImg12FixedBased = fixedFilterResultsPerCtu[filterSetIdx][i + padSize - 6] + j + padSize;
+#endif
+        }
+        else
+        {
+          pImg0FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 0] + blkDst.x + j + padSize;
+          pImg1FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 1] + blkDst.x + j + padSize;
+          pImg2FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize - 1] + blkDst.x + j + padSize;
+          pImg3FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 2] + blkDst.x + j + padSize;
+          pImg4FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize - 2] + blkDst.x + j + padSize;
+#if JVET_AD0222_ALF_LONG_FIXFILTER
+          pImg5FixedBased  = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 3] + blkDst.x + j + padSize;
+          pImg6FixedBased  = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize - 3] + blkDst.x + j + padSize;
+          pImg7FixedBased  = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 4] + blkDst.x + j + padSize;
+          pImg8FixedBased  = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize - 4] + blkDst.x + j + padSize;
+          pImg9FixedBased  = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 5] + blkDst.x + j + padSize;
+          pImg10FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize - 5] + blkDst.x + j + padSize;
+          pImg11FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize + 6] + blkDst.x + j + padSize;
+          pImg12FixedBased = fixedFilterResults[filterSetIdx][blkDst.y + i + padSize - 6] + blkDst.x + j + padSize;
+#endif
+        }
+#endif
+#if JVET_AD0222_ADDITONAL_ALF_FIXFILTER
+        const Pel *pImg0Gauss[NUM_GAUSS_FILTERED_SOURCE];
+
+        for (int gaussIdx = 0; gaussIdx < NUM_GAUSS_FILTERED_SOURCE; gaussIdx++)
+        {
+          if (isFixedFilterPaddedPerCtu)
+          {
+            pImg0Gauss[gaussIdx] = gaussCtu[gaussIdx][i + padSizeGauss + 0] + j + padSizeGauss;
+          }
+          else
+          {
+            pImg0Gauss[gaussIdx] = gaussPic[gaussIdx][blkDst.y + i + padSizeGauss + 0] + blkDst.x + j + padSizeGauss;
+          }
+        }
+#endif
+        __m256i cur    = _mm256_loadu_si256((const __m256i *) pImg0);
+        __m256i accumA = mmOffset;
+        __m256i accumB = mmOffset;
+
+        auto process2coeffs = [&](const int i, const Pel *ptr0, const Pel *ptr1, const Pel *ptr2, const Pel *ptr3)
+        {
+          const __m256i val00 = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) ptr0), cur);
+          const __m256i val10 = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) ptr2), cur);
+          const __m256i val01 = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) ptr1), cur);
+          const __m256i val11 = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) ptr3), cur);
+
+          __m256i val01A = _mm256_unpacklo_epi16(val00, val10);
+          __m256i val01B = _mm256_unpackhi_epi16(val00, val10);
+          __m256i val01C = _mm256_unpacklo_epi16(val01, val11);
+          __m256i val01D = _mm256_unpackhi_epi16(val01, val11);
+
+          __m256i limit01A = params[0][1][i];
+          __m256i limit01B = params[1][1][i];
+
+          val01A = _mm256_min_epi16(val01A, limit01A);
+          val01B = _mm256_min_epi16(val01B, limit01B);
+          val01C = _mm256_min_epi16(val01C, limit01A);
+          val01D = _mm256_min_epi16(val01D, limit01B);
+
+          limit01A = _mm256_sub_epi16(_mm256_setzero_si256(), limit01A);
+          limit01B = _mm256_sub_epi16(_mm256_setzero_si256(), limit01B);
+
+          val01A = _mm256_max_epi16(val01A, limit01A);
+          val01B = _mm256_max_epi16(val01B, limit01B);
+          val01C = _mm256_max_epi16(val01C, limit01A);
+          val01D = _mm256_max_epi16(val01D, limit01B);
+
+          val01A = _mm256_add_epi16(val01A, val01C);
+          val01B = _mm256_add_epi16(val01B, val01D);
+
+          const __m256i coeff01A = params[0][0][i];
+          const __m256i coeff01B = params[1][0][i];
+
+          accumA = _mm256_add_epi32(accumA, _mm256_madd_epi16(val01A, coeff01A));
+          accumB = _mm256_add_epi32(accumB, _mm256_madd_epi16(val01B, coeff01B));
+        };
 #if JVET_AD0222_ALF_LONG_FIXFILTER && JVET_AD0222_ADDITONAL_ALF_FIXFILTER
-      coeff01A = params[0][0][18];
-      coeff01B = params[1][0][18];
+        process2coeffs(0, pImg7 + 0, pImg8 - 0, pImg5 + 0, pImg6 - 0);
+        process2coeffs(1, pImg3 + 0, pImg4 - 0, pImg1 + 1, pImg2 - 1);
+        process2coeffs(2, pImg1 + 0, pImg2 - 0, pImg1 - 1, pImg2 + 1);
+        process2coeffs(3, pImg0 + 4, pImg0 - 4, pImg0 + 3, pImg0 - 3);
+        process2coeffs(4, pImg0 + 2, pImg0 - 2, pImg0 + 1, pImg0 - 1);
+        process2coeffs(5, pImg12FixedBased - 0, pImg11FixedBased + 0, pImg10FixedBased - 0, pImg9FixedBased + 0);
+        process2coeffs(6, pImg8FixedBased - 0, pImg7FixedBased + 0, pImg6FixedBased - 0, pImg5FixedBased + 0);
+        process2coeffs(7, pImg4FixedBased - 1, pImg3FixedBased + 1, pImg4FixedBased - 0, pImg3FixedBased + 0);
+        process2coeffs(8, pImg4FixedBased + 1, pImg3FixedBased - 1, pImg2FixedBased - 2, pImg1FixedBased + 2);
+        process2coeffs(9, pImg2FixedBased - 1, pImg1FixedBased + 1, pImg2FixedBased - 0, pImg1FixedBased + 0);
+        process2coeffs(10, pImg2FixedBased + 1, pImg1FixedBased - 1, pImg2FixedBased + 2, pImg1FixedBased - 2);
+        process2coeffs(11, pImg0FixedBased - 6, pImg0FixedBased + 6, pImg0FixedBased - 5, pImg0FixedBased + 5);
+        process2coeffs(12, pImg0FixedBased - 4, pImg0FixedBased + 4, pImg0FixedBased - 3, pImg0FixedBased + 3);
+        process2coeffs(13, pImg0FixedBased - 2, pImg0FixedBased + 2, pImg0FixedBased - 1, pImg0FixedBased + 1);
 #elif JVET_AD0222_ALF_LONG_FIXFILTER
-      coeff01A = params[0][0][16];
-      coeff01B = params[1][0][16];
+        process2coeffs(0, pImg7 + 0, pImg8 - 0, pImg5 + 0, pImg6 - 0);
+        process2coeffs(1, pImg3 + 0, pImg4 - 0, pImg1 + 1, pImg2 - 1);
+        process2coeffs(2, pImg1 + 0, pImg2 - 0, pImg1 - 1, pImg2 + 1);
+        process2coeffs(3, pImg0 + 4, pImg0 - 4, pImg0 + 3, pImg0 - 3);
+        process2coeffs(4, pImg0 + 2, pImg0 - 2, pImg0 + 1, pImg0 - 1);
+        process2coeffs(5, pImg12FixedBased - 0, pImg11FixedBased + 0, pImg10FixedBased - 0, pImg9FixedBased + 0);
+        process2coeffs(6, pImg8FixedBased - 0, pImg7FixedBased + 0, pImg6FixedBased - 0, pImg5FixedBased + 0);
+        process2coeffs(7, pImg4FixedBased - 1, pImg3FixedBased + 1, pImg4FixedBased - 0, pImg3FixedBased + 0);
+        process2coeffs(8, pImg4FixedBased + 1, pImg3FixedBased - 1, pImg2FixedBased - 2, pImg1FixedBased + 2);
+        process2coeffs(9, pImg2FixedBased - 1, pImg1FixedBased + 1, pImg2FixedBased - 0, pImg1FixedBased + 0);
+        process2coeffs(10, pImg2FixedBased + 1, pImg1FixedBased - 1, pImg2FixedBased + 2, pImg1FixedBased - 2);
+        process2coeffs(11, pImg0FixedBased - 6, pImg0FixedBased + 6, pImg0FixedBased - 5, pImg0FixedBased + 5);
+        process2coeffs(12, pImg0FixedBased - 4, pImg0FixedBased + 4, pImg0FixedBased - 3, pImg0FixedBased + 3);
+        process2coeffs(13, pImg0FixedBased - 2, pImg0FixedBased + 2, pImg0FixedBased - 1, pImg0FixedBased + 1);
 #elif JVET_AD0222_ADDITONAL_ALF_FIXFILTER
-      coeff01A = params[0][0][17];
-      coeff01B = params[1][0][17];
+        process2coeffs(0, pImg11 + 0, pImg12 - 0, pImg9 + 0, pImg10 - 0);
+        process2coeffs(1, pImg7 + 0, pImg8 - 0, pImg5 + 0, pImg6 - 0);
+        process2coeffs(2, pImg3 + 2, pImg4 - 2, pImg3 + 1, pImg4 - 1);
+        process2coeffs(3, pImg3 + 0, pImg4 - 0, pImg3 - 1, pImg4 + 1);
+        process2coeffs(4, pImg3 - 2, pImg4 + 2, pImg1 + 2, pImg2 - 2);
+        process2coeffs(5, pImg1 + 1, pImg2 - 1, pImg1 + 0, pImg2 - 0);
+        process2coeffs(6, pImg1 - 1, pImg2 + 1, pImg1 - 2, pImg2 + 2);
+        process2coeffs(7, pImg0 + 6, pImg0 - 6, pImg0 + 5, pImg0 - 5);
+        process2coeffs(8, pImg0 + 4, pImg0 - 4, pImg0 + 3, pImg0 - 3);
+        process2coeffs(9, pImg0 + 2, pImg0 - 2, pImg0 + 1, pImg0 - 1);
+        process2coeffs(10, pImg4FixedBased - 0, pImg3FixedBased + 0, pImg2FixedBased - 1, pImg1FixedBased + 1);
+        process2coeffs(11, pImg2FixedBased - 0, pImg1FixedBased + 0, pImg2FixedBased + 1, pImg1FixedBased - 1);
+        process2coeffs(12, pImg0FixedBased - 2, pImg0FixedBased + 2, pImg0FixedBased - 1, pImg0FixedBased + 1);
 #else
-      coeff01A = params[0][0][15];
-      coeff01B = params[1][0][15];
+        process2coeffs(0, pImg11 + 0, pImg12 - 0, pImg9 + 0, pImg10 - 0);
+        process2coeffs(1, pImg7 + 0, pImg8 - 0, pImg5 + 0, pImg6 - 0);
+        process2coeffs(2, pImg3 + 2, pImg4 - 2, pImg3 + 1, pImg4 - 1);
+        process2coeffs(3, pImg3 + 0, pImg4 - 0, pImg3 - 1, pImg4 + 1);
+        process2coeffs(4, pImg3 - 2, pImg4 + 2, pImg1 + 2, pImg2 - 2);
+        process2coeffs(5, pImg1 + 1, pImg2 - 1, pImg1 + 0, pImg2 - 0);
+        process2coeffs(6, pImg1 - 1, pImg2 + 1, pImg1 - 2, pImg2 + 2);
+        process2coeffs(7, pImg0 + 6, pImg0 - 6, pImg0 + 5, pImg0 - 5);
+        process2coeffs(8, pImg0 + 4, pImg0 - 4, pImg0 + 3, pImg0 - 3);
+        process2coeffs(9, pImg0 + 2, pImg0 - 2, pImg0 + 1, pImg0 - 1);
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+        process2coeffs(10, pImg4FixedBased - 0, pImg3FixedBased + 0, pImg2FixedBased - 1, pImg1FixedBased + 1);
+        process2coeffs(11, pImg2FixedBased - 0, pImg1FixedBased + 0, pImg2FixedBased + 1, pImg1FixedBased - 1);
+        process2coeffs(12, pImg0FixedBased - 2, pImg0FixedBased + 2, pImg0FixedBased - 1, pImg0FixedBased + 1);
 #endif
+#endif
+        pImg0  = srcBeforeDb + j;
+        pImg1  = pImg0 + srcBeforeDbStride;
+        pImg2  = pImg0 - srcBeforeDbStride;
+        pImgP0 = srcResi + j;
 
-      accumA = _mm_add_epi32(accumA, _mm_madd_epi16(val01A, coeff01A));
-      accumB = _mm_add_epi32(accumB, _mm_madd_epi16(val01B, coeff01B));
-      // end prediction fixed filter
-#if JVET_AD0222_ADDITONAL_ALF_FIXFILTER
-      val00    = _mm_sub_epi16(_mm_loadu_si128((const __m128i *) (pImg0Gauss[0])), cur);
-      val10    = _mm_setzero_si128();
-      val01A   = _mm_unpacklo_epi16(val00, val10);
-      val01B   = _mm_unpackhi_epi16(val00, val10);
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
 #if JVET_AD0222_ALF_LONG_FIXFILTER
-      limit01A = params[0][1][19];
-      limit01B = params[1][1][19];
+        process2coeffs(14, pImg1 + 0, pImg2 + 0, pImg0 + 1, pImg0 - 1);
 #else
-      limit01A = params[0][1][18];
-      limit01B = params[1][1][18];
+        process2coeffs(13, pImg1 + 0, pImg2 + 0, pImg0 + 1, pImg0 - 1);
+#endif
+#else
+        process2coeffs(10, pImg1 + 0, pImg2 + 0, pImg0 + 1, pImg0 - 1);
 #endif
 
-      val01A   = _mm_min_epi16(val01A, limit01A);
-      val01B   = _mm_min_epi16(val01B, limit01B);
-      limit01A = _mm_sub_epi16(_mm_setzero_si128(), limit01A);
-      limit01B = _mm_sub_epi16(_mm_setzero_si128(), limit01B);
-      val01A   = _mm_max_epi16(val01A, limit01A);
-      val01B   = _mm_max_epi16(val01B, limit01B);
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+        __m256i val00 = _mm256_sub_epi16( _mm256_loadu_si256((const __m256i *) (fixedFilterResults[0 + fixedFilterSetIdx][blkDst.y + i + padSize] + blkDst.x + j + padSize)), cur);
+        __m256i val10 = _mm256_sub_epi16( _mm256_loadu_si256((const __m256i *) (fixedFilterResults[2 + fixedFilterSetIdx][blkDst.y + i + padSize] + blkDst.x + j + padSize)), cur);
+#else
+        __m256i val00 = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) pImg0), cur);
+        __m256i val10 = _mm256_sub_epi16( _mm256_loadu_si256((const __m256i *) (fixedFilterResults[fixedFilterSetIdx][blkDst.y + i] + blkDst.x + j)), cur);
+#endif
+        __m256i val01A = _mm256_unpacklo_epi16(val00, val10);
+        __m256i val01B = _mm256_unpackhi_epi16(val00, val10);
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
 #if JVET_AD0222_ALF_LONG_FIXFILTER
-      coeff01A = params[0][0][19];
-      coeff01B = params[1][0][19];
+        __m256i limit01A = params[0][1][15];
+        __m256i limit01B = params[1][1][15];
 #else
-      coeff01A = params[0][0][18];
-      coeff01B = params[1][0][18];
+        __m256i limit01A = params[0][1][14];
+        __m256i limit01B = params[1][1][14];
 #endif
-      accumA = _mm_add_epi32(accumA, _mm_madd_epi16(val01A, coeff01A));
-      accumB = _mm_add_epi32(accumB, _mm_madd_epi16(val01B, coeff01B));
+#else
+        __m256i limit01A = params[0][1][11];
+        __m256i limit01B = params[1][1][11];
 #endif
-      accumA = _mm_srai_epi32(accumA, shift);
-      accumB = _mm_srai_epi32(accumB, shift);
-
-      accumA = _mm_packs_epi32(accumA, accumB);
-#if JVET_AI0084_ALF_RESIDUALS_SCALING
-      if ( bScalingCorr )
-      {
-        accumA = _mm_add_epi16(accumA, curBase);
-      }
-      else
+        val01A   = _mm256_min_epi16(val01A, limit01A);
+        val01B   = _mm256_min_epi16(val01B, limit01B);
+        limit01A = _mm256_sub_epi16(_mm256_setzero_si256(), limit01A);
+        limit01B = _mm256_sub_epi16(_mm256_setzero_si256(), limit01B);
+        val01A   = _mm256_max_epi16(val01A, limit01A);
+        val01B   = _mm256_max_epi16(val01B, limit01B);
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+#if JVET_AD0222_ALF_LONG_FIXFILTER
+        __m256i coeff01A = params[0][0][15];
+        __m256i coeff01B = params[1][0][15];
+#else
+        __m256i coeff01A = params[0][0][14];
+        __m256i coeff01B = params[1][0][14];
 #endif
-      accumA = _mm_add_epi16(accumA, cur);
-      accumA = _mm_min_epi16(mmMax, _mm_max_epi16(accumA, mmMin));
+#else
+        __m256i coeff01A = params[0][0][11];
+        __m256i coeff01B = params[1][0][11];
+#endif
+        accumA = _mm256_add_epi32(accumA, _mm256_madd_epi16(val01A, coeff01A));
+        accumB = _mm256_add_epi32(accumB, _mm256_madd_epi16(val01B, coeff01B));
 
-      _mm_storeu_si128((__m128i *) (dst + j), accumA);
-    }   // for j
-    src += srcStride * stepY;
-    dst += dstStride * stepY;
-    srcBeforeDb += srcBeforeDbStride * stepY;
-    srcResi += srcResiStride * stepY;
-  }   // for i
-}
+        // start residual fixed filter
+        __m256i zero = _mm256_setzero_si256();
+        val00 = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) (fixedFilterResiResults[1 - fixedFilterSetIdx][blkDst.y + i] + blkDst.x + j)), zero);
+        val10  = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) (pImg0)), cur);
+        val01A = _mm256_unpacklo_epi16(val00, val10);
+        val01B = _mm256_unpackhi_epi16(val00, val10);
+#if JVET_AD0222_ALF_LONG_FIXFILTER
+        limit01A = params[0][1][16];
+        limit01B = params[1][1][16];
+#else
+        limit01A = params[0][1][15];
+        limit01B = params[1][1][15];
+#endif
 
-template<X86_VEXT vext>
-static void simdFilter13x13BlkExtDbResi(
-  AlfClassifier * *classifier, const PelUnitBuf &recDst, const PelUnitBuf &recBeforeDb, const PelUnitBuf &resi,
-  const CPelUnitBuf &recSrc, const Area &blkDst, const Area &blk, const ComponentID compId, const short *filterSet,
-#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
-  const Pel *fClipSet
+        val01A   = _mm256_min_epi16(val01A, limit01A);
+        val01B   = _mm256_min_epi16(val01B, limit01B);
+        limit01A = _mm256_sub_epi16(_mm256_setzero_si256(), limit01A);
+        limit01B = _mm256_sub_epi16(_mm256_setzero_si256(), limit01B);
+        val01A   = _mm256_max_epi16(val01A, limit01A);
+        val01B   = _mm256_max_epi16(val01B, limit01B);
+#if JVET_AD0222_ALF_LONG_FIXFILTER
+        coeff01A = params[0][0][16];
+        coeff01B = params[1][0][16];
 #else
-  const short *fClipSet
+        coeff01A = params[0][0][15];
+        coeff01B = params[1][0][15];
 #endif
-  ,const ClpRng &clpRng, CodingStructure &cs, Pel ***fixedFilterResults, Pel ***fixedFilterResiResults, int fixedFilterSetIdx
+
+        accumA = _mm256_add_epi32(accumA, _mm256_madd_epi16(val01A, coeff01A));
+        accumB = _mm256_add_epi32(accumB, _mm256_madd_epi16(val01B, coeff01B));
+        // end residual fixed filter
+
 #if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
-  ,Pel ***fixedFilterResultsPerCtu, bool isFixedFilterPaddedPerCtu
-#endif
+        val00 = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) pImgP0), zero);
 #if JVET_AD0222_ADDITONAL_ALF_FIXFILTER
-  , Pel ***gaussPic, Pel ***gaussCtu
-#endif
-#if JVET_AG0158_ALF_LUMA_COEFF_PRECISION
-  , char coeffBits
+        val10 = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) (pImg0Gauss[0])), cur);
+#else
+        val10 = _mm256_sub_epi16(cur, cur);
 #endif
-)
-{
-  const CPelBuf srcBuffer         = recSrc.get(compId);
-  PelBuf        dstBuffer         = recDst.get(compId);
-  const CPelBuf scrBufferBeforeDb = recBeforeDb.get(compId);
-  const CPelBuf scrBufferResi     = resi.get(compId);
-
-  const size_t srcStride         = srcBuffer.stride;
-  const size_t dstStride         = dstBuffer.stride;
-  const size_t srcBeforeDbStride = scrBufferBeforeDb.stride;
-  const size_t srcResiStride     = scrBufferResi.stride;
-#if JVET_AI0084_ALF_RESIDUALS_SCALING
-  int adjustShift = coeffBits - 1;
-  const bool  bScalingCorr = isLuma(compId) && fixedFilterSetIdx < 0;
-  if ( bScalingCorr )
-  {
-    fixedFilterSetIdx = -fixedFilterSetIdx - 1;
-    adjustShift -= shiftPrecis; // add more precision
-  }
-  const int shift = adjustShift;
-  const Pel currBase = 512;
-  int round = 1 << (shift - 1);
 
-  __m128i curBase = _mm_set_epi16( currBase, currBase, currBase, currBase, currBase, currBase, currBase, currBase );
+        val01A = _mm256_unpacklo_epi16(val00, val10);
+        val01B = _mm256_unpackhi_epi16(val00, val10);
 #else
-#if JVET_AG0158_ALF_LUMA_COEFF_PRECISION
-  int shift = coeffBits;
-  shift -= 1;
-  int round = 1 << (shift - 1);
+        __m256i val = _mm256_sub_epi16( _mm256_loadu_si256( (const __m256i *) (fixedFilterResults[EXT_LENGTH + fixedFilterSetIdx][blkDst.y + i] + blkDst.x + j)), cur);
+        val01A = _mm256_shuffle_epi8(val, _mm256_setr_epi8(0, 1, 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 0, 1, 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7));
+        val01B = _mm256_shuffle_epi8(val, _mm256_setr_epi8(8, 9, 8, 9, 10, 11, 10, 11, 12, 13, 12, 13, 14, 15, 14, 15, 8, 9, 8, 9, 10, 11, 10, 11, 12, 13, 12, 13, 14, 15, 14, 15));
+#endif
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+#if JVET_AD0222_ALF_LONG_FIXFILTER
+        limit01A = params[0][1][17];
+        limit01B = params[1][1][17];
 #else
-  constexpr int shift = AdaptiveLoopFilter::m_NUM_BITS - 1;
-  constexpr int round = 1 << (shift - 1);
+        limit01A = params[0][1][16];
+        limit01B = params[1][1][16];
+#endif
+#else
+        limit01A = params[0][1][12];
+        limit01B = params[1][1][12];
+#endif
+        val01A   = _mm256_min_epi16(val01A, limit01A);
+        val01B   = _mm256_min_epi16(val01B, limit01B);
+        limit01A = _mm256_sub_epi16(_mm256_setzero_si256(), limit01A);
+        limit01B = _mm256_sub_epi16(_mm256_setzero_si256(), limit01B);
+        val01A   = _mm256_max_epi16(val01A, limit01A);
+        val01B   = _mm256_max_epi16(val01B, limit01B);
+#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
+#if JVET_AD0222_ALF_LONG_FIXFILTER
+        coeff01A = params[0][0][17];
+        coeff01B = params[1][0][17];
+#else
+        coeff01A = params[0][0][16];
+        coeff01B = params[1][0][16];
 #endif
+#else
+        coeff01A = params[0][0][12];
+        coeff01B = params[1][0][12];
 #endif
+        accumA = _mm256_add_epi32(accumA, _mm256_madd_epi16(val01A, coeff01A));
+        accumB = _mm256_add_epi32(accumB, _mm256_madd_epi16(val01B, coeff01B));
 
-  const size_t width  = blk.width;
-  const size_t height = blk.height;
+        accumA = _mm256_srai_epi32(accumA, shift);
+        accumB = _mm256_srai_epi32(accumB, shift);
 
-  constexpr size_t stepX = 8;
-  size_t           stepY = 1;
+        accumA = _mm256_packs_epi32(accumA, accumB);
+#if JVET_AI0084_ALF_RESIDUALS_SCALING
+        if( bScalingCorr )
+        {
+          accumA = _mm256_add_epi16(accumA, curBase);
+        }
+        else
+#endif
+        accumA = _mm256_add_epi16(accumA, cur);
+        accumA = _mm256_min_epi16(mmMax, _mm256_max_epi16(accumA, mmMin));
+
+        _mm256_storeu_si256((__m256i *) (dst + j), accumA);
+      }   // for j
+      src += srcStride * stepY;
+      dst += dstStride * stepY;
+      srcBeforeDb += srcBeforeDbStride * stepY;
+      srcResi += srcResiStride * stepY;
+    }   // for i
+  }
+  else
+  {
 
   const __m128i mmOffset = _mm_set1_epi32(round);
   const __m128i mmMin    = _mm_set1_epi16(clpRng.min);
   const __m128i mmMax    = _mm_set1_epi16(clpRng.max);
-
-  static_assert(sizeof(*filterSet) == 2, "ALF coeffs must be 16-bit wide");
-  static_assert(sizeof(*fClipSet) == 2, "ALF clip values must be 16-bit wide");
-
-  const Pel *src         = srcBuffer.buf + blk.y * srcStride + blk.x;
-  Pel       *dst         = dstBuffer.buf + blkDst.y * dstStride + blkDst.x;
-  const Pel *srcBeforeDb = scrBufferBeforeDb.buf + blk.y * srcBeforeDbStride + blk.x;
-  const Pel *srcResi     = scrBufferResi.buf + blk.y * srcResiStride + blk.x;
-#if JVET_AB0184_ALF_MORE_FIXED_FILTER_OUTPUT_TAPS
-  const int padSize = ALF_PADDING_SIZE_FIXED_RESULTS;
-#endif
-#if JVET_AD0222_ADDITONAL_ALF_FIXFILTER
-  const int padSizeGauss = ALF_PADDING_SIZE_GAUSS_RESULTS;
+#if JVET_AI0084_ALF_RESIDUALS_SCALING
+  const __m128i curBase  = _mm_set1_epi16(currBase);
 #endif
+#endif //Use AVX2 SIMD
   for (size_t i = 0; i < height; i += stepY)
   {
     const AlfClassifier *pClass = classifier[blkDst.y + i] + blkDst.x;
@@ -4586,13 +6079,20 @@ static void simdFilter13x13BlkExtDbResi(
     srcBeforeDb += srcBeforeDbStride * stepY;
     srcResi += srcResiStride * stepY;
   }   // for i
+#if USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  }//Use 256 Bit Simd
+ #endif
 }
 #endif
 
 #if JVET_AD0222_ADDITONAL_ALF_FIXFILTER
 //Gauss Filter
 template<X86_VEXT vext>
-static void simdGaussFiltering(CodingStructure &cs, Pel ***gaussPic, const CPelBuf &srcLuma, const Area &blkDst, const Area &blk, const ClpRng &clpRng, const Pel clippingValues[4], int filterSetIdx, int storeIdx )
+static void simdGaussFiltering(CodingStructure &cs, Pel ***gaussPic, const CPelBuf &srcLuma, const Area &blkDst, const Area &blk, const ClpRng &clpRng, const Pel clippingValues[4], int filterSetIdx, int storeIdx
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  , bool applyCodingInfo, AlfClassifier** classifierCodingInfo
+#endif
+  )
 {
   int16_t gaussCoefTable[NUM_GAUSS_FILTERED_SOURCE][25] =
   {
@@ -4617,8 +6117,18 @@ static void simdGaussFiltering(CodingStructure &cs, Pel ***gaussPic, const CPelB
   }
   int16_t diffTH = 32;
 
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  const bool isIntraSlice = cs.slice->isIntra();
+  const bool isSpsAdjust  = cs.sps->getAlfLumaFixedFilterAdjust();
+
+  const bool useBounCondition = applyCodingInfo && !(!isSpsAdjust && isIntraSlice);
+  const bool useResiCondition = applyCodingInfo && (isSpsAdjust || !isSpsAdjust) && !isIntraSlice && false;
+  const int offsetClipValue = 1 << ( clpRng.bd - 1 );
+#endif
+#if !( USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION )
   const __m128i offsetMax = _mm_set1_epi16(diffTH);
   const __m128i offsetMin = _mm_sub_epi16(_mm_setzero_si128(), offsetMax);
+#endif
 
   const CPelBuf srcBuffer = srcLuma;
   const int srcStride = srcBuffer.stride;
@@ -4632,9 +6142,11 @@ static void simdGaussFiltering(CodingStructure &cs, Pel ***gaussPic, const CPelB
   constexpr int stepX = 8;
   int stepY = 1;
 
+#if !( USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION )
   const __m128i mmOffset = _mm_set1_epi32(round);
   const __m128i mmMin = _mm_set1_epi16(clpRng.min);
   const __m128i mmMax = _mm_set1_epi16(clpRng.max);
+#endif
 
   static_assert(sizeof(*gaussCoefTable[0]) == 2, "ALF coeffs must be 16-bit wide");
   static_assert(sizeof(*gaussClipTable   ) == 2, "ALF clip values must be 16-bit wide");
@@ -4642,8 +6154,296 @@ static void simdGaussFiltering(CodingStructure &cs, Pel ***gaussPic, const CPelB
   const Pel *src = srcBuffer.buf + blk.y * srcStride + blk.x;
   const int padSizeGauss = ALF_PADDING_SIZE_GAUSS_RESULTS;
 
+#if USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  const bool use256BitSimd = vext >= AVX2 && blkDst.width % 16 == 0 ? true : false;
+
+  if( use256BitSimd )
+  {
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+    __m256i mmClassIdxBsP, mmClassIdxResiP, mmClassIdxBsN, mmClassIdxResiN, mmClassIdxTmp;
+    __m256i mmOriOffset;
+    __m256i mmSignOffsetP, mmSignOffsetN;
+    __m256i mmAbsOffset;
+    __m256i mmAdjOffset;
+    __m256i mmZeroVector = _mm256_set1_epi16( 0 );
+    __m256i mm01Vector   = _mm256_set1_epi16( 1 );
+    __m256i mm08Vector   = _mm256_set1_epi16( 8 );
+    __m256i mm16Vector   = _mm256_set1_epi16( 16 );
+    __m256i mmPOffsetClipVector = _mm256_set1_epi16( +offsetClipValue );
+    __m256i mmNOffsetClipVector = _mm256_set1_epi16( -offsetClipValue );
+    // Set Factor
+    __m256i mmBsFactor = isIntraSlice ? _mm256_set1_epi16( 4 + 2 ) : _mm256_set1_epi16( 3 + 2 );
+    __m256i mmResiFactor = isIntraSlice ? _mm256_set1_epi16( 0 >> (!isSpsAdjust ? 1 : 0)) : _mm256_set1_epi16( 3 >> (!isSpsAdjust ? 1 : 0));
+#endif
+    const __m256i offsetMax = _mm256_set1_epi16(diffTH);
+    const __m256i offsetMin = _mm256_sub_epi16(_mm256_set1_epi16( 0 ), offsetMax);
+    const __m256i mmOffset  = _mm256_set1_epi32(round);
+    const __m256i mmMin     = _mm256_set1_epi16(clpRng.min);
+    const __m256i mmMax     = _mm256_set1_epi16(clpRng.max);
+
+    for (int i = 0; i < height; i += stepY)
+    {
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+      AlfClassifier *pClassCodingInfo = nullptr;
+      if( useBounCondition || useResiCondition )
+      {
+        pClassCodingInfo = classifierCodingInfo[blkDst.y + i] + blkDst.x;
+      }
+#endif
+      for (int j = 0; j < width; j += stepX * 2)
+      {
+        __m256i params[2][2][6];
+
+        for (int k = 0; k < 2; k++)
+        {
+          __m256i rawCoef[4][2], rawClip[4][2], s0, s1;
+          __m128i rawCoefTmp[4][2], rawClipTmp[4][2];
+
+          for (int l = 0; l < 4; l++)
+          {
+            rawCoefTmp[l][0] = _mm_loadu_si128((const __m128i *) (gaussCoefTable[filterSetIdx] + 0));
+            rawCoefTmp[l][1] = _mm_loadu_si128((const __m128i *) (gaussCoefTable[filterSetIdx] + 8));
+
+            rawClipTmp[l][0] = _mm_loadu_si128((const __m128i *) (gaussClipTable + 0));
+            rawClipTmp[l][1] = _mm_loadu_si128((const __m128i *) (gaussClipTable + 8));
+
+            rawCoef[l][0] = _mm256_castsi128_si256( rawCoefTmp[l][0]);
+            rawCoef[l][0] = _mm256_insertf128_si256(rawCoef[l][0], rawCoefTmp[l][0], 1);
+            rawCoef[l][1] = _mm256_castsi128_si256( rawCoefTmp[l][1]);
+            rawCoef[l][1] = _mm256_insertf128_si256(rawCoef[l][1], rawCoefTmp[l][1], 1);
+
+            rawClip[l][0] = _mm256_castsi128_si256(rawClipTmp[l][0]);
+            rawClip[l][0] = _mm256_insertf128_si256(rawClip[l][0], rawClipTmp[l][0], 1);
+            rawClip[l][1] = _mm256_castsi128_si256(rawClipTmp[l][1]);
+            rawClip[l][1] = _mm256_insertf128_si256(rawClip[l][1], rawClipTmp[l][1], 1);
+          }   // for l
+
+          for (unsigned char l = 0; l < 2; l++)
+          {
+            int m = l << 2;
+
+            s0 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoef[0][l], 0x00), _mm256_shuffle_epi32(rawCoef[1][l], 0x00));
+            s1 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoef[2][l], 0x00), _mm256_shuffle_epi32(rawCoef[3][l], 0x00));
+            params[k][0][0 + m] = _mm256_blend_epi16(_mm256_shuffle_epi32(s0, 0x88), _mm256_shuffle_epi32(s1, 0x88), 0xf0);
+            s0 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[0][l], 0x00), _mm256_shuffle_epi32(rawClip[1][l], 0x00));
+            s1 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[2][l], 0x00), _mm256_shuffle_epi32(rawClip[3][l], 0x00));
+            params[k][1][0 + m] = _mm256_blend_epi16(_mm256_shuffle_epi32(s0, 0x88), _mm256_shuffle_epi32(s1, 0x88), 0xf0);
+
+            s0 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoef[0][l], 0x55), _mm256_shuffle_epi32(rawCoef[1][l], 0x55));
+            s1 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoef[2][l], 0x55), _mm256_shuffle_epi32(rawCoef[3][l], 0x55));
+            params[k][0][1 + m] = _mm256_blend_epi16(_mm256_shuffle_epi32(s0, 0x88), _mm256_shuffle_epi32(s1, 0x88), 0xf0);
+            s0 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[0][l], 0x55), _mm256_shuffle_epi32(rawClip[1][l], 0x55));
+            s1 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[2][l], 0x55), _mm256_shuffle_epi32(rawClip[3][l], 0x55));
+            params[k][1][1 + m] = _mm256_blend_epi16(_mm256_shuffle_epi32(s0, 0x88), _mm256_shuffle_epi32(s1, 0x88), 0xf0);
+
+            if (l < 1)
+            {
+              s0 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoef[0][l], 0xaa), _mm256_shuffle_epi32(rawCoef[1][l], 0xaa));
+              s1 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoef[2][l], 0xaa), _mm256_shuffle_epi32(rawCoef[3][l], 0xaa));
+              params[k][0][2 + m] = _mm256_blend_epi16(_mm256_shuffle_epi32(s0, 0x88), _mm256_shuffle_epi32(s1, 0x88), 0xf0);
+              s0 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[0][l], 0xaa), _mm256_shuffle_epi32(rawClip[1][l], 0xaa));
+              s1 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[2][l], 0xaa), _mm256_shuffle_epi32(rawClip[3][l], 0xaa));
+              params[k][1][2 + m] = _mm256_blend_epi16(_mm256_shuffle_epi32(s0, 0x88), _mm256_shuffle_epi32(s1, 0x88), 0xf0);
+
+              s0 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoef[0][l], 0xff), _mm256_shuffle_epi32(rawCoef[1][l], 0xff));
+              s1 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawCoef[2][l], 0xff), _mm256_shuffle_epi32(rawCoef[3][l], 0xff));
+              params[k][0][3 + m] = _mm256_blend_epi16(_mm256_shuffle_epi32(s0, 0x88), _mm256_shuffle_epi32(s1, 0x88), 0xf0);
+              s0 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[0][l], 0xff), _mm256_shuffle_epi32(rawClip[1][l], 0xff));
+              s1 = _mm256_unpacklo_epi64(_mm256_shuffle_epi32(rawClip[2][l], 0xff), _mm256_shuffle_epi32(rawClip[3][l], 0xff));
+              params[k][1][3 + m] = _mm256_blend_epi16(_mm256_shuffle_epi32(s0, 0x88), _mm256_shuffle_epi32(s1, 0x88), 0xf0);
+            }
+          }   // for l
+        }     // for k
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+        mmClassIdxBsP = _mm256_set1_epi16( 0 );
+        mmClassIdxBsN = _mm256_set1_epi16( 0 );
+        if( useBounCondition )
+        {
+          mmClassIdxTmp   = _mm256_loadu_si256((const __m256i *) (pClassCodingInfo + j));
+          mmClassIdxBsP   = _mm256_srai_epi16(mmClassIdxTmp, 1);
+          mmClassIdxBsN   = _mm256_sub_epi16( mm01Vector, mmClassIdxBsP);
+        }
+        mmClassIdxResiP = _mm256_set1_epi16( 0 );
+        mmClassIdxResiN = _mm256_set1_epi16( 0 );
+        if( useResiCondition )
+        {
+          mmClassIdxTmp   = _mm256_loadu_si256((const __m256i *) (pClassCodingInfo + j));
+          mmClassIdxBsP   = _mm256_srai_epi16(mmClassIdxTmp, 1);
+          mmClassIdxResiP = _mm256_sub_epi16(mmClassIdxTmp, _mm256_add_epi16(mmClassIdxBsP, mmClassIdxBsP));
+          mmClassIdxResiN = _mm256_sub_epi16( mm01Vector, mmClassIdxResiP);
+        }
+#endif
+
+        const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4, *pImg5, *pImg6;
+        pImg0 = src + j;
+        pImg1 = pImg0 + srcStride;
+        pImg2 = pImg0 - srcStride;
+        pImg3 = pImg1 + srcStride;
+        pImg4 = pImg2 - srcStride;
+        pImg5 = pImg3 + srcStride;
+        pImg6 = pImg4 - srcStride;
+
+        __m256i cur    = _mm256_loadu_si256((const __m256i *) pImg0);
+        __m256i accumA = mmOffset;
+        __m256i accumB = mmOffset;
+
+        auto process2coeffs = [&](const int i, const Pel *ptr0, const Pel *ptr1, const Pel *ptr2, const Pel *ptr3)
+        {
+          const __m256i val00 = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) ptr0), cur);
+          const __m256i val10 = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) ptr2), cur);
+          const __m256i val01 = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) ptr1), cur);
+          const __m256i val11 = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) ptr3), cur);
+
+          __m256i val01A = _mm256_unpacklo_epi16(val00, val10);
+          __m256i val01B = _mm256_unpackhi_epi16(val00, val10);
+          __m256i val01C = _mm256_unpacklo_epi16(val01, val11);
+          __m256i val01D = _mm256_unpackhi_epi16(val01, val11);
+
+          __m256i limit01A = params[0][1][i];
+          __m256i limit01B = params[1][1][i];
+
+          val01A = _mm256_min_epi16(val01A, limit01A);
+          val01B = _mm256_min_epi16(val01B, limit01B);
+          val01C = _mm256_min_epi16(val01C, limit01A);
+          val01D = _mm256_min_epi16(val01D, limit01B);
+
+          limit01A = _mm256_sub_epi16(_mm256_setzero_si256(), limit01A);
+          limit01B = _mm256_sub_epi16(_mm256_setzero_si256(), limit01B);
+
+          val01A = _mm256_max_epi16(val01A, limit01A);
+          val01B = _mm256_max_epi16(val01B, limit01B);
+          val01C = _mm256_max_epi16(val01C, limit01A);
+          val01D = _mm256_max_epi16(val01D, limit01B);
+
+          val01A = _mm256_add_epi16(val01A, val01C);
+          val01B = _mm256_add_epi16(val01B, val01D);
+
+          const __m256i coeff01A = params[0][0][i];
+          const __m256i coeff01B = params[1][0][i];
+
+          accumA = _mm256_add_epi32(accumA, _mm256_madd_epi16(val01A, coeff01A));
+          accumB = _mm256_add_epi32(accumB, _mm256_madd_epi16(val01B, coeff01B));
+        };
+
+        process2coeffs(0, pImg6 - 0, pImg5 + 0, pImg4 - 1, pImg3 + 1);
+        process2coeffs(1, pImg4 - 0, pImg3 + 0, pImg4 + 1, pImg3 - 1);
+        process2coeffs(2, pImg2 - 2, pImg1 + 2, pImg2 - 1, pImg1 + 1);
+        process2coeffs(3, pImg2 - 0, pImg1 + 0, pImg2 + 1, pImg1 - 1);
+        process2coeffs(4, pImg2 + 2, pImg1 - 2, pImg0 - 3, pImg0 + 3);
+        process2coeffs(5, pImg0 - 2, pImg0 + 2, pImg0 - 1, pImg0 + 1);
+
+        accumA = _mm256_srai_epi32(accumA, shift);
+        accumB = _mm256_srai_epi32(accumB, shift);
+
+        accumA = _mm256_packs_epi32(accumA, accumB);
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+        if ( useBounCondition )
+        {
+          accumA = _mm256_min_epi16( mmPOffsetClipVector, accumA);
+          accumA = _mm256_max_epi16( mmNOffsetClipVector, accumA);
+          // accumA is Ori Offset
+          mmOriOffset = accumA;
+          // Calc Sign
+          // P = 1, N = 0
+          mmSignOffsetP = _mm256_abs_epi16( _mm256_cmpgt_epi16(mmOriOffset, mmZeroVector) );
+          // P = 0, N = 1
+          mmSignOffsetN = _mm256_abs_epi16( _mm256_sub_epi16( mm01Vector, mmSignOffsetP));
+          // Calc Abs Offset
+          mmAbsOffset = _mm256_abs_epi16( mmOriOffset );
+          // BS based Adjustment
+          mmAdjOffset = _mm256_mullo_epi16(mmAbsOffset, _mm256_add_epi16( mm16Vector, mmBsFactor));
+          mmAdjOffset = _mm256_add_epi16(mmAdjOffset, mm08Vector);
+          mmAdjOffset = _mm256_srai_epi16(mmAdjOffset, 4);
+
+          __m256i mmTmpAdj = _mm256_mullo_epi16(mmClassIdxBsP, mmAdjOffset);
+          __m256i mmTmpOrg = _mm256_mullo_epi16(mmClassIdxBsN, mmAbsOffset);
+
+          __m256i mmTmpFin = _mm256_add_epi16(mmTmpAdj, mmTmpOrg);
+
+          __m256i mmTmpSignP = _mm256_mullo_epi16(mmSignOffsetP, mmTmpFin);
+          __m256i mmTmpSignN = _mm256_sub_epi16( mmZeroVector, _mm256_mullo_epi16(mmSignOffsetN, mmTmpFin) );
+
+          accumA = _mm256_add_epi16(mmTmpSignP, mmTmpSignN);
+        }
+
+        if ( useResiCondition )
+        {
+          accumA = _mm256_min_epi16( mmPOffsetClipVector, accumA);
+          accumA = _mm256_max_epi16( mmNOffsetClipVector, accumA);
+          // accumA is Ori Offset
+          mmOriOffset = accumA;
+          // Calc Sign
+          // P = 1, N = 0
+          mmSignOffsetP = _mm256_abs_epi16( _mm256_cmpgt_epi16(mmOriOffset, mmZeroVector));
+          // P = 0, N = 1
+          mmSignOffsetN = _mm256_abs_epi16( _mm256_sub_epi16( mm01Vector, mmSignOffsetP));
+          // Calc Abs Offset
+          mmAbsOffset = _mm256_abs_epi16(mmOriOffset);
+          // Resi based Adjustment
+          mmAdjOffset = _mm256_mullo_epi16(mmAbsOffset, _mm256_add_epi16( mm16Vector, mmResiFactor));
+          mmAdjOffset = _mm256_add_epi16(mmAdjOffset, mm08Vector);
+          mmAdjOffset = _mm256_srai_epi16(mmAdjOffset, 4);
+
+          __m256i mmTmpAdj = _mm256_mullo_epi16(mmClassIdxResiP, mmAdjOffset);
+          __m256i mmTmpOrg = _mm256_mullo_epi16(mmClassIdxResiN, mmAbsOffset);
+
+          __m256i mmTmpFin = _mm256_add_epi16(mmTmpAdj, mmTmpOrg);
+
+          __m256i mmTmpSignP = _mm256_mullo_epi16(mmSignOffsetP, mmTmpFin);
+          __m256i mmTmpSignN = _mm256_sub_epi16(mmZeroVector, _mm256_mullo_epi16(mmSignOffsetN, mmTmpFin));
+
+          accumA = _mm256_add_epi16(mmTmpSignP, mmTmpSignN);
+        }
+#endif
+        // Clip Offset
+        accumA = _mm256_min_epi16(accumA, offsetMax);
+        accumA = _mm256_max_epi16(accumA, offsetMin);
+
+        accumA = _mm256_add_epi16(accumA, cur);
+        accumA = _mm256_min_epi16(mmMax, _mm256_max_epi16(accumA, mmMin));
+
+        int curY = blkDst.y + i + padSizeGauss;
+        int curX = blkDst.x + j + padSizeGauss;
+
+        _mm256_storeu_si256((__m256i *) (gaussPic[storeIdx][curY] + curX), accumA);
+      }   // for j
+      src += srcStride * stepY;
+    }   // for i
+  }
+  else //use256BitSimd
+  {
+
+  const __m128i offsetMax = _mm_set1_epi16(diffTH);
+  const __m128i offsetMin = _mm_sub_epi16(_mm_setzero_si128(), offsetMax);
+  const __m128i mmOffset = _mm_set1_epi32(round);
+  const __m128i mmMin = _mm_set1_epi16(clpRng.min);
+  const __m128i mmMax = _mm_set1_epi16(clpRng.max);
+#endif //Use AVX2 SIMD
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  __m128i mmClassIdxBsP, mmClassIdxResiP, mmClassIdxBsN, mmClassIdxResiN, mmClassIdxTmp;
+  __m128i mmOriOffset;
+  __m128i mmSignOffsetP, mmSignOffsetN;
+  __m128i mmAbsOffset;
+  __m128i mmAdjOffset;
+  __m128i mmZeroVector = _mm_set1_epi16( 0 );
+  __m128i mm01Vector   = _mm_set1_epi16( 1 );
+  __m128i mm08Vector   = _mm_set1_epi16( 8 );
+  __m128i mm16Vector   = _mm_set1_epi16( 16 );
+  __m128i mmPOffsetClipVector = _mm_set1_epi16( +offsetClipValue );
+  __m128i mmNOffsetClipVector = _mm_set1_epi16( -offsetClipValue );
+  //Set Factor
+  __m128i mmBsFactor = isIntraSlice ? _mm_set1_epi16( 4 + 2 ) : _mm_set1_epi16( 3 + 2 );
+  __m128i mmResiFactor = isIntraSlice ? _mm_set1_epi16( 0 >> (!isSpsAdjust ? 1 : 0) ) : _mm_set1_epi16( 3 >> (!isSpsAdjust ? 1 : 0) );
+#endif
   for (int i = 0; i < height; i += stepY)
   {
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+    AlfClassifier *pClassCodingInfo = nullptr;
+    if( useBounCondition || useResiCondition )
+    {
+      pClassCodingInfo = classifierCodingInfo[blkDst.y + i] + blkDst.x;
+    }
+#endif
     for (int j = 0; j < width; j += stepX)
     {
       __m128i params[2][2][6];
@@ -4697,6 +6497,25 @@ static void simdGaussFiltering(CodingStructure &cs, Pel ***gaussPic, const CPelB
           }
         }//for l
       }//for k
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+      mmClassIdxBsP = _mm_set1_epi16( 0 );
+      mmClassIdxBsN = _mm_set1_epi16( 0 );
+      if( useBounCondition )
+      {
+        mmClassIdxTmp   = _mm_loadu_si128( (const __m128i *) (pClassCodingInfo + j));
+        mmClassIdxBsP   = _mm_srai_epi16( mmClassIdxTmp, 1 );
+        mmClassIdxBsN   = _mm_sub_epi16( mm01Vector, mmClassIdxBsP );
+      }
+      mmClassIdxResiP = _mm_set1_epi16( 0 );
+      mmClassIdxResiN = _mm_set1_epi16( 0 );
+      if( useResiCondition )
+      {
+        mmClassIdxTmp   = _mm_loadu_si128( (const __m128i *) (pClassCodingInfo + j));
+        mmClassIdxBsP   = _mm_srai_epi16( mmClassIdxTmp, 1 );
+        mmClassIdxResiP = _mm_sub_epi16(  mmClassIdxTmp, _mm_add_epi16( mmClassIdxBsP, mmClassIdxBsP) );
+        mmClassIdxResiN = _mm_sub_epi16( mm01Vector, mmClassIdxResiP );
+      }
+#endif
 
       const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4, *pImg5, *pImg6;
       pImg0 = src + j;
@@ -4759,6 +6578,65 @@ static void simdGaussFiltering(CodingStructure &cs, Pel ***gaussPic, const CPelB
       accumB = _mm_srai_epi32(accumB, shift);
 
       accumA = _mm_packs_epi32(accumA, accumB);
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+      if( useBounCondition )
+      {
+        accumA = _mm_min_epi16( mmPOffsetClipVector, accumA );
+        accumA = _mm_max_epi16( mmNOffsetClipVector, accumA );
+        //accumA is Ori Offset
+        mmOriOffset = accumA;
+        //Calc Sign
+        //P = 1, N = 0
+        mmSignOffsetP = _mm_abs_epi16( _mm_cmpgt_epi16( mmOriOffset , mmZeroVector ));
+        //P = 0, N = 1
+        mmSignOffsetN = _mm_abs_epi16( _mm_sub_epi16( mm01Vector, mmSignOffsetP ));
+        //Calc Abs Offset
+        mmAbsOffset = _mm_abs_epi16( mmOriOffset );
+        //BS based Adjustment
+        mmAdjOffset = _mm_mullo_epi16( mmAbsOffset, _mm_add_epi16( mm16Vector, mmBsFactor ) );
+        mmAdjOffset = _mm_add_epi16( mmAdjOffset, mm08Vector );
+        mmAdjOffset = _mm_srai_epi16( mmAdjOffset, 4 );
+
+        __m128i mmTmpAdj = _mm_mullo_epi16( mmClassIdxBsP, mmAdjOffset );
+        __m128i mmTmpOrg = _mm_mullo_epi16( mmClassIdxBsN, mmAbsOffset );
+
+        __m128i mmTmpFin = _mm_add_epi16( mmTmpAdj, mmTmpOrg );
+
+        __m128i mmTmpSignP = _mm_mullo_epi16( mmSignOffsetP, mmTmpFin );
+        __m128i mmTmpSignN = _mm_sub_epi16( mmZeroVector, _mm_mullo_epi16( mmSignOffsetN, mmTmpFin ));
+
+        accumA = _mm_add_epi16( mmTmpSignP, mmTmpSignN );
+      }
+
+      if( useResiCondition )
+      {
+        accumA = _mm_min_epi16( mmPOffsetClipVector, accumA );
+        accumA = _mm_max_epi16( mmNOffsetClipVector, accumA );
+        //accumA is Ori Offset
+        mmOriOffset = accumA;
+        //Calc Sign
+        //P = 1, N = 0
+        mmSignOffsetP = _mm_abs_epi16( _mm_cmpgt_epi16( mmOriOffset , mmZeroVector ));
+        //P = 0, N = 1
+        mmSignOffsetN = _mm_abs_epi16( _mm_sub_epi16( mm01Vector, mmSignOffsetP));
+        //Calc Abs Offset
+        mmAbsOffset = _mm_abs_epi16( mmOriOffset );
+        //Resi based Adjustment
+        mmAdjOffset = _mm_mullo_epi16( mmAbsOffset, _mm_add_epi16( mm16Vector, mmResiFactor ) );
+        mmAdjOffset = _mm_add_epi16( mmAdjOffset, mm08Vector );
+        mmAdjOffset = _mm_srai_epi16( mmAdjOffset, 4 );
+
+        __m128i mmTmpAdj = _mm_mullo_epi16( mmClassIdxResiP, mmAdjOffset );
+        __m128i mmTmpOrg = _mm_mullo_epi16( mmClassIdxResiN, mmAbsOffset );
+
+        __m128i mmTmpFin = _mm_add_epi16( mmTmpAdj, mmTmpOrg );
+
+        __m128i mmTmpSignP = _mm_mullo_epi16( mmSignOffsetP, mmTmpFin );
+        __m128i mmTmpSignN = _mm_sub_epi16( mmZeroVector, _mm_mullo_epi16( mmSignOffsetN, mmTmpFin ));
+
+        accumA = _mm_add_epi16( mmTmpSignP, mmTmpSignN );
+      }
+#endif
 
       //Clip Offset
       accumA = _mm_min_epi16(accumA, offsetMax);
@@ -4774,6 +6652,9 @@ static void simdGaussFiltering(CodingStructure &cs, Pel ***gaussPic, const CPelB
     }//for j
     src += srcStride * stepY;
   }//for i
+#if USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  }//use256BitSimd
+#endif
 }
 #endif
 
@@ -4893,7 +6774,11 @@ static void simdFixFilter13x13Db9Blk( AlfClassifier **classifier, const CPelBuf
 #if JVET_AE0139_ALF_IMPROVED_FIXFILTER
   const CPelBuf &srcLumaBeforeDb,
 #endif
-  Pel ***fixedFilterResults, int picWidth, const int fixedFiltInd, const short classIndFixed[NUM_CLASSES_FIX], int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4] )
+  Pel ***fixedFilterResults, int picWidth, const int fixedFiltInd, const short classIndFixed[NUM_CLASSES_FIX], int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4]
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+    , bool applyCodingInfo, CodingStructure &cs, AlfClassifier** classifierCodingInfo
+#endif
+  )
 #else
 static void simdFilter13x13Blk( AlfClassifier **classifier, const CPelBuf &srcLuma, const Area& curBlk,
 #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY
@@ -4925,6 +6810,15 @@ static void simdFilter13x13Blk( AlfClassifier **classifier, const CPelBuf &srcLu
   const __m128i mm11 = _mm_set1_epi8(1);
   const __m128i mm3 = _mm_set1_epi16(3);
 #endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  const bool isIntraSlice = cs.slice->isIntra();
+  const bool isSpsAdjust = cs.sps->getAlfLumaFixedFilterAdjust();
+  const bool useCodingInfo = true;
+
+  const bool useBounCondition = applyCodingInfo && !( !isSpsAdjust && isIntraSlice ) && useCodingInfo;
+  const bool useResiCondition = applyCodingInfo && (isSpsAdjust || !isSpsAdjust) && !isIntraSlice && useCodingInfo;
+  const int offsetClipValue = 1 << (clpRng.bd - 1);
+#endif
 #if JVET_AE0139_ALF_IMPROVED_FIXFILTER
   const int srcBeforeDbStride = srcLumaBeforeDb.stride;
   const Pel *srcBeforeDb = srcLumaBeforeDb.buf + curBlk.y * srcBeforeDbStride + curBlk.x;
@@ -4946,6 +6840,22 @@ static void simdFilter13x13Blk( AlfClassifier **classifier, const CPelBuf &srcLu
     mmClippingValues256 = _mm256_insertf128_si256(mmClippingValues256, mmClippingValues, 1);
     const __m256i mm11 = _mm256_set1_epi8(1);
     const __m256i mm3 = _mm256_set1_epi16(3);
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+    __m256i mmClassIdxBsP, mmClassIdxResiP, mmClassIdxBsN, mmClassIdxResiN, mmClassIdxTmp;
+    __m256i mmOriOffset;
+    __m256i mmSignOffsetP, mmSignOffsetN;
+    __m256i mmAbsOffset;
+    __m256i mmAdjOffset;
+    __m256i mmZeroVector = _mm256_set1_epi16( 0 );
+    __m256i mm01Vector   = _mm256_set1_epi16( 1 );
+    __m256i mm08Vector   = _mm256_set1_epi16( 8 );
+    __m256i mm16Vector   = _mm256_set1_epi16( 16 );
+    __m256i mmPOffsetClipVector = _mm256_set1_epi16( +offsetClipValue );
+    __m256i mmNOffsetClipVector = _mm256_set1_epi16( -offsetClipValue );
+    //Set Factor
+    __m256i mmBsFactor = isIntraSlice ? _mm256_set1_epi16( 4 ) : _mm256_set1_epi16( 3 );
+    __m256i mmResiFactor = isIntraSlice ? _mm256_set1_epi16( 0 >> (!isSpsAdjust ? 1 : 0) ) : _mm256_set1_epi16( 3 >> (!isSpsAdjust ? 1 : 0) );
+#endif
     for (int i = 0; i < height; i += stepY)
     {
 #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY
@@ -4953,6 +6863,13 @@ static void simdFilter13x13Blk( AlfClassifier **classifier, const CPelBuf &srcLu
 #else
       const AlfClassifier *pClass = classifier[curBlk.y + i] + curBlk.x;
 #endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+      AlfClassifier *pClassCodingInfo = nullptr;
+      if( useBounCondition || useResiCondition )
+      {
+        pClassCodingInfo = classifierCodingInfo[blkDst.y + i] + blkDst.x;
+      }
+#endif
 
       for (int j = 0; j < width; j += stepX * 2)
       {
@@ -5125,6 +7042,25 @@ static void simdFilter13x13Blk( AlfClassifier **classifier, const CPelBuf &srcLu
         params[29] = _mm256_unpackhi_epi64(_mm256_unpacklo_epi32(rawCoef[0][8], rawCoef[1][8]), _mm256_unpacklo_epi32(rawCoef[2][8], rawCoef[3][8]));
         params[30] = _mm256_unpacklo_epi64(_mm256_unpackhi_epi32(rawCoef[0][8], rawCoef[1][8]), _mm256_unpackhi_epi32(rawCoef[2][8], rawCoef[3][8]));
         params[31] = _mm256_unpackhi_epi64(_mm256_unpackhi_epi32(rawCoef[0][8], rawCoef[1][8]), _mm256_unpackhi_epi32(rawCoef[2][8], rawCoef[3][8]));
+#endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+        mmClassIdxBsP = _mm256_set1_epi16( 0 );
+        mmClassIdxBsN = _mm256_set1_epi16( 0 );
+        if( useBounCondition )
+        {
+          mmClassIdxTmp   = _mm256_loadu_si256( (const __m256i *) (pClassCodingInfo + j));
+          mmClassIdxBsP   = _mm256_srai_epi16( mmClassIdxTmp, 1 );
+          mmClassIdxBsN   = _mm256_sub_epi16( mm01Vector, mmClassIdxBsP );
+        }
+        mmClassIdxResiP = _mm256_set1_epi16( 0 );
+        mmClassIdxResiN = _mm256_set1_epi16( 0 );
+        if( useResiCondition )
+        {
+          mmClassIdxTmp   = _mm256_loadu_si256( (const __m256i *) (pClassCodingInfo + j));
+          mmClassIdxBsP   = _mm256_srai_epi16( mmClassIdxTmp, 1 );
+          mmClassIdxResiP = _mm256_sub_epi16( mmClassIdxTmp, _mm256_add_epi16( mmClassIdxBsP, mmClassIdxBsP ) );
+          mmClassIdxResiN = _mm256_sub_epi16( mm01Vector, mmClassIdxResiP );
+        }
 #endif
         for (int ii = 0; ii < stepY; ii++)
         {
@@ -5269,6 +7205,65 @@ static void simdFilter13x13Blk( AlfClassifier **classifier, const CPelBuf &srcLu
           accumB = _mm256_srai_epi32(accumB, shift);
 
           accumA = _mm256_blend_epi16(accumA, _mm256_slli_si256(accumB, 2), 0xAA);
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+          if( useBounCondition )
+          {
+            accumA = _mm256_min_epi16( mmPOffsetClipVector, accumA );
+            accumA = _mm256_max_epi16( mmNOffsetClipVector, accumA );
+            //accumA is Ori Offset
+            mmOriOffset = accumA;
+            //Calc Sign
+            //P = 1, N = 0
+            mmSignOffsetP = _mm256_abs_epi16( _mm256_cmpgt_epi16( mmOriOffset , mmZeroVector ));
+            //P = 0, N = 1
+            mmSignOffsetN = _mm256_abs_epi16( _mm256_sub_epi16( mm01Vector, mmSignOffsetP ));
+            //Calc Abs Offset
+            mmAbsOffset = _mm256_abs_epi16( mmOriOffset );
+            //BS based Adjustment
+            mmAdjOffset = _mm256_mullo_epi16( mmAbsOffset, _mm256_add_epi16( mm16Vector, mmBsFactor ) );
+            mmAdjOffset = _mm256_add_epi16( mmAdjOffset, mm08Vector );
+            mmAdjOffset = _mm256_srai_epi16( mmAdjOffset, 4 );
+
+            __m256i mmTmpAdj = _mm256_mullo_epi16( mmClassIdxBsP, mmAdjOffset );
+            __m256i mmTmpOrg = _mm256_mullo_epi16( mmClassIdxBsN, mmAbsOffset );
+
+            __m256i mmTmpFin = _mm256_add_epi16( mmTmpAdj, mmTmpOrg );
+
+            __m256i mmTmpSignP = _mm256_mullo_epi16( mmSignOffsetP, mmTmpFin );
+            __m256i mmTmpSignN = _mm256_sub_epi16( mmZeroVector, _mm256_mullo_epi16( mmSignOffsetN, mmTmpFin ));
+
+            accumA = _mm256_add_epi16( mmTmpSignP, mmTmpSignN );
+          }
+
+          if( useResiCondition )
+          {
+            accumA = _mm256_min_epi16( mmPOffsetClipVector, accumA );
+            accumA = _mm256_max_epi16( mmNOffsetClipVector, accumA );
+            //accumA is Ori Offset
+            mmOriOffset = accumA;
+            //Calc Sign
+            //P = 0, N = 1
+            mmSignOffsetP = _mm256_abs_epi16( _mm256_cmpgt_epi16( mmOriOffset , mmZeroVector ));
+            //P = 1, N = 0
+            mmSignOffsetN = _mm256_abs_epi16( _mm256_sub_epi16( mm01Vector, mmSignOffsetP ));
+            //Calc Abs Offset
+            mmAbsOffset = _mm256_abs_epi16( mmOriOffset );
+            //Resi based Adjustment
+            mmAdjOffset = _mm256_mullo_epi16( mmAbsOffset, _mm256_add_epi16( mm16Vector, mmResiFactor ) );
+            mmAdjOffset = _mm256_add_epi16( mmAdjOffset, mm08Vector );
+            mmAdjOffset = _mm256_srai_epi16( mmAdjOffset, 4 );
+
+            __m256i mmTmpAdj = _mm256_mullo_epi16( mmClassIdxResiP, mmAdjOffset );
+            __m256i mmTmpOrg = _mm256_mullo_epi16( mmClassIdxResiN, mmAbsOffset );
+
+            __m256i mmTmpFin = _mm256_add_epi16( mmTmpAdj, mmTmpOrg );
+
+            __m256i mmTmpSignP = _mm256_mullo_epi16( mmSignOffsetP, mmTmpFin );
+            __m256i mmTmpSignN = _mm256_sub_epi16( mmZeroVector, _mm256_mullo_epi16( mmSignOffsetN, mmTmpFin ));
+
+            accumA = _mm256_add_epi16( mmTmpSignP, mmTmpSignN );
+          }
+#endif
           accumA = _mm256_add_epi16(accumA, cur);
           accumA = _mm256_min_epi16(mmMax, _mm256_max_epi16(accumA, mmMin));
 
@@ -5301,6 +7296,22 @@ static void simdFilter13x13Blk( AlfClassifier **classifier, const CPelBuf &srcLu
     const __m128i mmClippingValues = _mm_loadl_epi64((const __m128i *)clippingValues);
     const __m128i mm11 = _mm_set1_epi8(1);
     const __m128i mm3 = _mm_set1_epi16(3);
+#endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+    __m128i mmClassIdxBsP, mmClassIdxResiP, mmClassIdxBsN, mmClassIdxResiN, mmClassIdxTmp;
+    __m128i mmOriOffset;
+    __m128i mmSignOffsetP, mmSignOffsetN;
+    __m128i mmAbsOffset;
+    __m128i mmAdjOffset;
+    __m128i mmZeroVector = _mm_set1_epi16( 0 );
+    __m128i mm01Vector   = _mm_set1_epi16( 1 );
+    __m128i mm08Vector   = _mm_set1_epi16( 8 );
+    __m128i mm16Vector   = _mm_set1_epi16( 16 );
+    __m128i mmPOffsetClipVector = _mm_set1_epi16( +offsetClipValue );
+    __m128i mmNOffsetClipVector = _mm_set1_epi16( -offsetClipValue );
+    //Set Factor
+    __m128i mmBsFactor = isIntraSlice ? _mm_set1_epi16( 4 ) : _mm_set1_epi16( 3 );
+    __m128i mmResiFactor = isIntraSlice ? _mm_set1_epi16( 0 >> (!isSpsAdjust ? 1 : 0) ) : _mm_set1_epi16( 3 >> (!isSpsAdjust ? 1 : 0) );
 #endif
   for (int i = 0; i < height; i += stepY)
   {
@@ -5309,6 +7320,13 @@ static void simdFilter13x13Blk( AlfClassifier **classifier, const CPelBuf &srcLu
 #else
     const AlfClassifier *pClass = classifier[curBlk.y + i] + curBlk.x;
 #endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+    AlfClassifier *pClassCodingInfo = nullptr;
+    if( useBounCondition || useResiCondition )
+    {
+      pClassCodingInfo = classifierCodingInfo[blkDst.y + i] + blkDst.x;
+    }
+#endif
 
     for (int j = 0; j < width; j += stepX)
     {
@@ -5414,6 +7432,25 @@ static void simdFilter13x13Blk( AlfClassifier **classifier, const CPelBuf &srcLu
       params[30] = _mm_unpacklo_epi64(_mm_unpackhi_epi32(rawCoef[0][8], rawCoef[1][8]), _mm_unpackhi_epi32(rawCoef[2][8], rawCoef[3][8]));
       params[31] = _mm_unpackhi_epi64(_mm_unpackhi_epi32(rawCoef[0][8], rawCoef[1][8]), _mm_unpackhi_epi32(rawCoef[2][8], rawCoef[3][8]));
 #endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+      mmClassIdxBsP = _mm_set1_epi16( 0 );
+      mmClassIdxBsN = _mm_set1_epi16( 0 );
+      if( useBounCondition )
+      {
+        mmClassIdxTmp = _mm_loadu_si128( (const __m128i *) ( pClassCodingInfo + j ) );
+        mmClassIdxBsP = _mm_srai_epi16( mmClassIdxTmp, 1 );
+        mmClassIdxBsN = _mm_sub_epi16( mm01Vector, mmClassIdxBsP );
+      }
+      mmClassIdxResiP = _mm_set1_epi16( 0 );
+      mmClassIdxResiN = _mm_set1_epi16( 0 );
+      if( useResiCondition )
+      {
+        mmClassIdxTmp = _mm_loadu_si128( (const __m128i *) ( pClassCodingInfo + j ) );
+        mmClassIdxBsP = _mm_srai_epi16( mmClassIdxTmp, 1 );
+        mmClassIdxResiP = _mm_sub_epi16( mmClassIdxTmp , _mm_add_epi16( mmClassIdxBsP, mmClassIdxBsP) );
+        mmClassIdxResiN = _mm_sub_epi16( mm01Vector, mmClassIdxResiP );
+      }
+#endif
 
       for (int ii = 0; ii < stepY; ii++)
       {
@@ -5558,6 +7595,65 @@ static void simdFilter13x13Blk( AlfClassifier **classifier, const CPelBuf &srcLu
         accumB = _mm_srai_epi32(accumB, shift);
 
         accumA = _mm_blend_epi16(accumA, _mm_slli_si128(accumB, 2), 0xAA);
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+        if( useBounCondition )
+        {
+          accumA = _mm_min_epi16( mmPOffsetClipVector, accumA );
+          accumA = _mm_max_epi16( mmNOffsetClipVector, accumA );
+          //accumA is Ori Offset
+          mmOriOffset = accumA;
+          //Calc Sign
+          //P = 1, N = 0
+          mmSignOffsetP = _mm_abs_epi16( _mm_cmpgt_epi16( mmOriOffset , mmZeroVector ));
+          //P = 0, N = 1
+          mmSignOffsetN = _mm_abs_epi16( _mm_sub_epi16( mm01Vector, mmSignOffsetP ));
+          //Calc Abs Offset
+          mmAbsOffset = _mm_abs_epi16( mmOriOffset );
+          //BS based Adjustment
+          mmAdjOffset = _mm_mullo_epi16( mmAbsOffset, _mm_add_epi16( mm16Vector, mmBsFactor ) );
+          mmAdjOffset = _mm_add_epi16( mmAdjOffset, mm08Vector );
+          mmAdjOffset = _mm_srai_epi16( mmAdjOffset, 4 );
+
+          __m128i mmTmpAdj = _mm_mullo_epi16( mmClassIdxBsP, mmAdjOffset );
+          __m128i mmTmpOrg = _mm_mullo_epi16( mmClassIdxBsN, mmAbsOffset );
+
+          __m128i mmTmpFin = _mm_add_epi16( mmTmpAdj, mmTmpOrg );
+
+          __m128i mmTmpSignP = _mm_mullo_epi16( mmSignOffsetP, mmTmpFin );
+          __m128i mmTmpSignN = _mm_sub_epi16( mmZeroVector, _mm_mullo_epi16( mmSignOffsetN, mmTmpFin ));
+
+          accumA = _mm_add_epi16( mmTmpSignP, mmTmpSignN );
+        }
+
+        if( useResiCondition )
+        {
+          accumA = _mm_min_epi16( mmPOffsetClipVector, accumA );
+          accumA = _mm_max_epi16( mmNOffsetClipVector, accumA );
+          //accumA is Ori Offset
+          mmOriOffset = accumA;
+          //Calc Sign
+          //P = 1, N = 0
+          mmSignOffsetP = _mm_abs_epi16( _mm_cmpgt_epi16( mmOriOffset , mmZeroVector ));
+          //P = 0, N = 1
+          mmSignOffsetN = _mm_abs_epi16( _mm_sub_epi16( mm01Vector, mmSignOffsetP ));
+          //Calc Abs Offset
+          mmAbsOffset = _mm_abs_epi16( mmOriOffset );
+          //Resi based Adjustment
+          mmAdjOffset = _mm_mullo_epi16( mmAbsOffset, _mm_add_epi16( mm16Vector, mmResiFactor ) );
+          mmAdjOffset = _mm_add_epi16( mmAdjOffset, mm08Vector );
+          mmAdjOffset = _mm_srai_epi16( mmAdjOffset, 4 );
+
+          __m128i mmTmpAdj = _mm_mullo_epi16( mmClassIdxResiP, mmAdjOffset );
+          __m128i mmTmpOrg = _mm_mullo_epi16( mmClassIdxResiN, mmAbsOffset );
+
+          __m128i mmTmpFin = _mm_add_epi16( mmTmpAdj, mmTmpOrg );
+
+          __m128i mmTmpSignP = _mm_mullo_epi16( mmSignOffsetP, mmTmpFin );
+          __m128i mmTmpSignN = _mm_sub_epi16( mmZeroVector, _mm_mullo_epi16( mmSignOffsetN, mmTmpFin ));
+
+          accumA = _mm_add_epi16( mmTmpSignP, mmTmpSignN );
+        }
+#endif
         accumA = _mm_add_epi16(accumA, cur);
         accumA = _mm_min_epi16(mmMax, _mm_max_epi16(accumA, mmMin));
 
@@ -5588,7 +7684,11 @@ static void simdFilter13x13Blk( AlfClassifier **classifier, const CPelBuf &srcLu
 
 #if JVET_AE0139_ALF_IMPROVED_FIXFILTER
 template<X86_VEXT vext>
-static void simdFixFilter9x9Db9Blk(AlfClassifier **classifier, const CPelBuf &srcLuma, const Area& curBlk, const Area &blkDst, const CPelBuf &srcLumaBeforeDb, Pel ***fixedFilterResults, int picWidth, const int fixedFiltInd, const short classIndFixed[NUM_CLASSES_FIX], int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4])
+static void simdFixFilter9x9Db9Blk(AlfClassifier **classifier, const CPelBuf &srcLuma, const Area& curBlk, const Area &blkDst, const CPelBuf &srcLumaBeforeDb, Pel ***fixedFilterResults, int picWidth, const int fixedFiltInd, const short classIndFixed[NUM_CLASSES_FIX], int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4]
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+    , bool applyCodingInfo, CodingStructure &cs, AlfClassifier** classifierCodingInfo
+#endif
+  )
 {
   const int srcStride = srcLuma.stride;
   constexpr int shift = AdaptiveLoopFilter::m_NUM_BITS_FIXED_FILTER - 1;
@@ -5613,6 +7713,15 @@ static void simdFixFilter9x9Db9Blk(AlfClassifier **classifier, const CPelBuf &sr
   const __m128i mm11 = _mm_set1_epi8(1);
   const __m128i mm3 = _mm_set1_epi16(3);
 #endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  const bool isIntraSlice = cs.slice->isIntra();
+  const bool isSpsAdjust = cs.sps->getAlfLumaFixedFilterAdjust();
+  const bool useCodingInfo = true;
+
+  const bool useBounCondition = applyCodingInfo && !( !isSpsAdjust && isIntraSlice ) && useCodingInfo;
+  const bool useResiCondition = applyCodingInfo && (isSpsAdjust || !isSpsAdjust) && !isIntraSlice && useCodingInfo;
+  const int offsetClipValue = 1 << ( clpRng.bd - 1 );
+#endif
 
   const int srcBeforeDbStride = srcLumaBeforeDb.stride;
   const Pel *srcBeforeDb = srcLumaBeforeDb.buf + curBlk.y * srcBeforeDbStride + curBlk.x;
@@ -5630,6 +7739,22 @@ static void simdFixFilter9x9Db9Blk(AlfClassifier **classifier, const CPelBuf &sr
     mmClippingValues256 = _mm256_insertf128_si256(mmClippingValues256, mmClippingValues, 1);
     const __m256i mm11 = _mm256_set1_epi8(1);
     const __m256i mm3 = _mm256_set1_epi16(3);
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+    __m256i mmClassIdxBsP, mmClassIdxResiP, mmClassIdxBsN, mmClassIdxResiN, mmClassIdxTmp;
+    __m256i mmOriOffset;
+    __m256i mmSignOffsetP, mmSignOffsetN;
+    __m256i mmAbsOffset;
+    __m256i mmAdjOffset;
+    __m256i mmZeroVector = _mm256_set1_epi16( 0 );
+    __m256i mm01Vector   = _mm256_set1_epi16( 1 );
+    __m256i mm08Vector   = _mm256_set1_epi16( 8 );
+    __m256i mm16Vector   = _mm256_set1_epi16( 16 );
+    __m256i mmPOffsetClipVector = _mm256_set1_epi16( +offsetClipValue );
+    __m256i mmNOffsetClipVector = _mm256_set1_epi16( -offsetClipValue );
+    //Set Factor
+    __m256i mmBsFactor = isIntraSlice ? _mm256_set1_epi16( 4 ) : _mm256_set1_epi16( 3 );
+    __m256i mmResiFactor = isIntraSlice ? _mm256_set1_epi16( 0 >> (!isSpsAdjust ? 1 : 0) ) : _mm256_set1_epi16( 3 >> (!isSpsAdjust ? 1 : 0) );
+#endif
     for (int i = 0; i < height; i += stepY)
     {
 #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY
@@ -5637,6 +7762,13 @@ static void simdFixFilter9x9Db9Blk(AlfClassifier **classifier, const CPelBuf &sr
 #else
       const AlfClassifier *pClass = classifier[curBlk.y + i] + curBlk.x;
 #endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+      AlfClassifier *pClassCodingInfo = nullptr;
+      if( useBounCondition || useResiCondition )
+      {
+        pClassCodingInfo = classifierCodingInfo[blkDst.y + i] + blkDst.x;
+      }
+#endif
 
       for (int j = 0; j < width; j += stepX * 2)
       {
@@ -5735,7 +7867,25 @@ static void simdFixFilter9x9Db9Blk(AlfClassifier **classifier, const CPelBuf &sr
         params[18] = _mm256_unpackhi_epi64(_mm256_unpacklo_epi32(rawCoef[0][5], rawCoef[1][5]), _mm256_unpacklo_epi32(rawCoef[2][5], rawCoef[3][5]));
         params[19] = _mm256_unpacklo_epi64(_mm256_unpackhi_epi32(rawCoef[0][5], rawCoef[1][5]), _mm256_unpackhi_epi32(rawCoef[2][5], rawCoef[3][5]));
         params[20] = _mm256_unpackhi_epi64(_mm256_unpackhi_epi32(rawCoef[0][5], rawCoef[1][5]), _mm256_unpackhi_epi32(rawCoef[2][5], rawCoef[3][5]));
-
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+        mmClassIdxBsP = _mm256_set1_epi16( 0 );
+        mmClassIdxBsN = _mm256_set1_epi16( 0 );
+        if( useBounCondition )
+        {
+          mmClassIdxTmp   = _mm256_loadu_si256( (const __m256i *) (pClassCodingInfo + j));
+          mmClassIdxBsP   = _mm256_srai_epi16( mmClassIdxTmp, 1 );
+          mmClassIdxBsN   = _mm256_sub_epi16( mm01Vector, mmClassIdxBsP );
+        }
+        mmClassIdxResiP = _mm256_set1_epi16( 0 );
+        mmClassIdxResiN = _mm256_set1_epi16( 0 );
+        if( useResiCondition )
+        {
+          mmClassIdxTmp   = _mm256_loadu_si256( (const __m256i *) (pClassCodingInfo + j));
+          mmClassIdxBsP   = _mm256_srai_epi16( mmClassIdxTmp, 1 );
+          mmClassIdxResiP = _mm256_sub_epi16( mmClassIdxTmp, _mm256_add_epi16( mmClassIdxBsP, mmClassIdxBsP ) );
+          mmClassIdxResiN = _mm256_sub_epi16( mm01Vector, mmClassIdxResiP );
+        }
+#endif
         for (int ii = 0; ii < stepY; ii++)
         {
           const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4, *pImg5, *pImg6, *pImg7, *pImg8;
@@ -5836,6 +7986,65 @@ static void simdFixFilter9x9Db9Blk(AlfClassifier **classifier, const CPelBuf &sr
           accumB = _mm256_srai_epi32(accumB, shift);
 
           accumA = _mm256_blend_epi16(accumA, _mm256_slli_si256(accumB, 2), 0xAA);
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+          if( useBounCondition )
+          {
+            accumA = _mm256_min_epi16( mmPOffsetClipVector, accumA );
+            accumA = _mm256_max_epi16( mmNOffsetClipVector, accumA );
+            //accumA is Ori Offset
+            mmOriOffset = accumA;
+            //Calc Sign
+            //P = 1, N = 0
+            mmSignOffsetP = _mm256_abs_epi16( _mm256_cmpgt_epi16( mmOriOffset , mmZeroVector ));
+            //P = 0, N = 1
+            mmSignOffsetN = _mm256_abs_epi16( _mm256_sub_epi16( mm01Vector,  mmSignOffsetP ));
+            //Calc Abs Offset
+            mmAbsOffset = _mm256_abs_epi16( mmOriOffset );
+            //BS based Adjustment
+            mmAdjOffset = _mm256_mullo_epi16( mmAbsOffset, _mm256_add_epi16( mm16Vector, mmBsFactor ) );
+            mmAdjOffset = _mm256_add_epi16( mmAdjOffset, mm08Vector );
+            mmAdjOffset = _mm256_srai_epi16( mmAdjOffset, 4 );
+
+            __m256i mmTmpAdj = _mm256_mullo_epi16( mmClassIdxBsP, mmAdjOffset );
+            __m256i mmTmpOrg = _mm256_mullo_epi16( mmClassIdxBsN, mmAbsOffset );
+
+            __m256i mmTmpFin = _mm256_add_epi16( mmTmpAdj, mmTmpOrg );
+
+            __m256i mmTmpSignP = _mm256_mullo_epi16( mmSignOffsetP, mmTmpFin );
+            __m256i mmTmpSignN = _mm256_sub_epi16( mmZeroVector, _mm256_mullo_epi16( mmSignOffsetN, mmTmpFin ));
+
+            accumA = _mm256_add_epi16( mmTmpSignP, mmTmpSignN );
+          }
+
+          if( useResiCondition )
+          {
+            accumA = _mm256_min_epi16( mmPOffsetClipVector, accumA );
+            accumA = _mm256_max_epi16( mmNOffsetClipVector, accumA );
+            //accumA is Ori Offset
+            mmOriOffset = accumA;
+            //Calc Sign
+            //P = 1, N = 0
+            mmSignOffsetP = _mm256_abs_epi16( _mm256_cmpgt_epi16( mmOriOffset , mmZeroVector ));
+            //P = 0, N = 1
+            mmSignOffsetN = _mm256_abs_epi16( _mm256_sub_epi16( mm01Vector, mmSignOffsetP ));
+            //Calc Abs Offset
+            mmAbsOffset = _mm256_abs_epi16( mmOriOffset );
+            //Resi based Adjustment
+            mmAdjOffset = _mm256_mullo_epi16( mmAbsOffset, _mm256_add_epi16( mm16Vector, mmResiFactor ) );
+            mmAdjOffset = _mm256_add_epi16( mmAdjOffset, mm08Vector );
+            mmAdjOffset = _mm256_srai_epi16( mmAdjOffset, 4 );
+
+            __m256i mmTmpAdj = _mm256_mullo_epi16( mmClassIdxResiP, mmAdjOffset );
+            __m256i mmTmpOrg = _mm256_mullo_epi16( mmClassIdxResiN, mmAbsOffset );
+
+            __m256i mmTmpFin = _mm256_add_epi16( mmTmpAdj, mmTmpOrg );
+
+            __m256i mmTmpSignP = _mm256_mullo_epi16( mmSignOffsetP, mmTmpFin );
+            __m256i mmTmpSignN = _mm256_sub_epi16( mmZeroVector, _mm256_mullo_epi16( mmSignOffsetN, mmTmpFin ));
+
+            accumA = _mm256_add_epi16( mmTmpSignP, mmTmpSignN );
+          }
+#endif
           accumA = _mm256_add_epi16(accumA, cur);
           accumA = _mm256_min_epi16(mmMax, _mm256_max_epi16(accumA, mmMin));
 
@@ -5866,6 +8075,22 @@ static void simdFixFilter9x9Db9Blk(AlfClassifier **classifier, const CPelBuf &sr
     const __m128i mmClippingValues = _mm_loadl_epi64((const __m128i *)clippingValues);
     const __m128i mm11 = _mm_set1_epi8(1);
     const __m128i mm3 = _mm_set1_epi16(3);
+#endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+    __m128i mmClassIdxBsP, mmClassIdxResiP, mmClassIdxBsN, mmClassIdxResiN, mmClassIdxTmp;
+    __m128i mmOriOffset;
+    __m128i mmSignOffsetP, mmSignOffsetN;
+    __m128i mmAbsOffset;
+    __m128i mmAdjOffset;
+    __m128i mmZeroVector = _mm_set1_epi16( 0 );
+    __m128i mm01Vector   = _mm_set1_epi16( 1 );
+    __m128i mm08Vector   = _mm_set1_epi16( 8 );
+    __m128i mm16Vector   = _mm_set1_epi16( 16 );
+    __m128i mmPOffsetClipVector = _mm_set1_epi16( +offsetClipValue );
+    __m128i mmNOffsetClipVector = _mm_set1_epi16( -offsetClipValue );
+    //Set Factor
+    __m128i mmBsFactor = isIntraSlice ? _mm_set1_epi16( 4 ) : _mm_set1_epi16( 3 );
+    __m128i mmResiFactor = isIntraSlice ? _mm_set1_epi16( 0 >> (!isSpsAdjust ? 1 : 0) ) : _mm_set1_epi16( 3 >> (!isSpsAdjust ? 1 : 0) );
 #endif
     for (int i = 0; i < height; i += stepY)
     {
@@ -5874,6 +8099,13 @@ static void simdFixFilter9x9Db9Blk(AlfClassifier **classifier, const CPelBuf &sr
 #else
       const AlfClassifier *pClass = classifier[curBlk.y + i] + curBlk.x;
 #endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+      AlfClassifier *pClassCodingInfo = nullptr;
+      if( useBounCondition || useResiCondition )
+      {
+        pClassCodingInfo = classifierCodingInfo[blkDst.y + i] + blkDst.x;
+      }
+#endif
 
       for (int j = 0; j < width; j += stepX)
       {
@@ -5936,6 +8168,25 @@ static void simdFixFilter9x9Db9Blk(AlfClassifier **classifier, const CPelBuf &sr
         params[19] = _mm_unpacklo_epi64(_mm_unpackhi_epi32(rawCoef[0][5], rawCoef[1][5]), _mm_unpackhi_epi32(rawCoef[2][5], rawCoef[3][5]));
         params[20] = _mm_unpackhi_epi64(_mm_unpackhi_epi32(rawCoef[0][5], rawCoef[1][5]), _mm_unpackhi_epi32(rawCoef[2][5], rawCoef[3][5]));
 
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+        mmClassIdxBsP = _mm_set1_epi16( 0 );
+        mmClassIdxBsN = _mm_set1_epi16( 0 );
+        if( useBounCondition )
+        {
+          mmClassIdxTmp   = _mm_loadu_si128( (const __m128i *) (pClassCodingInfo + j));
+          mmClassIdxBsP   = _mm_srai_epi16( mmClassIdxTmp, 1 );
+          mmClassIdxBsN   = _mm_sub_epi16( mm01Vector, mmClassIdxBsP );
+        }
+        mmClassIdxResiP = _mm_set1_epi16( 0 );
+        mmClassIdxResiN = _mm_set1_epi16( 0 );
+        if( useResiCondition )
+        {
+          mmClassIdxTmp   = _mm_loadu_si128( (const __m128i *) (pClassCodingInfo + j));
+          mmClassIdxBsP   = _mm_srai_epi16( mmClassIdxTmp, 1 );
+          mmClassIdxResiP = _mm_sub_epi16(  mmClassIdxTmp, _mm_add_epi16( mmClassIdxBsP, mmClassIdxBsP) );
+          mmClassIdxResiN = _mm_sub_epi16( mm01Vector, mmClassIdxResiP );
+        }
+#endif
         for (int ii = 0; ii < stepY; ii++)
         {
           const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4, *pImg5, *pImg6, *pImg7, *pImg8;
@@ -6036,6 +8287,65 @@ static void simdFixFilter9x9Db9Blk(AlfClassifier **classifier, const CPelBuf &sr
           accumB = _mm_srai_epi32(accumB, shift);
 
           accumA = _mm_blend_epi16(accumA, _mm_slli_si128(accumB, 2), 0xAA);
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+          if( useBounCondition )
+          {
+            accumA = _mm_min_epi16( mmPOffsetClipVector, accumA );
+            accumA = _mm_max_epi16( mmNOffsetClipVector, accumA );
+            //accumA is Ori Offset
+            mmOriOffset = accumA;
+            //Calc Sign
+            //P = 1, N = 0
+            mmSignOffsetP = _mm_abs_epi16( _mm_cmpgt_epi16( mmOriOffset , mmZeroVector ));
+            //P = 0, N = 1
+            mmSignOffsetN = _mm_abs_epi16( _mm_sub_epi16( mm01Vector, mmSignOffsetP ));
+            //Calc Abs Offset
+            mmAbsOffset = _mm_abs_epi16( mmOriOffset );
+            //BS based Adjustment
+            mmAdjOffset = _mm_mullo_epi16( mmAbsOffset, _mm_add_epi16( mm16Vector, mmBsFactor ) );
+            mmAdjOffset = _mm_add_epi16( mmAdjOffset, mm08Vector );
+            mmAdjOffset = _mm_srai_epi16( mmAdjOffset, 4 );
+
+            __m128i mmTmpAdj = _mm_mullo_epi16( mmClassIdxBsP, mmAdjOffset );
+            __m128i mmTmpOrg = _mm_mullo_epi16( mmClassIdxBsN, mmAbsOffset );
+
+            __m128i mmTmpFin = _mm_add_epi16( mmTmpAdj, mmTmpOrg );
+
+            __m128i mmTmpSignP = _mm_mullo_epi16( mmSignOffsetP, mmTmpFin );
+            __m128i mmTmpSignN = _mm_sub_epi16( mmZeroVector, _mm_mullo_epi16( mmSignOffsetN, mmTmpFin ));
+
+            accumA = _mm_add_epi16( mmTmpSignP, mmTmpSignN );
+          }
+
+          if( useResiCondition )
+          {
+            accumA = _mm_min_epi16( mmPOffsetClipVector, accumA );
+            accumA = _mm_max_epi16( mmNOffsetClipVector, accumA );
+            //accumA is Ori Offset
+            mmOriOffset = accumA;
+            //Calc Sign
+            //P = 1, N = 0
+            mmSignOffsetP = _mm_abs_epi16( _mm_cmpgt_epi16( mmOriOffset , mmZeroVector ));
+            //P = 0, N = 1
+            mmSignOffsetN = _mm_abs_epi16( _mm_sub_epi16( mm01Vector, mmSignOffsetP ));
+            //Calc Abs Offset
+            mmAbsOffset = _mm_abs_epi16( mmOriOffset );
+            //Resi based Adjustment
+            mmAdjOffset = _mm_mullo_epi16( mmAbsOffset, _mm_add_epi16( mm16Vector, mmResiFactor ) );
+            mmAdjOffset = _mm_add_epi16( mmAdjOffset, mm08Vector );
+            mmAdjOffset = _mm_srai_epi16( mmAdjOffset, 4 );
+
+            __m128i mmTmpAdj = _mm_mullo_epi16( mmClassIdxResiP, mmAdjOffset );
+            __m128i mmTmpOrg = _mm_mullo_epi16( mmClassIdxResiN, mmAbsOffset );
+
+            __m128i mmTmpFin = _mm_add_epi16( mmTmpAdj, mmTmpOrg );
+
+            __m128i mmTmpSignP = _mm_mullo_epi16( mmSignOffsetP, mmTmpFin );
+            __m128i mmTmpSignN = _mm_sub_epi16( mmZeroVector, _mm_mullo_epi16( mmSignOffsetN, mmTmpFin ));
+
+            accumA = _mm_add_epi16( mmTmpSignP, mmTmpSignN );
+          }
+#endif
           accumA = _mm_add_epi16(accumA, cur);
           accumA = _mm_min_epi16(mmMax, _mm_max_epi16(accumA, mmMin));
 
@@ -6370,126 +8680,456 @@ static void simdDeriveVariance(const CPelBuf &srcLuma, const Area &blkDst, const
         __m128i xx4 = _mm_alignr_epi8(xx8, xx0, 8);
         __m128i xx6 = _mm_alignr_epi8(xx8, xx0, 12);
 
-        x0 = _mm_add_epi32(x0, y0);
-        s0 = _mm_add_epi32(s0, s2);
+        x0 = _mm_add_epi32(x0, y0);
+        s0 = _mm_add_epi32(s0, s2);
+
+        __m128i x2 = _mm_alignr_epi8(s0, x0, 4);
+        __m128i x4 = _mm_alignr_epi8(s0, x0, 8);
+        __m128i x6 = _mm_alignr_epi8(s0, x0, 12);
+
+        yy0 = _mm_add_epi32(xx0, xx2);
+        xx0 = _mm_add_epi32(xx4, xx6);
+        yy0 = _mm_add_epi32(yy0, xx8);
+
+        y0 = _mm_add_epi32(x0, x2);
+        x4 = _mm_add_epi32(x4, x6);
+        y0 = _mm_add_epi32(y0, s0);
+
+        __m128i sum2 = _mm_add_epi32(yy0, xx0);
+        __m128i sum = _mm_add_epi32(y0, x4);
+
+        x0 = x8;
+        y0 = y8;
+
+        _mm_storeu_si128((__m128i *) &variance[2][iOffset][jOffset], sum);
+        _mm_storeu_si128((__m128i *) &variance[3][iOffset][jOffset], sum2);
+
+        if (i == 8)
+        {
+          x8 = _mm_loadu_si128((__m128i *)&variance[2][iOffset - 4][jOffset]);
+          y8 = _mm_loadu_si128((__m128i *)&variance[3][iOffset - 4][jOffset]);
+          x6 = _mm_loadu_si128((__m128i *)&variance[2][iOffset - 3][jOffset]);
+          __m128i y6 = _mm_loadu_si128((__m128i *)&variance[3][iOffset - 3][jOffset]);
+          x4 = _mm_loadu_si128((__m128i *)&variance[2][iOffset - 2][jOffset]);
+          __m128i y4 = _mm_loadu_si128((__m128i *)&variance[3][iOffset - 2][jOffset]);
+          x2 = _mm_loadu_si128((__m128i *)&variance[2][iOffset - 1][jOffset]);
+          __m128i y2 = _mm_loadu_si128((__m128i *)&variance[3][iOffset - 1][jOffset]);
+
+          x8 = _mm_add_epi32(sum, x8);
+          y8 = _mm_add_epi32(sum2, y8);
+
+          x4 = _mm_add_epi32(x6, x4);
+          y4 = _mm_add_epi32(y6, y4);
+
+          x2 = _mm_add_epi32(x8, x2);
+          y2 = _mm_add_epi32(y8, y2);
+
+          sum = _mm_add_epi32(x4, x2);
+          sum2 = _mm_add_epi32(y4, y2);
+          _mm_storeu_si128((__m128i *) &variance[0][iOffset - 4][jOffset], sum);
+          _mm_storeu_si128((__m128i *) &variance[1][iOffset - 4][jOffset], sum2);
+
+          sum2 = _mm_mullo_epi32(sum2, n);
+          sum = _mm_mullo_epi32(sum, sum);
+          sum2 = _mm_add_epi32(sum2, o);
+          sum2 = _mm_sub_epi32(sum2, sum);
+          sum2 = _mm_srli_epi32(sum2, 3);
+          sum2 = _mm_mullo_epi32(sum2, m13);
+          sum2 = _mm_srli_epi32(sum2, 14);
+          _mm_storeu_si128((__m128i *) &variance[VARIANCE][iOffset - 4][jOffset], sum2);
+        }
+        else if (i > 8)
+        {
+          x8 = _mm_loadu_si128((__m128i *)&variance[2][iOffset - 5][jOffset]);
+          y8 = _mm_loadu_si128((__m128i *)&variance[3][iOffset - 5][jOffset]);
+          x6 = _mm_loadu_si128((__m128i *)&variance[0][iOffset - 5][jOffset]);
+          __m128i y6 = _mm_loadu_si128((__m128i *)&variance[1][iOffset - 5][jOffset]);
+
+          x6 = _mm_sub_epi32(x6, x8);
+          y6 = _mm_sub_epi32(y6, y8);
+
+          sum = _mm_add_epi32(x6, sum);
+          sum2 = _mm_add_epi32(y6, sum2);
+          _mm_storeu_si128((__m128i *) &variance[0][iOffset - 4][jOffset], sum);
+          _mm_storeu_si128((__m128i *) &variance[1][iOffset - 4][jOffset], sum2);
+
+          sum2 = _mm_mullo_epi32(sum2, n);
+          sum = _mm_mullo_epi32(sum, sum);
+          sum2 = _mm_add_epi32(sum2, o);
+          sum2 = _mm_sub_epi32(sum2, sum);
+          sum2 = _mm_srli_epi32(sum2, 3);
+          sum2 = _mm_mullo_epi32(sum2, m13);
+          sum2 = _mm_srli_epi32(sum2, 14);
+          _mm_storeu_si128((__m128i *) &variance[VARIANCE][iOffset - 4][jOffset], sum2);
+        }
+      }
+
+    }
+#if USE_AVX2
+  }
+#endif
+}
+#endif
+
+#if JVET_AC0162_ALF_RESIDUAL_SAMPLES_INPUT
+#if JVET_AE0139_ALF_IMPROVED_FIXFILTER
+template<X86_VEXT vext>
+static void simdFilterResi9x9Db9Blk(AlfClassifier **classifier, const CPelBuf &srcResiLuma, const Area &curBlk, const Area &blkDst, Pel ***fixedFilterResiResults, int picWidth, const int fixedFiltInd, const short classIndFixed[NUM_CLASSES_FIX], int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4]
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  , bool applyCodingInfo, CodingStructure &cs, AlfClassifier** classifierCodingInfo
+#endif
+  )
+{
+  const int srcStride = srcResiLuma.stride;
+  constexpr int shift = AdaptiveLoopFilter::m_NUM_BITS_FIXED_FILTER - 1;
+  constexpr int round = 1 << (shift - 1);
+
+  const int width = curBlk.width;
+  const int height = curBlk.height;
+
+  constexpr int stepX = 8;
+  constexpr int stepY = 2;
+
+  const Pel *src = srcResiLuma.buf + curBlk.y * srcStride + curBlk.x;
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  const bool isIntraSlice = cs.slice->isIntra();
+  const bool isSpsAdjust = cs.sps->getAlfLumaFixedFilterAdjust();
+  const bool useCodingInfo = isSpsAdjust ? true : false;
+  const bool useBounCondition = applyCodingInfo && !( !isSpsAdjust && isIntraSlice ) && useCodingInfo;
+  const bool useResiCondition = applyCodingInfo && (isSpsAdjust || !isSpsAdjust) && !isIntraSlice && useCodingInfo;
+  const int offsetClipValue = 1 << ( clpRng.bd - 1 );
+#endif
+#if !( USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION )
+  const __m128i mmOffset = _mm_set1_epi32(round);
+#endif
+
+  const int     clpRngmin = -clpRng.max;
+  const int     clpRngmax = clpRng.max;
+#if !( USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION )
+  const __m128i mmMin = _mm_set1_epi16(clpRngmin);
+  const __m128i mmMax = _mm_set1_epi16(clpRngmax);
+
+  const __m128i mmClippingValues = _mm_loadl_epi64((const __m128i *) clippingValues);
+  const __m128i mm11 = _mm_set1_epi8(1);
+  const __m128i mm3 = _mm_set1_epi16(3);
+#endif
+  const std::array<std::array<short, FIX_FILTER_NUM_COEFF_DB_COMBINE_9_DB_9 + 1>, NUM_FIXED_FILTERS>& filterCoeffFixed = packedDataFixedFilters9Db9Combine[fixedFiltQpInd];
+  const Pel zeros[8] = { 0 };
+
+#if USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  const bool use256BitSimd = vext >= AVX2 && blkDst.width % 16 == 0;
+
+  if( use256BitSimd )
+  {
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+    __m256i mmClassIdxBsP, mmClassIdxResiP, mmClassIdxBsN, mmClassIdxResiN, mmClassIdxTmp;
+    __m256i mmOriOffset;
+    __m256i mmSignOffsetP, mmSignOffsetN;
+    __m256i mmAbsOffset;
+    __m256i mmAdjOffset;
+    __m256i mmZeroVector = _mm256_set1_epi16(0);
+    __m256i mm01Vector   = _mm256_set1_epi16(1);
+    __m256i mm08Vector   = _mm256_set1_epi16(8);
+    __m256i mm16Vector   = _mm256_set1_epi16(16);
+    __m256i mmPOffsetClipVector = _mm256_set1_epi16(+offsetClipValue);
+    __m256i mmNOffsetClipVector = _mm256_set1_epi16(-offsetClipValue);
+    // Set Factor
+    __m256i mmBsFactor = isIntraSlice ? _mm256_set1_epi16( 4 ) : _mm256_set1_epi16( 3 );
+    __m256i mmResiFactor = isIntraSlice ? _mm256_set1_epi16( 0 >> (!isSpsAdjust ? 1 : 0)) : _mm256_set1_epi16( 3 >> (!isSpsAdjust ? 1 : 0) );
+#endif
+
+    const __m256i mmOffset = _mm256_set1_epi32(round);
+
+    const int     clpRngmin = -clpRng.max;
+    const int     clpRngmax = clpRng.max;
+    const __m256i mmMin     = _mm256_set1_epi16(clpRngmin);
+    const __m256i mmMax     = _mm256_set1_epi16(clpRngmax);
+
+    const __m128i mmClippingValues = _mm_loadl_epi64((const __m128i *) clippingValues);
+    __m256i mmClippingValues256 = _mm256_castsi128_si256(mmClippingValues);
+    mmClippingValues256               = _mm256_insertf128_si256(mmClippingValues256, mmClippingValues, 1);
+    const __m256i mm11             = _mm256_set1_epi8(1);
+    const __m256i mm3              = _mm256_set1_epi16(3);
+    const std::array<std::array<short, FIX_FILTER_NUM_COEFF_DB_COMBINE_9_DB_9 + 1>, NUM_FIXED_FILTERS>
+             &filterCoeffFixed = packedDataFixedFilters9Db9Combine[fixedFiltQpInd];
+    const Pel zeros[16]         = { 0 };
+
+    for (int i = 0; i < height; i += stepY)
+    {
+#if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY
+      const AlfClassifier *pClass = classifier[blkDst.y + i] + blkDst.x;
+#else
+      const AlfClassifier *pClass = classifier[curBlk.y + i] + curBlk.x;
+#endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+      AlfClassifier *pClassCodingInfo = nullptr;
+      if (useBounCondition || useResiCondition)
+      {
+        pClassCodingInfo = classifierCodingInfo[blkDst.y + i] + blkDst.x;
+      }
+#endif
+
+      for (int j = 0; j < width; j += stepX * 2)
+      {
+        __m256i params[11];
+        __m256i rawCoef[4][3];
+        for (int m = 0; m < 4; m++)
+        {
+          int transposeIdx0 = pClass[j + 2 * m] & 0x3;
+          const int filterIdx0    = classIndFixed[pClass[j + 2 * m] >> 2];
+
+          __m128i rawCoef00 = _mm_loadu_si128((const __m128i *) (filterCoeffFixed[filterIdx0].data()));
+          __m128i rawCoef01 = _mm_loadu_si128((const __m128i *) (filterCoeffFixed[filterIdx0].data() + 8));
+          __m128i rawCoef02 = _mm_loadu_si128((const __m128i *) (filterCoeffFixed[filterIdx0].data() + 14));
+          // transpose0
+          if (transposeIdx0 != 0)
+          {
+            const __m128i s00 = _mm_loadu_si128((const __m128i *) shTab9Db9[transposeIdx0][0]);
+            const __m128i s01 = _mm_loadu_si128((const __m128i *) shTab9Db9[transposeIdx0][1]);
+            const __m128i s02 = _mm_loadu_si128((const __m128i *) shTab9Db9[transposeIdx0][2]);
+
+            rawCoef00 = _mm_shuffle_epi8(rawCoef00, s00);
+            rawCoef01 = _mm_shuffle_epi8(rawCoef01, s01);
+            rawCoef02 = _mm_shuffle_epi8(rawCoef02, s02);
+          }
+
+          int transposeIdx1 = pClass[j + 2 * m + 8] & 0x3;
+          const int filterIdx1    = classIndFixed[pClass[j + 2 * m + 8] >> 2];
+
+          __m128i rawCoef10 = _mm_loadu_si128((const __m128i *) (filterCoeffFixed[filterIdx1].data()));
+          __m128i rawCoef11 = _mm_loadu_si128((const __m128i *) (filterCoeffFixed[filterIdx1].data() + 8));
+          __m128i rawCoef12 = _mm_loadu_si128((const __m128i *) (filterCoeffFixed[filterIdx1].data() + 14));
+          // transpose1
+          if (transposeIdx1 != 0)
+          {
+            const __m128i s10 = _mm_loadu_si128((const __m128i *) shTab9Db9[transposeIdx1][0]);
+            const __m128i s11 = _mm_loadu_si128((const __m128i *) shTab9Db9[transposeIdx1][1]);
+            const __m128i s12 = _mm_loadu_si128((const __m128i *) shTab9Db9[transposeIdx1][2]);
+
+            rawCoef10 = _mm_shuffle_epi8(rawCoef10, s10);
+            rawCoef11 = _mm_shuffle_epi8(rawCoef11, s11);
+            rawCoef12 = _mm_shuffle_epi8(rawCoef12, s12);
+          }
+
+          rawCoef[m][0] = _mm256_castsi128_si256(rawCoef00);
+          rawCoef[m][0] = _mm256_insertf128_si256(rawCoef[m][0], rawCoef10, 1);
+          rawCoef[m][1] = _mm256_castsi128_si256(rawCoef01);
+          rawCoef[m][1] = _mm256_insertf128_si256(rawCoef[m][1], rawCoef11, 1);
+          rawCoef[m][2] = _mm256_castsi128_si256(rawCoef02);
+          rawCoef[m][2] = _mm256_insertf128_si256(rawCoef[m][2], rawCoef12, 1);
+        }   // for(m)
+
+        params[0] = _mm256_unpacklo_epi64(_mm256_unpacklo_epi32(rawCoef[0][0], rawCoef[1][0]), _mm256_unpacklo_epi32(rawCoef[2][0], rawCoef[3][0]));
+        params[1] = _mm256_unpackhi_epi64(_mm256_unpacklo_epi32(rawCoef[0][0], rawCoef[1][0]),  _mm256_unpacklo_epi32(rawCoef[2][0], rawCoef[3][0]));
+        params[2] = _mm256_unpacklo_epi64(_mm256_unpackhi_epi32(rawCoef[0][0], rawCoef[1][0]),  _mm256_unpackhi_epi32(rawCoef[2][0], rawCoef[3][0]));
+        params[3] = _mm256_unpackhi_epi64(_mm256_unpackhi_epi32(rawCoef[0][0], rawCoef[1][0]), _mm256_unpackhi_epi32(rawCoef[2][0], rawCoef[3][0]));
+
+        params[4] = _mm256_unpacklo_epi64(_mm256_unpacklo_epi32(rawCoef[0][1], rawCoef[1][1]), _mm256_unpacklo_epi32(rawCoef[2][1], rawCoef[3][1]));
+        params[5] = _mm256_unpackhi_epi64(_mm256_unpacklo_epi32(rawCoef[0][1], rawCoef[1][1]), _mm256_unpacklo_epi32(rawCoef[2][1], rawCoef[3][1]));
+        params[6] = _mm256_unpacklo_epi64(_mm256_unpackhi_epi32(rawCoef[0][1], rawCoef[1][1]), _mm256_unpackhi_epi32(rawCoef[2][1], rawCoef[3][1]));
+
+        params[7]  = _mm256_unpacklo_epi64(_mm256_unpacklo_epi32(rawCoef[0][2], rawCoef[1][2]), _mm256_unpacklo_epi32(rawCoef[2][2], rawCoef[3][2]));
+        params[8]  = _mm256_unpackhi_epi64(_mm256_unpacklo_epi32(rawCoef[0][2], rawCoef[1][2]), _mm256_unpacklo_epi32(rawCoef[2][2], rawCoef[3][2]));
+        params[9]  = _mm256_unpacklo_epi64(_mm256_unpackhi_epi32(rawCoef[0][2], rawCoef[1][2]), _mm256_unpackhi_epi32(rawCoef[2][2], rawCoef[3][2]));
+        params[10] = _mm256_unpackhi_epi64(_mm256_unpackhi_epi32(rawCoef[0][2], rawCoef[1][2]), _mm256_unpackhi_epi32(rawCoef[2][2], rawCoef[3][2]));
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+        mmClassIdxBsP = _mm256_set1_epi16( 0 );
+        mmClassIdxBsN = _mm256_set1_epi16( 0 );
+        if (useBounCondition)
+        {
+          mmClassIdxTmp = _mm256_loadu_si256((const __m256i *) (pClassCodingInfo + j));
+          mmClassIdxBsP = _mm256_srai_epi16(mmClassIdxTmp, 1);
+          mmClassIdxBsN = _mm256_sub_epi16(mm01Vector, mmClassIdxBsP);
+        }
+        mmClassIdxResiP = _mm256_set1_epi16( 0 );
+        mmClassIdxResiN = _mm256_set1_epi16( 0 );
+        if (useResiCondition)
+        {
+          mmClassIdxTmp   = _mm256_loadu_si256((const __m256i *) (pClassCodingInfo + j));
+          mmClassIdxBsP   = _mm256_srai_epi16(mmClassIdxTmp, 1);
+          mmClassIdxResiP = _mm256_sub_epi16(mmClassIdxTmp, _mm256_add_epi16(mmClassIdxBsP, mmClassIdxBsP));
+          mmClassIdxResiN = _mm256_sub_epi16(mm01Vector, mmClassIdxResiP);
+        }
+#endif
+        for (int ii = 0; ii < stepY; ii++)
+        {
+          const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4, *pImg5, *pImg6, *pImg7, *pImg8;
+          pImg0 = src + j + ii * srcStride;
+          pImg1 = pImg0 + srcStride;
+          pImg2 = pImg0 - srcStride;
+          pImg3 = pImg1 + srcStride;
+          pImg4 = pImg2 - srcStride;
+          pImg5 = pImg3 + srcStride;
+          pImg6 = pImg4 - srcStride;
+          pImg7 = pImg5 + srcStride;
+          pImg8 = pImg6 - srcStride;
+
+          __m256i cur    = _mm256_loadu_si256((const __m256i *) pImg0);
+          __m256i accumA = mmOffset;
+          __m256i accumB = mmOffset;
 
-        __m128i x2 = _mm_alignr_epi8(s0, x0, 4);
-        __m128i x4 = _mm_alignr_epi8(s0, x0, 8);
-        __m128i x6 = _mm_alignr_epi8(s0, x0, 12);
+          auto process2coeffs = [&](const int i, const Pel *ptr0, const Pel *ptr1, const Pel *ptr2, const Pel *ptr3)
+          {
+            const __m256i val00 = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) ptr0), cur);
+            const __m256i val10 = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) ptr2), cur);
+            const __m256i val01 = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) ptr1), cur);
+            const __m256i val11 = _mm256_sub_epi16(_mm256_loadu_si256((const __m256i *) ptr3), cur);
 
-        yy0 = _mm_add_epi32(xx0, xx2);
-        xx0 = _mm_add_epi32(xx4, xx6);
-        yy0 = _mm_add_epi32(yy0, xx8);
+            __m256i val01A = _mm256_blend_epi16(val00, _mm256_slli_si256(val10, 2), 0xAA);
+            __m256i val01B = _mm256_blend_epi16(_mm256_srli_si256(val00, 2), val10, 0xAA);
+            __m256i val01C = _mm256_blend_epi16(val01, _mm256_slli_si256(val11, 2), 0xAA);
+            __m256i val01D = _mm256_blend_epi16(_mm256_srli_si256(val01, 2), val11, 0xAA);
 
-        y0 = _mm_add_epi32(x0, x2);
-        x4 = _mm_add_epi32(x4, x6);
-        y0 = _mm_add_epi32(y0, s0);
+            __m256i mmClippingFixed = _mm256_and_si256(params[i], mm3);
 
-        __m128i sum2 = _mm_add_epi32(yy0, xx0);
-        __m128i sum = _mm_add_epi32(y0, x4);
+            __m256i mmClippingFixed2 = _mm256_packs_epi16(mmClippingFixed, mmClippingFixed);
+            mmClippingFixed2         = _mm256_add_epi8(mmClippingFixed2, mmClippingFixed2);
+            __m256i xmm2             = _mm256_add_epi8(mmClippingFixed2, mm11);
+            __m256i xmmA             = _mm256_unpacklo_epi8(mmClippingFixed2, xmm2);
+            __m256i limit            = _mm256_shuffle_epi8(mmClippingValues256, xmmA);
 
-        x0 = x8;
-        y0 = y8;
+            val01A = _mm256_min_epi16(val01A, limit);
+            val01B = _mm256_min_epi16(val01B, limit);
+            val01C = _mm256_min_epi16(val01C, limit);
+            val01D = _mm256_min_epi16(val01D, limit);
 
-        _mm_storeu_si128((__m128i *) &variance[2][iOffset][jOffset], sum);
-        _mm_storeu_si128((__m128i *) &variance[3][iOffset][jOffset], sum2);
+            limit = _mm256_sub_epi16(_mm256_setzero_si256(), limit);
 
-        if (i == 8)
-        {
-          x8 = _mm_loadu_si128((__m128i *)&variance[2][iOffset - 4][jOffset]);
-          y8 = _mm_loadu_si128((__m128i *)&variance[3][iOffset - 4][jOffset]);
-          x6 = _mm_loadu_si128((__m128i *)&variance[2][iOffset - 3][jOffset]);
-          __m128i y6 = _mm_loadu_si128((__m128i *)&variance[3][iOffset - 3][jOffset]);
-          x4 = _mm_loadu_si128((__m128i *)&variance[2][iOffset - 2][jOffset]);
-          __m128i y4 = _mm_loadu_si128((__m128i *)&variance[3][iOffset - 2][jOffset]);
-          x2 = _mm_loadu_si128((__m128i *)&variance[2][iOffset - 1][jOffset]);
-          __m128i y2 = _mm_loadu_si128((__m128i *)&variance[3][iOffset - 1][jOffset]);
+            val01A = _mm256_max_epi16(val01A, limit);
+            val01B = _mm256_max_epi16(val01B, limit);
+            val01C = _mm256_max_epi16(val01C, limit);
+            val01D = _mm256_max_epi16(val01D, limit);
 
-          x8 = _mm_add_epi32(sum, x8);
-          y8 = _mm_add_epi32(sum2, y8);
+            val01A = _mm256_add_epi16(val01A, val01C);
+            val01B = _mm256_add_epi16(val01B, val01D);
 
-          x4 = _mm_add_epi32(x6, x4);
-          y4 = _mm_add_epi32(y6, y4);
+            const __m256i coeff = _mm256_srai_epi16(params[i], 2);
 
-          x2 = _mm_add_epi32(x8, x2);
-          y2 = _mm_add_epi32(y8, y2);
+            accumA = _mm256_add_epi32(accumA, _mm256_madd_epi16(val01A, coeff));
+            accumB = _mm256_add_epi32(accumB, _mm256_madd_epi16(val01B, coeff));
+          };
 
-          sum = _mm_add_epi32(x4, x2);
-          sum2 = _mm_add_epi32(y4, y2);
-          _mm_storeu_si128((__m128i *) &variance[0][iOffset - 4][jOffset], sum);
-          _mm_storeu_si128((__m128i *) &variance[1][iOffset - 4][jOffset], sum2);
+          process2coeffs(0, pImg8 + 0, pImg7 + 0, pImg6 - 1, pImg5 + 1);
+          process2coeffs(1, pImg4 - 2, pImg3 + 2, pImg2 - 3, pImg1 + 3);
+          process2coeffs(2, pImg0 - 4, pImg0 + 4, pImg6 + 1, pImg5 - 1);
+          process2coeffs(3, pImg4 + 2, pImg3 - 2, pImg2 + 3, pImg1 - 3);
 
-          sum2 = _mm_mullo_epi32(sum2, n);
-          sum = _mm_mullo_epi32(sum, sum);
-          sum2 = _mm_add_epi32(sum2, o);
-          sum2 = _mm_sub_epi32(sum2, sum);
-          sum2 = _mm_srli_epi32(sum2, 3);
-          sum2 = _mm_mullo_epi32(sum2, m13);
-          sum2 = _mm_srli_epi32(sum2, 14);
-          _mm_storeu_si128((__m128i *) &variance[VARIANCE][iOffset - 4][jOffset], sum2);
-        }
-        else if (i > 8)
-        {
-          x8 = _mm_loadu_si128((__m128i *)&variance[2][iOffset - 5][jOffset]);
-          y8 = _mm_loadu_si128((__m128i *)&variance[3][iOffset - 5][jOffset]);
-          x6 = _mm_loadu_si128((__m128i *)&variance[0][iOffset - 5][jOffset]);
-          __m128i y6 = _mm_loadu_si128((__m128i *)&variance[1][iOffset - 5][jOffset]);
+          process2coeffs(4, pImg6 + 0, pImg5 - 0, pImg4 - 1, pImg3 + 1);
+          process2coeffs(5, pImg2 - 2, pImg1 + 2, pImg0 - 3, pImg0 + 3);
+          process2coeffs(6, pImg4 + 1, pImg3 - 1, pImg2 + 2, pImg1 - 2);
 
-          x6 = _mm_sub_epi32(x6, x8);
-          y6 = _mm_sub_epi32(y6, y8);
+          process2coeffs(7, pImg4 + 0, pImg3 - 0, pImg2 - 1, pImg1 + 1);
+          process2coeffs(8, pImg0 - 2, pImg0 + 2, pImg2 + 1, pImg1 - 1);
+          process2coeffs(9, pImg2 + 0, pImg1 - 0, pImg0 - 1, pImg0 + 1);
+          process2coeffs(10, zeros, pImg0, pImg0, pImg0);
 
-          sum = _mm_add_epi32(x6, sum);
-          sum2 = _mm_add_epi32(y6, sum2);
-          _mm_storeu_si128((__m128i *) &variance[0][iOffset - 4][jOffset], sum);
-          _mm_storeu_si128((__m128i *) &variance[1][iOffset - 4][jOffset], sum2);
+          accumA = _mm256_srai_epi32(accumA, shift);
+          accumB = _mm256_srai_epi32(accumB, shift);
 
-          sum2 = _mm_mullo_epi32(sum2, n);
-          sum = _mm_mullo_epi32(sum, sum);
-          sum2 = _mm_add_epi32(sum2, o);
-          sum2 = _mm_sub_epi32(sum2, sum);
-          sum2 = _mm_srli_epi32(sum2, 3);
-          sum2 = _mm_mullo_epi32(sum2, m13);
-          sum2 = _mm_srli_epi32(sum2, 14);
-          _mm_storeu_si128((__m128i *) &variance[VARIANCE][iOffset - 4][jOffset], sum2);
-        }
-      }
+          accumA = _mm256_blend_epi16(accumA, _mm256_slli_si256(accumB, 2), 0xAA);
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+          if (useBounCondition)
+          {
+            accumA = _mm256_min_epi16(mmPOffsetClipVector, accumA);
+            accumA = _mm256_max_epi16(mmNOffsetClipVector, accumA);
+            // accumA is Ori Offset
+            mmOriOffset = accumA;
+            // Calc Sign
+            // P = 1, N = 0
+            mmSignOffsetP = _mm256_abs_epi16(_mm256_cmpgt_epi16(mmOriOffset, mmZeroVector));
+            // P = 0, N = 1
+            mmSignOffsetN = _mm256_abs_epi16(_mm256_sub_epi16(mm01Vector, mmSignOffsetP));
+            // Calc Abs Offset
+            mmAbsOffset = _mm256_abs_epi16(mmOriOffset);
+            // BS based Adjustment
+            mmAdjOffset = _mm256_mullo_epi16(mmAbsOffset, _mm256_add_epi16(mm16Vector, mmBsFactor));
+            mmAdjOffset = _mm256_add_epi16(mmAdjOffset, mm08Vector);
+            mmAdjOffset = _mm256_srai_epi16(mmAdjOffset, 4);
+
+            __m256i mmTmpAdj = _mm256_mullo_epi16(mmClassIdxBsP, mmAdjOffset);
+            __m256i mmTmpOrg = _mm256_mullo_epi16(mmClassIdxBsN, mmAbsOffset);
+
+            __m256i mmTmpFin = _mm256_add_epi16(mmTmpAdj, mmTmpOrg);
+
+            __m256i mmTmpSignP = _mm256_mullo_epi16(mmSignOffsetP, mmTmpFin);
+            __m256i mmTmpSignN = _mm256_sub_epi16(mmZeroVector, _mm256_mullo_epi16(mmSignOffsetN, mmTmpFin));
+
+            accumA = _mm256_add_epi16(mmTmpSignP, mmTmpSignN);
+          }
 
-    }
-#if USE_AVX2
-  }
-#endif
-}
+          if (useResiCondition)
+          {
+            accumA = _mm256_min_epi16(mmPOffsetClipVector, accumA);
+            accumA = _mm256_max_epi16(mmNOffsetClipVector, accumA);
+            // accumA is Ori Offset
+            mmOriOffset = accumA;
+            // Calc Sign
+            // P = 1, N = 0
+            mmSignOffsetP = _mm256_abs_epi16(_mm256_cmpgt_epi16(mmOriOffset, mmZeroVector));
+            // P = 0, N = 1
+            mmSignOffsetN = _mm256_abs_epi16(_mm256_sub_epi16(mm01Vector, mmSignOffsetP));
+            // Calc Abs Offset
+            mmAbsOffset = _mm256_abs_epi16(mmOriOffset);
+            // Resi based Adjustment
+            mmAdjOffset = _mm256_mullo_epi16(mmAbsOffset, _mm256_add_epi16(mm16Vector, mmResiFactor));
+            mmAdjOffset = _mm256_add_epi16(mmAdjOffset, mm08Vector);
+            mmAdjOffset = _mm256_srai_epi16(mmAdjOffset, 4);
+
+            __m256i mmTmpAdj = _mm256_mullo_epi16(mmClassIdxResiP, mmAdjOffset);
+            __m256i mmTmpOrg = _mm256_mullo_epi16(mmClassIdxResiN, mmAbsOffset);
+
+            __m256i mmTmpFin = _mm256_add_epi16(mmTmpAdj, mmTmpOrg);
+
+            __m256i mmTmpSignP = _mm256_mullo_epi16(mmSignOffsetP, mmTmpFin);
+            __m256i mmTmpSignN = _mm256_sub_epi16(mmZeroVector, _mm256_mullo_epi16(mmSignOffsetN, mmTmpFin));
+
+            accumA = _mm256_add_epi16(mmTmpSignP, mmTmpSignN);
+          }
 #endif
+          accumA = _mm256_add_epi16(accumA, cur);
+          accumA = _mm256_min_epi16(mmMax, _mm256_max_epi16(accumA, mmMin));
 
-#if JVET_AC0162_ALF_RESIDUAL_SAMPLES_INPUT
-#if JVET_AE0139_ALF_IMPROVED_FIXFILTER
-template<X86_VEXT vext>
-static void simdFilterResi9x9Db9Blk(AlfClassifier **classifier, const CPelBuf &srcResiLuma, const Area &curBlk, const Area &blkDst, Pel ***fixedFilterResiResults, int picWidth, const int fixedFiltInd, const short classIndFixed[NUM_CLASSES_FIX], int fixedFiltQpInd, int dirWindSize, const ClpRng &clpRng, const Pel clippingValues[4])
-{
-  const int srcStride = srcResiLuma.stride;
-  constexpr int shift = AdaptiveLoopFilter::m_NUM_BITS_FIXED_FILTER - 1;
-  constexpr int round = 1 << (shift - 1);
-
-  const int width = curBlk.width;
-  const int height = curBlk.height;
-
-  constexpr int stepX = 8;
-  constexpr int stepY = 2;
+#if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY
+          _mm256_storeu_si256((__m256i *) (&(fixedFilterResiResults[fixedFiltInd][blkDst.y + i + ii][blkDst.x + j])), accumA);
+#else
+          _mm256_storeu_si256((__m256i *) (&(fixedFilterResiResults[fixedFiltInd][curBlk.y + i + ii][curBlk.x + j])), accumA);
+#endif
+        }   // for (size_t ii = 0; ii < stepY; ii++)
+      }     // for (size_t j = 0; j < width; j += stepX)
+      src += srcStride * stepY;
+    }
 
-  const Pel *src = srcResiLuma.buf + curBlk.y * srcStride + curBlk.x;
+  }
+  else
+  {
 
   const __m128i mmOffset = _mm_set1_epi32(round);
 
-  const int     clpRngmin = -clpRng.max;
-  const int     clpRngmax = clpRng.max;
   const __m128i mmMin = _mm_set1_epi16(clpRngmin);
   const __m128i mmMax = _mm_set1_epi16(clpRngmax);
 
   const __m128i mmClippingValues = _mm_loadl_epi64((const __m128i *) clippingValues);
   const __m128i mm11 = _mm_set1_epi8(1);
   const __m128i mm3 = _mm_set1_epi16(3);
-  const std::array<std::array<short, FIX_FILTER_NUM_COEFF_DB_COMBINE_9_DB_9 + 1>, NUM_FIXED_FILTERS>& filterCoeffFixed = packedDataFixedFilters9Db9Combine[fixedFiltQpInd];
-  const Pel zeros[8] = { 0 };
+#endif //Use Avx2 Simd
+
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  __m128i mmClassIdxBsP, mmClassIdxResiP, mmClassIdxBsN, mmClassIdxResiN, mmClassIdxTmp;
+  __m128i mmOriOffset;
+  __m128i mmSignOffsetP, mmSignOffsetN;
+  __m128i mmAbsOffset;
+  __m128i mmAdjOffset;
+  __m128i mmZeroVector = _mm_set1_epi16( 0 );
+  __m128i mm01Vector   = _mm_set1_epi16( 1 );
+  __m128i mm08Vector   = _mm_set1_epi16( 8 );
+  __m128i mm16Vector   = _mm_set1_epi16( 16 );
+  __m128i mmPOffsetClipVector = _mm_set1_epi16( +offsetClipValue );
+  __m128i mmNOffsetClipVector = _mm_set1_epi16( -offsetClipValue );
+  //Set Factor
+  __m128i mmBsFactor = isIntraSlice ? _mm_set1_epi16( 4 ) : _mm_set1_epi16( 3 );
+  __m128i mmResiFactor = isIntraSlice ? _mm_set1_epi16( 0 >> (!isSpsAdjust ? 1 : 0) ) : _mm_set1_epi16( 3 >> (!isSpsAdjust ? 1 : 0) );
+#endif
+
   for (int i = 0; i < height; i += stepY)
   {
 #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY
@@ -6497,6 +9137,13 @@ static void simdFilterResi9x9Db9Blk(AlfClassifier **classifier, const CPelBuf &s
 #else
     const AlfClassifier *pClass = classifier[curBlk.y + i] + curBlk.x;
 #endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+    AlfClassifier *pClassCodingInfo = nullptr;
+    if( useBounCondition || useResiCondition )
+    {
+      pClassCodingInfo = classifierCodingInfo[blkDst.y + i] + blkDst.x;
+    }
+#endif
 
     for (int j = 0; j < width; j += stepX)
     {
@@ -6536,6 +9183,25 @@ static void simdFilterResi9x9Db9Blk(AlfClassifier **classifier, const CPelBuf &s
       params[8] = _mm_unpackhi_epi64(_mm_unpacklo_epi32(rawCoef[0][2], rawCoef[1][2]), _mm_unpacklo_epi32(rawCoef[2][2], rawCoef[3][2]));
       params[9] = _mm_unpacklo_epi64(_mm_unpackhi_epi32(rawCoef[0][2], rawCoef[1][2]), _mm_unpackhi_epi32(rawCoef[2][2], rawCoef[3][2]));
       params[10] = _mm_unpackhi_epi64(_mm_unpackhi_epi32(rawCoef[0][2], rawCoef[1][2]), _mm_unpackhi_epi32(rawCoef[2][2], rawCoef[3][2]));
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+      mmClassIdxBsP = _mm_set1_epi16( 0 );
+      mmClassIdxBsN = _mm_set1_epi16( 0 );
+      if( useBounCondition )
+      {
+        mmClassIdxTmp   = _mm_loadu_si128( (const __m128i *) (pClassCodingInfo + j));
+        mmClassIdxBsP   = _mm_srai_epi16( mmClassIdxTmp, 1 );
+        mmClassIdxBsN   = _mm_sub_epi16( mm01Vector, mmClassIdxBsP );
+      }
+      mmClassIdxResiP = _mm_set1_epi16( 0 );
+      mmClassIdxResiN = _mm_set1_epi16( 0 );
+      if( useResiCondition )
+      {
+        mmClassIdxTmp   = _mm_loadu_si128( (const __m128i *) (pClassCodingInfo + j));
+        mmClassIdxBsP   = _mm_srai_epi16( mmClassIdxTmp, 1 );
+        mmClassIdxResiP = _mm_sub_epi16(  mmClassIdxTmp, _mm_add_epi16( mmClassIdxBsP, mmClassIdxBsP) );
+        mmClassIdxResiN = _mm_sub_epi16( mm01Vector, mmClassIdxResiP );
+      }
+#endif
       for (int ii = 0; ii < stepY; ii++)
       {
         const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4, *pImg5, *pImg6, *pImg7, *pImg8;
@@ -6612,6 +9278,65 @@ static void simdFilterResi9x9Db9Blk(AlfClassifier **classifier, const CPelBuf &s
         accumB = _mm_srai_epi32(accumB, shift);
 
         accumA = _mm_blend_epi16(accumA, _mm_slli_si128(accumB, 2), 0xAA);
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+        if( useBounCondition )
+        {
+          accumA = _mm_min_epi16( mmPOffsetClipVector, accumA );
+          accumA = _mm_max_epi16( mmNOffsetClipVector, accumA );
+          //accumA is Ori Offset
+          mmOriOffset = accumA;
+          //Calc Sign
+          //P = 1, N = 0
+          mmSignOffsetP = _mm_abs_epi16( _mm_cmpgt_epi16( mmOriOffset , mmZeroVector ));
+          //P = 0, N = 1
+          mmSignOffsetN = _mm_abs_epi16( _mm_sub_epi16( mm01Vector, mmSignOffsetP ));
+          //Calc Abs Offset
+          mmAbsOffset = _mm_abs_epi16( mmOriOffset );
+          //BS based Adjustment
+          mmAdjOffset = _mm_mullo_epi16( mmAbsOffset, _mm_add_epi16( mm16Vector, mmBsFactor ) );
+          mmAdjOffset = _mm_add_epi16( mmAdjOffset, mm08Vector );
+          mmAdjOffset = _mm_srai_epi16( mmAdjOffset, 4 );
+
+          __m128i mmTmpAdj = _mm_mullo_epi16( mmClassIdxBsP, mmAdjOffset );
+          __m128i mmTmpOrg = _mm_mullo_epi16( mmClassIdxBsN, mmAbsOffset );
+
+          __m128i mmTmpFin = _mm_add_epi16( mmTmpAdj, mmTmpOrg );
+
+          __m128i mmTmpSignP = _mm_mullo_epi16( mmSignOffsetP, mmTmpFin );
+          __m128i mmTmpSignN = _mm_sub_epi16( mmZeroVector, _mm_mullo_epi16( mmSignOffsetN, mmTmpFin ));
+
+          accumA = _mm_add_epi16( mmTmpSignP, mmTmpSignN );
+        }
+
+        if( useResiCondition )
+        {
+          accumA = _mm_min_epi16( mmPOffsetClipVector, accumA );
+          accumA = _mm_max_epi16( mmNOffsetClipVector, accumA );
+          //accumA is Ori Offset
+          mmOriOffset = accumA;
+          //Calc Sign
+          //P = 1, N = 0
+          mmSignOffsetP = _mm_abs_epi16( _mm_cmpgt_epi16( mmOriOffset , mmZeroVector ));
+          //P = 0, N = 1
+          mmSignOffsetN = _mm_abs_epi16( _mm_sub_epi16( mm01Vector, mmSignOffsetP ));
+          //Calc Abs Offset
+          mmAbsOffset = _mm_abs_epi16( mmOriOffset );
+          //Resi based Adjustment
+          mmAdjOffset = _mm_mullo_epi16( mmAbsOffset, _mm_add_epi16( mm16Vector, mmResiFactor ) );
+          mmAdjOffset = _mm_add_epi16( mmAdjOffset, mm08Vector);
+          mmAdjOffset = _mm_srai_epi16( mmAdjOffset, 4 );
+
+          __m128i mmTmpAdj = _mm_mullo_epi16( mmClassIdxResiP, mmAdjOffset );
+          __m128i mmTmpOrg = _mm_mullo_epi16( mmClassIdxResiN, mmAbsOffset );
+
+          __m128i mmTmpFin = _mm_add_epi16( mmTmpAdj, mmTmpOrg );
+
+          __m128i mmTmpSignP = _mm_mullo_epi16( mmSignOffsetP, mmTmpFin );
+          __m128i mmTmpSignN = _mm_sub_epi16( mmZeroVector, _mm_mullo_epi16( mmSignOffsetN, mmTmpFin ));
+
+          accumA = _mm_add_epi16( mmTmpSignP, mmTmpSignN );
+        }
+#endif
         accumA = _mm_add_epi16(accumA, cur);
         accumA = _mm_min_epi16(mmMax, _mm_max_epi16(accumA, mmMin));
 
@@ -6624,6 +9349,9 @@ static void simdFilterResi9x9Db9Blk(AlfClassifier **classifier, const CPelBuf &s
     }     // for (size_t j = 0; j < width; j += stepX)
     src += srcStride * stepY;
   }
+#if USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  }//Use 256 Bit Simd
+#endif
 }
 #else
 template<X86_VEXT vext>
@@ -7103,6 +9831,243 @@ static void simdCalcClass0(AlfClassifier **classifier, const Area &blkDst, const
 #endif
   const __m128i shift = _mm_cvtsi32_si128(9 + bitDepth - 4);
   const int multTab[] = { 5628, 1407, 624, 351, 225, 156 };
+
+#if USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  const bool use256BitSimd = vext >= AVX2 && blkDst.width % 16 == 0 ? true : false;
+
+  if( use256BitSimd)
+  {
+#if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
+    const __m256i mult = _mm256_set1_epi32(multTab[dirWindSize % 10]);
+#else
+    const __m256i mult = _mm256_set1_epi32(multTab[dirWindSize]);
+#endif
+    const __m256i dirOff = _mm256_set1_epi32(noDir * (noDir + 1));
+    const __m256i ones   = _mm256_set1_epi32(1);
+    const __m256i zeros  = _mm256_setzero_si256();
+    const __m256i scale  = _mm256_set1_epi32(192);
+
+    int lapOffset = (dirWindSize == 1) ? 2 : 0;
+    for (int i = 0; i < curBlk.height; i += 2)
+    {
+      int iOffset = (i >> 1) + lapOffset;
+      for (int j = 0; j < curBlk.width; j += 16)
+      {
+        int jOffset = (j >> 1) + lapOffset;
+#if JVET_AE0139_ALF_IMPROVED_FIXFILTER
+        int iOffsetV = i >> 1;
+        int jOffsetV = j >> 1;
+#endif
+        __m256i sumV  = _mm256_loadu_si256((const __m256i *) &laplacian[VER][iOffset][jOffset]);   // 8 32-bit values
+        __m256i sumH  = _mm256_loadu_si256((const __m256i *) &laplacian[HOR][iOffset][jOffset]);
+        __m256i sumD0 = _mm256_loadu_si256((const __m256i *) &laplacian[DIAG0][iOffset][jOffset]);
+        __m256i sumD1 = _mm256_loadu_si256((const __m256i *) &laplacian[DIAG1][iOffset][jOffset]);
+
+        // sum += sumV + sumH;
+        __m256i tempAct  = _mm256_add_epi32(sumV, sumH);
+        __m256i activity = _mm256_mullo_epi32(tempAct, mult);
+        activity         = _mm256_srl_epi32(activity, shift);
+        activity         = _mm256_min_epi32(activity, scale);
+
+        __m256i xmm2  = activity;
+        __m256i xmm0  = _mm256_setzero_si256();
+        __m256i xmm15 = _mm256_cmpeq_epi32(xmm0, xmm0);
+        __m256i xmm1  = _mm256_srli_epi32(xmm15, 31);
+        __m256i xmm7  = _mm256_srli_epi32(xmm15, 29);
+#if JVET_AE0139_ALF_IMPROVED_FIXFILTER
+        __m256i xmm8 = _mm256_srli_epi32(xmm15, 28);
+#endif
+        __m256i xmm9 = _mm256_add_epi32(_mm256_slli_epi32(xmm7, 2), xmm1);
+
+        __m256i LUT192 = _mm256_set_epi32(0x0C020A00, 0x0E040608, 0x0E040608, 0x0C020A00, 0x0C020A00, 0x0E040608, 0x0E040608, 0x0C020A00);
+
+        xmm2 = _mm256_or_si256(xmm2, _mm256_srli_epi32(xmm2, 1));
+        xmm2 = _mm256_or_si256(xmm2, _mm256_srli_epi32(xmm2, 2));
+        xmm2 = _mm256_or_si256(xmm2, _mm256_srli_epi32(xmm2, 4));
+        xmm2 = _mm256_mullo_epi16(xmm2, xmm9);
+        xmm2 = _mm256_and_si256(_mm256_srli_epi32(xmm2, 5), xmm7);
+        xmm2 = _mm256_shuffle_epi8(LUT192, xmm2);
+
+        __m256i xmm4 = _mm256_xor_si256(activity, _mm256_srli_epi32(activity, 1));
+//        xmm4         = _mm256_cmplt_epi32(xmm4, activity);
+        xmm4         = _mm256_cmpgt_epi32(xmm4, _mm256_sub_epi32( activity, _mm256_set1_epi32(1) ) );
+        xmm4         = _mm256_add_epi32( _mm256_abs_epi32(xmm4), _mm256_set1_epi32( -1 ) );
+
+        xmm4         = _mm256_or_si256(_mm256_cmpeq_epi32(activity, xmm1), xmm4);
+        xmm4         = _mm256_and_si256(xmm4, xmm1);
+
+        activity = _mm256_or_si256(xmm2, xmm4);
+
+        __m256i hv1 = _mm256_max_epi32(sumV, sumH);
+        __m256i hv0 = _mm256_min_epi32(sumV, sumH);
+
+        __m256i d1 = _mm256_max_epi32(sumD0, sumD1);
+        __m256i d0 = _mm256_min_epi32(sumD0, sumD1);
+
+        // edgeStrengthHV, to optimize
+        __m256i hv0Two   = _mm256_slli_epi32(hv0, 1);
+        __m256i hv0Eight = _mm256_slli_epi32(hv0, 3);
+        __m256i hv1Two   = _mm256_slli_epi32(hv1, 1);
+        __m256i strength = _mm256_cmpgt_epi32(_mm256_slli_epi32(hv1, 2), _mm256_add_epi32(hv0, _mm256_slli_epi32(hv0, 2)));   // 4, 5
+        __m256i edgeStrengthHV = _mm256_and_si256(strength, ones);
+
+        strength       = _mm256_cmpgt_epi32(hv1Two, _mm256_add_epi32(hv0, hv0Two));   // 2, 3
+        edgeStrengthHV = _mm256_add_epi32(edgeStrengthHV, _mm256_and_si256(strength, ones));
+
+        strength       = _mm256_cmpgt_epi32(hv1, hv0Two);   // 1, 2
+        edgeStrengthHV = _mm256_add_epi32(edgeStrengthHV, _mm256_and_si256(strength, ones));
+
+        strength       = _mm256_cmpgt_epi32(hv1, _mm256_add_epi32(hv0, hv0Two));   // 1, 3
+        edgeStrengthHV = _mm256_add_epi32(edgeStrengthHV, _mm256_and_si256(strength, ones));
+
+        strength       = _mm256_cmpgt_epi32(hv1Two, _mm256_add_epi32(hv0, hv0Eight));   // 2, 9
+        edgeStrengthHV = _mm256_add_epi32(edgeStrengthHV, _mm256_and_si256(strength, ones));
+
+        strength       = _mm256_cmpgt_epi32(hv1, hv0Eight);   // 1, 8
+        edgeStrengthHV = _mm256_add_epi32(edgeStrengthHV, _mm256_and_si256(strength, ones));
+
+        // edgeStrengthD, to optimize
+        __m256i d0Two   = _mm256_slli_epi32(d0, 1);
+        __m256i d0Eight = _mm256_slli_epi32(d0, 3);
+        __m256i d1Two   = _mm256_slli_epi32(d1, 1);
+        strength        = _mm256_cmpgt_epi32(_mm256_slli_epi32(d1, 2), _mm256_add_epi32(d0, _mm256_slli_epi32(d0, 2)));   // 4, 5
+        __m256i edgeStrengthD = _mm256_and_si256(strength, ones);
+
+        strength      = _mm256_cmpgt_epi32(d1Two, _mm256_add_epi32(d0, d0Two));   // 2, 3
+        edgeStrengthD = _mm256_add_epi32(edgeStrengthD, _mm256_and_si256(strength, ones));
+
+        strength      = _mm256_cmpgt_epi32(d1, d0Two);   // 1, 2
+        edgeStrengthD = _mm256_add_epi32(edgeStrengthD, _mm256_and_si256(strength, ones));
+
+        strength      = _mm256_cmpgt_epi32(d1, _mm256_add_epi32(d0, d0Two));   // 1, 3
+        edgeStrengthD = _mm256_add_epi32(edgeStrengthD, _mm256_and_si256(strength, ones));
+
+        strength      = _mm256_cmpgt_epi32(d1Two, _mm256_add_epi32(d0, d0Eight));   // 2, 9
+        edgeStrengthD = _mm256_add_epi32(edgeStrengthD, _mm256_and_si256(strength, ones));
+
+        strength      = _mm256_cmpgt_epi32(d1, d0Eight);   // 1, 8
+        edgeStrengthD = _mm256_add_epi32(edgeStrengthD, _mm256_and_si256(strength, ones));
+
+        const __m256i hv1Xd0e = _mm256_mul_epi32(hv1, d0);
+        const __m256i hv0Xd1e = _mm256_mul_epi32(hv0, d1);
+        const __m256i hv1Xd0o = _mm256_mul_epi32(_mm256_srli_si256(hv1, 4), _mm256_srli_si256(d0, 4));
+        const __m256i hv0Xd1o = _mm256_mul_epi32(_mm256_srli_si256(hv0, 4), _mm256_srli_si256(d1, 4));
+
+        const __m256i xmme = _mm256_sub_epi64(hv0Xd1e, hv1Xd0e);
+        const __m256i xmmo = _mm256_sub_epi64(hv0Xd1o, hv1Xd0o);
+
+        __m256i dirCondition = _mm256_srai_epi32(_mm256_blend_epi16(_mm256_srli_si256(xmme, 4), xmmo, 0xCC), 31);
+
+        __m256i cx        = _mm256_blendv_epi8(edgeStrengthHV, edgeStrengthD, dirCondition);   // x
+        __m256i cy        = _mm256_blendv_epi8(edgeStrengthD, edgeStrengthHV, dirCondition);   // y
+        __m256i dirOffset = _mm256_blendv_epi8(_mm256_set1_epi32(28), zeros, dirCondition);
+        // direction = (y*(y+1))/2 + x
+        __m256i direction = _mm256_mullo_epi32(cy, cy);
+        direction         = _mm256_add_epi32(direction, cy);
+        direction         = _mm256_srli_epi32(direction, 1);
+        direction         = _mm256_add_epi32(direction, cx);
+        direction         = _mm256_andnot_si256(_mm256_cmpgt_epi32(cx, cy), direction);
+        direction         = _mm256_add_epi32(direction, dirOffset);
+#if JVET_AE0139_ALF_IMPROVED_FIXFILTER
+        __m256i sum2     = _mm256_loadu_si256((const __m256i *) &laplacian[VARIANCE][iOffsetV][jOffsetV]);
+        __m256i shiftLut = _mm256_set_m128i( _mm_loadu_si128( (const __m128i *) divShift2),  _mm_loadu_si128( (const __m128i *) divShift2) );
+        __m256i shiftVal = _mm256_shuffle_epi8(shiftLut, activity);
+        shiftVal         = _mm256_add_epi32(shiftVal, xmm1);
+        shiftVal         = _mm256_add_epi32(shiftVal, xmm1);
+        if (vext >= AVX2)
+        {
+          sum2 = _mm256_srlv_epi32(sum2, shiftVal);
+        }
+        else
+        {
+          __m128i sum2Tmp0 = _mm256_extracti128_si256(sum2, 0);
+          __m128i sum2Tmp1 = _mm256_extracti128_si256(sum2, 1);
+          __m128i shiftValTmp0 = _mm256_extracti128_si256(shiftVal, 0);
+          __m128i shiftValTmp1 = _mm256_extracti128_si256(shiftVal, 1);
+
+          uint64_t tmpVal0[4];
+          int32_t *pVal0 = (int32_t *) tmpVal0;
+          _mm_storeu_si128((__m128i *) tmpVal0, sum2Tmp0);
+          _mm_storeu_si128((__m128i *) (tmpVal0 + 2), shiftValTmp0);
+          pVal0[0] >>= pVal0[4];
+          pVal0[1] >>= pVal0[5];
+          pVal0[2] >>= pVal0[6];
+          pVal0[3] >>= pVal0[7];
+
+          uint64_t tmpVal1[4];
+          int32_t *pVal1 = (int32_t *) tmpVal1;
+          _mm_storeu_si128((__m128i *) tmpVal1, sum2Tmp1);
+          _mm_storeu_si128((__m128i *) (tmpVal1 + 2), shiftValTmp1);
+          pVal1[0] >>= pVal1[4];
+          pVal1[1] >>= pVal1[5];
+          pVal1[2] >>= pVal1[6];
+          pVal1[3] >>= pVal1[7];
+
+          sum2 = _mm256_set_m128i(  _mm_loadu_si128((const __m128i *) pVal1 ), _mm_loadu_si128((const __m128i *) pVal0) );
+        }
+
+        __m256i LUT0 = _mm256_set_m128i( _mm_loadu_si128((const __m128i *) sqrtSum), _mm_loadu_si128((const __m128i *) sqrtSum) );
+        __m256i LUT1  = _mm256_set_m128i(_mm_loadu_si128((const __m128i *) &sqrtSum[16]), _mm_loadu_si128((const __m128i *) &sqrtSum[16]) );
+        __m256i xmm16 = _mm256_set_epi32(16, 16, 16, 16, 16, 16, 16, 16);
+        __m256i xmm35 = _mm256_set_epi32(35, 35, 35, 35, 35, 35, 35, 35);
+        __m256i xmm48 = _mm256_set_epi32(48, 48, 48, 48, 48, 48, 48, 48);
+
+        __m256i use1 = _mm256_cmpgt_epi32(sum2, xmm8);
+
+        __m256i idx0 = _mm256_and_si256(sum2, xmm8);
+        __m256i idx1 = _mm256_sub_epi32(sum2, xmm16);
+        idx1         = _mm256_min_epi32(idx1, xmm8);
+
+        idx0 = _mm256_shuffle_epi8(LUT0, idx0);
+        idx1 = _mm256_shuffle_epi8(LUT1, idx1);
+
+        idx1 = _mm256_add_epi32(idx1, _mm256_slli_epi32(xmm1, 2));
+
+        idx0 = _mm256_andnot_si256(use1, idx0);
+        idx1 = _mm256_and_si256(use1, idx1);
+        idx0 = _mm256_add_epi32(idx0, idx1);
+
+        xmm35 = _mm256_cmpgt_epi32(sum2, xmm35);
+        xmm48 = _mm256_cmpgt_epi32(sum2, xmm48);
+
+        xmm35 = _mm256_and_si256(xmm35, xmm1);
+        xmm48 = _mm256_and_si256(xmm48, xmm1);
+
+        xmm35 = _mm256_add_epi32(xmm35, xmm48);
+
+        xmm2     = _mm256_add_epi32(idx0, xmm35);
+        xmm2     = _mm256_slli_epi32(xmm2, 4);
+        activity = _mm256_add_epi32(activity, xmm2);
+#endif
+        __m256i classIdx = _mm256_mullo_epi32(dirOff, activity);
+        classIdx         = _mm256_add_epi32(classIdx, direction);
+
+        // transpose
+        __m256i dirTempHVMinus1 = _mm256_cmpgt_epi32(sumV, sumH);
+        __m256i dirTempDMinus1  = _mm256_cmpgt_epi32(sumD0, sumD1);
+        __m256i transposeIdx    = _mm256_set1_epi32(3);
+        transposeIdx            = _mm256_add_epi32(transposeIdx, dirTempHVMinus1);
+        transposeIdx            = _mm256_add_epi32(transposeIdx, dirTempDMinus1);
+        transposeIdx            = _mm256_add_epi32(transposeIdx, dirTempDMinus1);
+
+        classIdx = _mm256_slli_epi16(classIdx, 2);
+        classIdx = _mm256_add_epi16(classIdx, transposeIdx);
+        classIdx = _mm256_shuffle_epi8(classIdx, _mm256_setr_epi8(0, 1, 0, 1, 4, 5, 4, 5, 8, 9, 8, 9, 12, 13, 12, 13, 0, 1, 0, 1, 4, 5, 4, 5, 8, 9, 8, 9, 12, 13, 12, 13));
+#if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY
+        _mm256_storeu_si256((__m256i *) &classifier[blkDst.pos().y + i][blkDst.pos().x + j], classIdx);
+        _mm256_storeu_si256((__m256i *) &classifier[blkDst.pos().y + i + 1][blkDst.pos().x + j], classIdx);
+#else
+        _mm256_storeu_si256((__m256i *) &classifier[curBlk.pos().y + i][curBlk.pos().x + j], classIdx);
+        _mm256_storeu_si256((__m256i *) &classifier[curBlk.pos().y + i + 1][curBlk.pos().x + j], classIdx);
+#endif
+
+      }   // for (int j = 0; j < curBlk.width; j += 16)
+    }     // for (int i = 0; i < curBlk.height; i += 2)
+
+  }
+  else
+  {
+#endif
 #if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
   const __m128i mult = _mm_set1_epi32(multTab[dirWindSize % 10]);
 #else  
@@ -7312,8 +10277,14 @@ static void simdCalcClass0(AlfClassifier **classifier, const Area &blkDst, const
 
     }//for (int j = 0; j < curBlk.width; j += 8)
   }//for (int i = 0; i < curBlk.height; i += 2)
+#if USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  }// Use 256 Bit Simd
+ #endif
 }
 
+#if USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION
+template <X86_VEXT vext>
+#endif
 static void simdCalcClass1(AlfClassifier **classifier, const Area &blkDst, const Area &curBlk, int dirWindSize, int classDir, int noDir, int noAct, int bitDepth, int subBlkSize, int mappingDir[NUM_DIR_FIX][NUM_DIR_FIX], uint32_t **laplacian[NUM_DIRECTIONS])
 {
   const __m128i shift = _mm_cvtsi32_si128(9 + bitDepth);
@@ -7322,6 +10293,82 @@ static void simdCalcClass1(AlfClassifier **classifier, const Area &blkDst, const
 #else
   const int multTab[] = { 5628, 1407, 624, 351, 225, 156 };
 #endif
+
+#if USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  const bool use256BitSimd = vext >= AVX2 && curBlk.width % 16 == 0 ? true : false;
+  if( use256BitSimd )
+  {
+    const __m256i mult  = _mm256_set1_epi32(multTab[dirWindSize]);
+    const __m256i scale = _mm256_set1_epi32(15);
+
+    for (int i = 0; i < curBlk.height; i += 2)
+    {
+      int iOffset = i >> 1;
+      for (int j = 0; j < curBlk.width; j += 16)
+      {
+        int     jOffset = j >> 1;
+        __m256i sumV    = _mm256_loadu_si256((const __m256i *) &laplacian[VER][iOffset][jOffset]);   // 8 32-bit values
+        __m256i sumH    = _mm256_loadu_si256((const __m256i *) &laplacian[HOR][iOffset][jOffset]);
+        __m256i sumD0   = _mm256_loadu_si256((const __m256i *) &laplacian[DIAG0][iOffset][jOffset]);
+        __m256i sumD1   = _mm256_loadu_si256((const __m256i *) &laplacian[DIAG1][iOffset][jOffset]);
+
+        // sum += sumV + sumH;
+        __m256i tempAct  = _mm256_add_epi32(sumV, sumH);
+        __m256i activity = _mm256_mullo_epi32(tempAct, mult);
+        activity         = _mm256_srl_epi32(activity, shift);
+        activity         = _mm256_min_epi32(activity, scale);
+        __m256i classIdx = _mm256_shuffle_epi8(_mm256_setr_epi8(0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4), activity);
+        classIdx         = _mm256_add_epi32(classIdx, _mm256_slli_epi32(classIdx, 2));   // activity * 5
+
+        __m256i hv1 = _mm256_max_epi32(sumV, sumH);
+        __m256i hv0 = _mm256_min_epi32(sumV, sumH);
+
+        __m256i d1 = _mm256_max_epi32(sumD0, sumD1);
+        __m256i d0 = _mm256_min_epi32(sumD0, sumD1);
+
+        const __m256i hv1Xd0e = _mm256_mul_epi32(hv1, d0);
+        const __m256i hv0Xd1e = _mm256_mul_epi32(hv0, d1);
+        const __m256i hv1Xd0o = _mm256_mul_epi32(_mm256_srli_si256(hv1, 4), _mm256_srli_si256(d0, 4));
+        const __m256i hv0Xd1o = _mm256_mul_epi32(_mm256_srli_si256(hv0, 4), _mm256_srli_si256(d1, 4));
+
+        const __m256i xmme = _mm256_sub_epi64(hv1Xd0e, hv0Xd1e);
+        const __m256i xmmo = _mm256_sub_epi64(hv1Xd0o, hv0Xd1o);
+
+        __m256i dirCondition = _mm256_srai_epi32(_mm256_blend_epi16(_mm256_srli_si256(xmme, 4), xmmo, 0xCC), 31);
+
+        __m256i hvd1      = _mm256_blendv_epi8(hv1, d1, dirCondition);
+        __m256i hvd0      = _mm256_blendv_epi8(hv0, d0, dirCondition);
+        __m256i strength1 = _mm256_cmpgt_epi32(hvd1, _mm256_add_epi32(hvd0, hvd0));
+        __m256i strength2 = _mm256_cmpgt_epi32(_mm256_add_epi32(hvd1, hvd1), _mm256_add_epi32(hvd0, _mm256_slli_epi32(hvd0, 3)));
+        __m256i offset    = _mm256_and_si256(strength1, _mm256_set1_epi32(1));
+        __m256i direction = _mm256_add_epi32(offset, _mm256_and_si256(strength2, _mm256_set1_epi32(1)));
+        direction         = _mm256_add_epi32(direction, _mm256_andnot_si256(dirCondition, _mm256_set1_epi32(2)));
+        direction         = _mm256_and_si256(direction, strength1);
+        classIdx          = _mm256_add_epi32(direction, classIdx);
+
+        // transpose
+        __m256i dirTempHVMinus1 = _mm256_cmpgt_epi32(sumV, sumH);
+        __m256i dirTempDMinus1  = _mm256_cmpgt_epi32(sumD0, sumD1);
+        __m256i transposeIdx    = _mm256_set1_epi32(3);
+        transposeIdx            = _mm256_add_epi32(transposeIdx, dirTempHVMinus1);
+        transposeIdx            = _mm256_add_epi32(transposeIdx, dirTempDMinus1);
+        transposeIdx            = _mm256_add_epi32(transposeIdx, dirTempDMinus1);
+        classIdx                = _mm256_slli_epi16(classIdx, 2);
+        classIdx                = _mm256_add_epi16(classIdx, transposeIdx);
+        classIdx = _mm256_shuffle_epi8(classIdx, _mm256_setr_epi8(0, 1, 0, 1, 4, 5, 4, 5, 8, 9, 8, 9, 12, 13, 12, 13, 0, 1, 0, 1, 4, 5, 4, 5, 8, 9, 8, 9, 12, 13, 12, 13));
+#if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY
+        _mm256_storeu_si256((__m256i *) &classifier[blkDst.pos().y + i][blkDst.pos().x + j], classIdx);
+        _mm256_storeu_si256((__m256i *) &classifier[blkDst.pos().y + i + 1][blkDst.pos().x + j], classIdx);
+#else
+        _mm256_storeu_si256((__m256i *) &classifier[curBlk.pos().y + i][curBlk.pos().x + j], classIdx);
+        _mm256_storeu_si256((__m256i *) &classifier[curBlk.pos().y + i + 1][curBlk.pos().x + j], classIdx);
+#endif
+      }   // for (int j = 0; j < curBlk.width; j += 16)
+    }     // for (int i = 0; i < curBlk.height; i += 2)
+  }
+  else
+  {
+#endif
   const __m128i mult = _mm_set1_epi32(multTab[dirWindSize]);
   const __m128i scale = _mm_set1_epi32(15);
 
@@ -7389,6 +10436,9 @@ static void simdCalcClass1(AlfClassifier **classifier, const Area &blkDst, const
 #endif
     }//for (int j = 0; j < curBlk.width; j += 8)
   }//for (int i = 0; i < curBlk.height; i += 2)
+#if USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  }//Use 256 Bit Simd
+#endif
 }
 #endif
 
@@ -7438,7 +10488,11 @@ void AdaptiveLoopFilter::_initAdaptiveLoopFilterX86()
 #else
   m_calcClass0 = simdCalcClass0;
 #endif
+#if USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  m_calcClass1 = simdCalcClass1<vext>;
+#else
   m_calcClass1 = simdCalcClass1;
+#endif
 
   for( int i = 0; i < NUM_SETS_FIXED_FILTERS; i++ )
   {
diff --git a/source/Lib/DecoderLib/DecLib.cpp b/source/Lib/DecoderLib/DecLib.cpp
index 82223c1df013107d5a3b10cd4e7d70537795293e..955a7d0b25d7df026bbceda2439ca720cd3ccdfe 100644
--- a/source/Lib/DecoderLib/DecLib.cpp
+++ b/source/Lib/DecoderLib/DecLib.cpp
@@ -815,6 +815,9 @@ void DecLib::executeLoopFilters()
     m_cALF.copyResiData(cs);
   }
 #endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  m_cALF.callCodingInfoBuf( cs ).fill( 0 );
+#endif
 
   // deblocking filter
 #if DB_PARAM_TID
@@ -842,8 +845,12 @@ void DecLib::executeLoopFilters()
   }  
 #endif
 
-
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  PelUnitBuf codingInfoBuf = m_cALF.callCodingInfoBuf( cs );
+  m_cLoopFilter.loopFilterPic( cs, codingInfoBuf, true );
+#else
   m_cLoopFilter.loopFilterPic( cs );
+#endif
 
 #if !MULTI_PASS_DMVR
   CS::setRefinedMotionField(cs);
diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp
index f974db44538cd4898d9ee7e5f2a9a49e3424a7e4..5e9c09f5826f1e7e90d226605facac4490df15aa 100644
--- a/source/Lib/DecoderLib/VLCReader.cpp
+++ b/source/Lib/DecoderLib/VLCReader.cpp
@@ -2182,6 +2182,16 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS)
   {
     pcSPS->setAlfPrecisionFlag( false );
   }
+#endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  if( pcSPS->getALFEnabledFlag() )
+  {
+    READ_FLAG( uiCode, "sps_alf_luma_fixed_filter_adjust" );         pcSPS->setAlfLumaFixedFilterAdjust( uiCode ? true : false );
+  }
+  else
+  {
+    pcSPS->setAlfLumaFixedFilterAdjust( false );
+  }
 #endif
   if (pcSPS->getALFEnabledFlag() && pcSPS->getChromaFormatIdc() != CHROMA_400)
   {
diff --git a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp
index 944ae6daf30388244b9151cea55754bd3cabd6ec..38e77f597248ba1a5d4820adb075179d5200580d 100644
--- a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp
+++ b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp
@@ -1421,6 +1421,9 @@ void EncAdaptiveLoopFilter::ALFProcess( CodingStructure& cs, const double *lambd
 #endif
   const CPelBuf &resiLuma = resiYuv.get(COMPONENT_Y);
 #endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  PelUnitBuf recYuvCodingInfo = m_tempBufCodingInfo.getBuf( cs.area );
+#endif
 #if JVET_AI0166_CCALF_CHROMA_SAO_INPUT
   m_tempBufSAO.copyFrom(cs.getRecoBuf());
   PelUnitBuf recYuvSAO = m_tempBufSAO.getBuf(cs.area);
@@ -1483,7 +1486,23 @@ void EncAdaptiveLoopFilter::ALFProcess( CodingStructure& cs, const double *lambd
             buf.extendBorderPel( MAX_ALF_PADDING_SIZE );
 #endif
             buf = buf.subBuf( UnitArea( cs.area.chromaFormat, Area( clipL ? 0 : MAX_ALF_PADDING_SIZE, clipT ? 0 : MAX_ALF_PADDING_SIZE, w, h ) ) );
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+            PelUnitBuf bufCodingInfo = m_tempBufCodingInfo2.subBuf( UnitArea( CHROMA_400, Area( 0, 0, wBuf, hBuf ) ) );
+            bufCodingInfo.copyFrom( recYuvCodingInfo.subBuf( UnitArea( CHROMA_400, Area( xStart - ( clipL ? 0 : MAX_ALF_PADDING_SIZE ), yStart - ( clipT ? 0 : MAX_ALF_PADDING_SIZE ), wBuf, hBuf ) ) ) );
+            // pad top-left unavailable samples for raster slice
+            if( xStart == xPos && yStart == yPos && ( rasterSliceAlfPad & 1 ) )
+            {
+              bufCodingInfo.padBorderPel( MAX_ALF_PADDING_SIZE, 1 );
+            }
 
+            // pad bottom-right unavailable samples for raster slice
+            if( xEnd == xPos + width && yEnd == yPos + height && ( rasterSliceAlfPad & 2 ) )
+            {
+              bufCodingInfo.padBorderPel( MAX_ALF_PADDING_SIZE, 2 );
+            }
+            mirroredPaddingForAlf(cs, bufCodingInfo, MAX_ALF_PADDING_SIZE, true, false);
+            bufCodingInfo = bufCodingInfo.subBuf( UnitArea( CHROMA_400, Area( clipL ? 0 : MAX_ALF_PADDING_SIZE, clipT ? 0 : MAX_ALF_PADDING_SIZE, w, h ) ) );
+#endif
 #if JVET_AC0162_ALF_RESIDUAL_SAMPLES_INPUT
             PelUnitBuf bufResi = m_tempBufResi2.subBuf(UnitArea(cs.area.chromaFormat, Area(0, 0, wBuf, hBuf)));
             bufResi.copyFrom(resiYuv.subBuf(
@@ -1543,6 +1562,9 @@ void EncAdaptiveLoopFilter::ALFProcess( CodingStructure& cs, const double *lambd
             const Area blkDstChroma( xStart >> scaleX, yStart >> scaleY, w >> scaleX, h >> scaleY );
             deriveFixedFilterChroma( m_classifier, buf, bufDb, blkDstChroma, blkSrcChroma, cs, -1, MAX_NUM_COMPONENT );
 #endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+            calcAlfLumaCodingInfoBlk(cs, m_classifierCodingInfo[0], blkDst, blkSrc, buf.get(COMPONENT_Y), 2, 2, m_inputBitDepth[CHANNEL_TYPE_LUMA], bufResi.get(COMPONENT_Y), m_laplacian[0], bufCodingInfo.get(COMPONENT_Y) );
+#endif
 #if JVET_X0071_ALF_BAND_CLASSIFIER
             deriveClassification( m_classifier, buf.get( COMPONENT_Y ),
 #if JVET_AC0162_ALF_RESIDUAL_SAMPLES_INPUT
@@ -1577,6 +1599,9 @@ void EncAdaptiveLoopFilter::ALFProcess( CodingStructure& cs, const double *lambd
         Area blkChroma(xPos >> scaleX, yPos >> scaleY, width >> scaleX, height >> scaleY);
         deriveFixedFilterChroma( m_classifier, recYuv, recYuvBeforeDb, blkChroma, blkChroma, cs, -1, MAX_NUM_COMPONENT );
 #endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+        calcAlfLumaCodingInfoBlk(cs, m_classifierCodingInfo[0], blk, blk, recYuv.get(COMPONENT_Y), 2, 2, m_inputBitDepth[CHANNEL_TYPE_LUMA], resiYuv.get(COMPONENT_Y), m_laplacian[0], recYuvCodingInfo.get(COMPONENT_Y) );
+#endif
 #if JVET_X0071_ALF_BAND_CLASSIFIER
         deriveClassification( m_classifier, recLuma,
 #if JVET_AC0162_ALF_RESIDUAL_SAMPLES_INPUT
diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h
index 616369020a22ab7bf9eddaa436d81378558df2ae..3d709252d5ae1ee795e3e449c0a1688c51c48457 100644
--- a/source/Lib/EncoderLib/EncCfg.h
+++ b/source/Lib/EncoderLib/EncCfg.h
@@ -712,6 +712,9 @@ protected:
 #endif
 #if JVET_AH0057_CCALF_COEFF_PRECISION
   bool      m_ccalfPrecision;
+#endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  bool      m_alfLumaFixedFilterAdjust;
 #endif
   bool      m_bTestSAODisableAtPictureLevel;
   double    m_saoEncodingRate;       // When non-0 SAO early picture termination is enabled for luma and chroma
@@ -2389,6 +2392,10 @@ public:
 #if JVET_AH0057_CCALF_COEFF_PRECISION
   void      setUseCCALFPrecision(bool b)                             { m_ccalfPrecision = b; }
   bool      getUseCCALFPrecision()                             const { return m_ccalfPrecision; }
+#endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  void      setAlfLumaFixedFilterAdjust(bool b)                      { m_alfLumaFixedFilterAdjust = b; }
+  bool      getAlfLumaFixedFilterAdjust()                      const { return m_alfLumaFixedFilterAdjust; }
 #endif
   void  setTestSAODisableAtPictureLevel (bool bVal)                  { m_bTestSAODisableAtPictureLevel = bVal; }
   bool  getTestSAODisableAtPictureLevel ( ) const                    { return m_bTestSAODisableAtPictureLevel; }
diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp
index 4b27948fbd737786ae9a2356c6f0043142b9d13e..4f9d962143d3015177a148924f8d24406a4fe4ee 100644
--- a/source/Lib/EncoderLib/EncCu.cpp
+++ b/source/Lib/EncoderLib/EncCu.cpp
@@ -23992,13 +23992,23 @@ void EncCu::xCalDebCost( CodingStructure &cs, Partitioner &partitioner, bool cal
     if ( leftEdgeAvai )
     {
       m_pcLoopFilter->resetFilterLengths();
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+      //No Impact on OBMC Buffer
+      m_pcLoopFilter->xDeblockCU( *cu, EDGE_VER, false, m_tempWoOBMCBuffer );
+#else
       m_pcLoopFilter->xDeblockCU( *cu, EDGE_VER );
+#endif
     }
 
     if (topEdgeAvai)
     {
       m_pcLoopFilter->resetFilterLengths();
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+      //No Impact on OBMC Buffer
+      m_pcLoopFilter->xDeblockCU( *cu, EDGE_HOR, false, m_tempWoOBMCBuffer );
+#else
       m_pcLoopFilter->xDeblockCU( *cu, EDGE_HOR );
+#endif
     }
 
     //update current CU SSE
diff --git a/source/Lib/EncoderLib/EncGOP.cpp b/source/Lib/EncoderLib/EncGOP.cpp
index 1d8c0cd314828a9799ab8705417a21dcea192484..f23b366bd9612471502010ea8b55c086d0615f5c 100644
--- a/source/Lib/EncoderLib/EncGOP.cpp
+++ b/source/Lib/EncoderLib/EncGOP.cpp
@@ -3771,7 +3771,9 @@ void EncGOP::compressGOP(int iPOCLast, int iNumPicRcvd, PicList &rcListPic, std:
         m_pcALF->copyResiData(cs);
       }
 #endif
-
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+      m_pcALF->callCodingInfoBuf( cs ).fill( 0 );
+#endif
       // create SAO object based on the picture size
       if( pcSlice->getSPS()->getSAOEnabledFlag()
 #if JVET_W0066_CCSAO
@@ -3862,7 +3864,12 @@ void EncGOP::compressGOP(int iPOCLast, int iNumPicRcvd, PicList &rcListPic, std:
       }
 #endif
 
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+      PelUnitBuf codingInfoBuf = m_pcALF->callCodingInfoBuf( cs );
+      m_pcLoopFilter->loopFilterPic( cs, codingInfoBuf, true );
+#else
       m_pcLoopFilter->loopFilterPic( cs );
+#endif
 
 #if !MULTI_PASS_DMVR
       CS::setRefinedMotionField(cs);
@@ -5128,7 +5135,12 @@ uint64_t EncGOP::preLoopFilterPicAndCalcDist( Picture* pcPic )
   } 
 #endif
 
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  PelUnitBuf codingInfoBuf = m_pcALF->callCodingInfoBuf( cs );
+  m_pcLoopFilter->loopFilterPic( cs, codingInfoBuf, false );
+#else
   m_pcLoopFilter->loopFilterPic( cs );
+#endif
 
   const CPelUnitBuf picOrg = pcPic->getRecoBuf();
   const CPelUnitBuf picRec = cs.getRecoBuf();
diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp
index d3209d48c5d0393c47f9c28c089ee38f79578fb2..5a1cad6daf4be6e33ccbe0b45db5e89383cbad44 100644
--- a/source/Lib/EncoderLib/EncLib.cpp
+++ b/source/Lib/EncoderLib/EncLib.cpp
@@ -2225,6 +2225,9 @@ void EncLib::xInitSPS( SPS& sps )
 #endif
 #if JVET_AH0057_CCALF_COEFF_PRECISION
   sps.setCCALFPrecisionFlag( m_ccalfPrecision );
+#endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  sps.setAlfLumaFixedFilterAdjust( m_intraPeriod < 0 ? false : true );
 #endif
   sps.setJointCbCrEnabledFlag( m_JointCbCrMode );
   sps.setMaxTLayers( m_maxTempLayer );
diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp
index bdb2d71d23654628a9d1001ee3451c316293773d..11bb3fbeb9a5785d121f84e6cb6e2a8d8faf6b51 100644
--- a/source/Lib/EncoderLib/VLCWriter.cpp
+++ b/source/Lib/EncoderLib/VLCWriter.cpp
@@ -1344,6 +1344,12 @@ void HLSWriter::codeSPS( const SPS* pcSPS )
     }
 #endif
   }
+#endif
+#if JVET_AJ0188_CODING_INFO_CLASSIFICATION
+  if( pcSPS->getALFEnabledFlag() )
+  {
+    WRITE_FLAG( pcSPS->getAlfLumaFixedFilterAdjust(),                                 "sps_alf_luma_fixed_filter_adjust" );
+  }
 #endif
   if (pcSPS->getALFEnabledFlag() && pcSPS->getChromaFormatIdc() != CHROMA_400)
   {