diff --git a/cfg/encoder_intra_vtm.cfg b/cfg/encoder_intra_vtm.cfg
index aec0126b24df4e2ce43ef34f33b95d097d846aa8..12a31a22940fb9d31224d058afcf6b9d181ba124 100644
--- a/cfg/encoder_intra_vtm.cfg
+++ b/cfg/encoder_intra_vtm.cfg
@@ -100,6 +100,7 @@ MRL                          : 1
 MIP                          : 1
 JointCbCr                    : 1      # joint coding of chroma residuals (if available): 0: disable, 1: enable
 ChromaTS                     : 1
+CCSAO                        : 1      # Cross-component sample adaptive offset: 0: disable, 1: enable
 # Fast tools
 PBIntraFast                  : 1
diff --git a/cfg/encoder_lowdelay_P_vtm.cfg b/cfg/encoder_lowdelay_P_vtm.cfg
index 9152b7877f65b00dc1c7c4966d3fb4c12890da32..f2f8c9217dd40d3f956b29f39f64de32cc707c36 100644
--- a/cfg/encoder_lowdelay_P_vtm.cfg
+++ b/cfg/encoder_lowdelay_P_vtm.cfg
@@ -128,6 +128,7 @@ ChromaTS                     : 1
 AffineMMVD                   : 1
 AdditionalInterHyps          : 0
 BIF                          : 1      # Bilateral filter: 0: disable, 1: enable
+CCSAO                        : 1      # Cross-component sample adaptive offset: 0: disable, 1: enable
 # Fast tools
 PBIntraFast                  : 1
diff --git a/cfg/encoder_lowdelay_vtm.cfg b/cfg/encoder_lowdelay_vtm.cfg
index 5a2784aa4183f7de899090f29823da08fcda9c65..f22d99fc2c914898be63ad9a36f9ef2b4538b79f 100644
--- a/cfg/encoder_lowdelay_vtm.cfg
+++ b/cfg/encoder_lowdelay_vtm.cfg
@@ -132,6 +132,7 @@ ChromaTS                     : 1
 AffineMMVD                   : 1
 AdditionalInterHyps          : 2
 BIF                          : 1      # Bilateral filter: 0: disable, 1: enable
+CCSAO                        : 1      # Cross-component sample adaptive offset: 0: disable, 1: enable
 # Fast tools
 PBIntraFast                  : 1
diff --git a/cfg/encoder_randomaccess_vtm.cfg b/cfg/encoder_randomaccess_vtm.cfg
index a732648effd0a4316c2f852f2d53fb6980c4ac6d..ffc4d09115e6486bd7110da3cc58b4f73892a7c5 100644
--- a/cfg/encoder_randomaccess_vtm.cfg
+++ b/cfg/encoder_randomaccess_vtm.cfg
@@ -162,6 +162,7 @@ PROF                         : 1
 AffineMMVD                   : 1
 AdditionalInterHyps          : 2
 BIF                          : 1      # Bilateral filter: 0: disable, 1: enable
+CCSAO                        : 1      # Cross-component sample adaptive offset: 0: disable, 1: enable
 # Fast tools
 PBIntraFast                  : 1
diff --git a/cfg/encoder_randomaccess_vtm_gop16.cfg b/cfg/encoder_randomaccess_vtm_gop16.cfg
index 68f3d7214907d5c0ea6bf5229a9dbf527105bd01..076459ff3c577c39ae8ccfe62d5556a94fa2b257 100644
--- a/cfg/encoder_randomaccess_vtm_gop16.cfg
+++ b/cfg/encoder_randomaccess_vtm_gop16.cfg
@@ -146,7 +146,7 @@ PROF                         : 1
 AffineMMVD                   : 1
 AdditionalInterHyps          : 2
 BIF                          : 1      # Bilateral filter: 0: disable, 1: enable
+CCSAO                        : 1      # Cross-component sample adaptive offset: 0: disable, 1: enable
 # Fast tools
 PBIntraFast                  : 1
diff --git a/source/App/EncoderApp/EncApp.cpp b/source/App/EncoderApp/EncApp.cpp
index 34cd83ca8e9b0858bd7ec57dd6b35c6ef4f9a5f4..a8b96db85b4114c3e1df21fee7d71cf7b3b994a9 100644
--- a/source/App/EncoderApp/EncApp.cpp
+++ b/source/App/EncoderApp/EncApp.cpp
@@ -346,6 +346,11 @@ void EncApp::xInitLibCfg()
     CHECK(m_noSaoConstraintFlag && m_bUseSAO, "SAO shall be deactivated when m_bNoSaoConstraintFlag is equal to 1");
+#if JVET_W0066_CCSAO
+    m_cEncLib.setNoCCSaoConstraintFlag(m_noCCSaoConstraintFlag);
+    CHECK(m_noCCSaoConstraintFlag && m_CCSAO, "CCSAO shall be deactivated when m_noCCSaoConstraintFlag is equal to 1");
     CHECK(m_noAlfConstraintFlag && m_alf, "ALF shall be deactivated when m_bNoAlfConstraintFlag is equal to 1");
@@ -516,6 +521,9 @@ void EncApp::xInitLibCfg()
+#if JVET_W0066_CCSAO
+    m_cEncLib.setNoCCSaoConstraintFlag(false);
 #if JVET_S0058_GCI
@@ -919,6 +927,9 @@ void EncApp::xInitLibCfg()
   //====== Sub-picture and Slices ========
   m_cEncLib.setSingleSlicePerSubPicFlagFlag                      ( m_singleSlicePerSubPicFlag );
   m_cEncLib.setUseSAO                                            ( m_bUseSAO );
+#if JVET_W0066_CCSAO
+  m_cEncLib.setUseCCSAO                                          ( m_CCSAO );
   m_cEncLib.setTestSAODisableAtPictureLevel                      ( m_bTestSAODisableAtPictureLevel );
   m_cEncLib.setSaoEncodingRate                                   ( m_saoEncodingRate );
   m_cEncLib.setSaoEncodingRateChroma                             ( m_saoEncodingRateChroma );
diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp
index f684965eb91ce3b440f30ff157fb102e75212873..79ce1652c9dcad9698d241e4d88c353157a7577e 100644
--- a/source/App/EncoderApp/EncAppCfg.cpp
+++ b/source/App/EncoderApp/EncAppCfg.cpp
@@ -914,6 +914,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("NoSignDataHidingConstraintFlag",                  m_noSignDataHidingConstraintFlag,                false, "Indicate that SDH is deactivated")
   ("NoQpDeltaConstraintFlag",                         m_noQpDeltaConstraintFlag,                       false, "Indicate that QPdelta is deactivated")
   ("NoSaoConstraintFlag",                             m_noSaoConstraintFlag,                           false, "Indicate that SAO is deactivated")
+#if JVET_W0066_CCSAO
+  ("NoCCSaoConstraintFlag",                           m_noCCSaoConstraintFlag,                         false, "Indicate that CCSAO is deactivated")
   ("NoAlfConstraintFlag",                             m_noAlfConstraintFlag,                           false, "Indicate that ALF is deactivated")
   ("NoCCAlfConstraintFlag",                           m_noCCAlfConstraintFlag,                          false, "Indicate that CCALF is deactivated")
   ("NoLmcsConstraintFlag",                            m_noLmcsConstraintFlag,                           false, "Indicate that LMCS is deactivated")
@@ -1240,6 +1243,9 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   ("GolombRiceParameterAdaptation",                   m_persistentRiceAdaptationEnabledFlag,            false, "Enable the adaptation of the Golomb-Rice parameter over the course of each slice")
   ("AlignCABACBeforeBypass",                          m_cabacBypassAlignmentEnabledFlag,                false, "Align the CABAC engine to a defined fraction of a bit prior to coding bypass data. Must be 1 in high bit rate profile, 0 otherwise")
   ("SAO",                                             m_bUseSAO,                                         true, "Enable Sample Adaptive Offset")
+#if JVET_W0066_CCSAO
+  ("CCSAO",                                           m_CCSAO,                                           true, "Cross-component Sample Adaptive Offset" )
   ("TestSAODisableAtPictureLevel",                    m_bTestSAODisableAtPictureLevel,                  false, "Enables the testing of disabling SAO at the picture level after having analysed all blocks")
   ("SaoEncodingRate",                                 m_saoEncodingRate,                                 0.75, "When >0 SAO early picture termination is enabled for luma and chroma")
   ("SaoEncodingRateChroma",                           m_saoEncodingRateChroma,                            0.5, "The SAO early picture termination rate to use for chroma (when m_SaoEncodingRate is >0). If <=0, use results for luma")
@@ -4233,6 +4239,9 @@ void EncAppCfg::xPrintParameter()
   msg( VERBOSE, "Slices: %d ", m_numSlicesInPic);
   msg( VERBOSE, "MCTS:%d ", m_MCTSEncConstraint );
   msg( VERBOSE, "SAO:%d ", (m_bUseSAO)?(1):(0));
+#if JVET_W0066_CCSAO
+  msg( VERBOSE, "CCSAO:%d ", m_CCSAO ? 1 : 0 );
   msg( VERBOSE, "ALF:%d ", m_alf ? 1 : 0 );
   msg( VERBOSE, "CCALF:%d ", m_ccalf ? 1 : 0 );
diff --git a/source/App/EncoderApp/EncAppCfg.h b/source/App/EncoderApp/EncAppCfg.h
index 3a6a3d1b4ddef2838bf9177945a8060638beb078..c07a0dc8749b37b4d251bc8501c476facf844a21 100644
--- a/source/App/EncoderApp/EncAppCfg.h
+++ b/source/App/EncoderApp/EncAppCfg.h
@@ -167,6 +167,9 @@ protected:
   bool      m_noPartitionConstraintsOverrideConstraintFlag;
   bool      m_noSaoConstraintFlag;
+#if JVET_W0066_CCSAO
+  bool      m_noCCSaoConstraintFlag;
   bool      m_noAlfConstraintFlag;
   bool      m_noCCAlfConstraintFlag;
 #if JVET_S0058_GCI
@@ -490,6 +493,9 @@ protected:
   // coding tool (SAO)
   bool      m_bUseSAO;
+#if JVET_W0066_CCSAO
+  bool      m_CCSAO;
   bool      m_bTestSAODisableAtPictureLevel;
   double    m_saoEncodingRate;                                ///< When >0 SAO early picture termination is enabled for luma and chroma
   double    m_saoEncodingRateChroma;                          ///< The SAO early picture termination rate to use for chroma (when m_SaoEncodingRate is >0). If <=0, use results for luma.
diff --git a/source/Lib/CommonLib/AlfParameters.h b/source/Lib/CommonLib/AlfParameters.h
index 3c1e7141fa9e7ff418ab348ff307dcd748444fc4..cb18d7991dc380605eb6a6179fe0041fce240940 100644
--- a/source/Lib/CommonLib/AlfParameters.h
+++ b/source/Lib/CommonLib/AlfParameters.h
@@ -424,6 +424,50 @@ struct CcAlfFilterParam
     return *this;
+#if JVET_W0066_CCSAO
+struct CcSaoComParam
+  bool     enabled   [MAX_NUM_COMPONENT];
+  uint8_t  setNum    [MAX_NUM_COMPONENT];
+  bool     setEnabled[MAX_NUM_COMPONENT][MAX_CCSAO_SET_NUM];
+  CcSaoComParam()
+  {
+    reset();
+  }
+  void reset()
+  {
+    std::memset( enabled,    false, sizeof( enabled    ) );
+    std::memset( setNum,         0, sizeof( setNum     ) );
+    std::memset( setEnabled, false, sizeof( setEnabled ) );
+    std::memset( candPos,        0, sizeof( candPos    ) );
+    std::memset( bandNum,        0, sizeof( bandNum    ) );
+    std::memset( offset,         0, sizeof( offset     ) );
+  }
+  void reset(ComponentID compID)
+  {
+    enabled[compID] = false;
+    setNum [compID] = 0;
+    std::memset( setEnabled[compID], false, sizeof( setEnabled[compID]) );
+    std::memset( candPos   [compID],     0, sizeof( candPos   [compID]) );
+    std::memset( bandNum   [compID],     0, sizeof( bandNum   [compID]) );
+    std::memset( offset    [compID],     0, sizeof( offset    [compID]) );
+  }
+  const CcSaoComParam& operator = ( const CcSaoComParam& src )
+  {
+    std::memcpy( enabled,    src.enabled,    sizeof( enabled    ) );
+    std::memcpy( setNum,     src.setNum,     sizeof( setNum     ) );
+    std::memcpy( setEnabled, src.setEnabled, sizeof( setEnabled ) );
+    std::memcpy( candPos,    src.candPos,    sizeof( candPos    ) );
+    std::memcpy( bandNum,    src.bandNum,    sizeof( bandNum    ) );
+    std::memcpy( offset,     src.offset,     sizeof( offset     ) );
+    return *this;
+  }
 //! \}
 #endif  // end of #ifndef  __ALFPARAMETERS__
diff --git a/source/Lib/CommonLib/BilateralFilter.cpp b/source/Lib/CommonLib/BilateralFilter.cpp
index e9d485661c408c68db36bc0b18d1f73231f82155..f654e028325e48b5e88297c2e135488aefc33616 100755
--- a/source/Lib/CommonLib/BilateralFilter.cpp
+++ b/source/Lib/CommonLib/BilateralFilter.cpp
@@ -51,6 +51,7 @@
   m_bilateralFilterDiamond5x5 = blockBilateralFilterDiamond5x5;
+  m_bilateralFilterDiamond5x5NoClip = blockBilateralFilterDiamond5x5NoClip;
 #ifdef TARGET_SIMD_X86
@@ -117,6 +118,241 @@ const char* BilateralFilter::getFilterLutParameters( const int size, const PredM
   return m_wBIF[sqp - 17];
+#if JVET_W0066_CCSAO
+void BilateralFilter::blockBilateralFilterDiamond5x5NoClip(uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bif_round_add, int bif_round_shift, bool isRDO, const char* LUTrowPtr)
+  int pad = 2;
+  int padwidth = iWidthExtSIMD;
+  int downbuffer[64];
+  int downleftbuffer[65];
+  int downrightbuffer[2][65];
+  int Shift, sg0, v0, idx, w0;
+  Shift = sizeof(int) * 8 - 1;
+  downbuffer[0] = 0;
+  for (int x = 0; x < uiWidth; x++)
+  {
+    int pixel = block[(-1 + pad) * padwidth + x + pad];
+    int below = block[(-1 + pad + 1) * padwidth + x + pad];
+    int diff = below - pixel;
+    sg0 = diff >> Shift;
+    v0 = (diff + sg0) ^ sg0;
+    v0 = (v0 + 4) >> 3;
+    idx = 15 + ((v0 - 15) & ((v0 - 15) >> Shift));
+    w0 = LUTrowPtr[idx];
+    int mod = (w0 + sg0) ^ sg0;
+    downbuffer[x] = mod;
+    int belowright = block[(-1 + pad + 1) * padwidth + x + pad + 1];
+    diff = belowright - pixel;
+    sg0 = diff >> Shift;
+    v0 = (diff + sg0) ^ sg0;
+    v0 = (v0 + 4) >> 3;
+    idx = 15 + ((v0 - 15) & ((v0 - 15) >> Shift));
+    w0 = LUTrowPtr[idx] >> 1;
+    mod = (w0 + sg0) ^ sg0;
+    downrightbuffer[1][x + 1] = mod;
+    int belowleft = block[(-1 + pad + 1) * padwidth + x + pad - 1];
+    diff = belowleft - pixel;
+    sg0 = diff >> Shift;
+    v0 = (diff + sg0) ^ sg0;
+    v0 = (v0 + 4) >> 3;
+    idx = 15 + ((v0 - 15) & ((v0 - 15) >> Shift));
+    w0 = LUTrowPtr[idx] >> 1;
+    mod = (w0 + sg0) ^ sg0;
+    downleftbuffer[x] = mod;
+  }
+  int width = uiWidth;
+  for (int y = 0; y < uiHeight; y++)
+  {
+    int diff;
+    int16_t* rowStart = &block[(y + pad) * padwidth + pad];
+    int pixel = rowStart[-1];
+    int right = rowStart[0];
+    diff = right - pixel;
+    sg0 = diff >> Shift;
+    v0 = (diff + sg0) ^ sg0;
+    v0 = (v0 + 4) >> 3;
+    idx = 15 + ((v0 - 15) & ((v0 - 15) >> Shift));
+    w0 = LUTrowPtr[idx];
+    int mod = (w0 + sg0) ^ sg0;
+    int rightmod = mod;
+    pixel = rowStart[-padwidth - 1];
+    int belowright = right;
+    diff = belowright - pixel;
+    sg0 = diff >> Shift;
+    v0 = (diff + sg0) ^ sg0;
+    v0 = (v0 + 4) >> 3;
+    idx = 15 + ((v0 - 15) & ((v0 - 15) >> Shift));
+    w0 = LUTrowPtr[idx] >> 1;
+    mod = (w0 + sg0) ^ sg0;
+    downrightbuffer[(y + 1) % 2][0] = mod;
+    pixel = rowStart[-padwidth + width];
+    int belowleft = rowStart[width - 1];
+    diff = belowleft - pixel;
+    sg0 = diff >> Shift;
+    v0 = (diff + sg0) ^ sg0;
+    v0 = (v0 + 4) >> 3;
+    idx = 15 + ((v0 - 15) & ((v0 - 15) >> Shift));
+    w0 = LUTrowPtr[idx] >> 1;
+    mod = (w0 + sg0) ^ sg0;
+    downleftbuffer[width] = mod;
+    for (int x = 0; x < uiWidth; x++)
+    {
+      pixel = rowStart[x];
+      int modsum = 0;
+      int abovemod = -downbuffer[x];
+      modsum += abovemod;
+      int leftmod = -rightmod;
+      modsum += leftmod;
+      right = rowStart[x + 1];
+      diff = right - pixel;
+      sg0 = diff >> Shift;
+      v0 = (diff + sg0) ^ sg0;
+      v0 = (v0 + 4) >> 3;
+      idx = 15 + ((v0 - 15) & ((v0 - 15) >> Shift));
+      w0 = LUTrowPtr[idx];
+      mod = (w0 + sg0) ^ sg0;
+      modsum += mod;
+      rightmod = mod;
+      int below = rowStart[x + padwidth];
+      diff = below - pixel;
+      sg0 = diff >> Shift;
+      v0 = (diff + sg0) ^ sg0;
+      v0 = (v0 + 4) >> 3;
+      idx = 15 + ((v0 - 15) & ((v0 - 15) >> Shift));
+      w0 = LUTrowPtr[idx];
+      mod = (w0 + sg0) ^ sg0;
+      modsum += mod;
+      downbuffer[x] = mod;
+      int aboverightmod = -downleftbuffer[x + 1];
+      // modsum += ((int16_t)((uint16_t)((aboverightmod) >> 1)));
+      modsum += aboverightmod;
+      int aboveleftmod = -downrightbuffer[(y + 1) % 2][x];
+      // modsum += ((int16_t)((uint16_t)((aboveleftmod) >> 1)));
+      modsum += aboveleftmod;
+      int belowleft = rowStart[x + padwidth - 1];
+      diff = belowleft - pixel;
+      sg0 = diff >> Shift;
+      v0 = (diff + sg0) ^ sg0;
+      v0 = (v0 + 4) >> 3;
+      idx = 15 + ((v0 - 15) & ((v0 - 15) >> Shift));
+      w0 = LUTrowPtr[idx] >> 1;
+      mod = (w0 + sg0) ^ sg0;
+      // modsum += ((int16_t)((uint16_t)((mod) >> 1)));
+      modsum += mod;
+      downleftbuffer[x] = mod;
+      int belowright = rowStart[x + padwidth + 1];
+      diff = belowright - pixel;
+      sg0 = diff >> Shift;
+      v0 = (diff + sg0) ^ sg0;
+      v0 = (v0 + 4) >> 3;
+      idx = 15 + ((v0 - 15) & ((v0 - 15) >> Shift));
+      w0 = LUTrowPtr[idx] >> 1;
+      mod = (w0 + sg0) ^ sg0;
+      //modsum += ((int16_t)((uint16_t)((mod) >> 1)));
+      modsum += mod;
+      downrightbuffer[y % 2][x + 1] = mod;
+      // For samples two pixels out, we do not reuse previously calculated
+      // values even though that is possible. Doing so would likely increase
+      // speed when SIMD is turned off.
+      int above = rowStart[x - 2 * padwidth];
+      diff = above - pixel;
+      sg0 = diff >> Shift;
+      v0 = (diff + sg0) ^ sg0;
+      v0 = (v0 + 4) >> 3;
+      idx = 15 + ((v0 - 15) & ((v0 - 15) >> Shift));
+      w0 = LUTrowPtr[idx] >> 1;
+      mod = (w0 + sg0) ^ sg0;
+      modsum += mod;
+      below = rowStart[x + 2 * padwidth];
+      diff = below - pixel;
+      sg0 = diff >> Shift;
+      v0 = (diff + sg0) ^ sg0;
+      v0 = (v0 + 4) >> 3;
+      idx = 15 + ((v0 - 15) & ((v0 - 15) >> Shift));
+      w0 = LUTrowPtr[idx] >> 1;
+      mod = (w0 + sg0) ^ sg0;
+      modsum += mod;
+      int left = rowStart[x - 2];
+      diff = left - pixel;
+      sg0 = diff >> Shift;
+      v0 = (diff + sg0) ^ sg0;
+      v0 = (v0 + 4) >> 3;
+      idx = 15 + ((v0 - 15) & ((v0 - 15) >> Shift));
+      w0 = LUTrowPtr[idx] >> 1;
+      mod = (w0 + sg0) ^ sg0;
+      modsum += mod;
+      right = rowStart[x + 2];
+      diff = right - pixel;
+      sg0 = diff >> Shift;
+      v0 = (diff + sg0) ^ sg0;
+      v0 = (v0 + 4) >> 3;
+      idx = 15 + ((v0 - 15) & ((v0 - 15) >> Shift));
+      w0 = LUTrowPtr[idx] >> 1;
+      mod = (w0 + sg0) ^ sg0;
+      modsum += mod;
+      blkFilt[(y + pad) * (padwidth + 4) + x + pad] = ((int16_t)((uint16_t)((modsum * bfac + bif_round_add) >> bif_round_shift)));
+    }
+  }
+  // Copy back
+  Pel* tempBlockPtr = (short*)blkFilt + (((padwidth + 4) << 1) + 2);
+  int tempBlockStride = padwidth + 4;
+  if (isRDO)
+  {
+    Pel* srcBlockPtr = (short*)block + (((padwidth) << 1) + 2);
+    int srcBlockStride = padwidth;
+    for (uint32_t yy = 0; yy < uiHeight; yy++)
+    {
+      for (uint32_t xx = 0; xx < uiWidth; xx++)
+      {
+        recPtr[xx] = ClipPel(srcBlockPtr[xx] + tempBlockPtr[xx], clpRng);
+      }
+      recPtr += recStride;
+      tempBlockPtr += tempBlockStride;
+      srcBlockPtr += srcBlockStride;
+    }
+  }
+  else
+  {
+    for (uint32_t yy = 0; yy < uiHeight; yy++)
+    {
+      for (uint32_t xx = 0; xx < uiWidth; xx++)
+      {
+        // new result = old result (which is SAO-treated already) + diff due to bilateral filtering
+        //recPtr[xx] = ClipPel<int>(recPtr[xx] + tempBlockPtr[xx], clpRng);
+        recPtr[xx] = recPtr[xx] + tempBlockPtr[xx]; // clipping is done jointly for SAO/BIF/CCSAO
+      }
+      recPtr += recStride;
+      tempBlockPtr += tempBlockStride;
+    }
+  }
 void BilateralFilter::blockBilateralFilterDiamond5x5( uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bif_round_add, int bif_round_shift, bool isRDO, const char* LUTrowPtr )
   int pad = 2;
@@ -559,6 +795,234 @@ void BilateralFilter::bilateralFilterRDOdiamond5x5(PelBuf& resiBuf, const CPelBu
+#if JVET_W0066_CCSAO
+void BilateralFilter::bilateralFilterDiamond5x5NoClip(const CPelUnitBuf& src, PelUnitBuf& rec, int32_t qp, const ClpRng& clpRng, TransformUnit& currTU)
+  CompArea& compArea = currTU.block(COMPONENT_Y);
+  const unsigned uiWidth = compArea.width;
+  const unsigned uiHeight = compArea.height;
+  bool topAltAvailable;
+  bool leftAltAvailable;
+  int srcStride = src.get(COMPONENT_Y).stride;
+  const Pel* srcPtr = src.get(COMPONENT_Y).bufAt(compArea);
+  const Pel* srcPtrTemp = srcPtr;
+  int recStride = rec.get(COMPONENT_Y).stride;
+  Pel* recPtr = rec.get(COMPONENT_Y).bufAt(compArea);
+  int bfac = 1;
+  const char* LUTrowPtr = getFilterLutParameters(std::min(uiWidth, uiHeight), currTU.cu->predMode, qp + currTU.cs->pps->getBIFQPOffset(), bfac);
+  int bif_round_add = (BIF_ROUND_ADD) >> (currTU.cs->pps->getBIFStrength());
+  int bif_round_shift = (BIF_ROUND_SHIFT)-(currTU.cs->pps->getBIFStrength());
+  const CompArea& myArea = currTU.blocks[COMPONENT_Y];
+  topAltAvailable = myArea.y - 2 >= 0;
+  leftAltAvailable = myArea.x - 2 >= 0;
+  bool bottomAltAvailable = myArea.y + myArea.height + 1 < currTU.cu->slice->getSPS()->getMaxPicHeightInLumaSamples();
+  bool rightAltAvailable = myArea.x + myArea.width + 1 < currTU.cu->slice->getSPS()->getMaxPicWidthInLumaSamples();
+  uint32_t   uiWidthExt = uiWidth + (NUMBER_PADDED_SAMPLES << 1);
+  uint32_t   uiHeightExt = uiHeight + (NUMBER_PADDED_SAMPLES << 1);
+  int iWidthExtSIMD = uiWidthExt;
+  if (uiWidth < 8)
+  {
+    iWidthExtSIMD = 8 + (NUMBER_PADDED_SAMPLES << 1);
+  }
+  Pel* tempBlockPtr;
+  bool allAvail = topAltAvailable && bottomAltAvailable && leftAltAvailable && rightAltAvailable;
+  memset(tempblock, 0, iWidthExtSIMD * uiHeightExt * sizeof(short));
+  if (allAvail)
+  {
+    // set pointer two rows up and two pixels to the left from the start of the block
+    tempBlockPtr = tempblock;
+    // same with image data
+    srcPtr = srcPtr - 2 * srcStride - 2;
+    //// Move block to temporary block
+    // Check if the block a the top block of a CTU.
+    bool isCTUboundary = myArea.y % currTU.cs->slice->getSPS()->getCTUSize() == 0;
+    if (isCTUboundary)
+    {
+      // The samples two lines up are out of bounds. (One line above the CTU is OK, since SAO uses that line.)
+      // Hence the top line of tempblock is unavailable if the block is the top block of a CTU.
+      // Therefore, copy samples from one line up instead of from two lines up by updating srcPtr *before* copy.
+      srcPtr += srcStride;
+      std::memcpy(tempBlockPtr, srcPtr, (uiWidthExt) * sizeof(Pel));
+    }
+    else
+    {
+      std::memcpy(tempBlockPtr, srcPtr, (uiWidthExt) * sizeof(Pel));
+      srcPtr += srcStride;
+    }
+    tempBlockPtr += iWidthExtSIMD;
+    // Copy samples that are not out of bounds.
+    for (uint32_t uiY = 1; uiY < uiHeightExt - 1; ++uiY)
+    {
+      std::memcpy(tempBlockPtr, srcPtr, (uiWidthExt) * sizeof(Pel));
+      srcPtr += srcStride;
+      tempBlockPtr += iWidthExtSIMD;
+    }
+    // Check if the block is a bottom block of a CTU.
+    isCTUboundary = (myArea.y + uiHeight) % currTU.cs->slice->getSPS()->getCTUSize() == 0;
+    if (isCTUboundary)
+    {
+      // The samples two lines down are out of bounds. (One line below the CTU is OK, since SAO uses that line.)
+      // Hence the bottom line of tempblock is unavailable if the block at the bottom of a CTU.
+      // Therefore, copy samples from the second to last line instead of the last line by subtracting srcPtr before copy.
+      srcPtr -= srcStride;
+      std::memcpy(tempBlockPtr, srcPtr, (uiWidthExt) * sizeof(Pel));
+    }
+    else
+    {
+      std::memcpy(tempBlockPtr, srcPtr, (uiWidthExt) * sizeof(Pel));
+    }
+    return m_bilateralFilterDiamond5x5NoClip(uiWidth, uiHeight, tempblock, tempblockFiltered, clpRng, recPtr, recStride, iWidthExtSIMD, bfac, bif_round_add, bif_round_shift, false, LUTrowPtr);
+  }
+  else
+  {
+    tempBlockPtr = tempblock + (NUMBER_PADDED_SAMPLES)*iWidthExtSIMD + NUMBER_PADDED_SAMPLES;
+    //// Move block to temporary block
+    for (uint32_t uiY = 0; uiY < uiHeight; ++uiY)
+    {
+      std::memcpy(tempBlockPtr, srcPtr, uiWidth * sizeof(Pel));
+      srcPtr += srcStride;
+      tempBlockPtr += iWidthExtSIMD;
+    }
+    srcPtr = srcPtrTemp;
+    if (topAltAvailable)
+    {
+      std::copy(srcPtr - 2 * srcStride, srcPtr - 2 * srcStride + uiWidth, tempblock + 2);
+      std::copy(srcPtr - srcStride, srcPtr - srcStride + uiWidth, tempblock + iWidthExtSIMD + 2);
+    }
+    if (bottomAltAvailable)
+    {
+      std::copy(srcPtr + (uiHeight + 1) * srcStride, srcPtr + (uiHeight + 1) * srcStride + uiWidth, tempblock + (uiHeightExt - 1) * iWidthExtSIMD + 2);
+      std::copy(srcPtr + uiHeight * srcStride, srcPtr + uiHeight * srcStride + uiWidth, tempblock + (uiHeightExt - 2) * iWidthExtSIMD + 2);
+    }
+    if (leftAltAvailable)
+    {
+      for (int yy = 0; yy < uiHeight; yy++)
+      {
+        tempblock[(iWidthExtSIMD << 1) + yy * iWidthExtSIMD + 0] = *(srcPtr + yy * srcStride - 2);
+        tempblock[(iWidthExtSIMD << 1) + yy * iWidthExtSIMD + 1] = *(srcPtr + yy * srcStride - 1);
+      }
+    }
+    if (rightAltAvailable)
+    {
+      for (int yy = 0; yy < uiHeight; yy++)
+      {
+        tempblock[(iWidthExtSIMD << 1) + uiWidthExt - 1 + yy * iWidthExtSIMD] = *(srcPtr + uiWidth + yy * srcStride + 1);
+        tempblock[(iWidthExtSIMD << 1) + uiWidthExt - 2 + yy * iWidthExtSIMD] = *(srcPtr + uiWidth + yy * srcStride);
+      }
+    }
+    // if not all available, copy from inside tempbuffer
+    if (!topAltAvailable)
+    {
+      std::copy(tempblock + iWidthExtSIMD * 2 + 2, tempblock + iWidthExtSIMD * 2 + 2 + uiWidth, tempblock + 2);
+      std::copy(tempblock + iWidthExtSIMD * 2 + 2, tempblock + iWidthExtSIMD * 2 + 2 + uiWidth, tempblock + iWidthExtSIMD + 2);
+    }
+    if (!bottomAltAvailable)
+    {
+      std::copy(tempblock + (uiHeightExt - 3) * iWidthExtSIMD + 2, tempblock + (uiHeightExt - 3) * iWidthExtSIMD + 2 + uiWidth, tempblock + (uiHeightExt - 2) * iWidthExtSIMD + 2);
+      std::copy(tempblock + (uiHeightExt - 3) * iWidthExtSIMD + 2, tempblock + (uiHeightExt - 3) * iWidthExtSIMD + 2 + uiWidth, tempblock + (uiHeightExt - 1) * iWidthExtSIMD + 2);
+    }
+    if (!leftAltAvailable)
+    {
+      for (int yy = 0; yy < uiHeight; yy++)
+      {
+        tempblock[(iWidthExtSIMD << 1) + yy * iWidthExtSIMD + 0] = tempblock[(iWidthExtSIMD << 1) + yy * iWidthExtSIMD + 2];
+        tempblock[(iWidthExtSIMD << 1) + yy * iWidthExtSIMD + 1] = tempblock[(iWidthExtSIMD << 1) + yy * iWidthExtSIMD + 2];
+      }
+    }
+    if (!rightAltAvailable)
+    {
+      for (int yy = 0; yy < uiHeight; yy++)
+      {
+        tempblock[(iWidthExtSIMD << 1) + uiWidthExt - 2 + yy * iWidthExtSIMD] = tempblock[(iWidthExtSIMD << 1) + uiWidthExt - 2 + yy * iWidthExtSIMD - 1];
+        tempblock[(iWidthExtSIMD << 1) + uiWidthExt - 1 + yy * iWidthExtSIMD] = tempblock[(iWidthExtSIMD << 1) + uiWidthExt - 2 + yy * iWidthExtSIMD - 1];
+      }
+    }
+    // All sides are available, easy to just copy corners also.
+    if (topAltAvailable && leftAltAvailable)
+    {
+      tempblock[0] = *(srcPtr - 2 * srcStride - 2);                                               // a     top left corner
+      tempblock[1] = *(srcPtr - 2 * srcStride - 1);                                               // b     a b|x x
+      tempblock[iWidthExtSIMD + 0] = *(srcPtr - srcStride - 2);                                   // c     c d|x x
+      tempblock[iWidthExtSIMD + 1] = *(srcPtr - srcStride - 1);                                   // d     -------
+    }
+    else
+    {
+      tempblock[0] = tempblock[iWidthExtSIMD * 2 + 2];                                            // extend top left
+      tempblock[1] = tempblock[iWidthExtSIMD * 2 + 2];                                            // extend top left
+      tempblock[iWidthExtSIMD + 0] = tempblock[iWidthExtSIMD * 2 + 2];                            // extend top left
+      tempblock[iWidthExtSIMD + 1] = tempblock[iWidthExtSIMD * 2 + 2];                            // extend top left
+    }
+    if (topAltAvailable && rightAltAvailable)
+    {
+      tempblock[iWidthExtSIMD - 2] = *(srcPtr - 2 * srcStride + uiWidth);                         // a
+      tempblock[iWidthExtSIMD - 1] = *(srcPtr - 2 * srcStride + uiWidth + 1);                     // b
+      tempblock[iWidthExtSIMD + uiWidthExt - 2] = *(srcPtr - srcStride + uiWidth);                // c
+      tempblock[iWidthExtSIMD + uiWidthExt - 1] = *(srcPtr - srcStride + uiWidth + 1);            // d
+    }
+    else
+    {
+      tempblock[iWidthExtSIMD - 2] = tempblock[iWidthExtSIMD * 2 + uiWidthExt - 3];               // extend top right
+      tempblock[iWidthExtSIMD - 1] = tempblock[iWidthExtSIMD * 2 + uiWidthExt - 3];               // extend top right
+      tempblock[iWidthExtSIMD + uiWidthExt - 2] = tempblock[iWidthExtSIMD * 2 + uiWidthExt - 3];  // extend top right
+      tempblock[iWidthExtSIMD + uiWidthExt - 1] = tempblock[iWidthExtSIMD * 2 + uiWidthExt - 3];  // extend top right
+    }
+    if (bottomAltAvailable && leftAltAvailable)
+    {
+      tempblock[iWidthExtSIMD * (uiHeightExt - 2) + 0] = *(srcPtr + uiHeight * srcStride - 2);          // a
+      tempblock[iWidthExtSIMD * (uiHeightExt - 2) + 1] = *(srcPtr + uiHeight * srcStride - 1);          // b
+      tempblock[iWidthExtSIMD * (uiHeightExt - 1) + 0] = *(srcPtr + (uiHeight + 1) * srcStride - 2);    // c
+      tempblock[iWidthExtSIMD * (uiHeightExt - 1) + 1] = *(srcPtr + (uiHeight + 1) * srcStride - 1);    // d
+    }
+    else
+    {
+      tempblock[iWidthExtSIMD * (uiHeightExt - 2) + 0] = tempblock[iWidthExtSIMD * (uiHeightExt - 3) + 2];  // bot avail: mirror left/right
+      tempblock[iWidthExtSIMD * (uiHeightExt - 2) + 1] = tempblock[iWidthExtSIMD * (uiHeightExt - 3) + 2];  // bot avail: mirror left/right
+      tempblock[iWidthExtSIMD * (uiHeightExt - 1) + 0] = tempblock[iWidthExtSIMD * (uiHeightExt - 3) + 2];  // bot avail: mirror left/right
+      tempblock[iWidthExtSIMD * (uiHeightExt - 1) + 1] = tempblock[iWidthExtSIMD * (uiHeightExt - 3) + 2];  // bot avail: mirror left/right
+    }
+    if (bottomAltAvailable && rightAltAvailable)
+    {
+      tempblock[iWidthExtSIMD * (uiHeightExt - 2) + uiWidthExt - 2] = *(srcPtr + uiHeight * srcStride + uiWidth);                // a
+      tempblock[iWidthExtSIMD * (uiHeightExt - 2) + uiWidthExt - 1] = *(srcPtr + uiHeight * srcStride + uiWidth + 1);            // b
+      tempblock[iWidthExtSIMD * (uiHeightExt - 1) + uiWidthExt - 2] = *(srcPtr + (uiHeight + 1) * srcStride + uiWidth);          // c
+      tempblock[iWidthExtSIMD * (uiHeightExt - 1) + uiWidthExt - 1] = *(srcPtr + (uiHeight + 1) * srcStride + uiWidth + 1);      // d
+    }
+    else
+    {
+      tempblock[iWidthExtSIMD * (uiHeightExt - 2) + uiWidthExt - 2] = tempblock[iWidthExtSIMD * (uiHeightExt - 3) + uiWidthExt - 3];
+      tempblock[iWidthExtSIMD * (uiHeightExt - 2) + uiWidthExt - 1] = tempblock[iWidthExtSIMD * (uiHeightExt - 3) + uiWidthExt - 3];
+      tempblock[iWidthExtSIMD * (uiHeightExt - 1) + uiWidthExt - 2] = tempblock[iWidthExtSIMD * (uiHeightExt - 3) + uiWidthExt - 3];
+      tempblock[iWidthExtSIMD * (uiHeightExt - 1) + uiWidthExt - 1] = tempblock[iWidthExtSIMD * (uiHeightExt - 3) + uiWidthExt - 3];
+    }
+  }
+  m_bilateralFilterDiamond5x5NoClip(uiWidth, uiHeight, tempblock, tempblockFiltered, clpRng, recPtr, recStride, iWidthExtSIMD, bfac, bif_round_add, bif_round_shift, false, LUTrowPtr);
 void BilateralFilter::bilateralFilterDiamond5x5(const CPelUnitBuf& src, PelUnitBuf& rec, int32_t qp, const ClpRng& clpRng, TransformUnit & currTU)
   CompArea &compArea = currTU.block(COMPONENT_Y);
diff --git a/source/Lib/CommonLib/BilateralFilter.h b/source/Lib/CommonLib/BilateralFilter.h
index 4cbd41be7cb19150bafba652de4e71334c4ec209..a77d6d62e9d085d5a39aa5514e1dbcad69d358c4 100755
--- a/source/Lib/CommonLib/BilateralFilter.h
+++ b/source/Lib/CommonLib/BilateralFilter.h
@@ -65,7 +65,13 @@ private:
   short *tempblockFiltered = &tempblockFilteredTemp[-2];
   void (*m_bilateralFilterDiamond5x5)( uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bif_round_add, int bif_round_shift, bool isRDO, const char* LUTrowPtr);
+#if JVET_W0066_CCSAO
+  void (*m_bilateralFilterDiamond5x5NoClip)(uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bif_round_add, int bif_round_shift, bool isRDO, const char* LUTrowPtr);
   static void blockBilateralFilterDiamond5x5(uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bif_round_add, int bif_round_shift, bool isRDO, const char* LUTrowPtr );
+#if JVET_W0066_CCSAO
+  static void blockBilateralFilterDiamond5x5NoClip(uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bif_round_add, int bif_round_shift, bool isRDO, const char* LUTrowPtr);
   char m_wBIF[26][16] = {
   {  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, },
@@ -108,6 +114,9 @@ public:
   void bilateralFilterPicRDOperCTU(CodingStructure& cs, PelUnitBuf& src,BIFCabacEst* BifCABACEstimator);
   void bilateralFilterDiamond5x5(const CPelUnitBuf& src, PelUnitBuf& rec, int32_t qp, const ClpRng& clpRng, TransformUnit & currTU);
+#if JVET_W0066_CCSAO
+  void bilateralFilterDiamond5x5NoClip(const CPelUnitBuf& src, PelUnitBuf& rec, int32_t qp, const ClpRng& clpRng, TransformUnit& currTU);
   void clipNotBilaterallyFilteredBlocks(const CPelUnitBuf& src, PelUnitBuf& rec, const ClpRng& clpRng, TransformUnit & currTU);
   const char* getFilterLutParameters( const int size, const PredMode predMode, const int qp, int& bfac );
@@ -116,6 +125,10 @@ public:
 #ifdef TARGET_SIMD_X86
   template<X86_VEXT vext>
   static void simdFilterDiamond5x5( uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bif_round_add, int bif_round_shift, bool isRDO, const char* LUTrowPtr );
+#if JVET_W0066_CCSAO
+  template<X86_VEXT vext>
+  static void simdFilterDiamond5x5NoClip(uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bif_round_add, int bif_round_shift, bool isRDO, const char* LUTrowPtr);
   void    initBilateralFilterX86();
   template <X86_VEXT vext>
diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h
index 75e0312e21f6bf7c7f8b94c320bb9337c8e7ffc7..d76bf13ea7dc8f5706e3c92a9e8ee6541774c357 100644
--- a/source/Lib/CommonLib/CommonDef.h
+++ b/source/Lib/CommonLib/CommonDef.h
@@ -207,6 +207,21 @@ static const int MIP_MAX_WIDTH =                                   MAX_TB_SIZEY;
 static const int MIP_MAX_HEIGHT =                                  MAX_TB_SIZEY;
+#if JVET_W0066_CCSAO
+#define          MAX_CCSAO_SET_NUM                                  4
+static const int MAX_CCSAO_CAND_POS_Y        =                      9;
+static const int MAX_CCSAO_CAND_POS_Y_BITS   =                      4;
+static const int MAX_CCSAO_BAND_NUM_Y        =                     16;
+static const int MAX_CCSAO_BAND_NUM_Y_BITS   =                      4;
+static const int MAX_CCSAO_BAND_NUM_U        =                      4;
+static const int MAX_CCSAO_BAND_NUM_U_BITS   =                      2;
+static const int MAX_CCSAO_BAND_NUM_V        =                      4;
+static const int MAX_CCSAO_BAND_NUM_V_BITS   =                      2;
+static const int MAX_CCSAO_CLASS_NUM         =                     64;
+static const int MAX_CCSAO_OFFSET_THR        =                     15;
+static const int MAX_CCSAO_FILTER_LENGTH     =                      3;
 static const int MAX_NUM_ALF_ALTERNATIVES_CHROMA =                  8;
 static const int MAX_NUM_ALF_CLASSES         =                     25;
diff --git a/source/Lib/CommonLib/Contexts.cpp b/source/Lib/CommonLib/Contexts.cpp
index ba7ebd86a7001e609c06728ae006a014ae70ff1f..6f4111ae4af9a1c114d6255bb92fb8e73dfef1e6 100644
--- a/source/Lib/CommonLib/Contexts.cpp
+++ b/source/Lib/CommonLib/Contexts.cpp
@@ -1515,6 +1515,16 @@ const CtxSet ContextSetCfg::BifCtrlFlags = ContextSetCfg::addCtxSet
+#if JVET_W0066_CCSAO
+const CtxSet ContextSetCfg::CcSaoControlIdc = ContextSetCfg::addCtxSet
 const CtxSet ContextSetCfg::LFNSTIdx = ContextSetCfg::addCtxSet
@@ -2567,6 +2577,16 @@ const CtxSet ContextSetCfg::BifCtrlFlags = ContextSetCfg::addCtxSet
+#if JVET_W0066_CCSAO
+const CtxSet ContextSetCfg::CcSaoControlIdc = ContextSetCfg::addCtxSet
 const CtxSet ContextSetCfg::LFNSTIdx = ContextSetCfg::addCtxSet
diff --git a/source/Lib/CommonLib/Contexts.h b/source/Lib/CommonLib/Contexts.h
index 4f49ae343125ef4139c5b641f35622375a1ac2b2..0b7f343b3a1f562fafede39eff05afeedc741a47 100644
--- a/source/Lib/CommonLib/Contexts.h
+++ b/source/Lib/CommonLib/Contexts.h
@@ -300,6 +300,9 @@ public:
   static const CtxSet   SaoTypeIdx;
   static const CtxSet   BifCtrlFlags;
+#if JVET_W0066_CCSAO
+  static const CtxSet   CcSaoControlIdc;
   static const CtxSet   TransformSkipFlag;
   static const CtxSet   MTSIdx;
diff --git a/source/Lib/CommonLib/Rom.cpp b/source/Lib/CommonLib/Rom.cpp
index c7351e229cb679571f6a6b3f777f2a1ec47445c9..69a74632d372d20a80ccaec8546401869cc06573 100644
--- a/source/Lib/CommonLib/Rom.cpp
+++ b/source/Lib/CommonLib/Rom.cpp
@@ -4831,4 +4831,8 @@ uint8_t g_geoTmShape[2][GEO_NUM_ANGLES] = {
                                             GEO_TM_SHAPE_L,  0,               0,               GEO_TM_SHAPE_L,
                                             GEO_TM_SHAPE_AL, GEO_TM_SHAPE_AL, GEO_TM_SHAPE_AL, 0, } };
+#if JVET_W0066_CCSAO
+const int8_t g_ccSaoCandPosX[MAX_NUM_LUMA_COMP][MAX_CCSAO_CAND_POS_Y] = { {-1,  0,  1, -1,  0,  1, -1,  0,  1} };
+const int8_t g_ccSaoCandPosY[MAX_NUM_LUMA_COMP][MAX_CCSAO_CAND_POS_Y] = { {-1, -1, -1,  0,  0,  0,  1,  1,  1} };
 //! \}
diff --git a/source/Lib/CommonLib/Rom.h b/source/Lib/CommonLib/Rom.h
index 763a3b24cb76cc12b154919954af64adcb161946..4eae2a15c273a1bf6de5f56ecf5ad3a07857c1ec 100644
--- a/source/Lib/CommonLib/Rom.h
+++ b/source/Lib/CommonLib/Rom.h
@@ -324,5 +324,9 @@ extern uint8_t g_geoTmShape[2][GEO_NUM_ANGLES];
 extern const int g_addHypWeight[MULTI_HYP_PRED_NUM_WEIGHTS];
+#if JVET_W0066_CCSAO
+extern const int8_t g_ccSaoCandPosX[MAX_NUM_LUMA_COMP][MAX_CCSAO_CAND_POS_Y];
+extern const int8_t g_ccSaoCandPosY[MAX_NUM_LUMA_COMP][MAX_CCSAO_CAND_POS_Y];
 #endif  //__TCOMROM__
diff --git a/source/Lib/CommonLib/SampleAdaptiveOffset.cpp b/source/Lib/CommonLib/SampleAdaptiveOffset.cpp
index 0c27da182295adb09e461343865b5b6b26844256..97e03f888da00e9d7bae35a98fffd500838850c1 100644
--- a/source/Lib/CommonLib/SampleAdaptiveOffset.cpp
+++ b/source/Lib/CommonLib/SampleAdaptiveOffset.cpp
@@ -138,11 +138,56 @@ void SampleAdaptiveOffset::create( int picWidth, int picHeight, ChromaFormat for
     m_offsetStepLog2  [compIdx] = isLuma(ComponentID(compIdx))? lumaBitShift : chromaBitShift;
   m_numberOfComponents = getNumberValidComponents(format);
+#if JVET_W0066_CCSAO
+  if (m_created)
+  {
+    return;
+  }
+  m_created = true;
+  m_ccSaoBuf.destroy();
+  m_ccSaoBuf.create(format, Area(0, 0, picWidth, picHeight), maxCUWidth, MAX_CCSAO_FILTER_LENGTH >> 1, 0, false);
+  m_picWidth = picWidth;
+  m_picHeight = picHeight;
+  m_maxCUWidth = maxCUWidth;
+  m_maxCUHeight = maxCUHeight;
+  m_numCTUsInWidth = ( m_picWidth / m_maxCUWidth ) + ( ( m_picWidth % m_maxCUWidth ) ? 1 : 0 );
+  m_numCTUsInHeight = ( m_picHeight / m_maxCUHeight ) + ( ( m_picHeight % m_maxCUHeight ) ? 1 : 0 );
+  m_numCTUsInPic = m_numCTUsInHeight * m_numCTUsInWidth;
+  for (int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++)
+  {
+    m_ccSaoControl[compIdx] = new uint8_t[m_numCTUsInPic];
+    ::memset(m_ccSaoControl[compIdx], 0, sizeof(uint8_t) * m_numCTUsInPic);
+  }
 void SampleAdaptiveOffset::destroy()
+#if JVET_W0066_CCSAO
+  if (!m_created)
+  {
+    return;
+  }
+  m_created = false;
+  m_ccSaoBuf.destroy();
+  for (int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++)
+  {
+    if (m_ccSaoControl[compIdx]) 
+    { 
+      delete [] m_ccSaoControl[compIdx];
+      m_ccSaoControl[compIdx] = nullptr;
+    }
+  }
 void SampleAdaptiveOffset::invertQuantOffsets(ComponentID compIdx, int typeIdc, int typeAuxInfo, int* dstOffsets, int* srcOffsets)
@@ -864,12 +909,14 @@ void SampleAdaptiveOffset::offsetCTUnoClip( const UnitArea& area, const CPelUnit
+#if !JVET_W0066_CCSAO
   int numHorVirBndry = 0, numVerVirBndry = 0;
   int horVirBndryPos[] = { -1,-1,-1 };
   int verVirBndryPos[] = { -1,-1,-1 };
   int horVirBndryPosComp[] = { -1,-1,-1 };
   int verVirBndryPosComp[] = { -1,-1,-1 };
   bool isCtuCrossedByVirtualBoundaries = isCrossedByVirtualBoundaries(area.Y().x, area.Y().y, area.Y().width, area.Y().height, numHorVirBndry, numVerVirBndry, horVirBndryPos, verVirBndryPos, cs.picHeader);
   for(int compIdx = 0; compIdx < numberOfComponents; compIdx++)
     const ComponentID compID = ComponentID(compIdx);
@@ -882,6 +929,7 @@ void SampleAdaptiveOffset::offsetCTUnoClip( const UnitArea& area, const CPelUnit
       const Pel* srcBlk = src.get(compID).bufAt(compArea);
       int  resStride    = res.get(compID).stride;
       Pel* resBlk       = res.get(compID).bufAt(compArea);
+#if !JVET_W0066_CCSAO
       for (int i = 0; i < numHorVirBndry; i++)
         horVirBndryPosComp[i] = (horVirBndryPos[i] >> ::getComponentScaleY(compID, area.chromaFormat)) - compArea.y;
@@ -890,7 +938,21 @@ void SampleAdaptiveOffset::offsetCTUnoClip( const UnitArea& area, const CPelUnit
         verVirBndryPosComp[i] = (verVirBndryPos[i] >> ::getComponentScaleX(compID, area.chromaFormat)) - compArea.x;
+#if JVET_W0066_CCSAO    
+      // Do not clip the final output for both luma and chroma. Clipping is done jontly for SAO/BIF/CCSAO.
+	    offsetBlockNoClip(cs.sps->getBitDepth(toChannelType(compID)),
+		    cs.slice->clpRng(compID),
+		    ctbOffset.typeIdc, ctbOffset.offset
+		    , srcBlk, resBlk, srcStride, resStride, compArea.width, compArea.height
+		    , isLeftAvail, isRightAvail
+		    , isAboveAvail, isBelowAvail
+		    , isAboveLeftAvail, isAboveRightAvail
+		    , isBelowLeftAvail, isBelowRightAvail
+		  //                 , isCtuCrossedByVirtualBoundaries, horVirBndryPosComp, verVirBndryPosComp, numHorVirBndry, numVerVirBndry
+	    );    
       if(compID == COMPONENT_Y)
         // If it is luma we should not clip, since we will clip
@@ -924,6 +986,7 @@ void SampleAdaptiveOffset::offsetCTUnoClip( const UnitArea& area, const CPelUnit
   } //compIdx
@@ -994,6 +1057,27 @@ void SampleAdaptiveOffset::SAOProcess( CodingStructure& cs, SAOBlkParam* saoBlkP
       const uint32_t width  = (xPos + pcv.maxCUWidth  > pcv.lumaWidth)  ? (pcv.lumaWidth - xPos)  : pcv.maxCUWidth;
       const uint32_t height = (yPos + pcv.maxCUHeight > pcv.lumaHeight) ? (pcv.lumaHeight - yPos) : pcv.maxCUHeight;
       const UnitArea area( cs.area.chromaFormat, Area(xPos , yPos, width, height) );
+#if JVET_W0066_CCSAO
+      // Always do non-clipped version for SAO/BIF, the clipping is done jointly after CCSAO is also applied
+      if (!bAllDisabled)
+        offsetCTUnoClip(area, m_tempBuf, rec, cs.picture->getSAO()[ctuRsAddr], cs);
+      BifParams& bifParams = cs.picture->getBifParam();
+      // And now we traverse the CTU to do BIF
+      for (auto& currCU : cs.traverseCUs(CS::getArea(cs, area, CH_L), CH_L))
+      {
+        for (auto& currTU : CU::traverseTUs(currCU))
+        {
+          bool isInter = (currCU.predMode == MODE_INTER) ? true : false;
+          if (bifParams.ctuOn[ctuRsAddr] && ((TU::getCbf(currTU, COMPONENT_Y) || isInter == false) && (currTU.cu->qp > 17)) && (128 > std::max(currTU.lumaSize().width, currTU.lumaSize().height)) && ((isInter == false) || (32 > std::min(currTU.lumaSize().width, currTU.lumaSize().height))))
+          {
+            m_bilateralFilter.bilateralFilterDiamond5x5NoClip(m_tempBuf, rec, currTU.cu->qp, cs.slice->clpRng(COMPONENT_Y), currTU);
+          }
+        }
+      }
@@ -1038,6 +1122,7 @@ void SampleAdaptiveOffset::SAOProcess( CodingStructure& cs, SAOBlkParam* saoBlkP
       offsetCTU( area, m_tempBuf, rec, cs.picture->getSAO()[ctuRsAddr], cs);
@@ -1053,6 +1138,405 @@ void SampleAdaptiveOffset::SAOProcess( CodingStructure& cs, SAOBlkParam* saoBlkP
+#if JVET_W0066_CCSAO
+void SampleAdaptiveOffset::CCSAOProcess(CodingStructure& cs)
+  const uint32_t numberOfComponents = getNumberValidComponents(cs.area.chromaFormat);
+  bool bAllDisabled = true;
+  for (uint32_t compIdx = 0; compIdx < numberOfComponents; compIdx++)
+  {
+    if (m_ccSaoComParam.enabled[compIdx])
+    {
+      bAllDisabled = false;
+    }
+  }
+  if (bAllDisabled)
+  {
+    return;
+  }
+  const PreCalcValues& pcv = *cs.pcv;
+  PelUnitBuf dstYuv = cs.getRecoBuf();
+  PelUnitBuf srcYuv = m_ccSaoBuf.getBuf( cs.area );
+  srcYuv.extendBorderPel( MAX_CCSAO_FILTER_LENGTH >> 1 );
+  applyCcSao(cs, pcv, srcYuv, dstYuv);
+void SampleAdaptiveOffset::applyCcSao(CodingStructure &cs, const PreCalcValues& pcv, const CPelUnitBuf& srcYuv, PelUnitBuf& dstYuv)
+  int ctuRsAddr = 0;
+  for (uint32_t yPos = 0; yPos < pcv.lumaHeight; yPos += pcv.maxCUHeight)
+  {
+    for (uint32_t xPos = 0; xPos < pcv.lumaWidth; xPos += pcv.maxCUWidth)
+    {
+      const uint32_t width  = (xPos + pcv.maxCUWidth  > pcv.lumaWidth ) ? (pcv.lumaWidth  - xPos) : pcv.maxCUWidth;
+      const uint32_t height = (yPos + pcv.maxCUHeight > pcv.lumaHeight) ? (pcv.lumaHeight - yPos) : pcv.maxCUHeight;
+      const UnitArea area(cs.area.chromaFormat, Area(xPos, yPos, width, height));
+      offsetCTUCcSaoNoClip(cs, area, srcYuv, dstYuv, ctuRsAddr);
+      ctuRsAddr++;
+    }
+  }
+void SampleAdaptiveOffset::jointClipSaoBifCcSao(CodingStructure& cs)
+  if (!cs.sps->getSAOEnabledFlag() && !cs.pps->getUseBIF() && !cs.sps->getCCSAOEnabledFlag())
+    return;
+  const PreCalcValues& pcv = *cs.pcv;
+  PelUnitBuf dstYuv = cs.getRecoBuf();
+  // Iterate all CTUs and check if any of the filters is on for a given component
+  int ctuRsAddr = 0;
+  for (uint32_t yPos = 0; yPos < pcv.lumaHeight; yPos += pcv.maxCUHeight)
+  {
+    for (uint32_t xPos = 0; xPos < pcv.lumaWidth; xPos += pcv.maxCUWidth)
+    {
+      const uint32_t width = (xPos + pcv.maxCUWidth > pcv.lumaWidth) ? (pcv.lumaWidth - xPos) : pcv.maxCUWidth;
+      const uint32_t height = (yPos + pcv.maxCUHeight > pcv.lumaHeight) ? (pcv.lumaHeight - yPos) : pcv.maxCUHeight;
+      const UnitArea area(cs.area.chromaFormat, Area(xPos, yPos, width, height));
+      const uint32_t numberOfComponents = getNumberValidComponents(area.chromaFormat);
+      SAOBlkParam mySAOblkParam = cs.picture->getSAO()[ctuRsAddr];
+      for (int compIdx = 0; compIdx < numberOfComponents; compIdx++)
+      {
+        const int setIdc = m_ccSaoControl[compIdx][ctuRsAddr];
+        SAOOffset& myCtbOffset = mySAOblkParam[compIdx];
+        if ((m_ccSaoComParam.enabled[compIdx] && setIdc != 0) || (myCtbOffset.modeIdc != SAO_MODE_OFF))
+        {
+          // We definitely need to clip if either SAO or CCSAO is on for the given component of the CTU                  
+          clipCTU(cs, dstYuv, area, ComponentID(compIdx));
+        }
+        else
+        {
+          // When BIF is on, the luma component might need to be clipped
+          if (cs.pps->getUseBIF())
+          {
+            if (compIdx == COMPONENT_Y)
+            {
+              BifParams& bifParams = cs.picture->getBifParam();
+              // And now we traverse the CTU to do clipping
+              for (auto& currCU : cs.traverseCUs(CS::getArea(cs, area, CH_L), CH_L))
+              {
+                for (auto& currTU : CU::traverseTUs(currCU))
+                {
+                  bool isInter = (currCU.predMode == MODE_INTER) ? true : false;
+                  if (bifParams.ctuOn[ctuRsAddr] && ((TU::getCbf(currTU, COMPONENT_Y) || isInter == false) && (currTU.cu->qp > 17)) && (128 > std::max(currTU.lumaSize().width, currTU.lumaSize().height)) && ((isInter == false) || (32 > std::min(currTU.lumaSize().width, currTU.lumaSize().height))))
+                  {
+                    m_bilateralFilter.clipNotBilaterallyFilteredBlocks(m_tempBuf, dstYuv, cs.slice->clpRng(COMPONENT_Y), currTU);
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      ctuRsAddr++;
+    }
+  }
+void SampleAdaptiveOffset::clipCTU(CodingStructure& cs, PelUnitBuf& dstYuv, const UnitArea& area, const ComponentID compID)
+  const CompArea &compArea = area.block(compID);
+  const uint32_t height = compArea.height;
+  const uint32_t width = compArea.width;
+  Pel *dst = dstYuv.get(compID).bufAt(area.block(compID));
+  int dstStride = dstYuv.get(compID).stride;
+  for (uint32_t y = 0; y < height; y++)
+  {
+    for (uint32_t x = 0; x < width; x++)
+    {
+      // new result = old result (which is SAO-treated already) + clipping
+      dst[x] = ClipPel<int>(dst[x], cs.slice->clpRng(compID));
+    }
+    dst += dstStride;
+  }
+void SampleAdaptiveOffset::offsetCTUCcSaoNoClip(CodingStructure& cs, const UnitArea& area, const CPelUnitBuf& srcYuv, PelUnitBuf& dstYuv, const int ctuRsAddr)
+  const uint32_t numberOfComponents = getNumberValidComponents(area.chromaFormat);
+  bool bAllOff = true;
+  for (uint32_t compIdx = 0; compIdx < numberOfComponents; compIdx++)
+  {
+    if (m_ccSaoComParam.enabled[compIdx])
+    {
+      bAllOff = false;
+    }
+  }
+  if (bAllOff)
+  {
+    return;
+  }
+  bool isLeftAvail, isRightAvail, isAboveAvail, isBelowAvail, isAboveLeftAvail, isAboveRightAvail, isBelowLeftAvail, isBelowRightAvail;
+  deriveLoopFilterBoundaryAvailibility(cs, area.Y(), isLeftAvail, isRightAvail, isAboveAvail, isBelowAvail, isAboveLeftAvail, isAboveRightAvail, isBelowLeftAvail, isBelowRightAvail);
+  for (int compIdx = 0; compIdx < numberOfComponents; compIdx++)
+  {
+    if (m_ccSaoComParam.enabled[compIdx])
+    {
+      const int setIdc = m_ccSaoControl[compIdx][ctuRsAddr];
+      if (setIdc != 0)
+      {
+        const ComponentID compID     = ComponentID(compIdx);
+        const CompArea   &compArea   = area.block(compID);
+        const int         srcStrideY = srcYuv.get(COMPONENT_Y ).stride;
+        const int         srcStrideU = srcYuv.get(COMPONENT_Cb).stride;
+        const int         srcStrideV = srcYuv.get(COMPONENT_Cr).stride;
+        const int         dstStride  = dstYuv.get(compID      ).stride;
+        const Pel        *srcBlkY    = srcYuv.get(COMPONENT_Y ).bufAt(area.block(COMPONENT_Y ));
+        const Pel        *srcBlkU    = srcYuv.get(COMPONENT_Cb).bufAt(area.block(COMPONENT_Cb));
+        const Pel        *srcBlkV    = srcYuv.get(COMPONENT_Cr).bufAt(area.block(COMPONENT_Cr));
+              Pel        *dstBlk     = dstYuv.get(compID      ).bufAt(compArea);
+        const uint16_t    candPosY   = m_ccSaoComParam.candPos[compIdx][setIdc - 1][COMPONENT_Y ];
+        const uint16_t    bandNumY   = m_ccSaoComParam.bandNum[compIdx][setIdc - 1][COMPONENT_Y ];
+        const uint16_t    bandNumU   = m_ccSaoComParam.bandNum[compIdx][setIdc - 1][COMPONENT_Cb];
+        const uint16_t    bandNumV   = m_ccSaoComParam.bandNum[compIdx][setIdc - 1][COMPONENT_Cr];
+        const short      *offset     = m_ccSaoComParam.offset [compIdx][setIdc - 1];
+        offsetBlockCcSaoNoClip(compID, cs.sps->getBitDepth(toChannelType(compID)), cs.slice->clpRng(compID)
+                             , candPosY, bandNumY, bandNumU, bandNumV
+                             , offset
+                             , srcBlkY, srcBlkU, srcBlkV, dstBlk
+                             , srcStrideY, srcStrideU, srcStrideV, dstStride
+                             , compArea.width, compArea.height
+                             , isLeftAvail, isRightAvail
+                             , isAboveAvail, isBelowAvail
+                             , isAboveLeftAvail, isAboveRightAvail
+                             , isBelowLeftAvail, isBelowRightAvail
+                              );
+      }
+    }
+  }
+void SampleAdaptiveOffset::offsetCTUCcSao(CodingStructure& cs, const UnitArea& area, const CPelUnitBuf& srcYuv, PelUnitBuf& dstYuv, const int ctuRsAddr)
+  const uint32_t numberOfComponents = getNumberValidComponents( area.chromaFormat );
+  bool bAllOff = true;
+  for( uint32_t compIdx = 0; compIdx < numberOfComponents; compIdx++)
+  {
+    if (m_ccSaoComParam.enabled[compIdx])
+    {
+      bAllOff = false;
+    }
+  }
+  if (bAllOff)
+  {
+    return;
+  }
+  bool isLeftAvail, isRightAvail, isAboveAvail, isBelowAvail, isAboveLeftAvail, isAboveRightAvail, isBelowLeftAvail, isBelowRightAvail;
+  deriveLoopFilterBoundaryAvailibility(cs, area.Y(), isLeftAvail,isRightAvail,isAboveAvail,isBelowAvail,isAboveLeftAvail,isAboveRightAvail,isBelowLeftAvail,isBelowRightAvail);
+  for(int compIdx = 0; compIdx < numberOfComponents; compIdx++)
+  {
+    if(m_ccSaoComParam.enabled[compIdx])
+    {
+      const int setIdc = m_ccSaoControl[compIdx][ctuRsAddr];
+      if (setIdc != 0)
+      {
+        const ComponentID compID     = ComponentID(compIdx);
+        const CompArea   &compArea   = area.block(compID);
+        const int         srcStrideY = srcYuv.get(COMPONENT_Y ).stride;
+        const int         srcStrideU = srcYuv.get(COMPONENT_Cb).stride;
+        const int         srcStrideV = srcYuv.get(COMPONENT_Cr).stride;
+        const int         dstStride  = dstYuv.get(compID      ).stride;
+        const Pel        *srcBlkY    = srcYuv.get(COMPONENT_Y ).bufAt(area.block(COMPONENT_Y ));
+        const Pel        *srcBlkU    = srcYuv.get(COMPONENT_Cb).bufAt(area.block(COMPONENT_Cb));
+        const Pel        *srcBlkV    = srcYuv.get(COMPONENT_Cr).bufAt(area.block(COMPONENT_Cr));
+              Pel        *dstBlk     = dstYuv.get(compID      ).bufAt(compArea);
+        const uint16_t    candPosY   = m_ccSaoComParam.candPos[compIdx][setIdc - 1][COMPONENT_Y ];
+        const uint16_t    bandNumY   = m_ccSaoComParam.bandNum[compIdx][setIdc - 1][COMPONENT_Y ];
+        const uint16_t    bandNumU   = m_ccSaoComParam.bandNum[compIdx][setIdc - 1][COMPONENT_Cb];
+        const uint16_t    bandNumV   = m_ccSaoComParam.bandNum[compIdx][setIdc - 1][COMPONENT_Cr];
+        const short      *offset     = m_ccSaoComParam.offset [compIdx][setIdc - 1];
+        offsetBlockCcSao( compID, cs.sps->getBitDepth(toChannelType(compID)), cs.slice->clpRng(compID)
+                        , candPosY, bandNumY, bandNumU, bandNumV
+                        , offset
+                        , srcBlkY, srcBlkU, srcBlkV, dstBlk
+                        , srcStrideY, srcStrideU, srcStrideV, dstStride
+                        , compArea.width, compArea.height
+                        , isLeftAvail, isRightAvail
+                        , isAboveAvail, isBelowAvail
+                        , isAboveLeftAvail, isAboveRightAvail
+                        , isBelowLeftAvail, isBelowRightAvail
+                        );
+      }
+    }
+  }
+void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, const int bitDepth, const ClpRng& clpRng
+                                                , const uint16_t candPosY
+                                                , const uint16_t bandNumY, const uint16_t bandNumU, const uint16_t bandNumV
+                                                , const short* offset
+                                                , const Pel* srcY, const Pel* srcU, const Pel* srcV, Pel* dst
+                                                , const int srcStrideY, const int srcStrideU, const int srcStrideV, const int dstStride
+                                                , const int width, const int height
+                                                , bool isLeftAvail, bool isRightAvail, bool isAboveAvail, bool isBelowAvail, bool isAboveLeftAvail, bool isAboveRightAvail, bool isBelowLeftAvail, bool isBelowRightAvail
+                                                 )
+  const int candPosYX = g_ccSaoCandPosX[COMPONENT_Y][candPosY];
+  const int candPosYY = g_ccSaoCandPosY[COMPONENT_Y][candPosY];
+  switch (compID)
+  {
+  case COMPONENT_Y:
+    {
+      for (int y = 0; y < height; y++)
+      {
+        for (int x = 0; x < width; x++)
+        {
+          const Pel* colY = srcY +  x + srcStrideY * candPosYY + candPosYX;
+          const Pel* colU = srcU + (x >> 1);
+          const Pel* colV = srcV + (x >> 1);
+          const int bandY    = (*colY * bandNumY) >> bitDepth;
+          const int bandU    = (*colU * bandNumU) >> bitDepth;
+          const int bandV    = (*colV * bandNumV) >> bitDepth;
+          const int bandIdx  = bandY * bandNumU * bandNumV
+                             + bandU * bandNumV
+                             + bandV;
+          const int classIdx = bandIdx;
+          //dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+          dst[x] = dst[x] + offset[classIdx];
+        }
+        srcY += srcStrideY;
+        srcU += srcStrideU * (y & 0x1);
+        srcV += srcStrideV * (y & 0x1);
+        dst  += dstStride;
+      }
+    }
+  break;
+  case COMPONENT_Cb:
+  case COMPONENT_Cr:
+    {
+      for (int y = 0; y < height; y++)
+      {
+        for (int x = 0; x < width; x++)
+        {
+          const Pel* colY = srcY + (x << 1) + srcStrideY * candPosYY + candPosYX;
+          const Pel* colU = srcU +  x;
+          const Pel* colV = srcV +  x;
+          const int bandY    = (*colY * bandNumY) >> bitDepth;
+          const int bandU    = (*colU * bandNumU) >> bitDepth;
+          const int bandV    = (*colV * bandNumV) >> bitDepth;
+          const int bandIdx  = bandY * bandNumU * bandNumV
+                             + bandU * bandNumV
+                             + bandV;
+          const int classIdx = bandIdx;
+          //dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+          dst[x] = dst[x] + offset[classIdx];
+        }
+        srcY += srcStrideY << 1;
+        srcU += srcStrideU;
+        srcV += srcStrideV;
+        dst  += dstStride;
+      }
+    }
+  break;
+  default:
+    {
+      THROW("Not a supported CCSAO compID\n");
+    }
+  }
+void SampleAdaptiveOffset::offsetBlockCcSao(const ComponentID compID, const int bitDepth, const ClpRng& clpRng
+                                          , const uint16_t candPosY
+                                          , const uint16_t bandNumY, const uint16_t bandNumU, const uint16_t bandNumV
+                                          , const short* offset
+                                          , const Pel* srcY, const Pel* srcU, const Pel* srcV, Pel* dst
+                                          , const int srcStrideY, const int srcStrideU, const int srcStrideV, const int dstStride
+                                          , const int width, const int height
+                                          , bool isLeftAvail, bool isRightAvail, bool isAboveAvail, bool isBelowAvail, bool isAboveLeftAvail, bool isAboveRightAvail, bool isBelowLeftAvail, bool isBelowRightAvail
+                                           )
+  const int candPosYX = g_ccSaoCandPosX[COMPONENT_Y][candPosY];
+  const int candPosYY = g_ccSaoCandPosY[COMPONENT_Y][candPosY];
+  switch(compID)
+  {
+  case COMPONENT_Y:
+    {
+      for (int y = 0; y < height; y++)
+      {
+        for (int x = 0; x < width; x++)
+        {
+          const Pel *colY = srcY +  x  + srcStrideY * candPosYY + candPosYX;
+          const Pel *colU = srcU + (x >> 1);
+          const Pel *colV = srcV + (x >> 1);
+          const int bandY    = (*colY * bandNumY) >> bitDepth;
+          const int bandU    = (*colU * bandNumU) >> bitDepth;
+          const int bandV    = (*colV * bandNumV) >> bitDepth;
+          const int bandIdx  = bandY * bandNumU * bandNumV
+                             + bandU * bandNumV
+                             + bandV;
+          const int classIdx = bandIdx;
+          dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+        }
+        srcY += srcStrideY;
+        srcU += srcStrideU * (y & 0x1);
+        srcV += srcStrideV * (y & 0x1);
+        dst  += dstStride;
+      }
+    }
+    break;
+  case COMPONENT_Cb:
+  case COMPONENT_Cr:
+    {
+      for (int y = 0; y < height; y++)
+      {
+        for (int x = 0; x < width; x++)
+        {
+          const Pel *colY = srcY + (x << 1) + srcStrideY * candPosYY + candPosYX;
+          const Pel *colU = srcU + x;
+          const Pel *colV = srcV + x;
+          const int bandY    = (*colY * bandNumY) >> bitDepth;
+          const int bandU    = (*colU * bandNumU) >> bitDepth;
+          const int bandV    = (*colV * bandNumV) >> bitDepth;
+          const int bandIdx  = bandY * bandNumU * bandNumV
+                             + bandU * bandNumV
+                             + bandV;
+          const int classIdx = bandIdx;
+          dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+        }
+        srcY += srcStrideY << 1;
+        srcU += srcStrideU;
+        srcV += srcStrideV;
+        dst  += dstStride;
+      }
+    }
+    break;
+  default:
+    {
+      THROW("Not a supported CCSAO compID\n");
+    }
+  }
 void SampleAdaptiveOffset::deriveLoopFilterBoundaryAvailibility(CodingStructure& cs, const Position &pos,
   bool& isLeftAvail,
diff --git a/source/Lib/CommonLib/SampleAdaptiveOffset.h b/source/Lib/CommonLib/SampleAdaptiveOffset.h
index 9fe135edb403f5e506420eedfd12311e8af17edb..1a0970122d487ccce978c33a34556e73e954e153 100644
--- a/source/Lib/CommonLib/SampleAdaptiveOffset.h
+++ b/source/Lib/CommonLib/SampleAdaptiveOffset.h
@@ -77,6 +77,13 @@ public:
   BilateralFilter m_bilateralFilter;
+#if JVET_W0066_CCSAO
+  void CCSAOProcess(CodingStructure& cs);
+  CcSaoComParam& getCcSaoComParam() { return m_ccSaoComParam; }
+  uint8_t* getCcSaoControlIdc(const ComponentID compID) { return m_ccSaoControl[compID]; }
+  PelStorage& getCcSaoBuf() { return m_ccSaoBuf; }
+  void jointClipSaoBifCcSao(CodingStructure& cs);
   void deriveLoopFilterBoundaryAvailibility(CodingStructure& cs, const Position &pos,
     bool& isLeftAvail,
@@ -106,6 +113,9 @@ protected:
   void offsetCTU(const UnitArea& area, const CPelUnitBuf& src, PelUnitBuf& res, SAOBlkParam& saoblkParam, CodingStructure& cs);
   void offsetCTUnoClip( const UnitArea& area, const CPelUnitBuf& src, PelUnitBuf& res, SAOBlkParam& saoblkParam, CodingStructure& cs);
+#if JVET_W0066_CCSAO
+  void clipCTU(CodingStructure& cs, PelUnitBuf& dstYuv, const UnitArea& area, const ComponentID compID);
   void offsetCTUonlySAO(const UnitArea& area, const CPelUnitBuf& src, PelUnitBuf& res, SAOBlkParam& saoblkParam, CodingStructure& cs);
   void xReconstructBlkSAOParams(CodingStructure& cs, SAOBlkParam* saoBlkParams);
@@ -132,6 +142,29 @@ protected:
     return bDisabledFlag;
   Reshape* m_pcReshape;
+#if JVET_W0066_CCSAO
+  void applyCcSao(CodingStructure &cs, const PreCalcValues& pcv, const CPelUnitBuf& srcYuv, PelUnitBuf& dstYuv);
+  void offsetCTUCcSao(CodingStructure& cs, const UnitArea& area, const CPelUnitBuf& srcYuv, PelUnitBuf& dstYuv, const int ctuRsAddr);
+  void offsetCTUCcSaoNoClip(CodingStructure& cs, const UnitArea& area, const CPelUnitBuf& srcYuv, PelUnitBuf& dstYuv, const int ctuRsAddr);
+  void offsetBlockCcSao(const ComponentID compID, const int bitDepth, const ClpRng& clpRng
+                      , const uint16_t candPosY
+                      , const uint16_t bandNumY, const uint16_t bandNumU, const uint16_t bandNumV
+                      , const short* offset
+                      , const Pel* srcY, const Pel* srcU, const Pel* srcV, Pel* dst
+                      , const int srcStrideY, const int srcStrideU, const int srcStrideV, const int dstStride
+                      , const int width, const int height
+                      , bool isLeftAvail, bool isRightAvail, bool isAboveAvail, bool isBelowAvail, bool isAboveLeftAvail, bool isAboveRightAvail, bool isBelowLeftAvail, bool isBelowRightAvail
+                       );
+  void offsetBlockCcSaoNoClip(const ComponentID compID, const int bitDepth, const ClpRng& clpRng
+                            , const uint16_t candPosY
+                            , const uint16_t bandNumY, const uint16_t bandNumU, const uint16_t bandNumV
+                            , const short* offset
+                            , const Pel* srcY, const Pel* srcU, const Pel* srcV, Pel* dst
+                            , const int srcStrideY, const int srcStrideU, const int srcStrideV, const int dstStride
+                            , const int width, const int height
+                            , bool isLeftAvail, bool isRightAvail, bool isAboveAvail, bool isBelowAvail, bool isAboveLeftAvail, bool isAboveRightAvail, bool isBelowLeftAvail, bool isBelowRightAvail
+                             );
   uint32_t m_offsetStepLog2[MAX_NUM_COMPONENT]; //offset step
   PelStorage m_tempBuf;
@@ -139,6 +172,20 @@ protected:
   std::vector<int8_t> m_signLineBuf1;
   std::vector<int8_t> m_signLineBuf2;
+#if JVET_W0066_CCSAO
+  bool                m_created = false;
+  PelStorage          m_ccSaoBuf;
+  int                 m_picWidth;
+  int                 m_picHeight;
+  int                 m_maxCUWidth;
+  int                 m_maxCUHeight;
+  int                 m_numCTUsInWidth;
+  int                 m_numCTUsInHeight;
+  int                 m_numCTUsInPic;
+  CcSaoComParam       m_ccSaoComParam;
+  uint8_t*            m_ccSaoControl[MAX_NUM_COMPONENT];
   bool m_picSAOEnabled[MAX_NUM_COMPONENT];
diff --git a/source/Lib/CommonLib/Slice.cpp b/source/Lib/CommonLib/Slice.cpp
index 69b158ee2b1f1c9bfb4a591a7f993469a0257f13..3981c07929ce7159b1014885f0a6ad83b0b7bea4 100644
--- a/source/Lib/CommonLib/Slice.cpp
+++ b/source/Lib/CommonLib/Slice.cpp
@@ -153,6 +153,10 @@ Slice::Slice()
     m_saoEnabledFlag[ch] = false;
+#if JVET_W0066_CCSAO
+  m_ccSaoComParam.reset();
+  resetCcSaoEnabledFlag();
   memset(m_alfApss, 0, sizeof(m_alfApss));
@@ -209,6 +213,10 @@ void Slice::initSlice()
   m_useLTforDRAP         = false;
   m_isDRAP               = false;
   m_latestDRAPPOC        = MAX_INT;
+#if JVET_W0066_CCSAO
+  m_ccSaoComParam.reset();
+  resetCcSaoEnabledFlag();
   m_tileGroupCcAlfCbEnabledFlag = 0;
@@ -264,6 +272,14 @@ void Slice::inheritFromPicHeader( PicHeader *picHeader, const PPS *pps, const SP
   setSaoEnabledFlag(CHANNEL_TYPE_LUMA,     picHeader->getSaoEnabledFlag(CHANNEL_TYPE_LUMA));
   setSaoEnabledFlag(CHANNEL_TYPE_CHROMA,   picHeader->getSaoEnabledFlag(CHANNEL_TYPE_CHROMA));
+#if JVET_W0066_CCSAO
+  setCcSaoEnabledFlag(COMPONENT_Y,  picHeader->getCcSaoEnabledFlag(COMPONENT_Y));
+  setCcSaoEnabledFlag(COMPONENT_Cb, picHeader->getCcSaoEnabledFlag(COMPONENT_Cb));
+  setCcSaoEnabledFlag(COMPONENT_Cr, picHeader->getCcSaoEnabledFlag(COMPONENT_Cr));
+  m_ccSaoComParam.enabled[COMPONENT_Y ] = picHeader->getCcSaoEnabledFlag(COMPONENT_Y);
+  m_ccSaoComParam.enabled[COMPONENT_Cb] = picHeader->getCcSaoEnabledFlag(COMPONENT_Cb);
+  m_ccSaoComParam.enabled[COMPONENT_Cr] = picHeader->getCcSaoEnabledFlag(COMPONENT_Cr);
   setTileGroupAlfEnabledFlag(COMPONENT_Y,  picHeader->getAlfEnabledFlag(COMPONENT_Y));
   setTileGroupAlfEnabledFlag(COMPONENT_Cb, picHeader->getAlfEnabledFlag(COMPONENT_Cb));
@@ -1219,6 +1235,15 @@ void Slice::copySliceInfo(Slice *pSrc, bool cpyAlmostAll)
     m_saoEnabledFlag[ch] = pSrc->m_saoEnabledFlag[ch];
+#if JVET_W0066_CCSAO
+  m_ccSaoComParam                  = pSrc->m_ccSaoComParam;
+  m_ccSaoControl    [COMPONENT_Y ] = pSrc->m_ccSaoControl    [COMPONENT_Y ];
+  m_ccSaoControl    [COMPONENT_Cb] = pSrc->m_ccSaoControl    [COMPONENT_Cb];
+  m_ccSaoControl    [COMPONENT_Cr] = pSrc->m_ccSaoControl    [COMPONENT_Cr];
+  m_ccSaoEnabledFlag[COMPONENT_Y ] = pSrc->m_ccSaoEnabledFlag[COMPONENT_Y ];
+  m_ccSaoEnabledFlag[COMPONENT_Cb] = pSrc->m_ccSaoEnabledFlag[COMPONENT_Cb];
+  m_ccSaoEnabledFlag[COMPONENT_Cr] = pSrc->m_ccSaoEnabledFlag[COMPONENT_Cr];
   m_cabacInitFlag                 = pSrc->m_cabacInitFlag;
   memcpy(m_alfApss, pSrc->m_alfApss, sizeof(m_alfApss)); // this might be quite unsafe
@@ -4760,6 +4785,9 @@ bool             operator == (const ConstraintInfo& op1, const ConstraintInfo& o
   if( op1.m_noQtbttDualTreeIntraConstraintFlag           != op2.m_noQtbttDualTreeIntraConstraintFlag             ) return false;
   if( op1.m_noPartitionConstraintsOverrideConstraintFlag != op2.m_noPartitionConstraintsOverrideConstraintFlag   ) return false;
   if( op1.m_noSaoConstraintFlag                          != op2.m_noSaoConstraintFlag                            ) return false;
+#if JVET_W0066_CCSAO
+  if( op1.m_noCCSaoConstraintFlag                        != op2.m_noCCSaoConstraintFlag                          ) return false;
   if( op1.m_noAlfConstraintFlag                          != op2.m_noAlfConstraintFlag                            ) return false;
   if( op1.m_noCCAlfConstraintFlag                        != op2.m_noCCAlfConstraintFlag                          ) return false;
 #if JVET_S0058_GCI
diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h
index 43eb1c0c2189a9f302607f2eb9e4d32048ae0bb2..246c7cfab59512afdb3caa79d739dc73b89a31cc 100644
--- a/source/Lib/CommonLib/Slice.h
+++ b/source/Lib/CommonLib/Slice.h
@@ -299,6 +299,9 @@ class ConstraintInfo
   bool              m_noPartitionConstraintsOverrideConstraintFlag;
   bool              m_noSaoConstraintFlag;
+#if JVET_W0066_CCSAO
+  bool              m_noCCSaoConstraintFlag;
   bool              m_noAlfConstraintFlag;
   bool              m_noCCAlfConstraintFlag;
 #if JVET_S0058_GCI
@@ -417,6 +420,9 @@ public:
     , m_noPartitionConstraintsOverrideConstraintFlag(false)
     , m_noSaoConstraintFlag      (false)
+#if JVET_W0066_CCSAO
+    , m_noCCSaoConstraintFlag      (false)
     , m_noAlfConstraintFlag      (false)
     , m_noCCAlfConstraintFlag      (false)
 #if JVET_S0058_GCI
@@ -583,6 +589,10 @@ public:
   void          setNoPartitionConstraintsOverrideConstraintFlag(bool bVal) { m_noPartitionConstraintsOverrideConstraintFlag = bVal; }
   bool          getNoSaoConstraintFlag() const { return m_noSaoConstraintFlag; }
   void          setNoSaoConstraintFlag(bool bVal) { m_noSaoConstraintFlag = bVal; }
+#if JVET_W0066_CCSAO
+  bool          getNoCCSaoConstraintFlag() const { return m_noCCSaoConstraintFlag; }
+  void          setNoCCSaoConstraintFlag(bool val) { m_noCCSaoConstraintFlag = val; }
   bool          getNoAlfConstraintFlag() const { return m_noAlfConstraintFlag; }
   void          setNoAlfConstraintFlag(bool bVal) { m_noAlfConstraintFlag = bVal; }
   bool          getNoCCAlfConstraintFlag() const { return m_noCCAlfConstraintFlag; }
@@ -1604,6 +1614,9 @@ private:
   bool              m_useWeightedBiPred;                 //!< Use of Weighting Bi-Prediction (B_SLICE)
   bool              m_saoEnabledFlag;
+#if JVET_W0066_CCSAO
+  bool              m_ccSaoEnabledFlag;
   bool              m_bTemporalIdNestingFlag; // temporal_id_nesting_flag
@@ -1923,6 +1936,10 @@ public:
   void                    setSAOEnabledFlag(bool bVal)                                                    { m_saoEnabledFlag = bVal;                                                    }
   bool                    getSAOEnabledFlag() const                                                       { return m_saoEnabledFlag;                                                    }
+#if JVET_W0066_CCSAO
+  bool                    getCCSAOEnabledFlag() const                                                     { return m_ccSaoEnabledFlag; }
+  void                    setCCSAOEnabledFlag( bool b )                                                   { m_ccSaoEnabledFlag = b;    }
   bool                    getALFEnabledFlag() const                                                       { return m_alfEnabledFlag; }
   void                    setALFEnabledFlag( bool b )                                                     { m_alfEnabledFlag = b; }
@@ -2670,6 +2687,9 @@ private:
   int                         m_qpDelta;                                                //!< value of Qp delta
   bool                        m_saoEnabledFlag[MAX_NUM_CHANNEL_TYPE];                   //!< sao enabled flags for each channel
+#if JVET_W0066_CCSAO
+  bool                        m_ccSaoEnabledFlag[MAX_NUM_COMPONENT];
   int                         m_alfFixedFilterSetIdx;
@@ -2804,6 +2824,10 @@ public:
   int                         getQpDelta() const                                        { return m_qpDelta;                                                                            }
   void                        setSaoEnabledFlag(ChannelType chType, bool b)             { m_saoEnabledFlag[chType] = b;                                                                }
   bool                        getSaoEnabledFlag(ChannelType chType) const               { return m_saoEnabledFlag[chType];                                                             }
+#if JVET_W0066_CCSAO
+  void                        setCcSaoEnabledFlag(ComponentID compId, bool b)           { m_ccSaoEnabledFlag[compId] = b;                                                              }
+  bool                        getCcSaoEnabledFlag(ComponentID compId) const             { return m_ccSaoEnabledFlag[compId];                                                           }
   void                        setAlfFixedFilterSetIdx(int i)                            { m_alfFixedFilterSetIdx = i;                                                                  }
   int                         getAlfFixedFilterSetIdx() const                           { return m_alfFixedFilterSetIdx;                                                               }
@@ -2916,6 +2940,9 @@ class Slice
   //  Bitstream writing
   bool                       m_saoEnabledFlag[MAX_NUM_CHANNEL_TYPE];
+#if JVET_W0066_CCSAO
+  bool                       m_ccSaoEnabledFlag[MAX_NUM_COMPONENT];
   int                        m_iPOC;
   int                        m_iLastIDR;
   int                        m_prevGDRInSameLayerPOC;  //< the previous GDR in the same layer
@@ -3078,6 +3105,11 @@ public:
   APS**                       getAlfAPSs()                                           { return m_alfApss;                                             }
   void                        setSaoEnabledFlag(ChannelType chType, bool s)          {m_saoEnabledFlag[chType] =s;                                   }
   bool                        getSaoEnabledFlag(ChannelType chType) const            { return m_saoEnabledFlag[chType];                              }
+#if JVET_W0066_CCSAO
+  void                        resetCcSaoEnabledFlag()                                { memset(m_ccSaoEnabledFlag, 0, sizeof(m_ccSaoEnabledFlag));    }
+  void                        setCcSaoEnabledFlag(ComponentID compID, bool b)        { m_ccSaoEnabledFlag[compID] = b;                               }
+  bool                        getCcSaoEnabledFlag(ComponentID compID)                { return m_ccSaoEnabledFlag[compID];                            }
   void                        setRPL0(const ReferencePictureList *pcRPL)             { m_pRPL0 = pcRPL;                                             }
   void                        setRPL1(const ReferencePictureList *pcRPL)             { m_pRPL1 = pcRPL;                                             }
   const ReferencePictureList* getRPL0()                                              { return m_pRPL0;                                              }
@@ -3389,6 +3421,10 @@ public:
   bool                        isLastSliceInSubpic();
+#if JVET_W0066_CCSAO
+  CcSaoComParam               m_ccSaoComParam;
+  uint8_t*                    m_ccSaoControl[MAX_NUM_COMPONENT];
   CcAlfFilterParam            m_ccAlfFilterParam;
   uint8_t*                    m_ccAlfFilterControl[2];
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index 947606520e257054a436f11c83e7caee40715a8f..202a7b51e06f61c3d6de3bdbdb3e7c71e0d61295 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -146,6 +146,7 @@
 #define ALF_IMPROVEMENT                                   1 // ALF improvement
 #define EMBEDDED_APS                                      1 // Embed APS into picture header
 #define JVET_V0094_BILATERAL_FILTER                       1 // Bilateral filter
+#define JVET_W0066_CCSAO                                  1 // JVET-W0066: Cross-component sample adaptive offset
 // SIMD optimizations
 #if IF_12TAP
@@ -671,6 +672,9 @@ enum ModeType
 enum ComponentID
   COMPONENT_Y         = 0,
+#if JVET_W0066_CCSAO
   COMPONENT_Cb        = 1,
   COMPONENT_Cr        = 2,
diff --git a/source/Lib/CommonLib/x86/BilateralFilterX86.h b/source/Lib/CommonLib/x86/BilateralFilterX86.h
index 7b429788c893020b664b40609c7fdec0893f5ba6..7af1e6828bc63391de20274f0ecda1513d01549f 100644
--- a/source/Lib/CommonLib/x86/BilateralFilterX86.h
+++ b/source/Lib/CommonLib/x86/BilateralFilterX86.h
@@ -44,6 +44,267 @@
+#if JVET_W0066_CCSAO
+template<X86_VEXT vext>
+void BilateralFilter::simdFilterDiamond5x5NoClip(uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bif_round_add, int bif_round_shift, bool isRDO, const char* LUTrowPtr)
+  //if( uiWidth < 4 || ( uiWidth < 8 && isRDO ) )
+  if (uiWidth < 4)
+  {
+    return blockBilateralFilterDiamond5x5NoClip(uiWidth, uiHeight, block, blkFilt, clpRng, recPtr, recStride, iWidthExtSIMD, bfac, bif_round_add, bif_round_shift, isRDO, LUTrowPtr);
+  }
+  int pad = 2;
+  int padwidth = iWidthExtSIMD;
+  __m128i center, left, right, up, down, lu, ld, ru, rd, diffabs, four, fifteen, lut, acc, temp, round_add, clipmin, clipmax, inputVals;
+  __m128i ll, rr, uu, dd;
+  four = _mm_set1_epi16(4);
+  fifteen = _mm_set1_epi16(15);
+  round_add = _mm_set1_epi16(bif_round_add);
+  clipmin = _mm_set1_epi16(clpRng.min);
+  clipmax = _mm_set1_epi16(clpRng.max);
+  lut = _mm_loadu_si128((__m128i*)(LUTrowPtr));
+  acc = _mm_set1_epi32(0);
+  // Copy back parameters
+  Pel* tempBlockPtr = (short*)blkFilt + (((padwidth + 4) << 1) + 2);
+  int tempBlockStride = padwidth + 4;
+  for (int col = 0; col < uiWidth; col += 8)
+  {
+    for (int row = 0; row < uiHeight; row++)
+    {
+      acc = _mm_set1_epi32(0);
+      int16_t* point = &block[(row + pad) * padwidth + pad + col];
+      center = _mm_loadu_si128((__m128i*)(point));
+      //load neighbours
+      left = _mm_loadu_si128((__m128i*)(point - 1));
+      right = _mm_loadu_si128((__m128i*)(point + 1));
+      up = _mm_loadu_si128((__m128i*)(point - padwidth));
+      down = _mm_loadu_si128((__m128i*)(point + padwidth));
+      lu = _mm_loadu_si128((__m128i*)(point - 1 - padwidth));
+      ld = _mm_loadu_si128((__m128i*)(point - 1 + padwidth));
+      ru = _mm_loadu_si128((__m128i*)(point + 1 - padwidth));
+      rd = _mm_loadu_si128((__m128i*)(point + 1 + padwidth));
+      ll = _mm_loadu_si128((__m128i*)(point - 2));
+      rr = _mm_loadu_si128((__m128i*)(point + 2));
+      uu = _mm_loadu_si128((__m128i*)(point - 2 * padwidth));
+      dd = _mm_loadu_si128((__m128i*)(point + 2 * padwidth));
+      //calculate diffs
+      left = _mm_sub_epi16(left, center);
+      right = _mm_sub_epi16(right, center);
+      up = _mm_sub_epi16(up, center);
+      down = _mm_sub_epi16(down, center);
+      lu = _mm_sub_epi16(lu, center);
+      ld = _mm_sub_epi16(ld, center);
+      ru = _mm_sub_epi16(ru, center);
+      rd = _mm_sub_epi16(rd, center);
+      ll = _mm_sub_epi16(ll, center);
+      rr = _mm_sub_epi16(rr, center);
+      uu = _mm_sub_epi16(uu, center);
+      dd = _mm_sub_epi16(dd, center);
+      //LEFT!
+      //calculate abs
+      diffabs = _mm_abs_epi16(left); //abs
+      diffabs = _mm_add_epi16(diffabs, four); //+4
+      diffabs = _mm_srai_epi16(diffabs, 3); //>>3
+      diffabs = _mm_min_epi16(diffabs, fifteen); //min(x,15)
+      diffabs = _mm_packus_epi16(diffabs, diffabs); //convert to 8
+      diffabs = _mm_shuffle_epi8(lut, diffabs);//lut
+      diffabs = _mm_cvtepi8_epi16(diffabs);//back to 16-bit
+      diffabs = _mm_sign_epi16(diffabs, left);//fix sign!
+      acc = _mm_add_epi16(diffabs, acc); //add to acc
+      //RIGHT!
+      //calculate abs
+      diffabs = _mm_abs_epi16(right); //abs
+      diffabs = _mm_add_epi16(diffabs, four); //+4
+      diffabs = _mm_srai_epi16(diffabs, 3); //>>3
+      diffabs = _mm_min_epi16(diffabs, fifteen); //min(x,15)
+      diffabs = _mm_packus_epi16(diffabs, diffabs); //convert to 8
+      diffabs = _mm_shuffle_epi8(lut, diffabs);//lut
+      diffabs = _mm_cvtepi8_epi16(diffabs);//back to 16-bit
+      diffabs = _mm_sign_epi16(diffabs, right);//fix sign!
+      acc = _mm_add_epi16(diffabs, acc); //add to acc
+      //UP!
+      //calculate abs
+      diffabs = _mm_abs_epi16(up); //abs
+      diffabs = _mm_add_epi16(diffabs, four); //+4
+      diffabs = _mm_srai_epi16(diffabs, 3); //>>3
+      diffabs = _mm_min_epi16(diffabs, fifteen); //min(x,15)
+      diffabs = _mm_packus_epi16(diffabs, diffabs); //convert to 8
+      diffabs = _mm_shuffle_epi8(lut, diffabs);//lut
+      diffabs = _mm_cvtepi8_epi16(diffabs);//back to 16-bit
+      diffabs = _mm_sign_epi16(diffabs, up);//fix sign!
+      acc = _mm_add_epi16(diffabs, acc); //add to acc
+      //DOWN!
+      //calculate abs
+      diffabs = _mm_abs_epi16(down); //abs
+      diffabs = _mm_add_epi16(diffabs, four); //+4
+      diffabs = _mm_srai_epi16(diffabs, 3); //>>3
+      diffabs = _mm_min_epi16(diffabs, fifteen); //min(x,15)
+      diffabs = _mm_packus_epi16(diffabs, diffabs); //convert to 8
+      diffabs = _mm_shuffle_epi8(lut, diffabs);//lut
+      diffabs = _mm_cvtepi8_epi16(diffabs);//back to 16-bit
+      diffabs = _mm_sign_epi16(diffabs, down);//fix sign!
+      acc = _mm_add_epi16(diffabs, acc); //add to acc
+      //lu!
+      //calculate abs
+      diffabs = _mm_abs_epi16(lu); //abs
+      diffabs = _mm_add_epi16(diffabs, four); //+4
+      diffabs = _mm_srai_epi16(diffabs, 3); //>>3
+      diffabs = _mm_min_epi16(diffabs, fifteen); //min(x,15)
+      diffabs = _mm_packus_epi16(diffabs, diffabs); //convert to 8
+      diffabs = _mm_shuffle_epi8(lut, diffabs);//lut
+      diffabs = _mm_cvtepi8_epi16(diffabs);//back to 16-bit
+      diffabs = _mm_srai_epi16(diffabs, 1);//diagonal shift!
+      diffabs = _mm_sign_epi16(diffabs, lu);//fix sign!
+      acc = _mm_add_epi16(diffabs, acc); //add to acc
+      //ld!
+      //calculate abs
+      diffabs = _mm_abs_epi16(ld); //abs
+      diffabs = _mm_add_epi16(diffabs, four); //+4
+      diffabs = _mm_srai_epi16(diffabs, 3); //>>3
+      diffabs = _mm_min_epi16(diffabs, fifteen); //min(x,15)
+      diffabs = _mm_packus_epi16(diffabs, diffabs); //convert to 8
+      diffabs = _mm_shuffle_epi8(lut, diffabs);//lut
+      diffabs = _mm_cvtepi8_epi16(diffabs);//back to 16-bit
+      diffabs = _mm_srai_epi16(diffabs, 1);//diagonal shift!
+      diffabs = _mm_sign_epi16(diffabs, ld);//fix sign!
+      acc = _mm_add_epi16(diffabs, acc); //add to acc
+      //ru!
+      //calculate abs
+      diffabs = _mm_abs_epi16(ru); //abs
+      diffabs = _mm_add_epi16(diffabs, four); //+4
+      diffabs = _mm_srai_epi16(diffabs, 3); //>>3
+      diffabs = _mm_min_epi16(diffabs, fifteen); //min(x,15)
+      diffabs = _mm_packus_epi16(diffabs, diffabs); //convert to 8
+      diffabs = _mm_shuffle_epi8(lut, diffabs);//lut
+      diffabs = _mm_cvtepi8_epi16(diffabs);//back to 16-bit
+      diffabs = _mm_srai_epi16(diffabs, 1);//diagonal shift!
+      diffabs = _mm_sign_epi16(diffabs, ru);//fix sign!
+      acc = _mm_add_epi16(diffabs, acc); //add to acc
+      //rd!
+      //calculate abs
+      diffabs = _mm_abs_epi16(rd); //abs
+      diffabs = _mm_add_epi16(diffabs, four); //+4
+      diffabs = _mm_srai_epi16(diffabs, 3); //>>3
+      diffabs = _mm_min_epi16(diffabs, fifteen); //min(x,15)
+      diffabs = _mm_packus_epi16(diffabs, diffabs); //convert to 8
+      diffabs = _mm_shuffle_epi8(lut, diffabs);//lut
+      diffabs = _mm_cvtepi8_epi16(diffabs);//back to 16-bit
+      diffabs = _mm_srai_epi16(diffabs, 1);//diagonal shift!
+      diffabs = _mm_sign_epi16(diffabs, rd);//fix sign!
+      acc = _mm_add_epi16(diffabs, acc); //add to acc
+      //ll!
+      //calculate abs
+      diffabs = _mm_abs_epi16(ll); //abs
+      diffabs = _mm_add_epi16(diffabs, four); //+4
+      diffabs = _mm_srai_epi16(diffabs, 3); //>>3
+      diffabs = _mm_min_epi16(diffabs, fifteen); //min(x,15)
+      diffabs = _mm_packus_epi16(diffabs, diffabs); //convert to 8
+      diffabs = _mm_shuffle_epi8(lut, diffabs);//lut
+      diffabs = _mm_cvtepi8_epi16(diffabs);//back to 16-bit
+      diffabs = _mm_srai_epi16(diffabs, 1);//diagonal shift!
+      diffabs = _mm_sign_epi16(diffabs, ll);//fix sign!
+      acc = _mm_add_epi16(diffabs, acc); //add to acc
+      //rr!
+      //calculate abs
+      diffabs = _mm_abs_epi16(rr); //abs
+      diffabs = _mm_add_epi16(diffabs, four); //+4
+      diffabs = _mm_srai_epi16(diffabs, 3); //>>3
+      diffabs = _mm_min_epi16(diffabs, fifteen); //min(x,15)
+      diffabs = _mm_packus_epi16(diffabs, diffabs); //convert to 8
+      diffabs = _mm_shuffle_epi8(lut, diffabs);//lut
+      diffabs = _mm_cvtepi8_epi16(diffabs);//back to 16-bit
+      diffabs = _mm_srai_epi16(diffabs, 1);//diagonal shift!
+      diffabs = _mm_sign_epi16(diffabs, rr);//fix sign!
+      acc = _mm_add_epi16(diffabs, acc); //add to acc
+      //uu!
+      //calculate abs
+      diffabs = _mm_abs_epi16(uu); //abs
+      diffabs = _mm_add_epi16(diffabs, four); //+4
+      diffabs = _mm_srai_epi16(diffabs, 3); //>>3
+      diffabs = _mm_min_epi16(diffabs, fifteen); //min(x,15)
+      diffabs = _mm_packus_epi16(diffabs, diffabs); //convert to 8
+      diffabs = _mm_shuffle_epi8(lut, diffabs);//lut
+      diffabs = _mm_cvtepi8_epi16(diffabs);//back to 16-bit
+      diffabs = _mm_srai_epi16(diffabs, 1);//diagonal shift!
+      diffabs = _mm_sign_epi16(diffabs, uu);//fix sign!
+      acc = _mm_add_epi16(diffabs, acc); //add to acc
+      //dd!
+      //calculate abs
+      diffabs = _mm_abs_epi16(dd); //abs
+      diffabs = _mm_add_epi16(diffabs, four); //+4
+      diffabs = _mm_srai_epi16(diffabs, 3); //>>3
+      diffabs = _mm_min_epi16(diffabs, fifteen); //min(x,15)
+      diffabs = _mm_packus_epi16(diffabs, diffabs); //convert to 8
+      diffabs = _mm_shuffle_epi8(lut, diffabs);//lut
+      diffabs = _mm_cvtepi8_epi16(diffabs);//back to 16-bit
+      diffabs = _mm_srai_epi16(diffabs, 1);//diagonal shift!
+      diffabs = _mm_sign_epi16(diffabs, dd);//fix sign!
+      acc = _mm_add_epi16(diffabs, acc); //add to acc
+      if (bfac == 2)
+      {
+        acc = _mm_slli_epi16(acc, 1);   // Shift left to get 2*
+      }
+      else if (bfac == 3)
+      {
+        temp = _mm_slli_epi16(acc, 1);  // Multiply by two by shifting left
+        acc = _mm_add_epi16(acc, temp); // Add original value to get 3*
+      }
+      // Add 16 and shift 5
+      acc = _mm_add_epi16(acc, round_add);
+      acc = _mm_srai_epi16(acc, bif_round_shift);
+      // Instead we add our input values to the delta
+      if (isRDO)
+      {
+        acc = _mm_add_epi16(acc, center);
+      }
+      else
+      {
+        int16_t* recpoint = &recPtr[row * recStride + col];
+        inputVals = _mm_loadu_si128((__m128i*)(recpoint));
+        acc = _mm_add_epi16(acc, inputVals);
+      }
+      // Clip
+      if (isRDO)
+      {
+        acc = _mm_max_epi16(acc, clipmin); // No clipping applied in this function, will be clipped later on in CCSAO
+        acc = _mm_min_epi16(acc, clipmax);
+      }
+      _mm_store_si128((__m128i*)(blkFilt + (row + pad) * (padwidth + 4) + col + pad), acc);
+    }
+  }
+  // Copy back from tempbufFilter to recBuf
+  int onerow = uiWidth * sizeof(Pel);
+  for (uint32_t yy = 0; yy < uiHeight; yy++)
+  {
+    std::memcpy(recPtr, tempBlockPtr, onerow);
+    recPtr += recStride;
+    tempBlockPtr += tempBlockStride;
+  }
 template<X86_VEXT vext>
 void BilateralFilter::simdFilterDiamond5x5(uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bif_round_add, int bif_round_shift, bool isRDO, const char* LUTrowPtr )
@@ -304,6 +565,9 @@ void BilateralFilter::simdFilterDiamond5x5(uint32_t uiWidth, uint32_t uiHeight,
 template <X86_VEXT vext>
 void BilateralFilter::_initBilateralFilterX86()
+#if JVET_W0066_CCSAO   
+  m_bilateralFilterDiamond5x5NoClip = simdFilterDiamond5x5NoClip<vext>;
   m_bilateralFilterDiamond5x5 = simdFilterDiamond5x5<vext>;  
diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp
index 77e09ae8cfcc6d2ef1d8e28bb6802a0fcf8d4211..b689abdc7d6764fca5ec7785670d9987fb4a4f7e 100644
--- a/source/Lib/DecoderLib/CABACReader.cpp
+++ b/source/Lib/DecoderLib/CABACReader.cpp
@@ -145,6 +145,24 @@ void CABACReader::coding_tree_unit( CodingStructure& cs, const UnitArea& area, i
   sao( cs, ctuRsAddr );
+#if JVET_W0066_CCSAO
+  if (cs.sps->getCCSAOEnabledFlag())
+  {
+    for ( int compIdx = 0; compIdx < getNumberValidComponents( cs.pcv->chrFormat ); compIdx++ )
+    {
+      if (cs.slice->m_ccSaoComParam.enabled[compIdx])
+      {
+        const int setNum = cs.slice->m_ccSaoComParam.setNum[compIdx];
+        const int      ry = ctuRsAddr / cs.pcv->widthInCtus;
+        const int      rx = ctuRsAddr % cs.pcv->widthInCtus;
+        const Position lumaPos(rx * cs.pcv->maxCUWidth, ry * cs.pcv->maxCUHeight);
+        ccSaoControlIdc(cs, ComponentID(compIdx), ctuRsAddr, cs.slice->m_ccSaoControl[compIdx], lumaPos, setNum);
+      }
+    }
+  }
   if (cs.sps->getALFEnabledFlag() && (cs.slice->getTileGroupAlfEnabledFlag(COMPONENT_Y)))
     const PreCalcValues& pcv = *cs.pcv;
@@ -562,6 +580,46 @@ void CABACReader::bif(CodingStructure& cs, unsigned ctuRsAddr)
+#if JVET_W0066_CCSAO
+void CABACReader::ccSaoControlIdc(CodingStructure &cs, const ComponentID compID, const int curIdx,
+                                  uint8_t *controlIdc, Position lumaPos, int setNum)
+  Position       leftLumaPos    = lumaPos.offset(-(int)cs.pcv->maxCUWidth, 0);
+  Position       aboveLumaPos   = lumaPos.offset(0, -(int)cs.pcv->maxCUWidth);
+  const uint32_t curSliceIdx    = cs.slice->getIndependentSliceIdx();
+  const uint32_t curTileIdx     = cs.pps->getTileIdx( lumaPos );
+  bool           leftAvail      = cs.getCURestricted( leftLumaPos,  lumaPos, curSliceIdx, curTileIdx, CH_L ) ? true : false;
+  bool           aboveAvail     = cs.getCURestricted( aboveLumaPos, lumaPos, curSliceIdx, curTileIdx, CH_L ) ? true : false;
+  int            ctxt           = 0;
+  if (leftAvail)
+  {
+    ctxt += ( controlIdc[curIdx - 1] ) ? 1 : 0;
+  }
+  if (aboveAvail)
+  {
+    ctxt += ( controlIdc[curIdx - cs.pcv->widthInCtus] ) ? 1 : 0;
+  }
+  ctxt += ( compID == COMPONENT_Y  ) ? 0 
+        : ( compID == COMPONENT_Cb ) ? 3 : 6;
+  int idcVal  = m_BinDecoder.decodeBin( Ctx::CcSaoControlIdc( ctxt ) );
+  if ( idcVal )
+  {
+    while ( ( idcVal != setNum ) && m_BinDecoder.decodeBinEP() )
+    {
+      idcVal++;
+    }
+  }
+  controlIdc[curIdx] = idcVal;
+  DTRACE(g_trace_ctx, D_SYNTAX, "ccSaoControlIdc() compID=%d pos=(%d,%d) ctxt=%d, setNum=%d, idcVal=%d\n",
+         compID, lumaPos.x, lumaPos.y, ctxt, setNum, idcVal);
 //  clause
diff --git a/source/Lib/DecoderLib/CABACReader.h b/source/Lib/DecoderLib/CABACReader.h
index 5d0e3329a7da3950f28179e76ac6bf89b09a03f5..7fe2facef5db65f2b0847a4dc38cbff880e91eb2 100644
--- a/source/Lib/DecoderLib/CABACReader.h
+++ b/source/Lib/DecoderLib/CABACReader.h
@@ -73,6 +73,10 @@ public:
   void        bif                      (CodingStructure&              cs, unsigned ctuRsAddr);
+#if JVET_W0066_CCSAO
+  void        ccSaoControlIdc           ( CodingStructure &cs, const ComponentID compID, const int curIdx, uint8_t *controlIdc, Position lumaPos, int setNum );
   void        readAlfCtuFilterIndex(CodingStructure&              cs, unsigned        ctuRsAddr);
   void ccAlfFilterControlIdc(CodingStructure &cs, const ComponentID compID, const int curIdx, uint8_t *filterControlIdc,
diff --git a/source/Lib/DecoderLib/DecLib.cpp b/source/Lib/DecoderLib/DecLib.cpp
index ec27df33991409c01a52574c12d9c264bff541b0..351ec69ba9a10b2d37668c8f22184a48dfc9b785 100644
--- a/source/Lib/DecoderLib/DecLib.cpp
+++ b/source/Lib/DecoderLib/DecLib.cpp
@@ -203,6 +203,18 @@ bool tryDecodePicture( Picture* pcEncPic, const int expectedPoc, const std::stri
                   pcEncPic->copySAO( *pic, 0 );
+#if JVET_W0066_CCSAO
+                if ( pic->cs->sps->getCCSAOEnabledFlag() )
+                {
+                  for (int i = 0; i < pic->slices.size(); i++)
+                  {
+                    pcEncPic->slices[i]->setCcSaoEnabledFlag(COMPONENT_Y,  pic->slices[i]->getCcSaoEnabledFlag(COMPONENT_Y));
+                    pcEncPic->slices[i]->setCcSaoEnabledFlag(COMPONENT_Cb, pic->slices[i]->getCcSaoEnabledFlag(COMPONENT_Cb));
+                    pcEncPic->slices[i]->setCcSaoEnabledFlag(COMPONENT_Cr, pic->slices[i]->getCcSaoEnabledFlag(COMPONENT_Cr));
+                  }
+                }
                 if( pic->cs->sps->getALFEnabledFlag() )
                   std::copy(pic->getAlfCtbFilterIndexVec().begin(), pic->getAlfCtbFilterIndexVec().end(), pcEncPic->getAlfCtbFilterIndexVec().begin());
@@ -681,6 +693,13 @@ void DecLib::executeLoopFilters()
+#if JVET_W0066_CCSAO
+  if (cs.sps->getCCSAOEnabledFlag())
+  {
+    m_cSAO.getCcSaoBuf().copyFrom( cs.getRecoBuf() );
+  }
   if( cs.sps->getSAOEnabledFlag() || cs.pps->getUseBIF())
@@ -690,6 +709,15 @@ void DecLib::executeLoopFilters()
     m_cSAO.SAOProcess( cs, cs.picture->getSAO() );
+#if JVET_W0066_CCSAO
+  if (cs.sps->getCCSAOEnabledFlag())
+  {
+    m_cSAO.getCcSaoComParam() = cs.slice->m_ccSaoComParam;
+    m_cSAO.CCSAOProcess( cs );
+  }
+  m_cSAO.jointClipSaoBifCcSao( cs );
   if( cs.sps->getALFEnabledFlag() )
     m_cALF.getCcAlfFilterParam() = cs.slice->m_ccAlfFilterParam;
@@ -1641,6 +1669,11 @@ void DecLib::xActivateParameterSets( const InputNALUnit nalu )
                    sps->getMaxCUWidth(), sps->getMaxCUHeight(),
                    log2SaoOffsetScaleLuma, log2SaoOffsetScaleChroma );
+#if JVET_W0066_CCSAO
+    pSlice->m_ccSaoControl[COMPONENT_Y ] = m_cSAO.getCcSaoControlIdc(COMPONENT_Y);
+    pSlice->m_ccSaoControl[COMPONENT_Cb] = m_cSAO.getCcSaoControlIdc(COMPONENT_Cb);
+    pSlice->m_ccSaoControl[COMPONENT_Cr] = m_cSAO.getCcSaoControlIdc(COMPONENT_Cr);
     m_cIntraPred.init( sps->getChromaFormatIdc(), sps->getBitDepth( CHANNEL_TYPE_LUMA ) );
@@ -2131,6 +2164,9 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
   m_HLSReader.setBitstream( &nalu.getBitstream() );
+#if JVET_W0066_CCSAO
+  m_apcSlicePilot->m_ccSaoComParam = m_cSAO.getCcSaoComParam();
   m_apcSlicePilot->m_ccAlfFilterParam = m_cALF.getCcAlfFilterParam();
diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp
index 90002f86004ac2cf251621915f2061884bba74bc..59cc7257929a175cbac23316a573b991c79c5478 100644
--- a/source/Lib/DecoderLib/VLCReader.cpp
+++ b/source/Lib/DecoderLib/VLCReader.cpp
@@ -2040,6 +2040,9 @@ void HLSyntaxReader::parseSPS(SPS* pcSPS)
   READ_FLAG( uiCode, "sps_sao_enabled_flag" );                      pcSPS->setSAOEnabledFlag ( uiCode ? true : false );
+#if JVET_W0066_CCSAO
+  READ_FLAG( uiCode, "sps_ccsao_enabled_flag" );                    pcSPS->setCCSAOEnabledFlag ( uiCode ? true : false );
   READ_FLAG( uiCode, "sps_alf_enabled_flag" );                      pcSPS->setALFEnabledFlag ( uiCode ? true : false );
   if (pcSPS->getALFEnabledFlag() && pcSPS->getChromaFormatIdc() != CHROMA_400)
@@ -3713,7 +3716,18 @@ void HLSyntaxReader::parsePictureHeader( PicHeader* picHeader, ParameterSetManag
     picHeader->setSaoEnabledFlag(CHANNEL_TYPE_CHROMA, false);
+#if JVET_W0066_CCSAO
+  picHeader->setCcSaoEnabledFlag(COMPONENT_Y,  sps->getCCSAOEnabledFlag());
+  picHeader->setCcSaoEnabledFlag(COMPONENT_Cb, sps->getCCSAOEnabledFlag());
+  picHeader->setCcSaoEnabledFlag(COMPONENT_Cr, sps->getCCSAOEnabledFlag());
+  if (sps->getCCSAOEnabledFlag() && pps->getSaoInfoInPhFlag())
+  {
+    READ_FLAG(uiCode, "ph_cc_sao_y_enabled_flag");  picHeader->setCcSaoEnabledFlag(COMPONENT_Y,  uiCode != 0);
+    READ_FLAG(uiCode, "ph_cc_sao_cb_enabled_flag"); picHeader->setCcSaoEnabledFlag(COMPONENT_Cb, uiCode != 0);
+    READ_FLAG(uiCode, "ph_cc_sao_cr_enabled_flag"); picHeader->setCcSaoEnabledFlag(COMPONENT_Cr, uiCode != 0);
+  }
   // deblocking filter controls
   if (pps->getDeblockingFilterControlPresentFlag())
@@ -4662,6 +4676,9 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, PicHeader* picHeader, Par
+#if JVET_W0066_CCSAO
+    parseCcSao(pcSlice, picHeader, sps, pcSlice->m_ccSaoComParam);
     if (pps->getDeblockingFilterControlPresentFlag())
@@ -5065,6 +5082,9 @@ void HLSyntaxReader::parseConstraintInfo(ConstraintInfo *cinfo)
     /* loop filter */
     READ_FLAG(symbol, "gci_no_sao_constraint_flag");                     cinfo->setNoSaoConstraintFlag(symbol > 0 ? true : false);
+#if JVET_W0066_CCSAO
+    READ_FLAG(symbol, "gci_no_ccsao_constraint_flag");                   cinfo->setNoCCSaoConstraintFlag(symbol > 0 ? true : false);
     READ_FLAG(symbol, "gci_no_alf_constraint_flag");                     cinfo->setNoAlfConstraintFlag(symbol > 0 ? true : false);
     READ_FLAG(symbol, "gci_no_ccalf_constraint_flag");                   cinfo->setNoCCAlfConstraintFlag(symbol > 0 ? true : false);
     READ_FLAG(symbol, "gci_no_lmcs_constraint_flag");                    cinfo->setNoLmcsConstraintFlag(symbol > 0 ? true : false);
@@ -5878,6 +5898,65 @@ bool HLSyntaxReader::xMoreRbspData()
   return (cnt>0);
+#if JVET_W0066_CCSAO
+void HLSyntaxReader::parseCcSao( Slice* pcSlice, PicHeader* picHeader, const SPS* sps, CcSaoComParam& ccSaoParam )
+  ccSaoParam.reset();
+  uint32_t  uiCode;
+  if (sps->getCCSAOEnabledFlag())
+  {
+    READ_FLAG(uiCode, "slice_ccsao_y_enabled_flag" ); pcSlice->setCcSaoEnabledFlag(COMPONENT_Y,  uiCode); ccSaoParam.enabled[COMPONENT_Y ] = uiCode;
+    READ_FLAG(uiCode, "slice_ccsao_cb_enabled_flag"); pcSlice->setCcSaoEnabledFlag(COMPONENT_Cb, uiCode); ccSaoParam.enabled[COMPONENT_Cb] = uiCode;
+    READ_FLAG(uiCode, "slice_ccsao_cr_enabled_flag"); pcSlice->setCcSaoEnabledFlag(COMPONENT_Cr, uiCode); ccSaoParam.enabled[COMPONENT_Cr] = uiCode;
+  }
+  else
+  {
+    ccSaoParam.enabled[COMPONENT_Y ] = false;
+    ccSaoParam.enabled[COMPONENT_Cb] = false;
+    ccSaoParam.enabled[COMPONENT_Cr] = false;
+  }
+  for (int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++)
+  {
+    if (ccSaoParam.enabled[compIdx])
+    {
+      READ_UVLC(uiCode, "ccsao_set_num"); ccSaoParam.setNum[compIdx] = uiCode + 1;
+      for (int setIdx = 0; setIdx < ccSaoParam.setNum[compIdx]; setIdx++)
+      {
+        ccSaoParam.setEnabled[compIdx][setIdx] = true;
+        READ_CODE(MAX_CCSAO_CAND_POS_Y_BITS, uiCode, "ccsao_cand_pos_y"); ccSaoParam.candPos[compIdx][setIdx][COMPONENT_Y ] = uiCode;
+        READ_CODE(MAX_CCSAO_BAND_NUM_Y_BITS, uiCode, "ccsao_band_num_y"); ccSaoParam.bandNum[compIdx][setIdx][COMPONENT_Y ] = uiCode + 1;
+        READ_CODE(MAX_CCSAO_BAND_NUM_U_BITS, uiCode, "ccsao_band_num_u"); ccSaoParam.bandNum[compIdx][setIdx][COMPONENT_Cb] = uiCode + 1;
+        READ_CODE(MAX_CCSAO_BAND_NUM_V_BITS, uiCode, "ccsao_band_num_v"); ccSaoParam.bandNum[compIdx][setIdx][COMPONENT_Cr] = uiCode + 1;
+        short *offset   = ccSaoParam.offset [compIdx][setIdx];
+        int    classNum = ccSaoParam.bandNum[compIdx][setIdx][COMPONENT_Y ]
+                        * ccSaoParam.bandNum[compIdx][setIdx][COMPONENT_Cb]
+                        * ccSaoParam.bandNum[compIdx][setIdx][COMPONENT_Cr];
+        for (int i = 0; i < classNum; i++)
+        {
+          READ_UVLC(uiCode, "ccsao_offset_abs"); offset[i] = uiCode;
+          if(offset[i] != 0 )
+          {
+            READ_FLAG(uiCode, "ccsao_offset_sign"); offset[i] = uiCode ? -offset[i] : offset[i];
+          }
+        }
+        DTRACE(g_trace_ctx, D_SYNTAX, "offset setIdx %d: ", setIdx);
+        for (int i = 0; i < classNum; i++)
+        {
+          DTRACE(g_trace_ctx, D_SYNTAX, "%d ", offset[i]);
+        }
+        DTRACE(g_trace_ctx, D_SYNTAX, "\n");
+      }
+    }
+  }
 int HLSyntaxReader::alfGolombDecode(const int k, const bool signed_val)
diff --git a/source/Lib/DecoderLib/VLCReader.h b/source/Lib/DecoderLib/VLCReader.h
index 9d413bad11cb544c2e706929ee3895b37873dd0b..5e055709a3fa5910142adb22666362eb775de727 100644
--- a/source/Lib/DecoderLib/VLCReader.h
+++ b/source/Lib/DecoderLib/VLCReader.h
@@ -201,6 +201,9 @@ public:
   void  decodeScalingList   ( ScalingList *scalingList, uint32_t scalingListId, bool isPredictor);
   void parseReshaper        ( SliceReshapeInfo& sliceReshaperInfo, const SPS* pcSPS, const bool isIntra );
+#if JVET_W0066_CCSAO
+  void parseCcSao           ( Slice* pcSlice, PicHeader* picHeader, const SPS* sps, CcSaoComParam& ccSaoParam );
   int  alfGolombDecode( const int k, const bool signed_val = true );
   void alfFilter( AlfParam& alfParam, const bool isChroma, const int altIdx, int order0, int order1 );
diff --git a/source/Lib/EncoderLib/CABACWriter.cpp b/source/Lib/EncoderLib/CABACWriter.cpp
index 9e98305fc06b56f66ab07ee291ed3a7cef0633f1..2d8f20f5e7daeabefcf8d7e01f3b3582317d38d9 100644
--- a/source/Lib/EncoderLib/CABACWriter.cpp
+++ b/source/Lib/EncoderLib/CABACWriter.cpp
@@ -198,6 +198,26 @@ void CABACWriter::coding_tree_unit( CodingStructure& cs, const UnitArea& area, i
     sao( *cs.slice, ctuRsAddr );
+#if JVET_W0066_CCSAO
+  if ( !skipSao )
+  {
+    for ( int compIdx = 0; compIdx < getNumberValidComponents( cs.pcv->chrFormat ); compIdx++ )
+    {
+      if (cs.slice->m_ccSaoComParam.enabled[compIdx])
+      {
+        const int setNum = cs.slice->m_ccSaoComParam.setNum[compIdx];
+        const int      ry = ctuRsAddr / cs.pcv->widthInCtus;
+        const int      rx = ctuRsAddr % cs.pcv->widthInCtus;
+        const Position lumaPos(rx * cs.pcv->maxCUWidth, ry * cs.pcv->maxCUHeight);
+        codeCcSaoControlIdc(cs.slice->m_ccSaoControl[compIdx][ctuRsAddr], cs, ComponentID(compIdx),
+                            ctuRsAddr, cs.slice->m_ccSaoControl[compIdx], lumaPos, setNum);
+      }
+    }
+  }
   if (!skipAlf)
     for (int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++)
@@ -427,7 +447,49 @@ void CABACWriter::sao_offset_pars( const SAOOffset& ctbPars, ComponentID compID,
+#if JVET_W0066_CCSAO
+void CABACWriter::codeCcSaoControlIdc(uint8_t idcVal, CodingStructure &cs, const ComponentID compID,
+                                      const int curIdx, const uint8_t *controlIdc, Position lumaPos,
+                                      const int setNum)
+  CHECK(idcVal > setNum, "Set index is too large");
+  const uint32_t curSliceIdx    = cs.slice->getIndependentSliceIdx();
+  const uint32_t curTileIdx     = cs.pps->getTileIdx( lumaPos );
+  Position       leftLumaPos    = lumaPos.offset(-(int)cs.pcv->maxCUWidth, 0);
+  Position       aboveLumaPos   = lumaPos.offset(0, -(int)cs.pcv->maxCUWidth);
+  bool           leftAvail      = cs.getCURestricted( leftLumaPos,  lumaPos, curSliceIdx, curTileIdx, CH_L ) ? true : false;
+  bool           aboveAvail     = cs.getCURestricted( aboveLumaPos, lumaPos, curSliceIdx, curTileIdx, CH_L ) ? true : false;
+  int            ctxt           = 0;
+  if (leftAvail)
+  {
+    ctxt += ( controlIdc[curIdx - 1]) ? 1 : 0;
+  }
+  if (aboveAvail)
+  {
+    ctxt += (controlIdc[curIdx - cs.pcv->widthInCtus]) ? 1 : 0;
+  }
+  ctxt += ( compID == COMPONENT_Y  ) ? 0 
+        : ( compID == COMPONENT_Cb ) ? 3 : 6;
+  m_BinEncoder.encodeBin( ( idcVal == 0 ) ? 0 : 1, Ctx::CcSaoControlIdc( ctxt ) ); // ON/OFF flag is context coded
+  if ( idcVal > 0 )
+  {
+    int val = (idcVal - 1);
+    while ( val )
+    {
+      m_BinEncoder.encodeBinEP( 1 );
+      val--;
+    }
+    if ( idcVal < setNum )
+    {
+      m_BinEncoder.encodeBinEP( 0 );
+    }
+  }
+  DTRACE( g_trace_ctx, D_SYNTAX, "ccSaoControlIdc() compID=%d pos=(%d,%d) ctxt=%d, setNum=%d, idcVal=%d\n", compID, lumaPos.x, lumaPos.y, ctxt, setNum, idcVal );
 //  clause
diff --git a/source/Lib/EncoderLib/CABACWriter.h b/source/Lib/EncoderLib/CABACWriter.h
index 0b55a2d36badfc6df380e1141818d84fc389b7e6..2f3113d30b29b75541c64bfe082534fff0fa06be 100644
--- a/source/Lib/EncoderLib/CABACWriter.h
+++ b/source/Lib/EncoderLib/CABACWriter.h
@@ -83,6 +83,10 @@ public:
   void        bif                      (const Slice&                   slice, const BifParams& BifParams);
   void        bif                      (const Slice& slice, const BifParams& BifParams, unsigned ctuRsAddr);
+#if JVET_W0066_CCSAO
+  void        codeCcSaoControlIdc       ( uint8_t idcVal, CodingStructure &cs, const ComponentID compID, const int curIdx,
+                                          const uint8_t *controlIdc, Position lumaPos, const int setNum );
   // coding (quad)tree (clause
   void        coding_tree               ( const CodingStructure&        cs,       Partitioner&      pm,         CUCtx& cuCtx, Partitioner* pPartitionerChroma = nullptr, CUCtx* pCuCtxChroma = nullptr);
diff --git a/source/Lib/EncoderLib/EncCfg.h b/source/Lib/EncoderLib/EncCfg.h
index 84929d8a066564622400a086a33adf84a04c27a1..22bdb7b64a9c6284b0872d2ca7f248c6a4a1ea8b 100644
--- a/source/Lib/EncoderLib/EncCfg.h
+++ b/source/Lib/EncoderLib/EncCfg.h
@@ -212,6 +212,9 @@ protected:
   bool      m_noPartitionConstraintsOverrideConstraintFlag;
   bool      m_noSaoConstraintFlag;
+#if JVET_W0066_CCSAO
+  bool      m_noCCSaoConstraintFlag;
   bool      m_noAlfConstraintFlag;
   bool      m_noCCAlfConstraintFlag;
 #if JVET_S0058_GCI
@@ -481,6 +484,9 @@ protected:
   bool      m_DeblockingFilterMetric;
   bool      m_bUseSAO;
+#if JVET_W0066_CCSAO
+  bool      m_CCSAO;
   bool      m_bTestSAODisableAtPictureLevel;
   double    m_saoEncodingRate;       // When non-0 SAO early picture termination is enabled for luma and chroma
   double    m_saoEncodingRateChroma; // The SAO early picture termination rate to use for chroma (when m_SaoEncodingRate is >0). If <=0, use results for luma.
@@ -949,6 +955,10 @@ public:
   void      setNoPartitionConstraintsOverrideConstraintFlag(bool val) { m_noPartitionConstraintsOverrideConstraintFlag = val; }
   bool      getNoSaoConstraintFlag() const { return m_noSaoConstraintFlag; }
   void      setNoSaoConstraintFlag(bool val) { m_noSaoConstraintFlag = val; }
+#if JVET_W0066_CCSAO
+  bool      getNoCCSaoConstraintFlag() const { return m_noCCSaoConstraintFlag; }
+  void      setNoCCSaoConstraintFlag(bool val) { m_noCCSaoConstraintFlag = val; }
   bool      getNoAlfConstraintFlag() const { return m_noAlfConstraintFlag; }
   void      setNoAlfConstraintFlag(bool val) { m_noAlfConstraintFlag = val; }
   bool      getNoCCAlfConstraintFlag() const { return m_noCCAlfConstraintFlag; }
@@ -1670,6 +1680,10 @@ public:
   bool      getSingleSlicePerSubPicFlagFlag( )                       { return m_singleSlicePerSubPicFlag;    }
   void      setUseSAO                  (bool bVal)                   { m_bUseSAO = bVal; }
   bool      getUseSAO                  ()                            { return m_bUseSAO; }
+#if JVET_W0066_CCSAO
+  void      setUseCCSAO( bool b )                                    { m_CCSAO = b; }
+  bool      getUseCCSAO()                                      const { return m_CCSAO; }
   void  setTestSAODisableAtPictureLevel (bool bVal)                  { m_bTestSAODisableAtPictureLevel = bVal; }
   bool  getTestSAODisableAtPictureLevel ( ) const                    { return m_bTestSAODisableAtPictureLevel; }
diff --git a/source/Lib/EncoderLib/EncGOP.cpp b/source/Lib/EncoderLib/EncGOP.cpp
index a60c1a4e81c53c3ad124db717f36e968a79c00f3..6aab82f7dd2d6dbf5c2014b1d81a35f51af3baf3 100644
--- a/source/Lib/EncoderLib/EncGOP.cpp
+++ b/source/Lib/EncoderLib/EncGOP.cpp
@@ -3100,6 +3100,13 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
+#if JVET_W0066_CCSAO
+      if ( cs.sps->getCCSAOEnabledFlag() )
+      {
+        m_pcSAO->getCcSaoBuf().copyFrom( cs.getRecoBuf() );
+      }
       // We need to do this step if at least one of BIF or SAO are enabled.
       if( pcSlice->getSPS()->getSAOEnabledFlag() || pcSlice->getPPS()->getUseBIF())
@@ -3149,6 +3156,24 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
+#if JVET_W0066_CCSAO
+      if ( pcSlice->getSPS()->getCCSAOEnabledFlag() )
+      {
+        m_pcSAO->initCABACEstimator( m_pcEncLib->getCABACEncoder(), m_pcEncLib->getCtxCache(), pcSlice );
+        m_pcSAO->CCSAOProcess( cs, pcSlice->getLambdas(), m_pcCfg->getIntraPeriod() );
+        //assign CCSAO slice header
+        for (int s = 0; s < uiNumSliceSegments; s++)
+        {
+          pcPic->slices[s]->m_ccSaoComParam              = m_pcSAO->getCcSaoComParam();
+          pcPic->slices[s]->m_ccSaoControl[COMPONENT_Y]  = m_pcSAO->getCcSaoControlIdc(COMPONENT_Y);
+          pcPic->slices[s]->m_ccSaoControl[COMPONENT_Cb] = m_pcSAO->getCcSaoControlIdc(COMPONENT_Cb);
+          pcPic->slices[s]->m_ccSaoControl[COMPONENT_Cr] = m_pcSAO->getCcSaoControlIdc(COMPONENT_Cr);
+        }
+      }
+      m_pcSAO->jointClipSaoBifCcSao( cs );
       if( pcSlice->getSPS()->getALFEnabledFlag() )
         for (int s = 0; s < uiNumSliceSegments; s++)
@@ -3522,6 +3547,11 @@ void EncGOP::compressGOP( int iPOCLast, int iNumPicRcvd, PicList& rcListPic,
             picHeader->setSaoEnabledFlag(CHANNEL_TYPE_LUMA,   pcSlice->getSaoEnabledFlag(CHANNEL_TYPE_LUMA  ));
             picHeader->setSaoEnabledFlag(CHANNEL_TYPE_CHROMA, pcSlice->getSaoEnabledFlag(CHANNEL_TYPE_CHROMA));
+#if JVET_W0066_CCSAO
+            picHeader->setCcSaoEnabledFlag(COMPONENT_Y,  pcSlice->getCcSaoEnabledFlag(COMPONENT_Y));
+            picHeader->setCcSaoEnabledFlag(COMPONENT_Cb, pcSlice->getCcSaoEnabledFlag(COMPONENT_Cb));
+            picHeader->setCcSaoEnabledFlag(COMPONENT_Cr, pcSlice->getCcSaoEnabledFlag(COMPONENT_Cr));
           // code ALF parameters in picture header or slice headers
diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp
index 9850b3561a696f075d7214ba5e57896daea171e2..f912f35904f74f2091c0c1a4094e38b367b21e21 100644
--- a/source/Lib/EncoderLib/EncLib.cpp
+++ b/source/Lib/EncoderLib/EncLib.cpp
@@ -169,9 +169,17 @@ void EncLib::create( const int layerId )
     m_cEncALF.create(this, m_iSourceWidth, m_iSourceHeight, m_chromaFormatIDC, m_maxCUWidth, m_maxCUHeight, floorLog2(m_maxCUWidth) - m_log2MinCUSize, m_bitDepth, m_inputBitDepth);
+#if JVET_W0066_CCSAO
+  if (m_bUseSAO || m_BIF || m_CCSAO)
   if (m_bUseSAO || m_BIF)
+#if JVET_W0066_CCSAO
+  if (m_bUseSAO || m_CCSAO)
   if (m_bUseSAO)
     const uint32_t widthInCtus = (m_iSourceWidth + m_maxCUWidth - 1) / m_maxCUWidth;
@@ -1268,6 +1276,9 @@ void EncLib::xInitSPS( SPS& sps )
+#if JVET_W0066_CCSAO
+  cinfo->setNoCCSaoConstraintFlag(m_noCCSaoConstraintFlag);
 #if JVET_S0058_GCI
@@ -1474,6 +1485,9 @@ void EncLib::xInitSPS( SPS& sps )
   sps.setUseWPBiPred( m_useWeightedBiPred );
   sps.setSAOEnabledFlag( m_bUseSAO );
+#if JVET_W0066_CCSAO
+  sps.setCCSAOEnabledFlag( m_CCSAO );
   sps.setJointCbCrEnabledFlag( m_JointCbCrMode );
   sps.setMaxTLayers( m_maxTempLayer );
   sps.setTemporalIdNestingFlag( ( m_maxTempLayer == 1 ) ? true : false );
diff --git a/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp b/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp
index 5458432cb74637912522e3babd3aca7c7ecb2458..ab9961388cbd0c1536a5e91c08f417ed9b1b0f73 100644
--- a/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp
+++ b/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp
@@ -56,6 +56,25 @@
 #define SAOCtx(c) SubCtx( Ctx::Sao, c )
+#if JVET_W0066_CCSAO
+#include <algorithm>
+struct SetIdxCount
+  uint8_t  setIdx;
+  uint16_t count;
+struct CtbCost
+  int16_t pos;
+  double  cost;
+bool compareSetIdxCount(SetIdxCount a, SetIdxCount b) { return a.count > b.count; }
+bool compareCtbCost(CtbCost a, CtbCost b) { return a.cost < b.cost; }
 //! rounding with IBDI
 inline double xRoundIbdi2(int bitDepth, double x)
@@ -175,6 +194,28 @@ void EncSampleAdaptiveOffset::createEncData(bool isPreDBFSamplesUsed, uint32_t n
+#if JVET_W0066_CCSAO
+  if (m_createdEnc)
+  {
+    return;
+  }
+  m_createdEnc = true;
+  for (int i = 0; i < MAX_CCSAO_SET_NUM; i++)
+  {
+    m_ccSaoStatData[i] = new CcSaoStatData[m_numCTUsInPic];
+  }
+  m_bestCcSaoControl = new uint8_t[m_numCTUsInPic];
+  m_tempCcSaoControl = new uint8_t[m_numCTUsInPic];
+  m_initCcSaoControl = new uint8_t[m_numCTUsInPic];
+  for (int i = 0; i < MAX_CCSAO_SET_NUM; i++)
+  {
+    m_trainingDistortion[i] = new int64_t[m_numCTUsInPic];
+  }
 void EncSampleAdaptiveOffset::destroyEncData()
@@ -199,6 +240,28 @@ void EncSampleAdaptiveOffset::destroyEncData()
     delete[] m_preDBFstatData[i];
+#if JVET_W0066_CCSAO
+  if (!m_createdEnc)
+  {
+    return;
+  }
+  m_createdEnc = false;
+  for (int i = 0; i < MAX_CCSAO_SET_NUM; i++)
+  {
+    if (m_ccSaoStatData[i]) { delete[] m_ccSaoStatData[i]; m_ccSaoStatData[i] = nullptr; }
+  }
+  if (m_bestCcSaoControl) { delete[] m_bestCcSaoControl; m_bestCcSaoControl = nullptr; }
+  if (m_tempCcSaoControl) { delete[] m_tempCcSaoControl; m_tempCcSaoControl = nullptr; }
+  if (m_initCcSaoControl) { delete[] m_initCcSaoControl; m_initCcSaoControl = nullptr; }
+  for (int i = 0; i < MAX_CCSAO_SET_NUM; i++)
+  {
+    if (m_trainingDistortion[i]) { delete[] m_trainingDistortion[i]; m_trainingDistortion[i] = nullptr; }
+  }
 void EncSampleAdaptiveOffset::initCABACEstimator( CABACEncoder* cabacEncoder, CtxCache* ctxCache, Slice* pcSlice )
@@ -1163,6 +1226,27 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn
+#if JVET_W0066_CCSAO
+      offsetCTUnoClip(area, srcYuv, resYuv, reconParams[ctuRsAddr], cs);
+      if (cs.pps->getUseBIF())
+      {
+        BifParams& bifParams = cs.picture->getBifParam();
+        for (auto& currCU : cs.traverseCUs(CS::getArea(cs, area, CH_L), CH_L))
+        {
+          for (auto& currTU : CU::traverseTUs(currCU))
+          {
+            bool isInter = (currCU.predMode == MODE_INTER) ? true : false;
+            if (bifParams.ctuOn[ctuRsAddr] && ((TU::getCbf(currTU, COMPONENT_Y) || isInter == false) && (currTU.cu->qp > 17)) && (128 > std::max(currTU.lumaSize().width, currTU.lumaSize().height)) && ((isInter == false) || (32 > std::min(currTU.lumaSize().width, currTU.lumaSize().height))))
+            {
+              bilateralFilter.bilateralFilterDiamond5x5NoClip(srcYuv, resYuv, currTU.cu->qp, cs.slice->clpRng(COMPONENT_Y), currTU);
+            }
+          }
+        }
+      }
@@ -1207,6 +1291,8 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn
       offsetCTU(area, srcYuv, resYuv, reconParams[ctuRsAddr], cs);
@@ -1251,6 +1337,27 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn
+#if JVET_W0066_CCSAO
+        offsetCTUnoClip(area, srcYuv, resYuv, reconParams[ctuRsAddr], cs);
+        if (cs.pps->getUseBIF())
+        {
+          BifParams& bifParams = cs.picture->getBifParam();
+          for (auto& currCU : cs.traverseCUs(CS::getArea(cs, area, CH_L), CH_L))
+          {
+            for (auto& currTU : CU::traverseTUs(currCU))
+            {
+              bool isInter = (currCU.predMode == MODE_INTER) ? true : false;
+              if (bifParams.ctuOn[ctuRsAddr] && ((TU::getCbf(currTU, COMPONENT_Y) || isInter == false) && (currTU.cu->qp > 17)) && (128 > std::max(currTU.lumaSize().width, currTU.lumaSize().height)) && ((isInter == false) || (32 > std::min(currTU.lumaSize().width, currTU.lumaSize().height))))
+              {
+                bilateralFilter.bilateralFilterDiamond5x5NoClip(srcYuv, resYuv, currTU.cu->qp, cs.slice->clpRng(COMPONENT_Y), currTU);
+              }
+            }
+          }
+        }
@@ -1295,6 +1402,7 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn
         offsetCTU(area, srcYuv, resYuv, reconParams[ctuRsAddr], cs);
@@ -1881,6 +1989,720 @@ void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int c
+#if JVET_W0066_CCSAO
+void EncSampleAdaptiveOffset::CCSAOProcess(CodingStructure& cs, const double* lambdas, const int intraPeriod)
+  PelUnitBuf orgYuv = cs.getOrgBuf(); 
+  PelUnitBuf dstYuv = cs.getRecoBuf();
+  PelUnitBuf srcYuv = m_ccSaoBuf.getBuf( cs.area );
+  srcYuv.extendBorderPel( MAX_CCSAO_FILTER_LENGTH >> 1 );
+  m_intraPeriod = intraPeriod;
+  setupCcSaoLambdas(cs, lambdas);
+  if (cs.slice->getSPS()->getCCSAOEnabledFlag())
+  {
+    const TempCtx ctxStartCcSao(m_CtxCache, SubCtx(Ctx::CcSaoControlIdc, m_CABACEstimator->getCtx()));
+    m_CABACEstimator->getCtx() = SubCtx(Ctx::CcSaoControlIdc, ctxStartCcSao); deriveCcSao(cs, COMPONENT_Y,  orgYuv, srcYuv, dstYuv);
+    m_CABACEstimator->getCtx() = SubCtx(Ctx::CcSaoControlIdc, ctxStartCcSao); deriveCcSao(cs, COMPONENT_Cb, orgYuv, srcYuv, dstYuv);
+    m_CABACEstimator->getCtx() = SubCtx(Ctx::CcSaoControlIdc, ctxStartCcSao); deriveCcSao(cs, COMPONENT_Cr, orgYuv, srcYuv, dstYuv);
+    applyCcSao(cs, *cs.pcv, srcYuv, dstYuv);
+  }
+void EncSampleAdaptiveOffset::setupCcSaoLambdas(CodingStructure& cs, const double* lambdas)
+  m_lambda[COMPONENT_Y ] = m_picWidth * m_picHeight <= 416 * 240 
+                         ? lambdas[COMPONENT_Y ] * 4.0 
+                         : lambdas[COMPONENT_Y ];
+  m_lambda[COMPONENT_Cb] = lambdas[COMPONENT_Cb];
+  m_lambda[COMPONENT_Cr] = lambdas[COMPONENT_Cr];
+void EncSampleAdaptiveOffset::deriveCcSao(CodingStructure& cs, const ComponentID compID, const CPelUnitBuf& orgYuv, const CPelUnitBuf& srcYuv, const CPelUnitBuf& dstYuv)
+  double bestCost = 0;
+  double tempCost = 0;
+  double bestCostS[MAX_CCSAO_SET_NUM + 1] = { 0 };
+  double bestCostG[17] = { 0 };
+  int    classNumG[17] = { 0 };
+  int    stageNum = m_intraPeriod == 1 ? MAX_CCSAO_CLASS_NUM / 4 : MAX_CCSAO_CLASS_NUM / 16;
+  for (int stage = 1; stage <= stageNum; stage++)
+    classNumG[stage] = stage * (MAX_CCSAO_CLASS_NUM / stageNum);
+  m_bestCcSaoParam.reset();
+  memset(m_bestCcSaoControl, 0, sizeof(uint8_t) * m_numCTUsInPic);
+  for (int setNum = 1; setNum <= MAX_CCSAO_SET_NUM; setNum++)
+  {
+    if (setNum > 1)
+    {
+      getCcSaoStatistics(cs, compID, orgYuv, srcYuv, dstYuv, m_ccSaoStatData, m_bestCcSaoParam);
+    }
+    setupInitCcSaoParam(cs, compID, setNum, m_trainingDistortion, m_ccSaoStatData, m_ccSaoStatFrame,
+                        m_initCcSaoParam, m_bestCcSaoParam, m_initCcSaoControl, m_bestCcSaoControl);
+    for (int stage = 1; stage <= stageNum; stage++)
+    {
+      for (int bandNumY = 1; bandNumY <= MAX_CCSAO_BAND_NUM_Y; bandNumY++)
+      for (int bandNumU = 1; bandNumU <= MAX_CCSAO_BAND_NUM_U; bandNumU++)
+      for (int bandNumV = 1; bandNumV <= MAX_CCSAO_BAND_NUM_V; bandNumV++)
+      for (int candPosY = 0; candPosY <  MAX_CCSAO_CAND_POS_Y && bandNumY > 1; candPosY++)
+      {
+        if (bandNumY < bandNumU || bandNumY < bandNumV)
+          continue;
+        int classNum = bandNumY * bandNumU * bandNumV;
+        if (classNum > MAX_CCSAO_CLASS_NUM)
+          continue;
+        if (classNum <= classNumG[stage - 1] || classNum > classNumG[stage])
+          continue;
+        setupTempCcSaoParam(cs, compID, setNum, candPosY, bandNumY, bandNumU, bandNumV, m_tempCcSaoParam, m_initCcSaoParam, m_tempCcSaoControl, m_initCcSaoControl);
+        getCcSaoStatistics(cs, compID, orgYuv, srcYuv, dstYuv, m_ccSaoStatData, m_tempCcSaoParam);
+        deriveCcSaoRDO(cs, compID, m_trainingDistortion, m_ccSaoStatData, m_ccSaoStatFrame,
+                       m_bestCcSaoParam, m_tempCcSaoParam, m_bestCcSaoControl, m_tempCcSaoControl, bestCost, tempCost);
+      }
+      bestCostG[stage] = bestCost;
+      if (bestCostG[stage] >= bestCostG[stage - 1])
+        break;
+    }
+    bestCostS[setNum] = bestCost;
+    if (bestCostS[setNum] >= bestCostS[setNum - 1])
+      break;
+  }
+  bool oneBlockFiltered = false;
+  for (int ctbIdx = 0; m_bestCcSaoParam.setNum > 0 && ctbIdx < m_numCTUsInPic; ctbIdx++)
+  {
+    if (m_bestCcSaoControl[ctbIdx])
+    {
+      oneBlockFiltered = true;
+      break;
+    }
+  }
+  m_ccSaoComParam.reset(compID);
+  memset(m_ccSaoControl[compID], 0, sizeof(uint8_t) * m_numCTUsInPic);
+  m_ccSaoComParam.enabled[compID] = oneBlockFiltered;
+  if (oneBlockFiltered)
+  {
+    CcSaoEncParam storedBestCcSaoParam = m_bestCcSaoParam;
+    memcpy(m_tempCcSaoControl, m_bestCcSaoControl, sizeof(uint8_t) * m_numCTUsInPic);
+    int setNum = 0;
+    for (int setIdx = 0; setIdx < MAX_CCSAO_SET_NUM; setIdx++)
+    {
+      uint8_t setIdc = m_bestCcSaoParam.mapIdxToIdc[setIdx];
+      if (m_bestCcSaoParam.setEnabled[setIdx])
+      {
+        for (int ctbIdx = 0; ctbIdx < m_numCTUsInPic; ctbIdx++)
+        {
+          if (m_tempCcSaoControl[ctbIdx] == (setIdx + 1) )
+          {
+            m_bestCcSaoControl[ctbIdx] = setIdc;
+          }
+        }
+        m_bestCcSaoParam.candPos[setIdc - 1][COMPONENT_Y ] = storedBestCcSaoParam.candPos[setIdx][COMPONENT_Y ];
+        m_bestCcSaoParam.bandNum[setIdc - 1][COMPONENT_Y ] = storedBestCcSaoParam.bandNum[setIdx][COMPONENT_Y ];
+        m_bestCcSaoParam.bandNum[setIdc - 1][COMPONENT_Cb] = storedBestCcSaoParam.bandNum[setIdx][COMPONENT_Cb];
+        m_bestCcSaoParam.bandNum[setIdc - 1][COMPONENT_Cr] = storedBestCcSaoParam.bandNum[setIdx][COMPONENT_Cr];
+        memcpy(m_bestCcSaoParam.offset[setIdc - 1], storedBestCcSaoParam.offset[setIdx], sizeof(storedBestCcSaoParam.offset[setIdx]));
+        setNum++;
+      }
+      m_bestCcSaoParam.setEnabled[setIdx] = setIdx < m_bestCcSaoParam.setNum ? true : false;
+    }
+    CHECK(setNum != m_bestCcSaoParam.setNum, "Number of sets enabled != setNum");
+    m_ccSaoComParam.setNum [compID] = m_bestCcSaoParam.setNum;
+    for ( int setIdx = 0; setIdx < m_bestCcSaoParam.setNum; setIdx++ )
+    {
+      m_ccSaoComParam.setEnabled[compID][setIdx]               = m_bestCcSaoParam.setEnabled[setIdx];
+      m_ccSaoComParam.candPos   [compID][setIdx][COMPONENT_Y ] = m_bestCcSaoParam.candPos   [setIdx][COMPONENT_Y ];
+      m_ccSaoComParam.bandNum   [compID][setIdx][COMPONENT_Y ] = m_bestCcSaoParam.bandNum   [setIdx][COMPONENT_Y ];
+      m_ccSaoComParam.bandNum   [compID][setIdx][COMPONENT_Cb] = m_bestCcSaoParam.bandNum   [setIdx][COMPONENT_Cb];
+      m_ccSaoComParam.bandNum   [compID][setIdx][COMPONENT_Cr] = m_bestCcSaoParam.bandNum   [setIdx][COMPONENT_Cr];
+      memcpy(m_ccSaoComParam.offset[compID][setIdx], m_bestCcSaoParam.offset[setIdx], sizeof(m_bestCcSaoParam.offset[setIdx]));
+    }
+    memcpy(m_ccSaoControl[compID], m_bestCcSaoControl, sizeof(uint8_t) * m_numCTUsInPic);
+  }
+void EncSampleAdaptiveOffset::setupInitCcSaoParam(CodingStructure& cs, const ComponentID compID, const int setNum, int64_t* trainingDistortion[MAX_CCSAO_SET_NUM]
+                                                , CcSaoStatData* blkStats[MAX_CCSAO_SET_NUM], CcSaoStatData frameStats[MAX_CCSAO_SET_NUM]
+                                                , CcSaoEncParam& initCcSaoParam, CcSaoEncParam& bestCcSaoParam
+                                                , uint8_t* initCcSaoControl, uint8_t* bestCcSaoControl)
+  initCcSaoParam.reset();
+  memset(initCcSaoControl, 0, sizeof(uint8_t) * m_numCTUsInPic);
+  if (setNum == 1)
+  {
+    std::fill_n(initCcSaoControl, m_numCTUsInPic, 1);
+    return;
+  }
+  for (int setIdx = 0; setIdx < MAX_CCSAO_SET_NUM; setIdx++)
+  {
+    if (bestCcSaoParam.setEnabled[setIdx])
+    {
+      getCcSaoFrameStats(compID, setIdx, bestCcSaoControl, blkStats, frameStats);
+      getCcSaoDistortion(compID, setIdx, blkStats, bestCcSaoParam.offset, trainingDistortion);
+    }
+  }
+  initCcSaoParam = bestCcSaoParam;
+  int ctbCntOn = 0;
+  CtbCost *ctbCost = new CtbCost[m_numCTUsInPic];
+  for (int ctbIdx = 0; ctbIdx < m_numCTUsInPic; ctbIdx++)
+  {
+    int64_t dist = 0;
+    if (bestCcSaoControl[ctbIdx])
+    {
+      int setIdx = bestCcSaoControl[ctbIdx] - 1;
+      dist = trainingDistortion[setIdx][ctbIdx];
+      ctbCntOn++;
+    }
+    ctbCost[ctbIdx].pos = ctbIdx;
+    ctbCost[ctbIdx].cost = (double)dist;
+  }
+  std::stable_sort(ctbCost, ctbCost + m_numCTUsInPic, compareCtbCost);
+  for (int ctbIdx = 0; ctbIdx < m_numCTUsInPic; ctbIdx++)
+  {
+    int ctbPos = ctbCost[ctbIdx].pos;
+    if (ctbIdx < ctbCntOn)
+    {
+      if (ctbIdx * 2 > ctbCntOn)
+      {
+        initCcSaoControl[ctbPos] = setNum;
+      }
+      else
+      {
+        initCcSaoControl[ctbPos] = bestCcSaoControl[ctbPos];
+      }
+    }
+    else
+    {
+      initCcSaoControl[ctbPos] = 0;
+    }
+  }
+  delete[] ctbCost;
+  ctbCost = nullptr;
+void EncSampleAdaptiveOffset::setupTempCcSaoParam(CodingStructure& cs, const ComponentID compID, const int setNum
+                                                , const int candPosY, const int bandNumY, const int bandNumU, const int bandNumV
+                                                , CcSaoEncParam& tempCcSaoParam, CcSaoEncParam& initCcSaoParam
+                                                , uint8_t* tempCcSaoControl, uint8_t* initCcSaoControl)
+  tempCcSaoParam.reset();
+  memset(tempCcSaoControl, 0, sizeof(uint8_t) * m_numCTUsInPic);
+  tempCcSaoParam = initCcSaoParam;;
+  memcpy(tempCcSaoControl, initCcSaoControl, sizeof(uint8_t) * m_numCTUsInPic);
+  tempCcSaoParam.setNum = setNum;
+  tempCcSaoParam.setEnabled[setNum - 1] = true;
+  tempCcSaoParam.candPos   [setNum - 1][COMPONENT_Y ] = candPosY;
+  tempCcSaoParam.bandNum   [setNum - 1][COMPONENT_Y ] = bandNumY;
+  tempCcSaoParam.bandNum   [setNum - 1][COMPONENT_Cb] = bandNumU;
+  tempCcSaoParam.bandNum   [setNum - 1][COMPONENT_Cr] = bandNumV;
+  for (int setIdx = 0; setIdx <= setNum; setIdx++)
+  {
+    tempCcSaoParam.mapIdxToIdc[setIdx] = setIdx < setNum ? setIdx + 1 : 0;
+  }
+void EncSampleAdaptiveOffset::getCcSaoStatistics(CodingStructure& cs, const ComponentID compID
+                                               , const CPelUnitBuf& orgYuv, const CPelUnitBuf& srcYuv, const CPelUnitBuf& dstYuv
+                                               , CcSaoStatData* blkStats[MAX_CCSAO_SET_NUM], const CcSaoEncParam& ccSaoParam)
+  bool isLeftAvail, isRightAvail, isAboveAvail, isBelowAvail, isAboveLeftAvail, isAboveRightAvail;
+  const PreCalcValues& pcv = *cs.pcv;
+  int ctuRsAddr = 0;
+  for( uint32_t yPos = 0; yPos < pcv.lumaHeight; yPos += pcv.maxCUHeight )
+  {
+    for( uint32_t xPos = 0; xPos < pcv.lumaWidth; xPos += pcv.maxCUWidth )
+    {
+      const uint32_t width  = (xPos + pcv.maxCUWidth  > pcv.lumaWidth)  ? (pcv.lumaWidth - xPos)  : pcv.maxCUWidth;
+      const uint32_t height = (yPos + pcv.maxCUHeight > pcv.lumaHeight) ? (pcv.lumaHeight - yPos) : pcv.maxCUHeight;
+      const UnitArea area( cs.area.chromaFormat, Area(xPos , yPos, width, height) );
+      deriveLoopFilterBoundaryAvailibility(cs, area.Y(), isLeftAvail, isAboveAvail, isAboveLeftAvail );
+      //NOTE: The number of skipped lines during gathering CTU statistics depends on the slice boundary availabilities.
+      //For simplicity, here only picture boundaries are considered.
+      isRightAvail      = (xPos + pcv.maxCUWidth  < pcv.lumaWidth );
+      isBelowAvail      = (yPos + pcv.maxCUHeight < pcv.lumaHeight);
+      isAboveRightAvail = ((yPos > 0) && (isRightAvail));
+      for (int setIdx = 0; setIdx < MAX_CCSAO_SET_NUM; setIdx++)
+      {
+        blkStats[setIdx][ctuRsAddr].reset();
+        if (!ccSaoParam.setEnabled[setIdx])
+          continue;
+        const CompArea   &compArea   = area.block(compID);
+        const int         srcStrideY = srcYuv.get(COMPONENT_Y ).stride;
+        const int         srcStrideU = srcYuv.get(COMPONENT_Cb).stride;
+        const int         srcStrideV = srcYuv.get(COMPONENT_Cr).stride;
+        const int         dstStride  = dstYuv.get(compID      ).stride;
+        const int         orgStride  = orgYuv.get(compID      ).stride;
+        const Pel        *srcBlkY    = srcYuv.get(COMPONENT_Y ).bufAt(area.block(COMPONENT_Y ));
+        const Pel        *srcBlkU    = srcYuv.get(COMPONENT_Cb).bufAt(area.block(COMPONENT_Cb));
+        const Pel        *srcBlkV    = srcYuv.get(COMPONENT_Cr).bufAt(area.block(COMPONENT_Cr));
+        const Pel        *dstBlk     = dstYuv.get(compID      ).bufAt(compArea);
+        const Pel        *orgBlk     = orgYuv.get(compID      ).bufAt(compArea);
+        const uint16_t    candPosY   = ccSaoParam.candPos[setIdx][COMPONENT_Y ];
+        const uint16_t    bandNumY   = ccSaoParam.bandNum[setIdx][COMPONENT_Y ];
+        const uint16_t    bandNumU   = ccSaoParam.bandNum[setIdx][COMPONENT_Cb];
+        const uint16_t    bandNumV   = ccSaoParam.bandNum[setIdx][COMPONENT_Cr];
+        getCcSaoBlkStats(compID, cs.area.chromaFormat, cs.sps->getBitDepth(toChannelType(compID))
+                       , setIdx, blkStats, ctuRsAddr
+                       , candPosY, bandNumY, bandNumU, bandNumV
+                       , srcBlkY, srcBlkU, srcBlkV, orgBlk, dstBlk
+                       , srcStrideY, srcStrideU, srcStrideV, orgStride, dstStride, compArea.width, compArea.height
+                       , isLeftAvail, isRightAvail, isAboveAvail, isBelowAvail, isAboveLeftAvail, isAboveRightAvail
+                       );
+      }
+      ctuRsAddr++;
+    }
+  }
+void EncSampleAdaptiveOffset::getCcSaoBlkStats(const ComponentID compID, const ChromaFormat chromaFormat, const int bitDepth
+                                             , const int setIdx, CcSaoStatData* blkStats[MAX_CCSAO_SET_NUM], const int ctuRsAddr
+                                             , const uint16_t candPosY
+                                             , const uint16_t bandNumY, const uint16_t bandNumU, const uint16_t bandNumV
+                                             , const Pel* srcY, const Pel* srcU, const Pel* srcV, const Pel* org, const Pel* dst
+                                             , const int srcStrideY, const int srcStrideU, const int srcStrideV, const int orgStride, const int dstStride
+                                             , const int width, const int height
+                                             , bool isLeftAvail, bool isRightAvail, bool isAboveAvail, bool isBelowAvail, bool isAboveLeftAvail, bool isAboveRightAvail
+                                             )
+  const int candPosYX = g_ccSaoCandPosX[COMPONENT_Y][candPosY];
+  const int candPosYY = g_ccSaoCandPosY[COMPONENT_Y][candPosY];
+  switch(compID)
+  {
+  case COMPONENT_Y:
+    {
+      for (int y = 0; y < height; y++)
+      {
+        for (int x = 0; x < width; x++)
+        {
+          const Pel *colY = srcY +  x  + srcStrideY * candPosYY + candPosYX;
+          const Pel *colU = srcU + (x >> 1);
+          const Pel *colV = srcV + (x >> 1);
+          const int bandY    = (*colY * bandNumY) >> bitDepth;
+          const int bandU    = (*colU * bandNumU) >> bitDepth;
+          const int bandV    = (*colV * bandNumV) >> bitDepth;
+          const int bandIdx  = bandY * bandNumU * bandNumV
+                             + bandU * bandNumV
+                             + bandV;
+          const int classIdx = bandIdx;
+          blkStats[setIdx][ctuRsAddr].diff [classIdx] += org[x] - dst[x];
+          blkStats[setIdx][ctuRsAddr].count[classIdx]++;
+        }
+        srcY += srcStrideY;
+        srcU += srcStrideU * (y & 0x1);
+        srcV += srcStrideV * (y & 0x1);
+        org  += orgStride;
+        dst  += dstStride;
+      }
+    }
+    break;
+  case COMPONENT_Cb:
+  case COMPONENT_Cr:
+    {
+      for (int y = 0; y < height; y++)
+      {
+        for (int x = 0; x < width; x++)
+        {
+          const Pel *colY = srcY + (x << 1) + srcStrideY * candPosYY + candPosYX;
+          const Pel *colU = srcU + x;
+          const Pel *colV = srcV + x;
+          const int bandY    = (*colY * bandNumY) >> bitDepth;
+          const int bandU    = (*colU * bandNumU) >> bitDepth;
+          const int bandV    = (*colV * bandNumV) >> bitDepth;
+          const int bandIdx  = bandY * bandNumU * bandNumV
+                             + bandU * bandNumV
+                             + bandV;
+          const int classIdx = bandIdx;
+          blkStats[setIdx][ctuRsAddr].diff [classIdx] += org[x] - dst[x];
+          blkStats[setIdx][ctuRsAddr].count[classIdx]++;
+        }
+        srcY += srcStrideY << 1;
+        srcU += srcStrideU;
+        srcV += srcStrideV;
+        org  += orgStride;
+        dst  += dstStride;
+      }
+    }
+    break;
+  default:
+    {
+      THROW("Not a supported CCSAO compID\n");
+    }
+  }
+void EncSampleAdaptiveOffset::getCcSaoFrameStats(const ComponentID compID, const int setIdx, const uint8_t* ccSaoControl
+                                               , CcSaoStatData* blkStats[MAX_CCSAO_SET_NUM], CcSaoStatData frameStats[MAX_CCSAO_SET_NUM])
+  frameStats[setIdx].reset();
+  int setIdc = setIdx + 1;
+  for (int ctbIdx = 0; ctbIdx < m_numCTUsInPic; ctbIdx++)
+  {
+    if (ccSaoControl[ctbIdx] == setIdc)
+    {
+      frameStats[setIdx] += blkStats[setIdx][ctbIdx];
+    }
+  }
+inline int EncSampleAdaptiveOffset::estCcSaoIterOffset(const double lambda, const int offsetInput, const int64_t count, const int64_t diffSum, const int shift, const int bitIncrease, int64_t& bestDist, double& bestCost, const int offsetTh)
+  int iterOffset, tempOffset;
+  int64_t tempDist, tempRate;
+  double tempCost, tempMinCost;
+  int offsetOutput = 0;
+  iterOffset = offsetInput;
+  // Assuming sending quantized value 0 results in zero offset and sending the value zero needs 1 bit. entropy coder can be used to measure the exact rate here.
+  tempMinCost = lambda;
+  while (iterOffset != 0)
+  {
+    // Calculate the bits required for signaling the offset
+    tempRate = lengthUvlc(abs(iterOffset)) + (iterOffset == 0 ? 0 : 1);
+    // Do the dequantization before distortion calculation
+    tempOffset = iterOffset << bitIncrease;
+    tempDist = estSaoDist(count, tempOffset, diffSum, shift);
+    tempCost = ((double)tempDist + lambda * (double)tempRate);
+    if (tempCost < tempMinCost)
+    {
+      tempMinCost = tempCost;
+      offsetOutput = iterOffset;
+      bestDist = tempDist;
+      bestCost = tempCost;
+    }
+    iterOffset = (iterOffset > 0) ? (iterOffset - 1) : (iterOffset + 1);
+  }
+  return offsetOutput;
+void EncSampleAdaptiveOffset::deriveCcSaoOffsets(const ComponentID compID, const int bitDepth, const int setIdx
+                                               , CcSaoStatData frameStats[MAX_CCSAO_SET_NUM]
+                                               , short offset[MAX_CCSAO_SET_NUM][MAX_CCSAO_CLASS_NUM])
+  int quantOffsets[MAX_CCSAO_CLASS_NUM] = { 0 };
+  for(int k = 0; k < MAX_CCSAO_CLASS_NUM; k++)
+  {
+    if(frameStats[setIdx].count[k] == 0)
+      continue;
+    quantOffsets[k] =
+      (int) xRoundIbdi(bitDepth, (double)(frameStats[setIdx].diff [k] << DISTORTION_PRECISION_ADJUSTMENT(bitDepth))
+                               / (double)(frameStats[setIdx].count[k]));
+    quantOffsets[k] = Clip3(-MAX_CCSAO_OFFSET_THR, MAX_CCSAO_OFFSET_THR, quantOffsets[k]);
+  }
+  int64_t dist[MAX_CCSAO_CLASS_NUM] = { 0 };
+  double  cost[MAX_CCSAO_CLASS_NUM] = { 0 };
+  for (int k = 0; k < MAX_CCSAO_CLASS_NUM; k++)
+  {
+    cost[k] = m_lambda[compID];
+    if (quantOffsets[k] != 0)
+    {
+      quantOffsets[k] = estCcSaoIterOffset(m_lambda[compID], quantOffsets[k], frameStats[setIdx].count[k], frameStats[setIdx].diff[k], 0, 0, dist[k], cost[k], MAX_CCSAO_OFFSET_THR);
+    }
+  }
+  for (int k = 0; k < MAX_CCSAO_CLASS_NUM; k++)
+  {
+    CHECK(quantOffsets[k] < -MAX_CCSAO_OFFSET_THR || quantOffsets[k] > MAX_CCSAO_OFFSET_THR, "Exceeded valid range for CCSAO offset");
+    offset[setIdx][k] = quantOffsets[k];
+  }
+void EncSampleAdaptiveOffset::getCcSaoDistortion(const ComponentID compID, const int setIdx, CcSaoStatData* blkStats[MAX_CCSAO_SET_NUM]
+                                               , short offset[MAX_CCSAO_SET_NUM][MAX_CCSAO_CLASS_NUM]
+                                               , int64_t* trainingDistortion[MAX_CCSAO_SET_NUM])
+  ::memset(trainingDistortion[setIdx], 0, sizeof(int64_t) * m_numCTUsInPic);
+  for (int ctbIdx = 0; ctbIdx < m_numCTUsInPic; ctbIdx++)
+  {
+    for (int k = 0; k < MAX_CCSAO_CLASS_NUM; k++)
+    {
+      trainingDistortion[setIdx][ctbIdx]
+        += estSaoDist(blkStats[setIdx][ctbIdx].count[k], offset[setIdx][k], blkStats[setIdx][ctbIdx].diff[k], 0);
+    }
+  }
+void EncSampleAdaptiveOffset::determineCcSaoControlIdc(CodingStructure& cs, const ComponentID compID 
+                                                     , const int ctuWidthC, const int ctuHeightC, const int picWidthC, const int picHeightC
+                                                     , CcSaoEncParam& ccSaoParam, uint8_t* ccSaoControl
+                                                     , int64_t* trainingDistorsion[MAX_CCSAO_SET_NUM]
+                                                     , int64_t& curTotalDist, double& curTotalRate)
+  bool setEnabled[MAX_CCSAO_SET_NUM];
+  std::fill_n(setEnabled, MAX_CCSAO_SET_NUM, false);
+  SetIdxCount setIdxCount[MAX_CCSAO_SET_NUM];
+  for (int i = 0; i < MAX_CCSAO_SET_NUM; i++)
+  {
+    setIdxCount[i].setIdx = i;
+    setIdxCount[i].count  = 0;
+  }
+  double prevRate = curTotalRate;
+  TempCtx ctxInitial(m_CtxCache);
+  TempCtx ctxBest(m_CtxCache);
+  TempCtx ctxStart(m_CtxCache);
+  ctxInitial = SubCtx(Ctx::CcSaoControlIdc, m_CABACEstimator->getCtx());
+  ctxBest    = SubCtx(Ctx::CcSaoControlIdc, m_CABACEstimator->getCtx());
+  int ctbIdx = 0;
+  for (int yCtb = 0; yCtb < picHeightC; yCtb += ctuHeightC)
+  {
+    for (int xCtb = 0; xCtb < picWidthC; xCtb += ctuWidthC)
+    {
+      int64_t  bestDist   = MAX_INT;
+      double   bestRate   = MAX_DOUBLE;
+      double   bestCost   = MAX_DOUBLE;
+      uint8_t  bestSetIdc = 0;
+      uint8_t  bestSetIdx = 0;
+      m_CABACEstimator->getCtx() = ctxBest;
+      ctxStart                   = SubCtx(Ctx::CcSaoControlIdc, m_CABACEstimator->getCtx());
+      for (int setIdx = 0; setIdx <= MAX_CCSAO_SET_NUM; setIdx++)
+      {
+        if (setIdx < MAX_CCSAO_SET_NUM && !ccSaoParam.setEnabled[setIdx])
+          continue;
+        uint8_t setIdc = ccSaoParam.mapIdxToIdc[setIdx];
+        m_CABACEstimator->getCtx() = ctxStart;
+        m_CABACEstimator->resetBits();
+        const Position lumaPos = Position({ xCtb << getComponentScaleX(compID, cs.pcv->chrFormat),
+                                            yCtb << getComponentScaleY(compID, cs.pcv->chrFormat) });
+        m_CABACEstimator->codeCcSaoControlIdc(setIdc, cs, compID, ctbIdx, ccSaoControl, lumaPos, ccSaoParam.setNum);
+        int64_t dist = setIdx == MAX_CCSAO_SET_NUM ? 0 : trainingDistorsion[setIdx][ctbIdx];
+        double  rate = FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits();
+        double  cost = rate * m_lambda[compID] + dist;
+        if (cost < bestCost)
+        {
+          bestCost   = cost;
+          bestRate   = rate;
+          bestDist   = dist;
+          bestSetIdc = setIdc;
+          bestSetIdx = setIdx;
+          ctxBest = SubCtx(Ctx::CcSaoControlIdc, m_CABACEstimator->getCtx());
+          ccSaoControl[ctbIdx] = setIdx == MAX_CCSAO_SET_NUM ? 0 : setIdx + 1;
+        }
+      }
+      if (bestSetIdc != 0)
+      {
+        setEnabled [bestSetIdx] = true;
+        setIdxCount[bestSetIdx].count++;
+      }
+      curTotalRate += bestRate;
+      curTotalDist += bestDist;
+      ctbIdx++;
+    }
+  }
+  std::copy_n(setEnabled, MAX_CCSAO_SET_NUM, ccSaoParam.setEnabled);
+  std::stable_sort(setIdxCount, setIdxCount + MAX_CCSAO_SET_NUM, compareSetIdxCount);
+  int setIdc = 1;
+  ccSaoParam.setNum = 0;
+  for (SetIdxCount &s : setIdxCount)
+  {
+    int setIdx = s.setIdx;
+    if (ccSaoParam.setEnabled[setIdx])
+    {
+      ccSaoParam.mapIdxToIdc[setIdx] = setIdc;
+      ccSaoParam.setNum++;
+      setIdc++;
+    }
+  }
+  curTotalRate = prevRate;
+  m_CABACEstimator->getCtx() = ctxInitial;
+  m_CABACEstimator->resetBits();
+  ctbIdx = 0;
+  for (int yCtb = 0; yCtb < picHeightC; yCtb += ctuHeightC)
+  {
+    for (int xCtb = 0; xCtb < picWidthC; xCtb += ctuWidthC)
+    {
+      const int setIdxPlus1 = ccSaoControl[ctbIdx];
+      const Position lumaPos = Position({ xCtb << getComponentScaleX(compID, cs.pcv->chrFormat), 
+                                          yCtb << getComponentScaleY(compID, cs.pcv->chrFormat) });
+      m_CABACEstimator->codeCcSaoControlIdc(setIdxPlus1 == 0 ? 0 : ccSaoParam.mapIdxToIdc[setIdxPlus1 - 1],
+                                            cs, compID, ctbIdx, ccSaoControl, lumaPos, ccSaoParam.setNum);
+      ctbIdx++;
+    }
+  }
+  curTotalRate += FRAC_BITS_SCALE*m_CABACEstimator->getEstFracBits();
+  // restore for next iteration
+  m_CABACEstimator->getCtx() = ctxInitial;
+int EncSampleAdaptiveOffset::lengthUvlc(int uiCode)
+  int uiLength = 1;
+  int uiTemp = ++uiCode;
+  CHECK(!uiTemp, "Integer overflow");
+  while (1 != uiTemp)
+  {
+    uiTemp >>= 1;
+    uiLength += 2;
+  }
+  // Take care of cases where uiLength > 32
+  return (uiLength >> 1) + ((uiLength + 1) >> 1);
+int EncSampleAdaptiveOffset::getCcSaoParamRate(const ComponentID compID, const CcSaoEncParam& ccSaoParam)
+  int bits = 0;
+  if (ccSaoParam.setNum > 0 )
+  {
+    bits += lengthUvlc(ccSaoParam.setNum - 1);
+    int signaledSetNum = 0;
+    for (int setIdx = 0; setIdx < MAX_CCSAO_SET_NUM; setIdx++)
+    {
+      if (ccSaoParam.setEnabled[setIdx])
+      {
+        bits += MAX_CCSAO_CAND_POS_Y_BITS;
+        bits += MAX_CCSAO_BAND_NUM_Y_BITS;
+        bits += MAX_CCSAO_BAND_NUM_U_BITS;
+        bits += MAX_CCSAO_BAND_NUM_V_BITS;
+        int classNum = ccSaoParam.bandNum[setIdx][COMPONENT_Y ]
+                     * ccSaoParam.bandNum[setIdx][COMPONENT_Cb]
+                     * ccSaoParam.bandNum[setIdx][COMPONENT_Cr];
+        for (int i = 0; i < classNum; i++)
+        {
+          bits += lengthUvlc(abs(ccSaoParam.offset[setIdx][i])) + (ccSaoParam.offset[setIdx][i] == 0 ? 0 : 1);
+        }
+        signaledSetNum++;
+      }
+    }
+    CHECK(signaledSetNum != ccSaoParam.setNum, "Number of sets signaled not the same as indicated");
+  }
+  return bits;
+void EncSampleAdaptiveOffset::deriveCcSaoRDO(CodingStructure& cs, const ComponentID compID, int64_t* trainingDistortion[MAX_CCSAO_SET_NUM]
+                                           , CcSaoStatData* blkStats[MAX_CCSAO_SET_NUM], CcSaoStatData frameStats[MAX_CCSAO_SET_NUM]
+                                           , CcSaoEncParam& bestCcSaoParam, CcSaoEncParam& tempCcSaoParam
+                                           , uint8_t* bestCcSaoControl, uint8_t* tempCcSaoControl
+                                           , double& bestCost, double& tempCost)
+  const int scaleX          = getComponentScaleX(compID, cs.pcv->chrFormat);
+  const int scaleY          = getComponentScaleY(compID, cs.pcv->chrFormat);
+  const int ctuWidthC       = cs.pcv->maxCUWidth  >> scaleX;
+  const int ctuHeightC      = cs.pcv->maxCUHeight >> scaleY;
+  const int picWidthC       = cs.pcv->lumaWidth   >> scaleX;
+  const int picHeightC      = cs.pcv->lumaHeight  >> scaleY;
+  const int maxTrainingIter = 15;
+  const TempCtx ctxStartCcSaoControlFlag  ( m_CtxCache, SubCtx( Ctx::CcSaoControlIdc, m_CABACEstimator->getCtx() ) );
+  int    trainingIter = 0;
+  bool   keepTraining = true;
+  bool   improved = false;
+  double prevCost = MAX_DOUBLE;
+  while (keepTraining)
+  {
+    improved = false;
+    for (int setIdx = 0; setIdx < MAX_CCSAO_SET_NUM; setIdx++)
+    {
+      if (tempCcSaoParam.setEnabled[setIdx])
+      {
+        getCcSaoFrameStats(compID, setIdx, tempCcSaoControl, blkStats, frameStats);
+        deriveCcSaoOffsets(compID, cs.sps->getBitDepth(toChannelType(compID)), setIdx, frameStats, tempCcSaoParam.offset);
+        getCcSaoDistortion(compID, setIdx, blkStats, tempCcSaoParam.offset, trainingDistortion);
+      }
+    }
+    m_CABACEstimator->getCtx() = ctxStartCcSaoControlFlag;
+    int64_t curTotalDist = 0;
+    double  curTotalRate = 0;
+    determineCcSaoControlIdc(cs, compID, ctuWidthC, ctuHeightC, picWidthC, picHeightC,
+                             tempCcSaoParam, tempCcSaoControl, trainingDistortion,
+                             curTotalDist, curTotalRate);
+    if (tempCcSaoParam.setNum > 0)
+    {
+      curTotalRate += getCcSaoParamRate(compID, tempCcSaoParam);
+      tempCost = curTotalRate * m_lambda[compID] + curTotalDist;
+      if (tempCost < prevCost)
+      {
+        prevCost = tempCost;
+        improved = true;
+      }
+      if (tempCost < bestCost)
+      {
+        bestCost = tempCost;
+        bestCcSaoParam = tempCcSaoParam;
+        memcpy(bestCcSaoControl, tempCcSaoControl, sizeof(uint8_t) * m_numCTUsInPic);
+      }
+    }
+    trainingIter++;
+    if (!improved || trainingIter > maxTrainingIter)
+    {
+      keepTraining = false;
+    }
+  }
 void EncSampleAdaptiveOffset::deriveLoopFilterBoundaryAvailibility(CodingStructure& cs, const Position &pos, bool& isLeftAvail, bool& isAboveAvail, bool& isAboveLeftAvail) const
   bool isLoopFiltAcrossSlicePPS = cs.pps->getLoopFilterAcrossSlicesEnabledFlag();
diff --git a/source/Lib/EncoderLib/EncSampleAdaptiveOffset.h b/source/Lib/EncoderLib/EncSampleAdaptiveOffset.h
index 686592bf8797a71d55802fd5894cd8b6c7ba0c2a..9f6e04c8e08f30ebb0f2afaf8c61f41126583a22 100644
--- a/source/Lib/EncoderLib/EncSampleAdaptiveOffset.h
+++ b/source/Lib/EncoderLib/EncSampleAdaptiveOffset.h
@@ -79,6 +79,69 @@ struct SAOStatData //data structure for SAO statistics
+#if JVET_W0066_CCSAO
+struct CcSaoStatData
+  int64_t  diff [MAX_CCSAO_CLASS_NUM];
+  uint32_t count[MAX_CCSAO_CLASS_NUM];
+  CcSaoStatData(){}
+  ~CcSaoStatData(){}
+  void reset()
+  {
+    ::memset(diff,  0, sizeof(int64_t)  * MAX_CCSAO_CLASS_NUM);
+    ::memset(count, 0, sizeof(uint32_t) * MAX_CCSAO_CLASS_NUM);
+  }
+  const CcSaoStatData& operator=(const CcSaoStatData& src)
+  {
+    ::memcpy(diff,  src.diff,  sizeof(int64_t)  * MAX_CCSAO_CLASS_NUM);
+    ::memcpy(count, src.count, sizeof(uint32_t) * MAX_CCSAO_CLASS_NUM);
+    return *this;
+  }
+  const CcSaoStatData& operator+= (const CcSaoStatData& src)
+  {
+    for(int i = 0; i < MAX_CCSAO_CLASS_NUM; i++)
+    {
+      diff [i] += src.diff [i];
+      count[i] += src.count[i];
+    }
+    return *this;
+  }
+struct CcSaoEncParam
+  uint8_t  setNum;
+  bool     setEnabled [MAX_CCSAO_SET_NUM];
+  uint16_t candPos    [MAX_CCSAO_SET_NUM][MAX_NUM_LUMA_COMP];
+  uint16_t bandNum    [MAX_CCSAO_SET_NUM][MAX_NUM_COMPONENT];
+  short    offset     [MAX_CCSAO_SET_NUM][MAX_CCSAO_CLASS_NUM];
+  uint8_t  mapIdxToIdc[MAX_CCSAO_SET_NUM + 1];
+  CcSaoEncParam() {}
+  ~CcSaoEncParam() {}
+  void reset()
+  {
+    setNum = 0;
+    ::memset(setEnabled,  false, sizeof(setEnabled));
+    ::memset(candPos,         0, sizeof(candPos));
+    ::memset(bandNum,         0, sizeof(bandNum));
+    ::memset(offset,          0, sizeof(offset));
+    ::memset(mapIdxToIdc,     0, sizeof(mapIdxToIdc));
+  }
+  const CcSaoEncParam& operator= (const CcSaoEncParam& src)
+  {
+    setNum = src.setNum;
+    ::memcpy(setEnabled,  src.setEnabled,  sizeof(setEnabled));
+    ::memcpy(candPos,     src.candPos,     sizeof(candPos));
+    ::memcpy(bandNum,     src.bandNum,     sizeof(bandNum));
+    ::memcpy(offset,      src.offset,      sizeof(offset));
+    ::memcpy(mapIdxToIdc, src.mapIdxToIdc, sizeof(mapIdxToIdc));
+    return *this;
+  }
 class EncSampleAdaptiveOffset : public SampleAdaptiveOffset
@@ -98,7 +161,9 @@ public:
                                           ,BIFCabacEst* BifCABACEstimator
+#if JVET_W0066_CCSAO
+  void CCSAOProcess(CodingStructure& cs, const double* lambdas, const int intraPeriod);
   void disabledRate( CodingStructure& cs, SAOBlkParam* reconParams, const double saoEncodingRate, const double saoEncodingRateChroma );
   void getPreDBFStatistics(CodingStructure& cs);
 private: //methods
@@ -129,6 +194,46 @@ private: //methods
   inline int64_t estSaoDist(int64_t count, int64_t offset, int64_t diffSum, int shift);
   inline int estIterOffset(int typeIdx, double lambda, int offsetInput, int64_t count, int64_t diffSum, int shift, int bitIncrease, int64_t& bestDist, double& bestCost, int offsetTh );
   void addPreDBFStatistics(std::vector<SAOStatData**>& blkStats);
+#if JVET_W0066_CCSAO
+  void setupCcSaoLambdas(CodingStructure& cs, const double* lambdas);
+  void deriveCcSao(CodingStructure& cs, const ComponentID compID, const CPelUnitBuf& orgYuv, const CPelUnitBuf& srcYuv, const CPelUnitBuf& dstYuv);
+  void setupInitCcSaoParam(CodingStructure& cs, const ComponentID compID, const int setNum, int64_t* trainingDistortion[MAX_CCSAO_SET_NUM]
+                         , CcSaoStatData* blkStats[MAX_CCSAO_SET_NUM], CcSaoStatData frameStats[MAX_CCSAO_SET_NUM]
+                         , CcSaoEncParam& initCcSaoParam, CcSaoEncParam& bestCcSaoParam
+                         , uint8_t* initCcSaoControl, uint8_t* bestCcSaoControl);
+  void setupTempCcSaoParam(CodingStructure& cs, const ComponentID compID, const int setNum
+                         , const int candPosY, const int bandNumY, const int bandNumU, const int bandNumV
+                         , CcSaoEncParam& tempCcSaoParam, CcSaoEncParam& initCcSaoParam
+                         , uint8_t* tempCcSaoControl, uint8_t* initCcSaoControl);
+  void getCcSaoStatistics(CodingStructure& cs, const ComponentID compID, const CPelUnitBuf& orgYuv, const CPelUnitBuf& srcYuv, const CPelUnitBuf& dstYuv
+                        , CcSaoStatData* blkStats[MAX_CCSAO_SET_NUM], const CcSaoEncParam& ccSaoParam);
+  void getCcSaoBlkStats(const ComponentID compID, const ChromaFormat chromaFormat, const int bitDepth
+                      , const int setIdx, CcSaoStatData* blkStats[MAX_CCSAO_SET_NUM], const int ctuRsAddr
+                      , const uint16_t candPosY
+                      , const uint16_t bandNumY, const uint16_t bandNumU, const uint16_t bandNumV
+                      , const Pel* srcY, const Pel* srcU, const Pel* srcV, const Pel* org, const Pel* dst
+                      , const int srcStrideY, const int srcStrideU, const int srcStrideV, const int orgStride, const int dstStride, const int width, const int height
+                      , bool isLeftAvail, bool isRightAvail, bool isAboveAvail, bool isBelowAvail, bool isAboveLeftAvail, bool isAboveRightAvail);
+  void getCcSaoFrameStats(const ComponentID compID, const int setIdx, const uint8_t* ccSaoControl
+                        , CcSaoStatData* blkStats[MAX_CCSAO_SET_NUM], CcSaoStatData frameStats[MAX_CCSAO_SET_NUM]);
+  void deriveCcSaoOffsets(const ComponentID compID, const int bitDepth, const int setIdx
+                        , CcSaoStatData frameStats[MAX_CCSAO_SET_NUM]
+                        , short offset[MAX_CCSAO_SET_NUM][MAX_CCSAO_CLASS_NUM]);
+  inline int estCcSaoIterOffset(const double lambda, const int offsetInput, const int64_t count, const int64_t diffSum, const int shift, const int bitIncrease, int64_t& bestDist, double& bestCost, const int offsetTh);
+  void getCcSaoDistortion(const ComponentID compID, const int setIdx, CcSaoStatData* blkStats[MAX_CCSAO_SET_NUM]
+                        , short offset[MAX_CCSAO_SET_NUM][MAX_CCSAO_CLASS_NUM], int64_t* trainingDistortion[MAX_CCSAO_SET_NUM]);
+  void deriveCcSaoRDO(CodingStructure& cs, const ComponentID compID, int64_t* trainingDistortion[MAX_CCSAO_SET_NUM]
+                    , CcSaoStatData* blkStats[MAX_CCSAO_SET_NUM], CcSaoStatData frameStats[MAX_CCSAO_SET_NUM]
+                    , CcSaoEncParam& bestCcSaoParam, CcSaoEncParam& tempCcSaoParam
+                    , uint8_t* bestCcSaoControl, uint8_t* tempCcSaoControl
+                    , double& bestCost, double& tempCost);
+  void determineCcSaoControlIdc(CodingStructure& cs, const ComponentID compID, 
+                                const int ctuWidthC, const int ctuHeightC, const int picWidthC, const int picHeightC,
+                                CcSaoEncParam& ccSaoParam, uint8_t* ccSaoControl, int64_t* trainingDistorsion[MAX_CCSAO_SET_NUM],
+                                int64_t& curTotalDist, double& curTotalRate);
+  int  getCcSaoParamRate(const ComponentID compID, const CcSaoEncParam& ccSaoParam);
+  int  lengthUvlc(int uiCode);
 private: //members
   //for RDO
   CABACWriter*           m_CABACEstimator;
@@ -141,6 +246,20 @@ private: //members
   double                 m_saoDisabledRate[MAX_NUM_COMPONENT][MAX_TLAYER];
   int                    m_skipLinesR[MAX_NUM_COMPONENT][NUM_SAO_NEW_TYPES];
   int                    m_skipLinesB[MAX_NUM_COMPONENT][NUM_SAO_NEW_TYPES];
+#if JVET_W0066_CCSAO
+  bool                   m_createdEnc = false;
+  int                    m_intraPeriod;
+  CcSaoStatData*         m_ccSaoStatData [MAX_CCSAO_SET_NUM];
+  CcSaoStatData          m_ccSaoStatFrame[MAX_CCSAO_SET_NUM];
+  CcSaoEncParam          m_bestCcSaoParam;
+  CcSaoEncParam          m_tempCcSaoParam;
+  CcSaoEncParam          m_initCcSaoParam;
+  uint8_t*               m_bestCcSaoControl;
+  uint8_t*               m_tempCcSaoControl;
+  uint8_t*               m_initCcSaoControl;
+  int64_t*               m_trainingDistortion[MAX_CCSAO_SET_NUM];
diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp
index 18a94142937889a37aa2579125b51f696c4aff72..6e88da5e13b01d53de280176e78837d13e3a9945 100644
--- a/source/Lib/EncoderLib/VLCWriter.cpp
+++ b/source/Lib/EncoderLib/VLCWriter.cpp
@@ -1233,6 +1233,9 @@ void HLSWriter::codeSPS( const SPS* pcSPS )
   WRITE_FLAG( pcSPS->getSAOEnabledFlag(),                                            "sps_sao_enabled_flag");
+#if JVET_W0066_CCSAO
+  WRITE_FLAG( pcSPS->getCCSAOEnabledFlag(),                                          "sps_ccsao_enabled_flag" );
   WRITE_FLAG( pcSPS->getALFEnabledFlag(),                                            "sps_alf_enabled_flag" );
   if (pcSPS->getALFEnabledFlag() && pcSPS->getChromaFormatIdc() != CHROMA_400)
@@ -2365,7 +2368,29 @@ void HLSWriter::codePictureHeader( PicHeader* picHeader, bool writeRbspTrailingB
     picHeader->setSaoEnabledFlag(CHANNEL_TYPE_CHROMA, false);
+#if JVET_W0066_CCSAO
+  if(sps->getCCSAOEnabledFlag())
+  {
+    if (pps->getSaoInfoInPhFlag())
+    {
+      WRITE_FLAG(picHeader->getCcSaoEnabledFlag(COMPONENT_Y),  "ph_cc_sao_y_enabled_flag");
+      WRITE_FLAG(picHeader->getCcSaoEnabledFlag(COMPONENT_Cb), "ph_cc_sao_cb_enabled_flag");
+      WRITE_FLAG(picHeader->getCcSaoEnabledFlag(COMPONENT_Cr), "ph_cc_sao_cr_enabled_flag");
+    }
+    else
+    {
+      picHeader->setCcSaoEnabledFlag(COMPONENT_Y,  true);
+      picHeader->setCcSaoEnabledFlag(COMPONENT_Cb, true);
+      picHeader->setCcSaoEnabledFlag(COMPONENT_Cr, true);
+    }
+  }
+  else
+  {
+    picHeader->setCcSaoEnabledFlag(COMPONENT_Y,  false);
+    picHeader->setCcSaoEnabledFlag(COMPONENT_Cb, false);
+    picHeader->setCcSaoEnabledFlag(COMPONENT_Cr, false);
+  }
   // deblocking filter controls
   if (pps->getDeblockingFilterControlPresentFlag())
@@ -2822,6 +2847,9 @@ void HLSWriter::codeSliceHeader         ( Slice* pcSlice )
+#if JVET_W0066_CCSAO
+    codeCcSao(pcSlice, picHeader, pcSlice->getSPS(), pcSlice->m_ccSaoComParam);
     if (pcSlice->getPPS()->getDeblockingFilterControlPresentFlag())
@@ -3033,6 +3061,9 @@ void  HLSWriter::codeConstraintInfo  ( const ConstraintInfo* cinfo )
     /* loop filter */
     WRITE_FLAG(cinfo->getNoSaoConstraintFlag() ? 1 : 0, "gci_no_sao_constraint_flag");
+#if JVET_W0066_CCSAO
+    WRITE_FLAG(cinfo->getNoCCSaoConstraintFlag() ? 1 : 0, "gci_no_ccsao_constraint_flag");
     WRITE_FLAG(cinfo->getNoAlfConstraintFlag() ? 1 : 0, "gci_no_alf_constraint_flag");
     WRITE_FLAG(cinfo->getNoCCAlfConstraintFlag() ? 1 : 0, "gci_no_ccalf_constraint_flag");
     WRITE_FLAG(cinfo->getNoLmcsConstraintFlag() ? 1 : 0, "gci_no_lmcs_constraint_flag");
@@ -3566,6 +3597,65 @@ bool HLSWriter::xFindMatchingLTRP(Slice* pcSlice, uint32_t *ltrpsIndex, int ltrp
   return false;
+#if JVET_W0066_CCSAO
+void HLSWriter::codeCcSao(Slice* pcSlice, PicHeader* picHeader, const SPS* sps, const CcSaoComParam& ccSaoParam)
+  if (pcSlice->getSPS()->getCCSAOEnabledFlag())
+  {
+    WRITE_FLAG(ccSaoParam.enabled[COMPONENT_Y ] ? 1 : 0, "slice_ccsao_y_enabled_flag");
+    WRITE_FLAG(ccSaoParam.enabled[COMPONENT_Cb] ? 1 : 0, "slice_ccsao_cb_enabled_flag");
+    WRITE_FLAG(ccSaoParam.enabled[COMPONENT_Cr] ? 1 : 0, "slice_ccsao_cr_enabled_flag");
+    for (int compIdx = 0; compIdx < MAX_NUM_COMPONENT; compIdx++)
+    {
+      if (ccSaoParam.enabled[compIdx])
+      {
+        CHECK(ccSaoParam.setNum[compIdx] == 0 
+           || ccSaoParam.setNum[compIdx] >  MAX_CCSAO_SET_NUM, "CCSAO setNum out of range");
+        WRITE_UVLC(ccSaoParam.setNum[compIdx] - 1, "ccsao_set_num");
+        for (int setIdx = 0; setIdx < ccSaoParam.setNum[compIdx]; setIdx++)
+        {
+          CHECK(ccSaoParam.candPos[compIdx][setIdx][COMPONENT_Y ] >= MAX_CCSAO_CAND_POS_Y, "CCSAO candPosY out of range");
+          CHECK(ccSaoParam.bandNum[compIdx][setIdx][COMPONENT_Y ] == 0 
+             || ccSaoParam.bandNum[compIdx][setIdx][COMPONENT_Y ] >  MAX_CCSAO_BAND_NUM_Y, "CCSAO bandNumY out of range");
+          CHECK(ccSaoParam.bandNum[compIdx][setIdx][COMPONENT_Cb] == 0 
+             || ccSaoParam.bandNum[compIdx][setIdx][COMPONENT_Cb] >  MAX_CCSAO_BAND_NUM_U, "CCSAO bandNumU out of range");
+          CHECK(ccSaoParam.bandNum[compIdx][setIdx][COMPONENT_Cr] == 0
+             || ccSaoParam.bandNum[compIdx][setIdx][COMPONENT_Cr] >  MAX_CCSAO_BAND_NUM_V, "CCSAO bandNumV out of range");
+          WRITE_CODE(ccSaoParam.candPos[compIdx][setIdx][COMPONENT_Y ],     MAX_CCSAO_CAND_POS_Y_BITS, "ccsao_cand_pos_y");
+          WRITE_CODE(ccSaoParam.bandNum[compIdx][setIdx][COMPONENT_Y ] - 1, MAX_CCSAO_BAND_NUM_Y_BITS, "ccsao_band_num_y");
+          WRITE_CODE(ccSaoParam.bandNum[compIdx][setIdx][COMPONENT_Cb] - 1, MAX_CCSAO_BAND_NUM_U_BITS, "ccsao_band_num_u");
+          WRITE_CODE(ccSaoParam.bandNum[compIdx][setIdx][COMPONENT_Cr] - 1, MAX_CCSAO_BAND_NUM_V_BITS, "ccsao_band_num_v");
+          const short *offset   = ccSaoParam.offset [compIdx][setIdx];
+          const int    classNum = ccSaoParam.bandNum[compIdx][setIdx][COMPONENT_Y ]
+                                * ccSaoParam.bandNum[compIdx][setIdx][COMPONENT_Cb]
+                                * ccSaoParam.bandNum[compIdx][setIdx][COMPONENT_Cr];
+          for (int i = 0; i < classNum; i++)
+          {
+            CHECK((offset[i] > MAX_CCSAO_OFFSET_THR || offset[i] < -MAX_CCSAO_OFFSET_THR), "CCSAO offset out of range");
+            WRITE_UVLC(abs(offset[i]), "ccsao_offset_abs");
+            if (abs(offset[i]) != 0)
+            {
+              WRITE_FLAG((offset[i] < 0) ? 1 : 0, "ccsao_offset_sign");
+            }
+          }
+          DTRACE(g_trace_ctx, D_SYNTAX, "offset setIdx %d: ", setIdx);
+          for (int i = 0; i < classNum; i++)
+          {
+            DTRACE(g_trace_ctx, D_SYNTAX, "%d ", offset[i]);
+          }
+          DTRACE(g_trace_ctx, D_SYNTAX, "\n");
+        }
+      }
+    }
+  }
 void HLSWriter::alfGolombEncode(int coeff, int k, const bool signed_coeff)
diff --git a/source/Lib/EncoderLib/VLCWriter.h b/source/Lib/EncoderLib/VLCWriter.h
index 982262d9dab50081992e09238b30cf4213751f1b..037897b325492a8ac317bb58a845b7e979c4de4a 100644
--- a/source/Lib/EncoderLib/VLCWriter.h
+++ b/source/Lib/EncoderLib/VLCWriter.h
@@ -160,6 +160,9 @@ public:
   void  codeScalingList         ( const ScalingList &scalingList );
+#if JVET_W0066_CCSAO
+  void codeCcSao                ( Slice* pcSlice, PicHeader* picHeader, const SPS* sps, const CcSaoComParam& ccSaoParam );
   void alfFilter( const AlfParam& alfParam, const bool isChroma, const int altIdx, int order0, int order1 );
   void alfGolombEncode(const int coeff, const int k, const bool signed_coeff = true);