From c0f0d7b6993e9be41d18e289538e85a985f016e2 Mon Sep 17 00:00:00 2001
From: Ruoyang Yu <ruoyyu@qti.qualcomm.com>
Date: Fri, 15 Nov 2024 17:37:12 +0000
Subject: [PATCH] JVET-AJ0237: 12-bit internal bit depth modifications for ECM

---
 source/App/EncoderApp/EncAppCfg.cpp           |   7 +
 source/Lib/CommonLib/AdaptiveLoopFilter.cpp   |  81 +++++++
 source/Lib/CommonLib/AdaptiveLoopFilter.h     |   8 +
 source/Lib/CommonLib/AlfParameters.h          |  15 +-
 source/Lib/CommonLib/BilateralFilter.cpp      | 107 ++++++++-
 source/Lib/CommonLib/BilateralFilter.h        |  17 ++
 source/Lib/CommonLib/CommonDef.h              |  11 +
 source/Lib/CommonLib/InterPrediction.cpp      | 107 +++++++++
 source/Lib/CommonLib/InterPrediction.h        |  12 +
 source/Lib/CommonLib/InterpolationFilter.cpp  |  28 +++
 source/Lib/CommonLib/InterpolationFilter.h    |   4 +
 source/Lib/CommonLib/IntraPrediction.cpp      | 222 +++++++++++++++++-
 source/Lib/CommonLib/IntraPrediction.h        |  28 +++
 source/Lib/CommonLib/Picture.cpp              |   3 +
 source/Lib/CommonLib/Rom.cpp                  |  12 +
 source/Lib/CommonLib/Rom.h                    |  12 +
 source/Lib/CommonLib/SampleAdaptiveOffset.cpp | 173 ++++++++++++++
 source/Lib/CommonLib/Slice.h                  |  14 ++
 source/Lib/CommonLib/TrQuant.cpp              |  60 +++++
 source/Lib/CommonLib/TypeDef.h                |   1 +
 .../Lib/CommonLib/x86/AdaptiveLoopFilterX86.h | 152 +++++++++++-
 source/Lib/CommonLib/x86/BilateralFilterX86.h | 107 +++++++++
 source/Lib/CommonLib/x86/BufferX86.h          |   4 +
 .../CommonLib/x86/InterpolationFilterX86.h    | 160 +++++++++++++
 source/Lib/CommonLib/x86/IntraX86.h           |  56 +++++
 source/Lib/CommonLib/x86/RdCostX86.h          |  40 ++++
 source/Lib/DecoderLib/DecLib.cpp              |  23 ++
 source/Lib/DecoderLib/VLCReader.cpp           |  15 ++
 .../Lib/EncoderLib/EncAdaptiveLoopFilter.cpp  |  12 +
 source/Lib/EncoderLib/EncCu.cpp               |   3 +
 source/Lib/EncoderLib/EncGOP.cpp              |  27 +++
 source/Lib/EncoderLib/EncGOP.h                |   8 +
 source/Lib/EncoderLib/EncLib.cpp              |   9 +
 .../EncoderLib/EncSampleAdaptiveOffset.cpp    | 128 ++++++++++
 .../Lib/EncoderLib/EncSampleAdaptiveOffset.h  |   9 +
 source/Lib/EncoderLib/EncTemporalFilter.cpp   |  67 ++++++
 source/Lib/EncoderLib/EncTemporalFilter.h     |  16 ++
 source/Lib/EncoderLib/InterSearch.cpp         |   8 +
 source/Lib/EncoderLib/IntraSearch.cpp         |  24 ++
 source/Lib/EncoderLib/IntraSearch.h           |   4 +
 source/Lib/EncoderLib/VLCWriter.cpp           |  11 +
 41 files changed, 1797 insertions(+), 8 deletions(-)

diff --git a/source/App/EncoderApp/EncAppCfg.cpp b/source/App/EncoderApp/EncAppCfg.cpp
index 75c962989..64ff0e13e 100644
--- a/source/App/EncoderApp/EncAppCfg.cpp
+++ b/source/App/EncoderApp/EncAppCfg.cpp
@@ -3588,6 +3588,13 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
   }
 #endif
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  if ((m_internalBitDepth[CHANNEL_TYPE_LUMA] > 10) && m_CCSAO && (m_iQP >= 37) && (m_sourceWidth * m_sourceHeight > 1920 * 1080))
+  {
+    m_CCSAO = false;
+  }
+#endif
+
   // check validity of input parameters
   if( xCheckParameter() )
   {
diff --git a/source/Lib/CommonLib/AdaptiveLoopFilter.cpp b/source/Lib/CommonLib/AdaptiveLoopFilter.cpp
index ceaa4d3d1..875310240 100644
--- a/source/Lib/CommonLib/AdaptiveLoopFilter.cpp
+++ b/source/Lib/CommonLib/AdaptiveLoopFilter.cpp
@@ -2544,7 +2544,11 @@ void  AdaptiveLoopFilter::alfAddCorrect( AlfClassifier** classifier, const PelUn
   int dstStride2 = dstStride * clsSizeY;
   int srcStride2 = srcStride * clsSizeY;
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  const Pel currBase = 1 << (clpRng.bd - 1);
+#else
   const Pel currBase = 512;
+#endif
   const int adjustOffCorr = (1 << (shiftCorr + shiftPrecis - 1));
   const int offsetN = adjustOffCorr;
   const int offsetP = offsetN - 1;
@@ -3182,7 +3186,11 @@ double AdaptiveLoopFilter::getScaleCorrDouble( const int s )
 #if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
 void AdaptiveLoopFilter::deriveFixFilterResultsBlkChroma(AlfClassifier ***classifier, Pel ***fixedFilterResults, const CPelBuf &src, const CPelBuf &srcBeforeDb, const Area &blkDst, const Area &blk, const int bits, CodingStructure& cs, const ClpRng &clpRng, const Pel clippingValues[4], int qp, int fixedFilterSetIdx, int mappingDir[NUM_DIR_FIX][NUM_DIR_FIX], uint32_t **laplacian[NUM_DIRECTIONS])
 {
+#if JVET_AJ0237_INTERNAL_12BIT
+  m_deriveVariance(src, blkDst, blk, laplacian, bits);
+#else
   m_deriveVariance(src, blkDst, blk, laplacian);
+#endif
   m_deriveClassificationLaplacian(src, blkDst, blk, laplacian, ALF_CLASSIFIER_FL_CHROMA);
   m_calcClass0(classifier[ALF_NUM_CLASSIFIER + 1], blkDst, blk, ALF_CLASSIFIER_FL_CHROMA + 10, 1, NUM_DIR_FIX, NUM_ACT_FIX, bits, 2, mappingDir, laplacian);
 
@@ -5054,8 +5062,12 @@ void AdaptiveLoopFilter::deriveClassificationAndFixFilterResultsBlk( AlfClassifi
 #endif
 {
 #if JVET_AE0139_ALF_IMPROVED_FIXFILTER
+#if JVET_AJ0237_INTERNAL_12BIT
+  m_deriveVariance(srcLuma, blkDst, blk, laplacian, bits);
+#else
   m_deriveVariance(srcLuma, blkDst, blk, laplacian);
 #endif
+#endif
 #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY
 #if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
   m_deriveClassificationLaplacian(srcLuma, blkDst, blk, laplacian, ALF_CLASSIFIER_FL);
@@ -5622,7 +5634,11 @@ void AdaptiveLoopFilter::filterBlk(AlfClassifier **classifier, const PelUnitBuf
     adjustShift -= shiftPrecis; // add more precision
   }
   const int shift     = adjustShift;
+#if JVET_AJ0237_INTERNAL_12BIT
+  const Pel currBase = 1 << (clpRng.bd - 1);
+#else
   const Pel currBase  = 512;  // 10-bits
+#endif
 #else
 #if JVET_AG0158_ALF_LUMA_COEFF_PRECISION
   const int shift = coeffBits - 1;
@@ -7055,8 +7071,16 @@ void AdaptiveLoopFilter::paddingFixedFilterResultsCtu(Pel*** fixedFilterResultsP
 }
 
 #if JVET_AE0139_ALF_IMPROVED_FIXFILTER
+#if JVET_AJ0237_INTERNAL_12BIT
+void AdaptiveLoopFilter::deriveVariance(const CPelBuf& srcLuma, const Area& blkDst, const Area& blk, uint32_t*** laplacian, int bits)
+#else
 void AdaptiveLoopFilter::deriveVariance(const CPelBuf &srcLuma, const Area &blkDst, const Area &blk, uint32_t ***laplacian)
+#endif
 {
+#if JVET_AJ0237_INTERNAL_12BIT
+  int64_t tempData[4][(m_CLASSIFICATION_BLK_SIZE + 10) >> 1][((m_CLASSIFICATION_BLK_SIZE + 16) >> 1) + 8] = { { { 0 } } };
+#endif
+
   int fl = DIST_CLASS;
   int stride = srcLuma.stride;
   int stride2 = 2 * stride;
@@ -7072,12 +7096,43 @@ void AdaptiveLoopFilter::deriveVariance(const CPelBuf &srcLuma, const Area &blkD
     for (int j = 0; j < blk.width + fl2; j += 2)
     {
       int jOffset = j >> 1;
+
+#if JVET_AJ0237_INTERNAL_12BIT
+      tempData[0][iOffset][jOffset] = src[j] + src[j + 1] + src1[j] + src1[j + 1];
+      tempData[1][iOffset][jOffset] = src[j] * src[j] + src[j + 1] * src[j + 1] + src1[j] * src1[j] + src1[j + 1] * src1[j + 1];
+#else
       laplacian[0][iOffset][jOffset] = src[j] + src[j + 1] + src1[j] + src1[j + 1];
       laplacian[1][iOffset][jOffset] = src[j] * src[j] + src[j + 1] * src[j + 1] + src1[j] * src1[j] + src1[j + 1] * src1[j + 1];
+#endif
 
       int iOffsetM4 = iOffset - 4;
       int jOffsetM4 = jOffset - 4;
 
+#if JVET_AJ0237_INTERNAL_12BIT
+      if (jOffsetM4 == 0)
+      {
+        tempData[2][iOffset][jOffsetM4] = tempData[0][iOffset][jOffset - 4] + tempData[0][iOffset][jOffset - 3] + tempData[0][iOffset][jOffset - 2] + tempData[0][iOffset][jOffset - 1] + tempData[0][iOffset][jOffset];
+        tempData[3][iOffset][jOffsetM4] = tempData[1][iOffset][jOffset - 4] + tempData[1][iOffset][jOffset - 3] + tempData[1][iOffset][jOffset - 2] + tempData[1][iOffset][jOffset - 1] + tempData[1][iOffset][jOffset];
+      }
+      else if (jOffsetM4 > 0)
+      {
+        tempData[2][iOffset][jOffsetM4] = tempData[2][iOffset][jOffset - 5] - tempData[0][iOffset][jOffset - 5] + tempData[0][iOffset][jOffset];
+        tempData[3][iOffset][jOffsetM4] = tempData[3][iOffset][jOffset - 5] - tempData[1][iOffset][jOffset - 5] + tempData[1][iOffset][jOffset];
+      }
+
+      if ((iOffsetM4 >= 0) && (jOffsetM4 >= 0))
+      {
+        if (iOffsetM4 == 0)
+        {
+          tempData[0][iOffsetM4][jOffsetM4] = tempData[2][iOffsetM4][jOffsetM4] + tempData[2][iOffset - 3][jOffsetM4] + tempData[2][iOffset - 2][jOffsetM4] + tempData[2][iOffset - 1][jOffsetM4] + tempData[2][iOffset][jOffsetM4];
+          tempData[1][iOffsetM4][jOffsetM4] = tempData[3][iOffsetM4][jOffsetM4] + tempData[3][iOffset - 3][jOffsetM4] + tempData[3][iOffset - 2][jOffsetM4] + tempData[3][iOffset - 1][jOffsetM4] + tempData[3][iOffset][jOffsetM4];
+        }
+        else
+        {
+          tempData[0][iOffsetM4][jOffsetM4] = tempData[0][iOffsetM4 - 1][jOffsetM4] - tempData[2][iOffsetM4 - 1][jOffsetM4] + tempData[2][iOffset][jOffsetM4];
+          tempData[1][iOffsetM4][jOffsetM4] = tempData[1][iOffsetM4 - 1][jOffsetM4] - tempData[3][iOffsetM4 - 1][jOffsetM4] + tempData[3][iOffset][jOffsetM4];
+        }
+#else
       if (jOffsetM4 == 0)
       {
         laplacian[2][iOffset][jOffsetM4] = laplacian[0][iOffset][jOffset - 4] + laplacian[0][iOffset][jOffset - 3] + laplacian[0][iOffset][jOffset - 2] + laplacian[0][iOffset][jOffset - 1] + laplacian[0][iOffset][jOffset];
@@ -7101,8 +7156,14 @@ void AdaptiveLoopFilter::deriveVariance(const CPelBuf &srcLuma, const Area &blkD
           laplacian[0][iOffsetM4][jOffsetM4] = laplacian[0][iOffsetM4 - 1][jOffsetM4] - laplacian[2][iOffsetM4 - 1][jOffsetM4] + laplacian[2][iOffset][jOffsetM4];
           laplacian[1][iOffsetM4][jOffsetM4] = laplacian[1][iOffsetM4 - 1][jOffsetM4] - laplacian[3][iOffsetM4 - 1][jOffsetM4] + laplacian[3][iOffset][jOffsetM4];
         }
+#endif
 
+#if JVET_AJ0237_INTERNAL_12BIT
+        int bdShift = 2 * std::max(0, bits - 10);
+        laplacian[VARIANCE][iOffsetM4][jOffsetM4] = (uint32_t)((13 * ((numSample * tempData[1][iOffsetM4][jOffsetM4] - tempData[0][iOffsetM4][jOffsetM4] * tempData[0][iOffsetM4][jOffsetM4] + offset) >> 3)) >> (14 + bdShift));
+#else
         laplacian[VARIANCE][iOffsetM4][jOffsetM4] = (13 * ((numSample * laplacian[1][iOffsetM4][jOffsetM4] - laplacian[0][iOffsetM4][jOffsetM4] * laplacian[0][iOffsetM4][jOffsetM4] + offset) >> 3)) >> 14;
+#endif
       }
     }
     src += stride2;
@@ -7184,7 +7245,11 @@ void AdaptiveLoopFilter::deriveFixedFilterResultsPerBlkChroma(AlfClassifier ***c
 
   if (useSimd)
   {
+#if JVET_AJ0237_INTERNAL_12BIT
+    m_deriveVariance(src, blk, blk, laplacian, bits);
+#else
     m_deriveVariance(src, blk, blk, laplacian);
+#endif
     m_deriveClassificationLaplacian(src, blk, blk, laplacian, ALF_CLASSIFIER_FL_CHROMA);
     m_calcClass0(classifier[ALF_NUM_CLASSIFIER + 1], blk, blk, ALF_CLASSIFIER_FL_CHROMA + 10, 1, NUM_DIR_FIX, NUM_ACT_FIX, bits, 2, mappingDir, laplacian);
     alfFixedFilterBlk(classifier[ALF_NUM_CLASSIFIER + 1], src, blk, blk, srcBeforeDb, fixedFilterResults, m_picWidth, fixedFilterSetIdx, targetFixedFilterSetInd, 0, clpRng, clippingValues, false
@@ -7195,7 +7260,11 @@ void AdaptiveLoopFilter::deriveFixedFilterResultsPerBlkChroma(AlfClassifier ***c
   }
   else
   {
+#if JVET_AJ0237_INTERNAL_12BIT
+    deriveVariance(src, blk, blk, laplacian, bits);
+#else
     deriveVariance(src, blk, blk, laplacian);
+#endif
     deriveClassificationLaplacian(src, blk, blk, laplacian, ALF_CLASSIFIER_FL_CHROMA);
     calcClass0Var(classifier[ALF_NUM_CLASSIFIER + 1], blk, blk, ALF_CLASSIFIER_FL_CHROMA + 10, 1, NUM_DIR_FIX, NUM_ACT_FIX, bits, 2, mappingDir, laplacian);
     alfFixedFilterBlkNonSimd(classifier[ALF_NUM_CLASSIFIER + 1], src, blk, blk, srcBeforeDb, fixedFilterResults, m_picWidth, fixedFilterSetIdx, targetFixedFilterSetInd, 0, clpRng, clippingValues, false
@@ -7704,8 +7773,12 @@ void AdaptiveLoopFilter::deriveFixedFilterResultsPerBlk( AlfClassifier **classif
   if(useSimd)
   {
 #if JVET_AE0139_ALF_IMPROVED_FIXFILTER
+#if JVET_AJ0237_INTERNAL_12BIT
+    m_deriveVariance(srcLuma, blkCur, blkCur, laplacian, bits);
+#else
     m_deriveVariance(srcLuma, blkCur, blkCur, laplacian);
 #endif
+#endif
 #if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
     m_deriveClassificationLaplacian(srcLuma, blkCur, blkCur, laplacian, ALF_CLASSIFIER_FL);
 #else
@@ -7715,8 +7788,12 @@ void AdaptiveLoopFilter::deriveFixedFilterResultsPerBlk( AlfClassifier **classif
   else
   {
 #if JVET_AE0139_ALF_IMPROVED_FIXFILTER
+#if JVET_AJ0237_INTERNAL_12BIT
+    deriveVariance(srcLuma, blkCur, blkCur, laplacian, bits);
+#else
     deriveVariance(srcLuma, blkCur, blkCur, laplacian);
 #endif
+#endif
 #if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
     deriveClassificationLaplacian(srcLuma, blkCur, blkCur, laplacian, ALF_CLASSIFIER_FL);
 #else
@@ -8126,7 +8203,11 @@ void AdaptiveLoopFilter::gaussFiltering(CodingStructure &cs, Pel ***gaussPic, co
   int padSize = ALF_PADDING_SIZE_GAUSS_RESULTS;
   int shift = 10;
   const int numCoeff = 12;
+#if JVET_AJ0237_INTERNAL_12BIT
+  int diffTH = 32 << std::max(0, cs.sps->getBitDepth(CHANNEL_TYPE_LUMA) - 10);
+#else
   int diffTH = 32;
+#endif
 #if JVET_AJ0188_CODING_INFO_CLASSIFICATION
   const bool isIntraSlice = cs.slice->isIntra();
   const bool isSpsAdjust  = cs.sps->getAlfLumaFixedFilterAdjust();
diff --git a/source/Lib/CommonLib/AdaptiveLoopFilter.h b/source/Lib/CommonLib/AdaptiveLoopFilter.h
index d2dacb490..6a31efba7 100644
--- a/source/Lib/CommonLib/AdaptiveLoopFilter.h
+++ b/source/Lib/CommonLib/AdaptiveLoopFilter.h
@@ -180,14 +180,22 @@ public:
   void deriveFixedFilterResultsBlk( AlfClassifier*** classifier, const CPelBuf& srcLuma, const CPelBuf& srcLumaBeforeDb, const Area& blkDst, const Area& blk, const int bits, CodingStructure &cs, const ClpRng &clpRng, const Pel clippingValues[4], int qp, int winIdx, int fixedFilterSetIdx );
   void deriveFixedFilterResults( AlfClassifier*** classifier, const CPelBuf& srcLuma, const CPelBuf& srcLumaBeforeDb, const Area& blkDst, const Area& blk, CodingStructure &cs, int winIdx, int fixedFilterSetIdx );
   static void calcClass0Var( AlfClassifier **classifier, const Area &blkDst, const Area &cu, int dirWindSize, int classDir, int noDir, int noAct, int bitDepth, int subBlkSize, int mappingDir[NUM_DIR_FIX][NUM_DIR_FIX], uint32_t **laplacian[NUM_DIRECTIONS] );
+#if JVET_AJ0237_INTERNAL_12BIT
+  static void deriveVariance(const CPelBuf& srcLuma, const Area& blkDst, const Area& blk, uint32_t ***laplacian, int bd);
+#else
   static void deriveVariance( const CPelBuf &srcLuma, const Area &blkDst, const Area &blk, uint32_t ***laplacian );
+#endif
   void deriveFixedFilterResultsCtuBoundary( AlfClassifier ***classifier, Pel ***fixedFilterResults, const CPelBuf &srcLuma, const CPelBuf &srcLumaBeforeDb, const Area &blkDst, const int bits, CodingStructure& cs, const ClpRng &clpRng, const Pel clippingValues[4], int qp, int fixedFilterSetIdx, int mappingDir[NUM_DIR_FIX][NUM_DIR_FIX], uint32_t **laplacian[NUM_DIRECTIONS], uint8_t* ctuEnableFlagLuma, uint8_t* ctuEnableOnlineLuma, int ctuIdx, int classifierIdx
 #if JVET_AJ0188_CODING_INFO_CLASSIFICATION
     , const CPelBuf& srcCodingInfo, const CPelBuf& srcResi
 #endif
     );
   void deriveFixedFilterResultsPerBlk( AlfClassifier ***classifier, Pel ***fixedFilterResults, const CPelBuf &srcLuma, const CPelBuf &srcLumaBeforeDb, const Area &blkCur, const int bits, CodingStructure& cs, const ClpRng &clpRng, const Pel clippingValues[4], int qp, int fixedFilterSetIdx, int mappingDir[NUM_DIR_FIX][NUM_DIR_FIX], uint32_t **laplacian[NUM_DIRECTIONS], const int classifierIdx );
+#if JVET_AJ0237_INTERNAL_12BIT
+  void(*m_deriveVariance)(const CPelBuf& srcLuma, const Area& blkDst, const Area& blk, uint32_t ***variance, int bd);
+#else
   void(*m_deriveVariance)(const CPelBuf &srcLuma, const Area &blkDst, const Area &blk, uint32_t ***variance);
+#endif
 #if JVET_AG0157_ALF_CHROMA_FIXED_FILTER
   void deriveFixedFilterResultsCtuBoundaryChroma(AlfClassifier ***classifier, Pel ***fixedFilterResults, const CPelBuf &src, const CPelBuf &srcBeforeDb, const Area &blkDst, const int bits, CodingStructure& cs, const ClpRng &clpRng, const Pel clippingValues[4], int qp, int fixedFilterSetIdx, int mappingDir[NUM_DIR_FIX][NUM_DIR_FIX], uint32_t **laplacian[NUM_DIRECTIONS], uint8_t* ctuEnableFlag, int ctuIdx);
   void deriveFixedFilterResultsPerBlkChroma(AlfClassifier ***classifier, Pel ***fixedFilterResults, const CPelBuf &src, const CPelBuf &srcBeforeDb, const Area &blk, const int bits, CodingStructure& cs, const ClpRng &clpRng, const Pel clippingValues[4], int qp, int fixedFilterSetIdx, int mappingDir[NUM_DIR_FIX][NUM_DIR_FIX], uint32_t **laplacian[NUM_DIRECTIONS]);
diff --git a/source/Lib/CommonLib/AlfParameters.h b/source/Lib/CommonLib/AlfParameters.h
index 0a507c46b..1712e1ba5 100644
--- a/source/Lib/CommonLib/AlfParameters.h
+++ b/source/Lib/CommonLib/AlfParameters.h
@@ -584,6 +584,9 @@ struct ScaleAlf
   bool usePrev;
 
   int apsIdx;
+#if JVET_AJ0237_INTERNAL_12BIT
+  int bitDepth;
+#endif
 
   void reset() 
   {
@@ -608,14 +611,20 @@ struct ScaleAlf
   void setMinMax( const Pel lumaMin = 0, const Pel lumaMax = 1024, const bool bCheckClassifier = true ) 
   {
     const int c = classifierIdx;
+#if !JVET_AJ0237_INTERNAL_12BIT
     const int bitDepth = 10;
+#endif
     idxClassMin = (!bCheckClassifier || c == 1) ? ((lumaMin * ALF_NUM_CLASSES_CLASSIFIER[c]) >> bitDepth) : 0 ;
     idxClassMax = (!bCheckClassifier || c == 1) ? ((lumaMax * ALF_NUM_CLASSES_CLASSIFIER[c]) >> bitDepth) : (ALF_NUM_CLASSES_CLASSIFIER[c] - 1) ;
 
     initMinMaxDone = true;
   }
 
-  void init( const int f, const int a, const int c ) 
+#if JVET_AJ0237_INTERNAL_12BIT
+  void init(const int f, const int a, const int c, const int bDepth)
+#else
+  void init( const int f, const int a, const int c )
+#endif
   {
     filterSetIndex  = f;
     alt_num         = a;
@@ -625,7 +634,9 @@ struct ScaleAlf
 
     idxClassMin = 0 ;
     idxClassMax = ALF_NUM_CLASSES_CLASSIFIER[c] - 1 ;
-
+#if JVET_AJ0237_INTERNAL_12BIT
+    bitDepth = bDepth;
+#endif
     initDone = true;
   }
 
diff --git a/source/Lib/CommonLib/BilateralFilter.cpp b/source/Lib/CommonLib/BilateralFilter.cpp
index 7d3b615d8..0f6c0f4e3 100644
--- a/source/Lib/CommonLib/BilateralFilter.cpp
+++ b/source/Lib/CommonLib/BilateralFilter.cpp
@@ -62,6 +62,9 @@ BilateralFilter::BilateralFilter()
   initBilateralFilterX86();
 #endif
 #endif
+#if JVET_AJ0237_INTERNAL_12BIT
+  internalBitDepth = 10;
+#endif
 }
 
 BilateralFilter::~BilateralFilter()
@@ -101,6 +104,12 @@ const char* BilateralFilter::getFilterLutParameters(int16_t* block, const int st
   int h = floorLog2(height);
   int mad = m_calcMAD(block, stride, width, height, w + h);
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  int bdShift = std::max(0, internalBitDepth - 10);
+  int offset = (bdShift == 0) ? 0 : (1 << (bdShift - 1));
+  mad = (mad + offset) >> bdShift;
+#endif
+
   w = std::min(w, 7);
   h = std::min(h, 7);
   mad = std::min(mad >> 4, 15);
@@ -163,7 +172,11 @@ const char* BilateralFilter::getFilterLutParameters(int16_t* block, const int st
 }
 #endif
 
+#if JVET_AJ0237_INTERNAL_12BIT
+inline void bifApplyLut(int diff, int& res, int cutBitsNum, int bitsRound, int bitsRound2, int shift, const char* lutRowPtr, int lutShift, int bdShift)
+#else
 inline void bifApplyLut(int diff, int& res, int cutBitsNum, int bitsRound, int bitsRound2, int shift, const char* lutRowPtr, int lutShift)
+#endif
 {
   int sg0 = diff >> shift;
   int v0 = (diff + sg0) ^ sg0;
@@ -177,14 +190,26 @@ inline void bifApplyLut(int diff, int& res, int cutBitsNum, int bitsRound, int b
   int idx = (v0 + 4) >> 3;
   idx = 15 + ((idx - 15) & ((idx - 15) >> shift));
   int w0 = lutRowPtr[idx] >> lutShift;
+#endif
+#if JVET_AJ0237_INTERNAL_12BIT
+  w0 = (w0 << bdShift);
 #endif
   res = (w0 + sg0) ^ sg0;
 }
 
+#if JVET_AJ0237_INTERNAL_12BIT
+void BilateralFilter::blockBilateralFilterDiamond5x5(uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bifRoundAdd, int bifRoundShift, bool isRDO, const char* lutRowPtr, bool noClip, int cutBitsNum, int bdShift)
+#else
 void BilateralFilter::blockBilateralFilterDiamond5x5( uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bifRoundAdd, int bifRoundShift, bool isRDO, const char* lutRowPtr, bool noClip, int cutBitsNum)
+#endif
 {
   int pad = NUMBER_PADDED_SAMPLES;
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  cutBitsNum += bdShift;
+#endif
+
+
   int padwidth = iWidthExtSIMD;
   int downbuffer[128];
   int downleftbuffer[129];
@@ -204,13 +229,25 @@ void BilateralFilter::blockBilateralFilterDiamond5x5( uint32_t uiWidth, uint32_t
   {
     int pixel = block[(-1 + pad)*padwidth + x + pad];
     int below = block[(-1 + pad + 1)*padwidth + x + pad];
+#if JVET_AJ0237_INTERNAL_12BIT
+    bifApplyLut(below - pixel, downbuffer[x], cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift1, bdShift);
+#else
     bifApplyLut(below - pixel, downbuffer[x], cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift1);
+#endif
 
     int belowright = block[(-1 + pad + 1)*padwidth + x + pad + 1];
+#if JVET_AJ0237_INTERNAL_12BIT
+    bifApplyLut(belowright - pixel, downrightbuffer[1][x + 1], cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift2, bdShift);
+#else
     bifApplyLut(belowright - pixel, downrightbuffer[1][x + 1], cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift2);
+#endif
 
     int belowleft = block[(-1 + pad + 1)*padwidth + x + pad - 1];
+#if JVET_AJ0237_INTERNAL_12BIT
+    bifApplyLut(belowleft - pixel, downleftbuffer[x], cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift2, bdShift);
+#else
     bifApplyLut(belowleft - pixel, downleftbuffer[x], cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift2);
+#endif
   }
   int width = uiWidth;
   for( int y = 0; y < uiHeight; y++ )
@@ -220,15 +257,27 @@ void BilateralFilter::blockBilateralFilterDiamond5x5( uint32_t uiWidth, uint32_t
     int pixel = rowStart[-1];
 
     int right = rowStart[0], rightmod = 0;
+#if JVET_AJ0237_INTERNAL_12BIT
+    bifApplyLut(right - pixel, rightmod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift1, bdShift);
+#else
     bifApplyLut(right - pixel, rightmod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift1);
+#endif
 
     pixel = rowStart[-padwidth - 1];
     int belowright = right;
+#if JVET_AJ0237_INTERNAL_12BIT
+    bifApplyLut(belowright - pixel, downrightbuffer[(y + 1) % 2][0], cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift2, bdShift);
+#else
     bifApplyLut(belowright - pixel, downrightbuffer[(y + 1) % 2][0], cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift2);
+#endif
 
     pixel = rowStart[-padwidth + width];
     int belowleft = rowStart[width - 1];
+#if JVET_AJ0237_INTERNAL_12BIT
+    bifApplyLut(belowleft - pixel, downleftbuffer[width], cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift2, bdShift);
+#else
     bifApplyLut(belowleft - pixel, downleftbuffer[width], cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift2);
+#endif
 
     for( int x = 0; x < uiWidth; x++ )
     {
@@ -242,12 +291,20 @@ void BilateralFilter::blockBilateralFilterDiamond5x5( uint32_t uiWidth, uint32_t
       modsum += leftmod;
 
       right = rowStart[x + 1];
+#if JVET_AJ0237_INTERNAL_12BIT
+      bifApplyLut(right - pixel, mod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift1, bdShift);
+#else
       bifApplyLut(right - pixel, mod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift1);
+#endif
       modsum += mod;
       rightmod = mod;
 
       int below = rowStart[x + padwidth];
+#if JVET_AJ0237_INTERNAL_12BIT
+      bifApplyLut(below - pixel, mod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift1, bdShift);
+#else
       bifApplyLut(below - pixel, mod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift1);
+#endif
       modsum += mod;
       downbuffer[x] = mod;
 
@@ -258,12 +315,20 @@ void BilateralFilter::blockBilateralFilterDiamond5x5( uint32_t uiWidth, uint32_t
       modsum += aboveleftmod;
 
       int belowleft = rowStart[x + padwidth - 1];
+#if JVET_AJ0237_INTERNAL_12BIT
+      bifApplyLut(belowleft - pixel, mod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift2, bdShift);
+#else
       bifApplyLut(belowleft - pixel, mod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift2);
+#endif
       modsum += mod;
       downleftbuffer[x] = mod;
 
       int belowright = rowStart[x + padwidth + 1];
+#if JVET_AJ0237_INTERNAL_12BIT
+      bifApplyLut(belowright - pixel, mod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift2, bdShift);
+#else
       bifApplyLut(belowright - pixel, mod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift2);
+#endif
       modsum += mod;
       downrightbuffer[y % 2][x + 1] = mod;
 
@@ -272,23 +337,43 @@ void BilateralFilter::blockBilateralFilterDiamond5x5( uint32_t uiWidth, uint32_t
       // speed when SIMD is turned off.
 
       int above = rowStart[x - 2 * padwidth];
+#if JVET_AJ0237_INTERNAL_12BIT
+      bifApplyLut(above - pixel, mod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift3, bdShift);
+#else
       bifApplyLut(above - pixel, mod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift3);
+#endif
       modsum += mod;
 
       below = rowStart[x + 2 * padwidth];
+#if JVET_AJ0237_INTERNAL_12BIT
+      bifApplyLut(below - pixel, mod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift3, bdShift);
+#else
       bifApplyLut(below - pixel, mod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift3);
+#endif
       modsum += mod;
 
       int left = rowStart[x - 2];
+#if JVET_AJ0237_INTERNAL_12BIT
+      bifApplyLut(left - pixel, mod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift3, bdShift);
+#else
       bifApplyLut(left - pixel, mod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift3);
+#endif
       modsum += mod;
 
       right = rowStart[x + 2];
+#if JVET_AJ0237_INTERNAL_12BIT
+      bifApplyLut(right - pixel, mod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift3, bdShift);
+#else
       bifApplyLut(right - pixel, mod, cutBitsNum, bitsRound, bitsRound2, shift, lutRowPtr, lutShift3);
+#endif
       modsum += mod;
 
 #if JVET_AF0112_BIF_DYNAMIC_SCALING
+#if JVET_AJ0237_INTERNAL_12BIT
+      blkFilt[(y + pad) * padwidth + x + pad] = ((int16_t)((modsum * bfac + (bifRoundAdd << 3)) >> (bifRoundShift + 3)));
+#else
       blkFilt[(y + pad) * padwidth + x + pad] = ((int16_t)((uint16_t)((modsum * bfac + (bifRoundAdd << 3)) >> (bifRoundShift + 3))));
+#endif
 #else
       blkFilt[(y + pad) * padwidth + x + pad] = (( int16_t ) (( uint16_t ) ((modsum*bfac + bifRoundAdd) >> bifRoundShift)));
 #endif
@@ -592,7 +677,12 @@ void BilateralFilter::bilateralFilterRDOdiamond5x5(const ComponentID compID, Pel
     CHECK(doReshape, "Reshape domain is not used for chroma");
 #endif
   }
+#if JVET_AJ0237_INTERNAL_12BIT
+  int bdShift = std::max(0, internalBitDepth - 10);
+  m_bilateralFilterDiamond5x5(uiWidth, uiHeight, tempblock, tempblockFiltered, clpRng, piReco, uiRecStride, iWidthExtSIMD, bfac, bifRoundAdd, bifRoundShift, true, lutRowPtr, false, cutBitsNum, bdShift);
+#else
   m_bilateralFilterDiamond5x5(uiWidth, uiHeight, tempblock, tempblockFiltered, clpRng, piReco, uiRecStride, iWidthExtSIMD, bfac, bifRoundAdd, bifRoundShift, true, lutRowPtr, false, cutBitsNum);
+#endif
 
   if( !useReco )
   {
@@ -619,6 +709,9 @@ void BilateralFilter::bilateralFilterDiamond5x5( const ComponentID compID, const
 #endif
 )
 {
+#if JVET_AJ0237_INTERNAL_12BIT
+  int bdShift = std::max(0, internalBitDepth - 10);
+#endif
 #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY
   const int scaleX = getChannelTypeScaleX( toChannelType( compID ), currTU.cu->cs->pcv->chrFormat );
   const int scaleY = getChannelTypeScaleY( toChannelType( compID ), currTU.cu->cs->pcv->chrFormat );
@@ -902,7 +995,11 @@ void BilateralFilter::bilateralFilterDiamond5x5( const ComponentID compID, const
         int bifRoundAdd = BIF_ROUND_ADD >> currTU.cs->pps->getBIFStrength();
         int bifRoundShift = BIF_ROUND_SHIFT - currTU.cs->pps->getBIFStrength();
 
+#if JVET_AJ0237_INTERNAL_12BIT
+        m_bilateralFilterDiamond5x5(uiWidth, uiHeight, tempblock, tempblockFiltered, clpRng, recPtr, recStride, iWidthExtSIMD, bfac, bifRoundAdd, bifRoundShift, false, lutRowPtr, false, cutBitsNum, bdShift);
+#else
         m_bilateralFilterDiamond5x5(uiWidth, uiHeight, tempblock, tempblockFiltered, clpRng, recPtr, recStride, iWidthExtSIMD, bfac, bifRoundAdd, bifRoundShift, false, lutRowPtr, false, cutBitsNum);
+#endif
 
         xStart = xEnd;
       }
@@ -1168,8 +1265,11 @@ void BilateralFilter::bilateralFilterDiamond5x5( const ComponentID compID, const
 
     int bifRoundAdd = BIF_ROUND_ADD >> currTU.cs->pps->getBIFStrength();
     int bifRoundShift = BIF_ROUND_SHIFT - currTU.cs->pps->getBIFStrength();
-
+#if JVET_AJ0237_INTERNAL_12BIT
+    m_bilateralFilterDiamond5x5(uiWidth, uiHeight, tempblock, tempblockFiltered, clpRng, recPtr, recStride, iWidthExtSIMD, bfac, bifRoundAdd, bifRoundShift, false, lutRowPtr, noClip, cutBitsNum, bdShift);
+#else
     m_bilateralFilterDiamond5x5(uiWidth, uiHeight, tempblock, tempblockFiltered, clpRng, recPtr, recStride, iWidthExtSIMD, bfac, bifRoundAdd, bifRoundShift, false, lutRowPtr, noClip, cutBitsNum);
+#endif
   }
 }
 void BilateralFilter::clipNotBilaterallyFilteredBlocks(const ComponentID compID, const CPelUnitBuf& src, PelUnitBuf& rec, const ClpRng& clpRng, TransformUnit & currTU)
@@ -1595,6 +1695,11 @@ const char* BilateralFilter::getFilterLutParametersChroma(int16_t* block, const
   int h = floorLog2(heightForStrength);
 
   int mad = m_calcMAD(block, stride, width, height, floorLog2(width) + floorLog2(height));
+#if JVET_AJ0237_INTERNAL_12BIT
+  int bdShift = std::max(0, internalBitDepth - 10);
+  int offset = (bdShift == 0) ? 0 : (1 << (bdShift - 1));
+  mad = (mad + offset) >> bdShift;
+#endif
 
   w = std::min(w, 7);
   h = std::min(h, 7);
diff --git a/source/Lib/CommonLib/BilateralFilter.h b/source/Lib/CommonLib/BilateralFilter.h
index 27cece33b..3cba3eefc 100644
--- a/source/Lib/CommonLib/BilateralFilter.h
+++ b/source/Lib/CommonLib/BilateralFilter.h
@@ -67,8 +67,13 @@ private:
   Pel *tempblock = (Pel*)tempblockSIMD;
   Pel* tempblockFiltered = (Pel*)tempblockFilteredSIMD;
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  void (*m_bilateralFilterDiamond5x5)(uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bifRoundAdd, int bifRoundShift, bool isRDO, const char* lutRowPtr, bool noClip, int cutBitsNum, int bdShift);
+  static void blockBilateralFilterDiamond5x5(uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bifRoundAdd, int bifRoundShift, bool isRDO, const char* lutRowPtr, bool noClip, int cutBitsNum, int bdShift);
+#else
   void (*m_bilateralFilterDiamond5x5)(uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bifRoundAdd, int bifRoundShift, bool isRDO, const char* lutRowPtr, bool noClip, int cutBitsNum);
   static void blockBilateralFilterDiamond5x5(uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bifRoundAdd, int bifRoundShift, bool isRDO, const char* lutRowPtr, bool noClip, int cutBitsNum);
+#endif
 
 #if JVET_AF0112_BIF_DYNAMIC_SCALING
   int (*m_calcMAD)(int16_t* block, int stride, int width, int height, int whlog2);
@@ -213,12 +218,19 @@ private:
     { 0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1, },
   };
 #endif
+
+#if JVET_AJ0237_INTERNAL_12BIT
+  int internalBitDepth;
+#endif
 public:
   BilateralFilter();
   ~BilateralFilter();
 
   void create();
   void destroy();
+#if JVET_AJ0237_INTERNAL_12BIT
+  void setInternalBitDepth(int bdDepth) { internalBitDepth = bdDepth; }
+#endif
 #if JVET_V0094_BILATERAL_FILTER
   void bilateralFilterRDOdiamond5x5(const ComponentID compID, PelBuf& resiBuf, const CPelBuf& predBuf, PelBuf& recoBuf, int32_t qp, const CPelBuf& recIPredBuf, const ClpRng& clpRng, TransformUnit & currTU, bool useReco, bool doReshape = false, std::vector<Pel>* pLUT = nullptr);
   void bilateralFilterPicRDOperCTU(const ComponentID compID, CodingStructure& cs, PelUnitBuf& src,BIFCabacEst* bifCABACEstimator);
@@ -243,8 +255,13 @@ public:
 
 #if ENABLE_SIMD_BILATERAL_FILTER || JVET_X0071_CHROMA_BILATERAL_FILTER_ENABLE_SIMD
 #ifdef TARGET_SIMD_X86
+#if JVET_AJ0237_INTERNAL_12BIT
+  template<X86_VEXT vext>
+  static void simdFilterDiamond5x5(uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bifRoundAdd, int bifRoundShift, bool isRDO, const char* lutRowPtr, bool noClip, int cutBitsNum, int bdShift);
+#else
   template<X86_VEXT vext>
   static void simdFilterDiamond5x5(uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bifRoundAdd, int bifRoundShift, bool isRDO, const char* lutRowPtr, bool noClip, int cutBitsNum);
+#endif
 #if JVET_AF0112_BIF_DYNAMIC_SCALING
   template<X86_VEXT vext>
   static int simdCalcMAD(int16_t* block, int stride, int width, int height, int whlog2);
diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h
index 29b7eb3dd..9bebc2f78 100644
--- a/source/Lib/CommonLib/CommonDef.h
+++ b/source/Lib/CommonLib/CommonDef.h
@@ -1419,6 +1419,10 @@ static const int CCCM_MAX_REF_SAMPLES     = 4 * ( 2 * CCCM_WINDOW_SIZE * ( 2 * M
 #else
 static const int CCCM_MAX_REF_SAMPLES     = ( 2 * CCCM_WINDOW_SIZE * ( 2 * MAX_CU_SIZE + CCCM_WINDOW_SIZE ) );
 #endif
+#if JVET_AJ0237_INTERNAL_12BIT
+static const int CCCM_MATRIX_BITS_HBD     = 32;
+static const int CCCM_DECIM_BITS_HBD      = 22;
+#endif
 #if JVET_AB0174_CCCM_DIV_FREE
 static const int CCCM_MATRIX_BITS         = 22;
 static const int CCCM_DECIM_BITS          = 16;
@@ -1426,7 +1430,9 @@ static const int CCCM_DECIM_BITS          = 16;
 static const int CCCM_MATRIX_BITS         = 28;
 static const int CCCM_DECIM_BITS          = 22;
 #endif
+#if !JVET_AJ0237_INTERNAL_12BIT
 static const int CCCM_DECIM_ROUND         = ( 1 << (CCCM_DECIM_BITS - 1 ) );
+#endif
 #if JVET_AB0143_CCCM_TS
 #if MMLM
 #if JVET_AC0054_GLCCCM
@@ -1531,6 +1537,11 @@ static const int MAX_DELTA_QP   =                                   7;      ///<
 static const int MAX_TESTED_QPs =   ( 1 + 1 + ( MAX_DELTA_QP << 1 ) );      ///< dqp=0 +- max_delta_qp + lossless mode
 
 static const int COM16_C806_TRANS_PREC =                            0;
+
+#if JVET_AJ0237_INTERNAL_12BIT
+#define DECIM_BITS(x)                           ( (x) > 10 ? CCCM_DECIM_BITS_HBD : CCCM_DECIM_BITS )
+#endif
+
 #if IF_12TAP
 #define NTAPS_LUMA(x)                           ( (x) == 0 ? 12 : 8 )  // 12-tap filter for index 0. 8-tap fitler for other indices.
 #else
diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp
index 70b847afd..806653b25 100644
--- a/source/Lib/CommonLib/InterPrediction.cpp
+++ b/source/Lib/CommonLib/InterPrediction.cpp
@@ -347,6 +347,9 @@ InterPrediction::InterPrediction()
 #if JVET_AG0276_NLIC
   m_skipDoLic = false;
 #endif
+#if JVET_AJ0237_INTERNAL_12BIT
+  m_dmvrCostLambda = 1;
+#endif
 }
 
 InterPrediction::~InterPrediction()
@@ -570,10 +573,18 @@ void InterPrediction::destroy()
 #if INTER_LIC || (TM_AMVP || TM_MRG || JVET_Z0084_IBC_TM) || JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_Z0061_TM_OBMC
 #if JVET_Z0153_IBC_EXT_REF
 #if JVET_AJ0172_IBC_ITMP_ALIGN_REF_AREA
+#if JVET_AJ0237_INTERNAL_12BIT
+void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC, const int ctuSize, Reshape* reshape, const int picWidth, const int picHeight, const int bitDepth )
+#else
 void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC, const int ctuSize, Reshape* reshape, const int picWidth, const int picHeight )
+#endif
+#else
+#if JVET_AJ0237_INTERNAL_12BIT
+void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC, const int ctuSize, Reshape* reshape, const int picWidth, const int bitDepth )
 #else
 void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC, const int ctuSize, Reshape* reshape, const int picWidth )
 #endif
+#endif
 #else
 void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC, const int ctuSize, Reshape* reshape )
 #endif
@@ -593,6 +604,10 @@ void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC, cons
   }
 
   m_currChromaFormat = chromaFormatIDC;
+#if JVET_AJ0237_INTERNAL_12BIT
+  m_dmvrCostLambda = 1 << std::max(0, std::min(14, bitDepth) - 10); // 14 is the maximum possible DMVR internal precision value, 10 is the baseline
+#endif
+
   if( m_acYuvPred[REF_PIC_LIST_0][COMPONENT_Y] == nullptr ) // check if first is null (in which case, nothing initialised yet)
   {
 #if JVET_AF0057
@@ -15486,7 +15501,11 @@ Distortion InterPrediction::deriveBcwBlending( PredictionUnit& pu, bool bUniDir[
   blendModel.params[1] = (int)((bcwModel.params[1] + offsetA) >> shiftA);
   blendModel.params[2] = (int)((bcwModel.params[2] + offsetA) >> shiftA);
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  blendModel.shift = bcwModel.decimBits - shiftA - bcwBlendingLog2WeightBase;
+#else
   blendModel.shift = CCCM_DECIM_BITS - shiftA - bcwBlendingLog2WeightBase;
+#endif
   blendModel.offset = blendModel.shift ? (1 << (blendModel.shift - 1)) : 0;
   if (blendModel.shift < 0)
   {
@@ -27643,7 +27662,11 @@ bool InterPrediction::processBDMVRPU2Dir(PredictionUnit& pu, bool subPURefine[2]
 #else
   Distortion initCost = xBDMVRGetMatchingError(pu, mvInitial_PU, bUseMR, false);
 #endif
+#if JVET_AJ0237_INTERNAL_12BIT
+  if (initCost < lumaArea * m_dmvrCostLambda)
+#else
   if (initCost < lumaArea)
+#endif
   {
     subPURefine[0] = false;
     subPURefine[1] = false;
@@ -27661,7 +27684,11 @@ bool InterPrediction::processBDMVRPU2Dir(PredictionUnit& pu, bool subPURefine[2]
 #else
   minCost = xBDMVRMvOneTemplateHPelSquareSearch<1>(mvFinal, initCost, pu, mvInitial_PU, 2, MV_FRACTIONAL_BITS_INTERNAL - 1, bUseMR, false);
 #endif
+#if JVET_AJ0237_INTERNAL_12BIT
+  subPURefine[0] = minCost >= lumaArea * m_dmvrCostLambda;
+#else
   subPURefine[0] = minCost >= lumaArea;
+#endif
   finalMvDir[0] = mvFinal[0];
 #if JVET_AA0093_REFINED_MOTION_FOR_ARMC
   }
@@ -27678,7 +27705,11 @@ bool InterPrediction::processBDMVRPU2Dir(PredictionUnit& pu, bool subPURefine[2]
 #else
   minCost = xBDMVRMvOneTemplateHPelSquareSearch<2>(mvFinal, initCost, pu, mvInitial_PU, 2, MV_FRACTIONAL_BITS_INTERNAL - 1, bUseMR, false);
 #endif
+#if JVET_AJ0237_INTERNAL_12BIT
+  subPURefine[1] = minCost >= lumaArea * m_dmvrCostLambda;
+#else
   subPURefine[1] = minCost >= lumaArea;
+#endif
   finalMvDir[1] = mvFinal[1];
 #if JVET_AA0093_REFINED_MOTION_FOR_ARMC
   }
@@ -27732,7 +27763,11 @@ void InterPrediction::processBDMVRSubPU(PredictionUnit& pu, bool subPURefine)
   Mv         mvFinal[2] = { pu.mv[0], pu.mv[1] };
   Mv         mvOffset;
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  const Distortion earlyTerminateTh = dx * dy * m_dmvrCostLambda;
+#else
   const Distortion earlyTerminateTh = dx * dy;
+#endif
   const int adaptiveSearchRangeHor = (dx >> 1) < BDMVR_INTME_RANGE ? (dx >> 1) : BDMVR_INTME_RANGE;
   const int adaptiveSearchRangeVer = (dy >> 1) < BDMVR_INTME_RANGE ? (dy >> 1) : BDMVR_INTME_RANGE;
   const bool adaptRange = (adaptiveSearchRangeHor != BDMVR_INTME_RANGE || adaptiveSearchRangeVer != BDMVR_INTME_RANGE);
@@ -28154,7 +28189,11 @@ void InterPrediction::bmAdaptiveAffineIntSearch(const PredictionUnit &pu, Mv(&mv
   }
   else
   {
+#if JVET_AJ0237_INTERNAL_12BIT
+    bmCostShift = bitDepth > 8 ? 2 : 0;
+#else
     bmCostShift = bitDepth > 8 ? bitDepth - 8 : 0;
+#endif
   }
 #else
   bmCostShift = 0;
@@ -28585,7 +28624,11 @@ void InterPrediction::bmAffineIntSearch(const PredictionUnit &pu, Mv(&mvOffset)[
   }
   else
   {
+#if JVET_AJ0237_INTERNAL_12BIT
+    bmCostShift = bitDepth > 8 ? 2 : 0;
+#else
     bmCostShift = bitDepth > 8 ? bitDepth - 8 : 0;
+#endif
   }
 #else
   bmCostShift = 0;
@@ -28802,7 +28845,11 @@ void InterPrediction::xInitBilateralMatching(const int width, const int height,
   }
   else
   {
+#if JVET_AJ0237_INTERNAL_12BIT
+    m_bmCostShift = bitDepth > 8 ? 2 : 0;
+#else
     m_bmCostShift = bitDepth > 8 ? bitDepth - 8 : 0;
+#endif
   }
 #else
   m_bmCostShift = 0;
@@ -29395,7 +29442,11 @@ bool InterPrediction::processBDMVR4Affine(PredictionUnit& pu
         minCost = xGetBilateralMatchingErrorAffine(pu, pu.mvAffi, true);
       }
       const int  lumaArea = pu.lumaSize().area();
+#if JVET_AJ0237_INTERNAL_12BIT
+      const bool isTooSmallDist = minCost < lumaArea * m_dmvrCostLambda;
+#else
       const bool isTooSmallDist = minCost < lumaArea;
+#endif
       if (!isTooSmallDist)
       {
         minCost = xBDMVRMv6ParameterSearchAffine(minCost, pu);
@@ -30555,7 +30606,11 @@ bool InterPrediction::processBDMVR4AdaptiveAffine(PredictionUnit& pu, Mv(&mvAffi
 #else
   bmAdaptiveAffineIntSearch(pu, mvFinalPUL0, minCostL0, mvFinalPUL1, minCostL1);
 #endif
+#if JVET_AJ0237_INTERNAL_12BIT
+  const int lumaArea = pu.lumaSize().area() * m_dmvrCostLambda;
+#else
   const int lumaArea = pu.lumaSize().area();
+#endif
 
   //  sub-pel search for L0
   if (minCostL0 > lumaArea)
@@ -30680,7 +30735,11 @@ bool InterPrediction::processBDMVR4AdaptiveAffine(PredictionUnit& pu, Mv(&mvAffi
           {
             minCostL0 = xGetBilateralMatchingErrorAffine(pu, pu.mvAffi, true);
           }
+#if JVET_AJ0237_INTERNAL_12BIT
+          const int  lumaArea = pu.lumaSize().area() * m_dmvrCostLambda;
+#else
           const int  lumaArea = pu.lumaSize().area();
+#endif
           const bool isTooSmallDist = minCostL0 < lumaArea;
           if (!isTooSmallDist)
           {
@@ -30761,7 +30820,11 @@ bool InterPrediction::processBDMVR4AdaptiveAffine(PredictionUnit& pu, Mv(&mvAffi
           {
             minCostL1 = xGetBilateralMatchingErrorAffine(pu, pu.mvAffi, true);
           }
+#if JVET_AJ0237_INTERNAL_12BIT
+          const int  lumaArea = pu.lumaSize().area() * m_dmvrCostLambda;
+#else
           const int  lumaArea = pu.lumaSize().area();
+#endif
           const bool isTooSmallDist = minCostL1 < lumaArea;
           if (!isTooSmallDist)
           {
@@ -30848,7 +30911,11 @@ bool InterPrediction::processBDMVR4AdaptiveAffine(PredictionUnit& pu, Mv(&mvAffi
           {
             minCost = xGetBilateralMatchingErrorAffine(pu, pu.mvAffi, true);
           }
+#if JVET_AJ0237_INTERNAL_12BIT
+          const int  lumaArea = pu.lumaSize().area() * m_dmvrCostLambda;
+#else
           const int  lumaArea = pu.lumaSize().area();
+#endif
           const bool isTooSmallDist = minCost < lumaArea;
           if (!isTooSmallDist)
           {
@@ -30934,7 +31001,11 @@ bool InterPrediction::processBDMVR(PredictionUnit& pu)
 #else
       minCost = xBDMVRGetMatchingError(pu, mvInitial_PU, bUseMR, false);
 #endif
+#if JVET_AJ0237_INTERNAL_12BIT
+      if (minCost >= lumaArea * m_dmvrCostLambda)
+#else
       if (minCost >= lumaArea)
+#endif
       {
 #if JVET_AI0185_ADAPTIVE_COST_IN_MERGE_MODE
         minCost = xBDMVRMvOneTemplateHPelSquareSearch<1>(mvFinal_PU, minCost, pu, mvInitial_PU, 2, MV_FRACTIONAL_BITS_INTERNAL - 1, bUseMR, useHadmard);
@@ -30950,7 +31021,11 @@ bool InterPrediction::processBDMVR(PredictionUnit& pu)
 #else
       minCost = xBDMVRGetMatchingError(pu, mvInitial_PU, bUseMR, false);
 #endif
+#if JVET_AJ0237_INTERNAL_12BIT
+      if (minCost >= lumaArea * m_dmvrCostLambda)
+#else
       if (minCost >= lumaArea)
+#endif
       {
 #if JVET_AI0185_ADAPTIVE_COST_IN_MERGE_MODE
         minCost = xBDMVRMvOneTemplateHPelSquareSearch<2>(mvFinal_PU, minCost, pu, mvInitial_PU, 2, MV_FRACTIONAL_BITS_INTERNAL - 1, bUseMR, useHadmard);
@@ -31106,7 +31181,11 @@ bool InterPrediction::processBDMVR(PredictionUnit& pu)
     minCost = xBDMVRMvSquareSearch( mvFinal_PU, minCost, pu, mvInitial_PU, 2, MV_FRACTIONAL_BITS_INTERNAL - 1,     bUseMR, false );
 #endif
 
+#if JVET_AJ0237_INTERNAL_12BIT
+    subPURefine = minCost >= (lumaArea * m_dmvrCostLambda);
+#else
     subPURefine = minCost >= lumaArea;
+#endif
 #if JVET_AG0067_DMVR_EXTENSIONS
     pu.mv[REF_PIC_LIST_0] = (mvFinal_PU[0] - puOrgMv[0]).scaleMv(scale0) + puOrgMv[0];
     pu.mv[REF_PIC_LIST_1] = (mvFinal_PU[1] - puOrgMv[1]).scaleMv(scale1) + puOrgMv[1];
@@ -31188,7 +31267,11 @@ bool InterPrediction::processBDMVR(PredictionUnit& pu)
   Mv         mvFinal[2] = { pu.mv[0], pu.mv[1] };
   Mv         mvOffset;
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  const Distortion earlyTerminateTh = dx * dy * m_dmvrCostLambda;
+#else
   const Distortion earlyTerminateTh = dx * dy;
+#endif
   const int adaptiveSearchRangeHor = (dx >> 1) < BDMVR_INTME_RANGE ? (dx >> 1) : BDMVR_INTME_RANGE;
   const int adaptiveSearchRangeVer = (dy >> 1) < BDMVR_INTME_RANGE ? (dy >> 1) : BDMVR_INTME_RANGE;
   const bool adaptRange = (adaptiveSearchRangeHor != BDMVR_INTME_RANGE || adaptiveSearchRangeVer != BDMVR_INTME_RANGE);
@@ -32077,7 +32160,11 @@ Distortion InterPrediction::xBDMVRMvIntPelFullSearch(Mv&mvOffset, Distortion cur
   }
   else
   {
+#if JVET_AJ0237_INTERNAL_12BIT
+    int32_t precisionAdj = cDistParam.bitDepth > 8 ? 2 : 0;
+#else
     int32_t precisionAdj = cDistParam.bitDepth > 8 ? cDistParam.bitDepth - 8 : 0;
+#endif
     curBestCost = cDistParam.distFunc(cDistParam) >> precisionAdj;
   }
 #else
@@ -32124,7 +32211,11 @@ Distortion InterPrediction::xBDMVRMvIntPelFullSearch(Mv&mvOffset, Distortion cur
       }
       else
       {
+#if JVET_AJ0237_INTERNAL_12BIT
+        int32_t precisionAdj = cDistParam.bitDepth > 8 ? 2 : 0;
+#else
         int32_t precisionAdj = cDistParam.bitDepth > 8 ? cDistParam.bitDepth - 8 : 0;
+#endif
         m_sadEnlargeArrayBilMrg[searchOffsetIdx] = cDistParam.distFunc(cDistParam) >> precisionAdj;
       }
 #else
@@ -33093,7 +33184,11 @@ Distortion InterPrediction::xBDMVRGetMatchingError(const PredictionUnit& pu, con
   }
   else
   {
+#if JVET_AJ0237_INTERNAL_12BIT
+    int32_t precisionAdj = cDistParam.bitDepth > 8 ? 2 : 0;
+#else
     int32_t precisionAdj = cDistParam.bitDepth > 8 ? cDistParam.bitDepth - 8 : 0;
+#endif
     return cDistParam.distFunc(cDistParam) >> precisionAdj;
   }
 #else
@@ -33259,7 +33354,11 @@ Distortion InterPrediction::xBDMVRGetMatchingError(const PredictionUnit& pu, con
   }
   else
   {
+#if JVET_AJ0237_INTERNAL_12BIT
+    int32_t precisionAdj = cDistParam.bitDepth > 8 ? 2 : 0;
+#else
     int32_t precisionAdj = cDistParam.bitDepth > 8 ? cDistParam.bitDepth - 8 : 0;
+#endif
     return cDistParam.distFunc( cDistParam ) >> precisionAdj;
   }
 #else
@@ -41495,6 +41594,10 @@ std::vector<Mv> InterPrediction::deriveMVDFromMVSDIdxAffineSI(PredictionUnit& pu
             {
               const ComponentID ch = ComponentID(chan);
 
+#if JVET_AJ0237_INTERNAL_12BIT
+              const int maxValue = (1 << slice.getSPS()->getBitDepth(toChannelType(ch))) - 1;
+#endif
+
               Pel *piTxtBuff =
                 pPadBuffYUV->getBuf(blkUnitAreaBuff)
                   .bufs[ch]
@@ -41512,7 +41615,11 @@ std::vector<Mv> InterPrediction::deriveMVDFromMVSDIdxAffineSI(PredictionUnit& pu
                   piTxtBuff[idx] += CompDiff[chan];
 
                   piTxtBuff[idx] = (piTxtBuff[idx] < 0) ? 0 : piTxtBuff[idx];
+#if JVET_AJ0237_INTERNAL_12BIT
+                  piTxtBuff[idx] = (piTxtBuff[idx] > maxValue) ? maxValue : piTxtBuff[idx];
+#else
                   piTxtBuff[idx] = (piTxtBuff[idx] > 1023) ? 1023 : piTxtBuff[idx];
+#endif
                 }
                 piTxtBuff += iStrideBuff;
                 piTmpBuff += iStrideTmp;
diff --git a/source/Lib/CommonLib/InterPrediction.h b/source/Lib/CommonLib/InterPrediction.h
index 9e7ba7f3e..f1d9664ef 100644
--- a/source/Lib/CommonLib/InterPrediction.h
+++ b/source/Lib/CommonLib/InterPrediction.h
@@ -310,6 +310,10 @@ protected:
   PelStorage           m_obmcPelStorage;
 #endif
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  uint8_t              m_dmvrCostLambda;
+#endif
+
   ChromaFormat         m_currChromaFormat;
 
   ComponentID          m_maxCompIDToPred;      ///< tells the predictor to only process the components up to (inklusive) this one - useful to skip chroma components during RD-search
@@ -687,10 +691,18 @@ public:
 #if INTER_LIC || (TM_AMVP || TM_MRG || JVET_Z0084_IBC_TM) || JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING || JVET_Z0061_TM_OBMC
 #if JVET_Z0153_IBC_EXT_REF
 #if JVET_AJ0172_IBC_ITMP_ALIGN_REF_AREA
+#if JVET_AJ0237_INTERNAL_12BIT
+  void    init                (RdCost* pcRdCost, ChromaFormat chromaFormatIDC, const int ctuSize, Reshape* reshape, const int picWidth, const int picHeight, const int bitDepth);
+#else
   void    init                (RdCost* pcRdCost, ChromaFormat chromaFormatIDC, const int ctuSize, Reshape* reshape, const int picWidth, const int picHeight);
+#endif
+#else
+#if JVET_AJ0237_INTERNAL_12BIT
+  void    init                (RdCost* pcRdCost, ChromaFormat chromaFormatIDC, const int ctuSize, Reshape* reshape, const int picWidth, const int bitDepth);
 #else
   void    init                (RdCost* pcRdCost, ChromaFormat chromaFormatIDC, const int ctuSize, Reshape* reshape, const int picWidth);
 #endif
+#endif
 #else
   void    init                (RdCost* pcRdCost, ChromaFormat chromaFormatIDC, const int ctuSize, Reshape* reshape);
 #endif
diff --git a/source/Lib/CommonLib/InterpolationFilter.cpp b/source/Lib/CommonLib/InterpolationFilter.cpp
index 09be2f148..914cceb7d 100644
--- a/source/Lib/CommonLib/InterpolationFilter.cpp
+++ b/source/Lib/CommonLib/InterpolationFilter.cpp
@@ -1596,9 +1596,17 @@ void InterpolationFilter::filterCopy( const ClpRng& clpRng, const Pel *src, int
     if (biMCForDMVR)
     {
       int shift10BitOut, offset;
+#if JVET_AJ0237_INTERNAL_12BIT
+      if ((clpRng.bd - IF_INTERNAL_PREC_BILINEAR(clpRng.bd)) > 0)
+#else
       if ((clpRng.bd - IF_INTERNAL_PREC_BILINEAR) > 0)
+#endif
       {
+#if JVET_AJ0237_INTERNAL_12BIT
+        shift10BitOut = (clpRng.bd - IF_INTERNAL_PREC_BILINEAR(clpRng.bd));
+#else
         shift10BitOut = (clpRng.bd - IF_INTERNAL_PREC_BILINEAR);
+#endif
         offset = (1 << (shift10BitOut - 1));
         for (row = 0; row < height; row++)
         {
@@ -1612,7 +1620,11 @@ void InterpolationFilter::filterCopy( const ClpRng& clpRng, const Pel *src, int
       }
       else
       {
+#if JVET_AJ0237_INTERNAL_12BIT
+        shift10BitOut = (IF_INTERNAL_PREC_BILINEAR(clpRng.bd) - clpRng.bd);
+#else
         shift10BitOut = (IF_INTERNAL_PREC_BILINEAR - clpRng.bd);
+#endif
         for (row = 0; row < height; row++)
         {
           for (col = 0; col < width; col++)
@@ -1649,9 +1661,17 @@ void InterpolationFilter::filterCopy( const ClpRng& clpRng, const Pel *src, int
     if (biMCForDMVR)
     {
       int shift10BitOut, offset;
+#if JVET_AJ0237_INTERNAL_12BIT
+      if ((clpRng.bd - IF_INTERNAL_PREC_BILINEAR(clpRng.bd)) > 0)
+#else
       if ((clpRng.bd - IF_INTERNAL_PREC_BILINEAR) > 0)
+#endif
       {
+#if JVET_AJ0237_INTERNAL_12BIT
+        shift10BitOut = (clpRng.bd - IF_INTERNAL_PREC_BILINEAR(clpRng.bd));
+#else
         shift10BitOut = (clpRng.bd - IF_INTERNAL_PREC_BILINEAR);
+#endif
         offset = (1 << (shift10BitOut - 1));
         for (row = 0; row < height; row++)
         {
@@ -1665,7 +1685,11 @@ void InterpolationFilter::filterCopy( const ClpRng& clpRng, const Pel *src, int
       }
       else
       {
+#if JVET_AJ0237_INTERNAL_12BIT
+        shift10BitOut = (IF_INTERNAL_PREC_BILINEAR(clpRng.bd) - clpRng.bd);
+#else
         shift10BitOut = (IF_INTERNAL_PREC_BILINEAR - clpRng.bd);
+#endif
         for (row = 0; row < height; row++)
         {
           for (col = 0; col < width; col++)
@@ -1865,7 +1889,11 @@ void InterpolationFilter::filter(const ClpRng& clpRng, Pel const *src, int srcSt
   {
     if( isFirst )
     {
+#if JVET_AJ0237_INTERNAL_12BIT
+      shift = IF_FILTER_PREC_BILINEAR - (IF_INTERNAL_PREC_BILINEAR(clpRng.bd) - clpRng.bd);
+#else
       shift = IF_FILTER_PREC_BILINEAR - (IF_INTERNAL_PREC_BILINEAR - clpRng.bd);
+#endif
       offset = 1 << (shift - 1);
     }
     else
diff --git a/source/Lib/CommonLib/InterpolationFilter.h b/source/Lib/CommonLib/InterpolationFilter.h
index 827258848..4b494a388 100644
--- a/source/Lib/CommonLib/InterpolationFilter.h
+++ b/source/Lib/CommonLib/InterpolationFilter.h
@@ -52,7 +52,11 @@
 #define IF_FILTER_PREC    6 ///< Log2 of sum of filter taps
 #endif
 #define IF_INTERNAL_OFFS (1<<(IF_INTERNAL_PREC-1)) ///< Offset used internally
+#if JVET_AJ0237_INTERNAL_12BIT
+#define IF_INTERNAL_PREC_BILINEAR(bd) std::min(IF_INTERNAL_PREC, int(bd))
+#else
 #define IF_INTERNAL_PREC_BILINEAR 10 ///< Number of bits for internal precision
+#endif
 #define IF_FILTER_PREC_BILINEAR   4  ///< Bilinear filter coeff precision so that intermediate value will not exceed 16 bit for SIMD - bit exact
 #if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
 #define IF_INTERNAL_FRAC_BITS(bd) std::max(2, IF_INTERNAL_PREC - int(bd))
diff --git a/source/Lib/CommonLib/IntraPrediction.cpp b/source/Lib/CommonLib/IntraPrediction.cpp
index c18de13d3..c6631009b 100644
--- a/source/Lib/CommonLib/IntraPrediction.cpp
+++ b/source/Lib/CommonLib/IntraPrediction.cpp
@@ -16521,7 +16521,11 @@ void IntraPrediction::reorderPLT(CodingStructure& cs, Partitioner& partitioner,
 }
 
 #if MMLM && LMS_LINEAR_MODEL
+#if JVET_AJ0237_INTERNAL_12BIT
+int IntraPrediction::xCalcLMParametersGeneralized(int64_t x, int64_t y, int64_t xx, int64_t xy, int count, int bitDepth, int& a, int& b, int& iShift)
+#else
 int IntraPrediction::xCalcLMParametersGeneralized(int x, int y, int xx, int xy, int count, int bitDepth, int &a, int &b, int &iShift)
+#endif
 {
 
   uint32_t uiInternalBitDepth = bitDepth;
@@ -16537,23 +16541,42 @@ int IntraPrediction::xCalcLMParametersGeneralized(int x, int y, int xx, int xy,
 
   int iCountShift = g_aucLog2[count];
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  int iTempShift = uiInternalBitDepth + iCountShift - ((uiInternalBitDepth > 10) ? 31 : 15);
+#else
   int iTempShift = uiInternalBitDepth + iCountShift - 15;
+#endif
 
   if (iTempShift > 0)
   {
+#if JVET_AJ0237_INTERNAL_12BIT
+    x = (x + ((int64_t)1 << (iTempShift - 1))) >> iTempShift;
+    y = (y + ((int64_t)1 << (iTempShift - 1))) >> iTempShift;
+    xx = (xx + ((int64_t)1 << (iTempShift - 1))) >> iTempShift;
+    xy = (xy + ((int64_t)1 << (iTempShift - 1))) >> iTempShift;
+#else
     x = (x + (1 << (iTempShift - 1))) >> iTempShift;
     y = (y + (1 << (iTempShift - 1))) >> iTempShift;
     xx = (xx + (1 << (iTempShift - 1))) >> iTempShift;
     xy = (xy + (1 << (iTempShift - 1))) >> iTempShift;
+#endif
     iCountShift -= iTempShift;
   }
   /////// xCalcLMParameters
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  int64_t avgX = x >> iCountShift;
+  int64_t avgY = y >> iCountShift;
+
+  int64_t RErrX = x & ((1 << iCountShift) - 1);
+  int64_t RErrY = y & ((1 << iCountShift) - 1);
+#else
   int avgX = x >> iCountShift;
   int avgY = y >> iCountShift;
 
   int RErrX = x & ((1 << iCountShift) - 1);
   int RErrY = y & ((1 << iCountShift) - 1);
+#endif
 
   int iB = 7;
   iShift = 13 - iB;
@@ -16566,19 +16589,33 @@ int IntraPrediction::xCalcLMParametersGeneralized(int x, int y, int xx, int xy,
   }
   else
   {
+#if JVET_AJ0237_INTERNAL_12BIT
+    int64_t a1 = xy - (avgX * avgY << iCountShift) - avgX * RErrY - avgY * RErrX;
+    int64_t a2 = xx - (avgX * avgX << iCountShift) - 2 * avgX * RErrX;
+#else
     int a1 = xy - (avgX * avgY << iCountShift) - avgX * RErrY - avgY * RErrX;
     int a2 = xx - (avgX * avgX << iCountShift) - 2 * avgX * RErrX;
+#endif
     const int iShiftA1 = uiInternalBitDepth - 2;
     const int iShiftA2 = 5;
     const int iAccuracyShift = uiInternalBitDepth + 4;
 
     int iScaleShiftA2 = 0;
     int iScaleShiftA1 = 0;
+
+#if JVET_AJ0237_INTERNAL_12BIT
+    int64_t a1s = a1;
+    int64_t a2s = a2;
+
+    iScaleShiftA1 = a1 == 0 ? 0 : floorLog2Uint64(abs(a1)) - iShiftA1;
+    iScaleShiftA2 = a2 == 0 ? 0 : floorLog2Uint64(abs(a2)) - iShiftA2;
+#else
     int a1s = a1;
     int a2s = a2;
 
     iScaleShiftA1 = a1 == 0 ? 0 : floorLog2(abs(a1)) - iShiftA1;
     iScaleShiftA2 = a2 == 0 ? 0 : floorLog2(abs(a2)) - iShiftA2;
+#endif
 
     if (iScaleShiftA1 < 0)
     {
@@ -16599,7 +16636,11 @@ int IntraPrediction::xCalcLMParametersGeneralized(int x, int y, int xx, int xy,
     if (a2s >= 32)
     {
       uint32_t a2t = m_auShiftLM[a2s - 32];
+#if JVET_AJ0237_INTERNAL_12BIT
+      a = int(a1s * a2t);
+#else
       a = a1s * a2t;
+#endif
     }
     else
     {
@@ -16625,8 +16666,11 @@ int IntraPrediction::xCalcLMParametersGeneralized(int x, int y, int xx, int xy,
 
     iShift = (iShift + iB) - n;
     a = a >> n;
-
+#if JVET_AJ0237_INTERNAL_12BIT
+    b = int(avgY - ((a * avgX) >> iShift));
+#else
     b = avgY - ((a * avgX) >> iShift);
+#endif
   }
   return 0;
 }
@@ -16699,7 +16743,11 @@ int IntraPrediction::xLMSampleClassifiedTraining(int count, int mean, int meanC,
     }
   }
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  int64_t x[2], y[2], xy[2], xx[2];
+#else
   int x[2], y[2], xy[2], xx[2];
+#endif
   for (int group = 0; group < 2; group++)
   {
     x[group] = y[group] = xy[group] = xx[group] = 0;
@@ -16851,7 +16899,11 @@ void IntraPrediction::xGetLMParametersLMS(const PredictionUnit &pu, const Compon
   srcColor0 = temp.bufAt(0, 0);
   curChroma0 = getPredictorPtr(compID);
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  int64_t x = 0, y = 0, xx = 0, xy = 0;
+#else
   int x = 0, y = 0, xx = 0, xy = 0;
+#endif
   int iCountShift = 0;
   unsigned uiInternalBitDepth = sps.getBitDepth(CHANNEL_TYPE_CHROMA);
 
@@ -17091,25 +17143,42 @@ void IntraPrediction::xGetLMParametersLMS(const PredictionUnit &pu, const Compon
       return;
     }
   }
-
+#if JVET_AJ0237_INTERNAL_12BIT
+  int iTempShift = uiInternalBitDepth + iCountShift - ((uiInternalBitDepth > 10) ? 31 : 15);
+#else
   int iTempShift = uiInternalBitDepth + iCountShift - 15;
+#endif
 
   if (iTempShift > 0)
   {
+#if JVET_AJ0237_INTERNAL_12BIT
+    x = (x + ((int64_t)1 << (iTempShift - 1))) >> iTempShift;
+    y = (y + ((int64_t)1 << (iTempShift - 1))) >> iTempShift;
+    xx = (xx + ((int64_t)1 << (iTempShift - 1))) >> iTempShift;
+    xy = (xy + ((int64_t)1 << (iTempShift - 1))) >> iTempShift;
+#else
     x = (x + (1 << (iTempShift - 1))) >> iTempShift;
     y = (y + (1 << (iTempShift - 1))) >> iTempShift;
     xx = (xx + (1 << (iTempShift - 1))) >> iTempShift;
     xy = (xy + (1 << (iTempShift - 1))) >> iTempShift;
+#endif
     iCountShift -= iTempShift;
   }
 
   /////// xCalcLMParameters
+#if JVET_AJ0237_INTERNAL_12BIT
+  int64_t avgX = x >> iCountShift;
+  int64_t avgY = y >> iCountShift;
 
+  int64_t RErrX = x & ((1 << iCountShift) - 1);
+  int64_t RErrY = y & ((1 << iCountShift) - 1);
+#else
   int avgX = x >> iCountShift;
   int avgY = y >> iCountShift;
 
   int RErrX = x & ((1 << iCountShift) - 1);
   int RErrY = y & ((1 << iCountShift) - 1);
+#endif
 
   int iB = 7;
   int a      = 0;
@@ -17122,20 +17191,32 @@ void IntraPrediction::xGetLMParametersLMS(const PredictionUnit &pu, const Compon
   }
   else
   {
+#if JVET_AJ0237_INTERNAL_12BIT
+    int64_t a1 = xy - (avgX * avgY << iCountShift) - avgX * RErrY - avgY * RErrX;
+    int64_t a2 = xx - (avgX * avgX << iCountShift) - 2 * avgX * RErrX;
+#else
     int a1 = xy - (avgX * avgY << iCountShift) - avgX * RErrY - avgY * RErrX;
     int a2 = xx - (avgX * avgX << iCountShift) - 2 * avgX * RErrX;
+#endif
     const int iShiftA1 = uiInternalBitDepth - 2;
     const int iShiftA2 = 5;
     const int iAccuracyShift = uiInternalBitDepth + 4;
 
     int iScaleShiftA2 = 0;
     int iScaleShiftA1 = 0;
+#if JVET_AJ0237_INTERNAL_12BIT
+    int64_t a1s = a1;
+    int64_t a2s = a2;
+
+    iScaleShiftA1 = a1 == 0 ? 0 : floorLog2Uint64(abs(a1)) - iShiftA1;
+    iScaleShiftA2 = a2 == 0 ? 0 : floorLog2Uint64(abs(a2)) - iShiftA2;
+#else
     int a1s = a1;
     int a2s = a2;
 
     iScaleShiftA1 = a1 == 0 ? 0 : floorLog2(abs(a1)) - iShiftA1;
     iScaleShiftA2 = a2 == 0 ? 0 : floorLog2(abs(a2)) - iShiftA2;
-
+#endif
     if (iScaleShiftA1 < 0)
     {
       iScaleShiftA1 = 0;
@@ -17155,7 +17236,11 @@ void IntraPrediction::xGetLMParametersLMS(const PredictionUnit &pu, const Compon
     if (a2s >= 32)
     {
       uint32_t a2t = m_auShiftLM[a2s - 32];
+#if JVET_AJ0237_INTERNAL_12BIT
+      a = int(a1s * a2t);
+#else
       a = a1s * a2t;
+#endif
     }
     else
     {
@@ -17181,7 +17266,11 @@ void IntraPrediction::xGetLMParametersLMS(const PredictionUnit &pu, const Compon
 
     iShift = (iShift + iB) - n;
     a = a >> n;
+#if JVET_AJ0237_INTERNAL_12BIT
+    b = int(avgY - ((a * avgX) >> iShift));
+#else
     b = avgY - ((a * avgX) >> iShift);
+#endif
 
     cclmModel.setFirstModel( a, b, iShift );
   }
@@ -23308,7 +23397,11 @@ void IntraPrediction::xCclmApplyModel(const PredictionUnit &pu, const ComponentI
       samples[0] = refLumaBlk.at(x, y);   // C
       samples[1] = cccmModel.bias();
 
+#if JVET_AJ0237_INTERNAL_12BIT
+      piPred.at(x, y) = ClipPel<Pel>(Pel((cccmModel.params[0] * samples[0] + cccmModel.params[1] * samples[1] + cccmModel.decimRound) >> cccmModel.decimBits), clpRng);
+#else
       piPred.at(x, y) = ClipPel<Pel>(Pel((cccmModel.params[0] * samples[0] + cccmModel.params[1] * samples[1] + CCCM_DECIM_ROUND) >> CCCM_DECIM_BITS), clpRng);
+#endif
     }
   }
 }
@@ -24292,7 +24385,11 @@ void IntraPrediction::combineCcpAndInter(PredictionUnit& pu, PelBuf& inPredCb, P
 #define DIV_INTR_BITS      (DIV_PREC_BITS - DIV_SLOT_BITS)
 #define DIV_INTR_ROUND     (1 << DIV_INTR_BITS >> 1)
 
+#if JVET_AJ0237_INTERNAL_12BIT
+int64_t xDivide(int64_t num, int64_t denom, int decimBits) // Note: assumes positive denominator
+#else
 int64_t xDivide(int64_t num, int64_t denom) // Note: assumes positive denominator
+#endif
 {
   static const int pow2W[8] = {   214,   153,   113,    86,    67,    53,    43,    35  }; // DIV_PREC_BITS_POW2
   static const int pow2O[8] = {  4822,  5952,  6624,  6792,  6408,  5424,  3792,  1466  }; // DIV_PREC_BITS
@@ -24306,11 +24403,19 @@ int64_t xDivide(int64_t num, int64_t denom) // Note: assumes positive denominato
 
   int scale     = ((pow2W[diffFull] * ((normDiff2 * normDiff2) >> DIV_PREC_BITS)) >> DIV_PREC_BITS_POW2) - (normDiff2 >> 1) + pow2B[diffFull];
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  return ((num << (decimBits - DIV_PREC_BITS)) * scale + round) >> shift;
+#else
   return ( (num << (CCCM_DECIM_BITS - DIV_PREC_BITS)) * scale + round) >> shift;
+#endif
 }
 
 #if JVET_AC0053_GAUSSIAN_SOLVER
+#if JVET_AJ0237_INTERNAL_12BIT
+void xGetDivScaleRoundShift(int64_t denom, int decimBits, int& scale, int& round, int& shift) // Note: assumes positive denominator
+#else
 void xGetDivScaleRoundShift(int64_t denom, int &scale, int &round, int &shift) // Note: assumes positive denominator
+#endif
 {
   static const int pow2W[8] = {   214,   153,   113,    86,    67,    53,    43,    35  }; // DIV_PREC_BITS_POW2
   static const int pow2O[8] = {  4822,  5952,  6624,  6792,  6408,  5424,  3792,  1466  }; // DIV_PREC_BITS
@@ -24323,7 +24428,11 @@ void xGetDivScaleRoundShift(int64_t denom, int &scale, int &round, int &shift) /
   int normDiff2 = normDiff - pow2O[diffFull];
 
   scale         = ((pow2W[diffFull] * ((normDiff2 * normDiff2) >> DIV_PREC_BITS)) >> DIV_PREC_BITS_POW2) - (normDiff2 >> 1) + pow2B[diffFull];
+#if JVET_AJ0237_INTERNAL_12BIT
+  scale       <<= decimBits - DIV_PREC_BITS;
+#else
   scale       <<= CCCM_DECIM_BITS - DIV_PREC_BITS;
+#endif
 }
 #endif
 
@@ -24333,8 +24442,22 @@ void xGetDivScaleRoundShift(int64_t denom, int &scale, int &round, int &shift) /
 #undef DIV_INTR_BITS
 #undef DIV_INTR_ROUND
 
+#if JVET_AJ0237_INTERNAL_12BIT
+int xCccmDivideLowPrec(int64_t num, int64_t denom, int decimBits)
+#else
 int xCccmDivideLowPrec(int64_t num, int64_t denom)
+#endif
 {
+#if JVET_AJ0237_INTERNAL_12BIT
+  if (num < 0)
+  {
+    return -int(xDivide(-num, denom, decimBits) >> decimBits);
+  }
+  else
+  {
+    return int(xDivide(num, denom, decimBits) >> decimBits);
+  }
+#else
   if ( num < 0 )
   {
     return -int(xDivide(-num, denom) >> CCCM_DECIM_BITS);
@@ -24343,13 +24466,21 @@ int xCccmDivideLowPrec(int64_t num, int64_t denom)
   {
     return int(xDivide(num, denom) >> CCCM_DECIM_BITS);
   }
+#endif
 }
 
+#if JVET_AJ0237_INTERNAL_12BIT
+int64_t xCccmDivide(int64_t num, int64_t denom, int decimBits) // Note: assumes positive denominator
+{
+  return xDivide(num, denom, decimBits);
+}
+#else
 int64_t xCccmDivide(int64_t num, int64_t denom) // Note: assumes positive denominator
 {
   return xDivide(num, denom);
 }
 #endif
+#endif
 
 #if JVET_AD0120_LBCCP || JVET_AG0154_DECODER_DERIVED_CCP_FUSION
 #if JVET_AA0057_CCCM || JVET_AG0154_DECODER_DERIVED_CCP_FUSION
@@ -26328,7 +26459,16 @@ int IntraPrediction::xBvgCccmCalcBlkAver(const PredictionUnit& pu) const
     }
   }
 #if JVET_AB0174_CCCM_DIV_FREE
+#if JVET_AJ0237_INTERNAL_12BIT
+#if JVET_AJ0237_INTERNAL_12BIT
+  const int bd = pu.cu->slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA);
+  return numSamples == 0 ? (1 << (bd - 1)) : xCccmDivideLowPrec(sumSamples, numSamples, DECIM_BITS(bd));
+#else
+  return numSamples == 0 ? ((1 << pu.cu->slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA)) - 1) : xCccmDivideLowPrec(sumSamples, numSamples);
+#endif
+#else
   return numSamples == 0 ? 512 : xCccmDivideLowPrec(sumSamples, numSamples);
+#endif
 #else
   return numSamples == 0 ? 512 : ( sumSamples + numSamples/2) / numSamples;
 #endif
@@ -26856,7 +26996,16 @@ int IntraPrediction::xCccmCalcRefAver(const PredictionUnit& pu
 #endif
 
 #if JVET_AB0174_CCCM_DIV_FREE
+#if JVET_AJ0237_INTERNAL_12BIT
+#if JVET_AJ0237_INTERNAL_12BIT
+  const int bd = pu.cu->slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA);
+  return numSamples == 0 ? (1 << (bd - 1)) : xCccmDivideLowPrec(sumSamples, numSamples, DECIM_BITS(bd));
+#else
+  return numSamples == 0 ? ((1 << pu.cu->slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA)) - 1) : xCccmDivideLowPrec(sumSamples, numSamples);
+#endif
+#else
   return numSamples == 0 ? 512 : xCccmDivideLowPrec(sumSamples, numSamples);
+#endif
 #else
   return numSamples == 0 ? 512 : ( sumSamples + numSamples/2) / numSamples;
 #endif
@@ -27787,7 +27936,11 @@ int IntraPrediction::xCflmCalcRefAver(const PredictionUnit& pu, const CompArea&
   }
 
 #if JVET_AD0184_REMOVAL_OF_DIVISION_OPERATIONS
+#if JVET_AJ0237_INTERNAL_12BIT
+  return numSamples == 0 ? (1 << (pu.cu->slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) - 1)) : PU::getMeanValue(sumSamples + (numSamples >> 1), numSamples);
+#else
   return numSamples == 0 ? 512 : PU::getMeanValue( sumSamples + (numSamples >> 1), numSamples);
+#endif
 #else
   return numSamples == 0 ? 512 : (sumSamples + numSamples / 2) / numSamples;
 #endif
@@ -27797,7 +27950,11 @@ int IntraPrediction::xCflmCalcRefAver(const PredictionUnit& pu, const CompArea&
 #if JVET_AA0057_CCCM || JVET_AB0092_GLM_WITH_LUMA || JVET_AC0119_LM_CHROMA_FUSION || JVET_AG0058_EIP || JVET_AG0154_DECODER_DERIVED_CCP_FUSION
     
 #if JVET_AC0053_GAUSSIAN_SOLVER
+#if JVET_AJ0237_INTERNAL_12BIT
+void CccmCovariance::gaussBacksubstitution( TCccmCoeff* x, int numEq, int col, int round, int bits)
+#else
 void CccmCovariance::gaussBacksubstitution( TCccmCoeff* x, int numEq, int col )
+#endif
 {
   x[numEq-1] = C[numEq-1][col];
 
@@ -27807,7 +27964,11 @@ void CccmCovariance::gaussBacksubstitution( TCccmCoeff* x, int numEq, int col )
 
     for( int j = i+1; j < numEq; j++ )
     {
+#if JVET_AJ0237_INTERNAL_12BIT
+      x[i] -= FIXED_MULT(C[i][j], x[j], round, bits);
+#else
       x[i] -= FIXED_MULT(C[i][j], x[j]);
+#endif
     }
   }
 }
@@ -27825,6 +27986,11 @@ void CccmCovariance::gaussElimination( TCccmCoeff A[CCCM_NUM_PARAMS_MAX][CCCM_NU
 #else
   int reg = 2 << (bd - 8);
 #endif
+
+#if JVET_AJ0237_INTERNAL_12BIT
+  const int decimBits = DECIM_BITS(bd);
+  const int decimRound = (1 << (decimBits - 1));
+#endif
   
   // Create an [M][M+2] matrix system (could have been done already when calculating auto/cross-correlations)
   for( int i = 0; i < numEq; i++ )
@@ -27847,7 +28013,11 @@ void CccmCovariance::gaussElimination( TCccmCoeff A[CCCM_NUM_PARAMS_MAX][CCCM_NU
 #if JVET_AB0174_CCCM_DIV_FREE
     int scale, round, shift;
     
+#if JVET_AJ0237_INTERNAL_12BIT
+    xGetDivScaleRoundShift(diag, decimBits, scale, round, shift);
+#else
     xGetDivScaleRoundShift(diag, scale, round, shift);
+#endif
 #endif
 
     for( int j = i+1; j < numEq+numFilters; j++ )
@@ -27867,7 +28037,11 @@ void CccmCovariance::gaussElimination( TCccmCoeff A[CCCM_NUM_PARAMS_MAX][CCCM_NU
       // On row j all elements with k < i+1 are now zero (not zeroing those here as backsubstitution does not need them)
       for( int k = i + 1; k < numEq+numFilters; k++ )
       {
+#if JVET_AJ0237_INTERNAL_12BIT
+        dst[k] -= FIXED_MULT(scale, src[k], decimRound, decimBits);
+#else
          dst[k] -= FIXED_MULT(scale, src[k]);
+#endif
       }
     }
   }
@@ -27875,12 +28049,21 @@ void CccmCovariance::gaussElimination( TCccmCoeff A[CCCM_NUM_PARAMS_MAX][CCCM_NU
   // Solve with backsubstitution
   if ( numFilters == 2 )
   {
+#if JVET_AJ0237_INTERNAL_12BIT
+    gaussBacksubstitution(x0, numEq, colChr0, decimRound, decimBits);
+    gaussBacksubstitution(x1, numEq, colChr1, decimRound, decimBits);
+#else
     gaussBacksubstitution(x0, numEq, colChr0);
     gaussBacksubstitution(x1, numEq, colChr1);
+#endif
   }
   else
   {
+#if JVET_AJ0237_INTERNAL_12BIT
+    gaussBacksubstitution(x0, numEq, colChr0, decimRound, decimBits);
+#else
     gaussBacksubstitution(x0, numEq, colChr0);
+#endif
   }
 }
 
@@ -28055,7 +28238,11 @@ void CccmCovariance::solve1( const Pel A[CCCM_NUM_PARAMS_MAX][CCCM_REF_SAMPLES_M
 #endif
 
   // Scale the matrix and vector to selected dynamic range
+#if JVET_AJ0237_INTERNAL_12BIT
+  int matrixShift = ((model.bd > 10) ? CCCM_MATRIX_BITS_HBD : 28) - 2 * model.bd - ceilLog2(sampleNum);
+#else
   int matrixShift = 28 - 2 * model.bd - ceilLog2( sampleNum );
+#endif
 
   if( matrixShift > 0 )
   {
@@ -28104,8 +28291,12 @@ void CccmCovariance::solve1( const Pel A[CCCM_NUM_PARAMS_MAX][CCCM_REF_SAMPLES_M
 
 #if JVET_AB0174_CCCM_DIV_FREE
   // Add the chroma offset to bias term (after shifting up by CCCM_DECIM_BITS and down by cccmModelCb.bd - 1)
+#if JVET_AJ0237_INTERNAL_12BIT
+  model.params[numParams - 1] += chromaOffset << (model.decimBits - (model.bd - 1));
+#else
   model.params[numParams - 1] += chromaOffset << (CCCM_DECIM_BITS - (model.bd - 1));
 #endif
+#endif
 }
 
 #if JVET_AB0174_CCCM_DIV_FREE
@@ -28174,7 +28365,11 @@ void CccmCovariance::solve2( const Pel A[CCCM_NUM_PARAMS_MAX][CCCM_REF_SAMPLES_M
   // Scale the matrix and vector to selected dynamic range
   CHECK( modelCb.bd != modelCr.bd, "Bitdepth of Cb and Cr is different" );
 #if JVET_AE0059_INTER_CCCM
+#if JVET_AJ0237_INTERNAL_12BIT
+  int matrixShift = ((modelCb.bd > 10) ? CCCM_MATRIX_BITS_HBD : (interCccmMode ? 28 : CCCM_MATRIX_BITS)) - 2 * modelCb.bd - ceilLog2(sampleNum);
+#else
   int matrixShift = (interCccmMode ? 28 : CCCM_MATRIX_BITS) - 2 * modelCb.bd - ceilLog2( sampleNum );
+#endif
 #else
   int matrixShift = CCCM_MATRIX_BITS - 2 * modelCb.bd - ceilLog2( sampleNum );
 #endif
@@ -28242,9 +28437,14 @@ void CccmCovariance::solve2( const Pel A[CCCM_NUM_PARAMS_MAX][CCCM_REF_SAMPLES_M
 
 #if JVET_AB0174_CCCM_DIV_FREE
   // Add the chroma offset to bias term (after shifting up by CCCM_DECIM_BITS and down by cccmModelCb.bd - 1)
+#if JVET_AJ0237_INTERNAL_12BIT
+  modelCb.params[numParams - 1] += chromaOffsetCb << (modelCb.decimBits - (modelCb.bd - 1));
+  modelCr.params[numParams - 1] += chromaOffsetCr << (modelCr.decimBits - (modelCr.bd - 1));
+#else
   modelCb.params[numParams - 1] += chromaOffsetCb << (CCCM_DECIM_BITS - (modelCb.bd - 1));
   modelCr.params[numParams - 1] += chromaOffsetCr << (CCCM_DECIM_BITS - (modelCr.bd - 1));
 #endif
+#endif
 }
 #endif
 
@@ -30445,8 +30645,16 @@ void CccmCovariance::solveEip(const TCccmCoeff* A, const TCccmCoeff* Y, const in
   {
     regularizationParam = (sampleNum <= REGULARIZED_EIP_L2_SAMPLE_THRESHOLD) ? REGULARIZED_EIP_L2_SMALL * numParams : REGULARIZED_EIP_L2_LARGE * numParams;
   }
+#if JVET_AJ0237_INTERNAL_12BIT
+  regularizationParam <<= 2 * std::max(0, model.bd - 10);
+#endif
+#else
+#if JVET_AJ0237_INTERNAL_12BIT
+  int regShift = 2 * std::max(0, model.bd - 10);
+  const int regularizationParam = ((sampleNum <= REGULARIZED_EIP_L2_SAMPLE_THRESHOLD) ? REGULARIZED_EIP_L2_SMALL * numParams : REGULARIZED_EIP_L2_LARGE * numParams) << regShift;
 #else
   const int regularizationParam = (sampleNum <= REGULARIZED_EIP_L2_SAMPLE_THRESHOLD) ? REGULARIZED_EIP_L2_SMALL * numParams : REGULARIZED_EIP_L2_LARGE * numParams;
+#endif
 #endif
   for (int coli0 = 0; coli0 < numParams - 1; coli0++) // The last term (bias) is not regularized.
   {
@@ -30467,7 +30675,11 @@ void CccmCovariance::solveEip(const TCccmCoeff* A, const TCccmCoeff* Y, const in
   }
 #endif
   // Scale the matrix and vector to selected dynamic range
+#if JVET_AJ0237_INTERNAL_12BIT
+  int matrixShift = ((model.bd > 10) ? CCCM_MATRIX_BITS_HBD : 28) - 2 * model.bd - ceilLog2(sampleNum);
+#else
   int matrixShift = 28 - 2 * model.bd - ceilLog2(sampleNum);
+#endif
 
   if (matrixShift > 0)
   {
@@ -30515,8 +30727,12 @@ void CccmCovariance::solveEip(const TCccmCoeff* A, const TCccmCoeff* Y, const in
 
 #if JVET_AB0174_CCCM_DIV_FREE
   // Add the chroma offset to bias term (after shifting up by CCCM_DECIM_BITS and down by cccmModelCb.bd - 1)
+#if JVET_AJ0237_INTERNAL_12BIT
+  model.params[numParams - 1] += lumaOffset << (model.decimBits - (model.bd - 1));
+#else
   model.params[numParams - 1] += lumaOffset << (CCCM_DECIM_BITS - (model.bd - 1));
 #endif
+#endif
 }
 
 void IntraPrediction::initEipParams(const PredictionUnit& pu, const ComponentID compId)
diff --git a/source/Lib/CommonLib/IntraPrediction.h b/source/Lib/CommonLib/IntraPrediction.h
index 9601371d8..66ae8a1e5 100644
--- a/source/Lib/CommonLib/IntraPrediction.h
+++ b/source/Lib/CommonLib/IntraPrediction.h
@@ -145,7 +145,11 @@ typedef short TrainDataType;
 
 #if JVET_AA0057_CCCM || JVET_AB0092_GLM_WITH_LUMA || JVET_AC0119_LM_CHROMA_FUSION || JVET_AG0058_EIP || JVET_AG0154_DECODER_DERIVED_CCP_FUSION
 typedef int64_t TCccmCoeff;
+#if JVET_AJ0237_INTERNAL_12BIT
+#define FIXED_MULT(x, y, round, bits) TCccmCoeff((int64_t(x)*(y) + round) >> bits )
+#else
 #define FIXED_MULT(x, y) TCccmCoeff((int64_t(x)*(y) + CCCM_DECIM_ROUND) >> CCCM_DECIM_BITS )
+#endif
 #if !JVET_AB0174_CCCM_DIV_FREE
 #define FIXED_DIV(x, y)  TCccmCoeff((int64_t(x)    << CCCM_DECIM_BITS ) / (y) )
 #endif
@@ -157,6 +161,10 @@ struct CccmModel
     bd = bitdepth;
     midVal = ( 1 << ( bitdepth - 1 ) );
     params.resize( num );
+#if JVET_AJ0237_INTERNAL_12BIT
+    decimBits = DECIM_BITS(bd);
+    decimRound = (1 << (decimBits - 1));
+#endif
   }
 
   ~CccmModel() {}
@@ -164,6 +172,10 @@ struct CccmModel
   std::vector<TCccmCoeff> params;
   int        bd;
   int        midVal;
+#if JVET_AJ0237_INTERNAL_12BIT
+  int        decimRound;
+  int        decimBits;
+#endif
   
   const int getNumParams() const
   {
@@ -176,7 +188,11 @@ struct CccmModel
 
     std::fill( params.begin(), params.end(), 0 );
 
+#if JVET_AJ0237_INTERNAL_12BIT
+    params[numParams - 1] = (TCccmCoeff)1 << decimBits; // Default bias to 1
+#else
     params[numParams - 1] = 1 << CCCM_DECIM_BITS; // Default bias to 1
+#endif
   }
 
   Pel convolve(Pel* vector)
@@ -188,7 +204,11 @@ struct CccmModel
       sum += params[i] * vector[i];
     }
 
+#if JVET_AJ0237_INTERNAL_12BIT
+    return Pel( (sum + decimRound) >> decimBits);
+#else
     return Pel( (sum + CCCM_DECIM_ROUND ) >> CCCM_DECIM_BITS );
+#endif
   }
   
   Pel nonlinear(const Pel val) { return (val * val + midVal) >> bd; }
@@ -238,7 +258,11 @@ private:
   TCccmCoeff C[CCCM_NUM_PARAMS_MAX][CCCM_NUM_PARAMS_MAX + 2];
 
 #if JVET_AC0053_GAUSSIAN_SOLVER
+#if JVET_AJ0237_INTERNAL_12BIT
+  void gaussBacksubstitution       ( TCccmCoeff* x, int numEq, int col, int round, int bits);
+#else
   void gaussBacksubstitution       ( TCccmCoeff* x, int numEq, int col );
+#endif
 #if JVET_AE0059_INTER_CCCM
   void gaussElimination            ( TCccmCoeff A[CCCM_NUM_PARAMS_MAX][CCCM_NUM_PARAMS_MAX], TCccmCoeff* y0, TCccmCoeff* x0, TCccmCoeff* y1, TCccmCoeff* x1, int numEq, int numFilters, int bd, const bool interCccmMode = false);
 #else
@@ -659,7 +683,11 @@ public:
     int b;
     int shift;
   };
+#if JVET_AJ0237_INTERNAL_12BIT
+  int xCalcLMParametersGeneralized(int64_t x, int64_t y, int64_t xx, int64_t xy, int count, int bitDepth, int& a, int& b, int& iShift);
+#else
   int xCalcLMParametersGeneralized(int x, int y, int xx, int xy, int count, int bitDepth, int &a, int &b, int &iShift);
+#endif
   int xLMSampleClassifiedTraining (int count, int mean, int meanC, int lumaSamples[], int chrmSamples[], int bitDepth, MMLMParameters parameters[]);
 #if JVET_AG0136_INTRA_TMP_LIC
   std::array<int, 7>& getMemLicParams(const int licIdc, const int idx) { return m_memLicParams[licIdc][idx]; }
diff --git a/source/Lib/CommonLib/Picture.cpp b/source/Lib/CommonLib/Picture.cpp
index c3e95010c..e1e609a75 100644
--- a/source/Lib/CommonLib/Picture.cpp
+++ b/source/Lib/CommonLib/Picture.cpp
@@ -1810,6 +1810,9 @@ void Picture::calcLumaClpParams()
     clipDeltaShift = ADAPTIVE_CLIP_SHIFT_DELTA_VALUE_0;
     cs->slice->setAdaptiveClipQuant(false);
   }
+#if JVET_AJ0237_INTERNAL_12BIT
+  clipDeltaShift += std::max(0, cs->sps->getBitDepth(toChannelType(COMPONENT_Y)) - 10);
+#endif
   int       pelMaxOF  = 0;
   int       pelMinOF  = (1 << cs->sps->getBitDepth(toChannelType(COMPONENT_Y))) - 1;
   const int orgPelMin = pelMin;
diff --git a/source/Lib/CommonLib/Rom.cpp b/source/Lib/CommonLib/Rom.cpp
index 3dfaab825..b4aba4478 100644
--- a/source/Lib/CommonLib/Rom.cpp
+++ b/source/Lib/CommonLib/Rom.cpp
@@ -625,14 +625,26 @@ MsgLevel g_verbosity = VERBOSE;
 #if JVET_Y0141_SIGN_PRED_IMPROVE
 #if JVET_W0119_LFNST_EXTENSION || EXTENDED_LFNST
 #if JVET_AJ0175_NSPT_FOR_NONREG_MODES
+#if JVET_AJ0237_INTERNAL_12BIT
+int16_t* g_resiBorderTemplateLFNST[NUM_NSPT_BLOCK_TYPES][6][6][210];
+#else
 int8_t * g_resiBorderTemplateLFNST[NUM_NSPT_BLOCK_TYPES][6][6][210];
+#endif
+#else
+#if JVET_AJ0237_INTERNAL_12BIT
+int16_t* g_resiBorderTemplateLFNST[6][6][210];
 #else
 int8_t * g_resiBorderTemplateLFNST[6][6][210];
 #endif
+#endif
 #else
 int8_t * g_resiBorderTemplateLFNST[6][6][16];
 #endif
+#if JVET_AJ0237_INTERNAL_12BIT
+int16_t* g_resiBorderTemplate[6][6][NUM_TRANS_TYPE * NUM_TRANS_TYPE];
+#else
 int8_t * g_resiBorderTemplate[6][6][NUM_TRANS_TYPE*NUM_TRANS_TYPE];
+#endif
 #else
 const int8_t * g_resiBorderTemplate[6][6][NUM_TRANS_TYPE*NUM_TRANS_TYPE];
 #endif
diff --git a/source/Lib/CommonLib/Rom.h b/source/Lib/CommonLib/Rom.h
index eb045838d..6f832c7a0 100644
--- a/source/Lib/CommonLib/Rom.h
+++ b/source/Lib/CommonLib/Rom.h
@@ -82,14 +82,26 @@ void destroyMipFilters();
 #if JVET_Y0141_SIGN_PRED_IMPROVE
 #if JVET_W0119_LFNST_EXTENSION || EXTENDED_LFNST
 #if JVET_AJ0175_NSPT_FOR_NONREG_MODES
+#if JVET_AJ0237_INTERNAL_12BIT
+extern       int16_t* g_resiBorderTemplateLFNST[NUM_NSPT_BLOCK_TYPES][6][6][210];
+#else
 extern       int8_t * g_resiBorderTemplateLFNST[NUM_NSPT_BLOCK_TYPES][6][6][210];
+#endif
+#else
+#if JVET_AJ0237_INTERNAL_12BIT
+extern       int16_t* g_resiBorderTemplateLFNST[6][6][210];
 #else
 extern       int8_t * g_resiBorderTemplateLFNST[6][6][210];
 #endif
+#endif
 #else
 extern       int8_t * g_resiBorderTemplateLFNST[6][6][16];
 #endif
+#if JVET_AJ0237_INTERNAL_12BIT
+extern       int16_t* g_resiBorderTemplate[6][6][NUM_TRANS_TYPE * NUM_TRANS_TYPE];
+#else
 extern       int8_t * g_resiBorderTemplate[6][6][NUM_TRANS_TYPE*NUM_TRANS_TYPE];
+#endif
 #else
 extern const int8_t * g_resiBorderTemplate[6][6][NUM_TRANS_TYPE*NUM_TRANS_TYPE];
 #endif
diff --git a/source/Lib/CommonLib/SampleAdaptiveOffset.cpp b/source/Lib/CommonLib/SampleAdaptiveOffset.cpp
index 04f37c011..f2a946054 100644
--- a/source/Lib/CommonLib/SampleAdaptiveOffset.cpp
+++ b/source/Lib/CommonLib/SampleAdaptiveOffset.cpp
@@ -2158,7 +2158,12 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClipEdge(const ComponentID compID,
   const int edgePosXB  = g_ccSaoEdgePosX[edgeDir][1], edgePosYB = g_ccSaoEdgePosY[edgeDir][1];
   const int bandCmp    = g_ccSaoBandTab [bandIdc][0];
   const int bandNum    = g_ccSaoBandTab [bandIdc][1];
+#if JVET_AJ0237_INTERNAL_12BIT
+  const int bdShift = std::max(0, bitDepth - 10);
+  const int edgeThrVal = g_ccSaoEdgeThr[edgeIdc][edgeThr] << bdShift;
+#else
   const int edgeThrVal = g_ccSaoEdgeThr [edgeIdc][edgeThr];
+#endif
   const int edgeNum    = g_ccSaoEdgeNum [edgeIdc][0];
   const int edgeNumUni = g_ccSaoEdgeNum [edgeIdc][1];
   const int srcStrideE = edgeCmp == COMPONENT_Y ? srcStrideY : edgeCmp == COMPONENT_Cb ? srcStrideU : srcStrideV;
@@ -2216,7 +2221,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClipEdge(const ComponentID compID,
           const int bandIdx  = (*col[bandCmp] * bandNum) >> bitDepth;
         
           const int classIdx = bandIdx * edgeNum + edgeIdx;
+#if JVET_AJ0237_INTERNAL_12BIT
+          dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
           dst[x] = dst[x] + offset[classIdx];
+#endif
 #else
           const Pel *colY = srcY + x;
           const Pel *colA = srcY + x + srcStrideY * candPosYYA + candPosYXA;
@@ -2295,7 +2304,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClipEdge(const ComponentID compID,
           const int bandIdx  = (*col[bandCmp] * bandNum) >> bitDepth;
         
           const int classIdx = bandIdx * edgeNum + edgeIdx;
+#if JVET_AJ0237_INTERNAL_12BIT
+          dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
           dst[x] = dst[x] + offset[classIdx];
+#endif
 #else
           const Pel *colY = srcY + x;
           const Pel *colA = srcY + x + srcStrideY * candPosYYA + candPosYXA;
@@ -2369,7 +2382,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClipEdge(const ComponentID compID,
         const int bandIdx  = (*col[bandCmp] * bandNum) >> bitDepth;
         
         const int classIdx = bandIdx * edgeNum + edgeIdx;
+#if JVET_AJ0237_INTERNAL_12BIT
+        dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
         dst[x] = dst[x] + offset[classIdx];
+#endif
 #else
         const Pel *colY = srcY + x;
         const Pel *colA = srcY + x + srcStrideY * candPosYYA + candPosYXA;
@@ -2436,7 +2453,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClipEdge(const ComponentID compID,
           const int bandIdx  = (*col[bandCmp] * bandNum) >> bitDepth;
         
           const int classIdx = bandIdx * edgeNum + edgeIdx;
+#if JVET_AJ0237_INTERNAL_12BIT
+          dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
           dst[x] = dst[x] + offset[classIdx];
+#endif
 #else
           const Pel *colY = srcY + x;
           const Pel *colA = srcY + x + srcStrideY * candPosYYA + candPosYXA;
@@ -2504,7 +2525,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClipEdge(const ComponentID compID,
         const int bandIdx  = (*col[bandCmp] * bandNum) >> bitDepth;
         
         const int classIdx = bandIdx * edgeNum + edgeIdx;
+#if JVET_AJ0237_INTERNAL_12BIT
+        dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
         dst[x] = dst[x] + offset[classIdx];
+#endif
 #else
         const Pel *colY = srcY + x;
         const Pel *colA = srcY + x + srcStrideY * candPosYYA + candPosYXA;
@@ -2573,7 +2598,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClipEdge(const ComponentID compID,
         const int bandIdx  = (*col[bandCmp] * bandNum) >> bitDepth;
         
         const int classIdx = bandIdx * edgeNum + edgeIdx;
+#if JVET_AJ0237_INTERNAL_12BIT
+        dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
         dst[x] = dst[x] + offset[classIdx];
+#endif
 #else
         const Pel *colY = srcY + x;
         const Pel *colA = srcY + x + srcStrideY * candPosYYA + candPosYXA;
@@ -2640,7 +2669,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClipEdge(const ComponentID compID,
           const int bandIdx  = (*col[bandCmp] * bandNum) >> bitDepth;
         
           const int classIdx = bandIdx * edgeNum + edgeIdx;
+#if JVET_AJ0237_INTERNAL_12BIT
+          dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
           dst[x] = dst[x] + offset[classIdx];
+#endif
 #else
           const Pel *colY = srcY + x;
           const Pel *colA = srcY + x + srcStrideY * candPosYYA + candPosYXA;
@@ -2708,7 +2741,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClipEdge(const ComponentID compID,
         const int bandIdx  = (*col[bandCmp] * bandNum) >> bitDepth;
         
         const int classIdx = bandIdx * edgeNum + edgeIdx;
+#if JVET_AJ0237_INTERNAL_12BIT
+        dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
         dst[x] = dst[x] + offset[classIdx];
+#endif
 #else
         const Pel *colY = srcY + x;
         const Pel *colA = srcY + x + srcStrideY * candPosYYA + candPosYXA;
@@ -2788,7 +2825,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClipEdge(const ComponentID compID,
           const int bandIdx  = (*col[bandCmp] * bandNum) >> bitDepth;
           
           const int classIdx = bandIdx * edgeNum + edgeIdx;
+#if JVET_AJ0237_INTERNAL_12BIT
+          dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
           dst[x] = dst[x] + offset[classIdx];
+#endif
 #else
           const Pel *colY = srcY + (x << chromaScaleX);
           const Pel *colA = srcY + (x << chromaScaleX) + srcStrideY * candPosYYA + candPosYXA;
@@ -2868,7 +2909,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClipEdge(const ComponentID compID,
           const int bandIdx  = (*col[bandCmp] * bandNum) >> bitDepth;
           
           const int classIdx = bandIdx * edgeNum + edgeIdx;
+#if JVET_AJ0237_INTERNAL_12BIT
+          dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
           dst[x] = dst[x] + offset[classIdx];
+#endif
 #else
           const Pel *colY = srcY + (x << chromaScaleX);
           const Pel *colA = srcY + (x << chromaScaleX) + srcStrideY * candPosYYA + candPosYXA;
@@ -2943,7 +2988,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClipEdge(const ComponentID compID,
         const int bandIdx  = (*col[bandCmp] * bandNum) >> bitDepth;
         
         const int classIdx = bandIdx * edgeNum + edgeIdx;
+#if JVET_AJ0237_INTERNAL_12BIT
+        dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
         dst[x] = dst[x] + offset[classIdx];
+#endif
 #else
         const Pel *colY = srcY + (x << chromaScaleX);
         const Pel *colA = srcY + (x << chromaScaleX) + srcStrideY * candPosYYA + candPosYXA;
@@ -3011,7 +3060,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClipEdge(const ComponentID compID,
           const int bandIdx  = (*col[bandCmp] * bandNum) >> bitDepth;
           
           const int classIdx = bandIdx * edgeNum + edgeIdx;
+#if JVET_AJ0237_INTERNAL_12BIT
+          dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
           dst[x] = dst[x] + offset[classIdx];
+#endif
 #else
           const Pel *colY = srcY + (x << chromaScaleX);
           const Pel *colA = srcY + (x << chromaScaleX) + srcStrideY * candPosYYA + candPosYXA;
@@ -3080,7 +3133,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClipEdge(const ComponentID compID,
         const int bandIdx  = (*col[bandCmp] * bandNum) >> bitDepth;
         
         const int classIdx = bandIdx * edgeNum + edgeIdx;
+#if JVET_AJ0237_INTERNAL_12BIT
+        dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
         dst[x] = dst[x] + offset[classIdx];
+#endif
 #else
         const Pel *colY = srcY + (x << chromaScaleX);
         const Pel *colA = srcY + (x << chromaScaleX) + srcStrideY * candPosYYA + candPosYXA;
@@ -3150,7 +3207,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClipEdge(const ComponentID compID,
         const int bandIdx  = (*col[bandCmp] * bandNum) >> bitDepth;
         
         const int classIdx = bandIdx * edgeNum + edgeIdx;
+#if JVET_AJ0237_INTERNAL_12BIT
+        dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
         dst[x] = dst[x] + offset[classIdx];
+#endif
 #else
         const Pel *colY = srcY + (x << chromaScaleX);
         const Pel *colA = srcY + (x << chromaScaleX) + srcStrideY * candPosYYA + candPosYXA;
@@ -3218,7 +3279,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClipEdge(const ComponentID compID,
           const int bandIdx  = (*col[bandCmp] * bandNum) >> bitDepth;
           
           const int classIdx = bandIdx * edgeNum + edgeIdx;
+#if JVET_AJ0237_INTERNAL_12BIT
+          dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
           dst[x] = dst[x] + offset[classIdx];
+#endif
 #else
           const Pel *colY = srcY + (x << chromaScaleX);
           const Pel *colA = srcY + (x << chromaScaleX) + srcStrideY * candPosYYA + candPosYXA;
@@ -3287,7 +3352,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClipEdge(const ComponentID compID,
         const int bandIdx  = (*col[bandCmp] * bandNum) >> bitDepth;
         
         const int classIdx = bandIdx * edgeNum + edgeIdx;
+#if JVET_AJ0237_INTERNAL_12BIT
+        dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
         dst[x] = dst[x] + offset[classIdx];
+#endif
 #else
         const Pel *colY = srcY + (x << chromaScaleX);
         const Pel *colA = srcY + (x << chromaScaleX) + srcStrideY * candPosYYA + candPosYXA;
@@ -3538,7 +3607,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, cons
           const int classIdx = bandIdx;
 
           // dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+#if JVET_AJ0237_INTERNAL_12BIT
+          dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
           dst[x] = dst[x] + offset[classIdx];
+#endif
         }
         srcY += srcStrideY;
         srcU += srcStrideU * chromaScaleYM1;
@@ -3565,7 +3638,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, cons
             const int classIdx = bandIdx;
 
             // dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+#if JVET_AJ0237_INTERNAL_12BIT
+            dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
             dst[x] = dst[x] + offset[classIdx];
+#endif
           }
           srcY += srcStrideY;
           srcU += srcStrideU * ((y & 0x1) | chromaScaleYM1);
@@ -3604,7 +3681,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, cons
           const int classIdx = bandIdx;
 
           // dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+#if JVET_AJ0237_INTERNAL_12BIT
+          dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
           dst[x] = dst[x] + offset[classIdx];
+#endif
         }
 
         srcY += srcStrideY;
@@ -3638,7 +3719,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, cons
         const int classIdx = bandIdx;
 
         // dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+#if JVET_AJ0237_INTERNAL_12BIT
+        dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
         dst[x] = dst[x] + offset[classIdx];
+#endif
       }
 
       srcY += srcStrideY;
@@ -3666,7 +3751,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, cons
           const int classIdx = bandIdx;
 
           // dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+#if JVET_AJ0237_INTERNAL_12BIT
+          dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
           dst[x] = dst[x] + offset[classIdx];
+#endif
         }
 
         srcY += srcStrideY;
@@ -3700,7 +3789,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, cons
           const int classIdx = bandIdx;
 
           // dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+#if JVET_AJ0237_INTERNAL_12BIT
+          dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
           dst[x] = dst[x] + offset[classIdx];
+#endif
         }
         srcY += srcStrideY;
         srcU += srcStrideU * ((y & 0x1) | chromaScaleYM1);
@@ -3727,7 +3820,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, cons
           const int classIdx = bandIdx;
 
           // dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+#if JVET_AJ0237_INTERNAL_12BIT
+          dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
           dst[x] = dst[x] + offset[classIdx];
+#endif
         }
         srcY += srcStrideY;
         srcU += srcStrideU * ((y & 0x1) | chromaScaleYM1);
@@ -3760,7 +3857,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, cons
           const int classIdx = bandIdx;
 
           // dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+#if JVET_AJ0237_INTERNAL_12BIT
+          dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
           dst[x] = dst[x] + offset[classIdx];
+#endif
         }
         srcY += srcStrideY;
         srcU += srcStrideU * ((y & 0x1) | chromaScaleYM1);
@@ -3793,7 +3894,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, cons
           const int classIdx = bandIdx;
 
           // dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+#if JVET_AJ0237_INTERNAL_12BIT
+          dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
           dst[x] = dst[x] + offset[classIdx];
+#endif
         }
         srcY += srcStrideY;
         srcU += srcStrideU * ((y & 0x1) | chromaScaleYM1);
@@ -3822,7 +3927,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, cons
         const int classIdx = bandIdx;
 
         // dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+#if JVET_AJ0237_INTERNAL_12BIT
+        dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
         dst[x] = dst[x] + offset[classIdx];
+#endif
       }
       break;
     }
@@ -3850,7 +3959,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, cons
           const int classIdx = bandIdx;
 
           // dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+#if JVET_AJ0237_INTERNAL_12BIT
+          dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
           dst[x] = dst[x] + offset[classIdx];
+#endif
         }
         srcY += srcStrideY;
         srcU += srcStrideU * ((y & 0x1) | chromaScaleYM1);
@@ -3883,7 +3996,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, cons
           const int classIdx = bandIdx;
 
           // dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+#if JVET_AJ0237_INTERNAL_12BIT
+          dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
           dst[x] = dst[x] + offset[classIdx];
+#endif
         }
         srcY += srcStrideY;
         srcU += srcStrideU * ((y & 0x1) | chromaScaleYM1);
@@ -3911,7 +4028,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, cons
         const int classIdx = bandIdx;
 
         // dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+#if JVET_AJ0237_INTERNAL_12BIT
+        dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
         dst[x] = dst[x] + offset[classIdx];
+#endif
       }
       break;
     }
@@ -3950,7 +4071,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, cons
           const int classIdx = bandIdx;
 
           // dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+#if JVET_AJ0237_INTERNAL_12BIT
+          dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
           dst[x] = dst[x] + offset[classIdx];
+#endif
         }
         srcY += srcStrideY << chromaScaleY;
         srcU += srcStrideU;
@@ -3977,7 +4102,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, cons
             const int classIdx = bandIdx;
 
             // dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+#if JVET_AJ0237_INTERNAL_12BIT
+            dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
             dst[x] = dst[x] + offset[classIdx];
+#endif
           }
           srcY += srcStrideY << chromaScaleY;
           srcU += srcStrideU;
@@ -4016,7 +4145,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, cons
           const int classIdx = bandIdx;
 
           // dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+#if JVET_AJ0237_INTERNAL_12BIT
+          dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
           dst[x] = dst[x] + offset[classIdx];
+#endif
         }
 
         srcY += srcStrideY << chromaScaleY;
@@ -4050,7 +4183,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, cons
         const int classIdx = bandIdx;
 
         // dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+#if JVET_AJ0237_INTERNAL_12BIT
+        dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
         dst[x] = dst[x] + offset[classIdx];
+#endif
       }
 
       srcY += srcStrideY << chromaScaleY;
@@ -4078,7 +4215,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, cons
           const int classIdx = bandIdx;
 
           // dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+#if JVET_AJ0237_INTERNAL_12BIT
+          dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
           dst[x] = dst[x] + offset[classIdx];
+#endif
         }
 
         srcY += srcStrideY << chromaScaleY;
@@ -4112,7 +4253,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, cons
           const int classIdx = bandIdx;
 
           // dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+#if JVET_AJ0237_INTERNAL_12BIT
+          dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
           dst[x] = dst[x] + offset[classIdx];
+#endif
         }
         srcY += srcStrideY << chromaScaleY;
         srcU += srcStrideU;
@@ -4139,7 +4284,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, cons
           const int classIdx = bandIdx;
 
           // dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+#if JVET_AJ0237_INTERNAL_12BIT
+          dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
           dst[x] = dst[x] + offset[classIdx];
+#endif
         }
         srcY += srcStrideY << chromaScaleY;
         srcU += srcStrideU;
@@ -4172,7 +4321,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, cons
           const int classIdx = bandIdx;
 
           // dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+#if JVET_AJ0237_INTERNAL_12BIT
+          dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
           dst[x] = dst[x] + offset[classIdx];
+#endif
         }
         srcY += srcStrideY << chromaScaleY;
         srcU += srcStrideU;
@@ -4205,7 +4358,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, cons
           const int classIdx = bandIdx;
 
           // dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+#if JVET_AJ0237_INTERNAL_12BIT
+          dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
           dst[x] = dst[x] + offset[classIdx];
+#endif
         }
         srcY += srcStrideY << chromaScaleY;
         srcU += srcStrideU;
@@ -4234,7 +4391,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, cons
         const int classIdx = bandIdx;
 
         // dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+#if JVET_AJ0237_INTERNAL_12BIT
+        dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
         dst[x] = dst[x] + offset[classIdx];
+#endif
       }
       break;
     }
@@ -4262,7 +4423,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, cons
           const int classIdx = bandIdx;
 
           // dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+#if JVET_AJ0237_INTERNAL_12BIT
+          dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
           dst[x] = dst[x] + offset[classIdx];
+#endif
         }
         srcY += srcStrideY << chromaScaleY;
         srcU += srcStrideU;
@@ -4295,7 +4460,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, cons
           const int classIdx = bandIdx;
 
           // dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+#if JVET_AJ0237_INTERNAL_12BIT
+          dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
           dst[x] = dst[x] + offset[classIdx];
+#endif
         }
         srcY += srcStrideY << chromaScaleY;
         srcU += srcStrideU;
@@ -4322,7 +4491,11 @@ void SampleAdaptiveOffset::offsetBlockCcSaoNoClip(const ComponentID compID, cons
         const int classIdx = bandIdx;
 
         // dst[x] = ClipPel<int>(dst[x] + offset[classIdx], clpRng);
+#if JVET_AJ0237_INTERNAL_12BIT
+        dst[x] = dst[x] + (offset[classIdx] << m_offsetStepLog2[compID]);
+#else
         dst[x] = dst[x] + offset[classIdx];
+#endif
       }
     }
     break;
diff --git a/source/Lib/CommonLib/Slice.h b/source/Lib/CommonLib/Slice.h
index 29864a45a..ec4cababa 100644
--- a/source/Lib/CommonLib/Slice.h
+++ b/source/Lib/CommonLib/Slice.h
@@ -3531,8 +3531,15 @@ public:
   void                        setColRefIdx( uint32_t refIdx)                             { m_colRefIdx = refIdx;                                                                       }
   uint32_t                    getColRefIdx()                                             { return m_colRefIdx;                                                                         }
 #if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC
+#if JVET_AJ0237_INTERNAL_12BIT
+  void                        setCostForARMC(uint32_t cost, int bitDepth)               { m_costForARMC = (cost << (std::max(0, bitDepth - 10)));                                      }
+#else
   void                        setCostForARMC(uint32_t cost)                             { m_costForARMC = cost;                                                                        }
+#endif
   uint32_t                    getCostForARMC()                                          { return m_costForARMC;                                                                        }
+#if JVET_AJ0237_INTERNAL_12BIT
+  uint32_t                    getCostForARMC(int bitDepth)                              { return m_costForARMC >> (std::max(0, bitDepth - 10));                                        } // for header parsing/writing purpose
+#endif
 #endif
 #if JVET_AC0185_ENHANCED_TEMPORAL_MOTION_DERIVATION
   void                        setPicColFromL0Flag2nd(bool val)                          { m_picColFromL0Flag2nd = val;                                                                 }
@@ -4084,6 +4091,9 @@ public:
 #endif
   void                        checkColRefIdx(uint32_t curSliceSegmentIdx, const Picture* pic);
 #if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC
+#if JVET_AJ0237_INTERNAL_12BIT
+  uint32_t                    getCostForARMC(int bitDepth) const                                 { return m_costForARMC >> (std::max(0, bitDepth - 10)); } // for header parsing/writing purpose
+#endif
   uint32_t                    getCostForARMC() const                                 { return m_costForARMC;                                         }
 #endif
 #if JVET_Y0134_TMVP_NAMVP_CAND_REORDERING
@@ -4284,7 +4294,11 @@ public:
   void                        setExtAmvpLevel(int b)                            { m_extAmvpLevel = b;                                    }
 #endif
 #if JVET_AA0093_DIVERSITY_CRITERION_FOR_ARMC
+#if JVET_AJ0237_INTERNAL_12BIT
+  void                        setCostForARMC(uint32_t cost, int bitDepth)                          { m_costForARMC = (cost << (std::max(0, bitDepth - 10))); }
+#else
   void                        setCostForARMC(uint32_t cost)                          { m_costForARMC = cost;                                         }
+#endif
 #endif
   void                        setBiDirPred( bool b, int refIdx0, int refIdx1 ) { m_biDirPred = b; m_symRefIdx[0] = refIdx0; m_symRefIdx[1] = refIdx1; }
   bool                        getBiDirPred() const { return m_biDirPred; }
diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp
index ac707949d..c731c3976 100644
--- a/source/Lib/CommonLib/TrQuant.cpp
+++ b/source/Lib/CommonLib/TrQuant.cpp
@@ -2529,7 +2529,11 @@ void TrQuant::predCoeffSigns(TransformUnit &tu, const ComponentID compID, const
   ComponentID residCompID = compID;
   bool bJccrWithCr = bIsJCCR && !(tu.jointCbCr >> 1);
 #if JVET_AI0096_SIGN_PRED_BIT_DEPTH_FIX
+#if JVET_AJ0237_INTERNAL_12BIT
+  const int signPredShift = SIGN_PRED_RESIDUAL_BITS;
+#else
   const int signPredShift = 10 + SIGN_PRED_RESIDUAL_BITS  - tu.cs->sps->getBitDepth(toChannelType(COMPONENT_Y));
+#endif
   const int signPredOffset = 1 << (signPredShift - 1);
 #endif
   if(bJccrWithCr)
@@ -2627,14 +2631,23 @@ void TrQuant::predCoeffSigns(TransformUnit &tu, const ComponentID compID, const
     int spArea = tu.cs->sps->getSignPredArea();
     int signPredWidth = std::min((int)width, spArea);
     int signPredHeight = std::min((int)height, spArea);
+#if JVET_AJ0237_INTERNAL_12BIT
+    int16_t* pTemplate = (int16_t*)xMalloc(int16_t, stride * h * w);
+    AreaBuf<int16_t> templateBuf(pTemplate, stride, length, h * w);
+#else
     int8_t         *pTemplate      = (int8_t *) xMalloc(int8_t, stride * h * w);
     AreaBuf<int8_t> templateBuf(pTemplate, stride, length, h * w);
+#endif
 #else
     int8_t *pTemplate = (int8_t *) xMalloc(int8_t, stride * SIGN_PRED_FREQ_RANGE * SIGN_PRED_FREQ_RANGE);
     AreaBuf<int8_t> templateBuf(pTemplate, stride, length, SIGN_PRED_FREQ_RANGE * SIGN_PRED_FREQ_RANGE);
 #endif
     Position prev(0,0);
+#if JVET_AJ0237_INTERNAL_12BIT
+    int16_t* templ = templateBuf.buf;
+#else
     int8_t *templ = templateBuf.buf;
+#endif
 #if JVET_Y0141_SIGN_PRED_IMPROVE
     for (int j = 0; j < signPredHeight*signPredWidth; ++j)
     {
@@ -2660,8 +2673,12 @@ void TrQuant::predCoeffSigns(TransformUnit &tu, const ComponentID compID, const
 
       for (uint32_t i = 0; i < height; i++)
       {
+#if JVET_AJ0237_INTERNAL_12BIT
+        templ[i] = (int16_t)(*pelResi);
+#else
         CHECK(*pelResi < -128 || *pelResi > 127, "value exceeds 8-bit range");
         templ[i] = (int8_t)(*pelResi);
+#endif
         pelResi -= resi.stride;
       }
 
@@ -2669,8 +2686,12 @@ void TrQuant::predCoeffSigns(TransformUnit &tu, const ComponentID compID, const
 
       for (uint32_t i = 0; i < width; i++)
       {
+#if JVET_AJ0237_INTERNAL_12BIT
+        templ[i + height] = (int16_t)pelResi[i];
+#else
         CHECK(pelResi[i] < -128 || pelResi[i] > 127, "value exceeds 8-bit range");
         templ[i + height] = (int8_t) pelResi[i];
+#endif
       }
 #if !JVET_Y0141_SIGN_PRED_IMPROVE
       templ += templateBuf.stride;
@@ -2701,9 +2722,15 @@ void TrQuant::predCoeffSigns(TransformUnit &tu, const ComponentID compID, const
     PelBuf   resi(memTmpResid, width, height);
     int signPredHeight = 4;
     int signPredWidth = 4;
+#if JVET_AJ0237_INTERNAL_12BIT
+    int16_t* pTemplate = (int16_t*)xMalloc(int16_t, stride * signPredHeight * signPredWidth);
+    AreaBuf<int16_t> templateBuf(pTemplate, stride, length, signPredHeight* signPredWidth);
+    int16_t* templ = templateBuf.buf;
+#else
     int8_t         *pTemplate      = (int8_t *) xMalloc(int8_t, stride * signPredHeight * signPredWidth);
     AreaBuf<int8_t> templateBuf(pTemplate, stride, length, signPredHeight * signPredWidth);
     int8_t *templ = templateBuf.buf;
+#endif
     for (int j = 0; j < signPredHeight*signPredWidth; ++j)
     {
       coeff.fill(0);
@@ -2721,8 +2748,12 @@ void TrQuant::predCoeffSigns(TransformUnit &tu, const ComponentID compID, const
 
       for (uint32_t i = 0; i < height; i++)
       {
+#if JVET_AJ0237_INTERNAL_12BIT
+        templ[i] = (int16_t)(*pelResi);
+#else
         CHECK(*pelResi < -128 || *pelResi > 127, "value exceeds 8-bit range");
         templ[i] = (int8_t)(*pelResi);
+#endif
         pelResi -= resi.stride;
       }
 
@@ -2730,8 +2761,12 @@ void TrQuant::predCoeffSigns(TransformUnit &tu, const ComponentID compID, const
 
       for (uint32_t i = 0; i < width; i++)
       {
+#if JVET_AJ0237_INTERNAL_12BIT
+        templ[i + height] = (int16_t)pelResi[i];
+#else
         CHECK(pelResi[i] < -128 || pelResi[i] > 127, "value exceeds 8-bit range");
         templ[i + height] = (int8_t) pelResi[i];
+#endif
       }
       templ += templateBuf.stride;
     }
@@ -2838,23 +2873,44 @@ void TrQuant::predCoeffSigns(TransformUnit &tu, const ComponentID compID, const
   const uint32_t w = std::min(uiWidth, (uint32_t)SIGN_PRED_FREQ_RANGE);
   const uint32_t h = std::min(uiHeight, (uint32_t)SIGN_PRED_FREQ_RANGE);
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  AreaBuf<const int16_t> templateNormalizedBuf =
+    (lfnstEnabled ? AreaBuf<const int16_t>()
+                  : AreaBuf<const int16_t>(g_resiBorderTemplate[log2Width - 2][log2Height - 2][actualTrIdx], stride,
+                                          length, w * h));
+#else
   AreaBuf<const int8_t> templateNormalizedBuf =
     (lfnstEnabled ? AreaBuf<const int8_t>()
                   : AreaBuf<const int8_t>(g_resiBorderTemplate[log2Width - 2][log2Height - 2][actualTrIdx], stride,
                                           length, w * h));
+#endif
 #if JVET_AJ0175_NSPT_FOR_NONREG_MODES
   bool allowNSPT = CU::isNSPTAllowed( tu, compID, uiWidth, uiHeight, spsIntraLfnstEnabled && CU::isIntra( *( tu.cu ) ) );
   int  nsptBucketIdx = allowNSPT ? PU::getNSPTBucket(tu) : 0;
+#if JVET_AJ0237_INTERNAL_12BIT
+  AreaBuf<const int16_t> templateLfnstNormalizedBuf =
+    (lfnstEnabled ? AreaBuf<const int16_t>(g_resiBorderTemplateLFNST[nsptBucketIdx][log2Width - 2][log2Height - 2][actualLfnstIdx],
+                                          stride, length, signPredWidth * signPredHeight)
+                  : AreaBuf<const int16_t>());
+#else
   AreaBuf<const int8_t> templateLfnstNormalizedBuf =
     (lfnstEnabled ? AreaBuf<const int8_t>(g_resiBorderTemplateLFNST[nsptBucketIdx][log2Width - 2][log2Height - 2][actualLfnstIdx],
                                           stride, length, signPredWidth * signPredHeight)
                   : AreaBuf<const int8_t>());
+#endif
+#else
+#if JVET_AJ0237_INTERNAL_12BIT
+  AreaBuf<const int16_t> templateLfnstNormalizedBuf =
+    (lfnstEnabled ? AreaBuf<const int16_t>(g_resiBorderTemplateLFNST[log2Width - 2][log2Height - 2][actualLfnstIdx],
+                                          stride, length, signPredWidth * signPredHeight)
+                  : AreaBuf<const int16_t>());
 #else
   AreaBuf<const int8_t> templateLfnstNormalizedBuf =
     (lfnstEnabled ? AreaBuf<const int8_t>(g_resiBorderTemplateLFNST[log2Width - 2][log2Height - 2][actualLfnstIdx],
                                           stride, length, signPredWidth * signPredHeight)
                   : AreaBuf<const int8_t>());
 #endif
+#endif
 #else
   AreaBuf<const int8_t> templateNormalizedBuf(g_resiBorderTemplate[log2Width - 2][log2Height - 2][actualTrIdx], stride,
                                               length, SIGN_PRED_FREQ_RANGE * SIGN_PRED_FREQ_RANGE);
@@ -2915,7 +2971,11 @@ void TrQuant::predCoeffSigns(TransformUnit &tu, const ComponentID compID, const
     CHECK(coeffVal == 0, "coefficient value should be nonzero");
 #endif
 
+#if JVET_AJ0237_INTERNAL_12BIT
+    const int16_t* templateBasisVec;
+#else
     const int8_t *templateBasisVec;
+#endif
 
 #if JVET_Y0141_SIGN_PRED_IMPROVE
     if (lfnstEnabled)
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index a2ab6604d..d5e901ea3 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -525,6 +525,7 @@
 #define JVET_Z0150_MEMORY_USAGE_PRINT                     1 // JVET-Z0150: Print memory usage
 #define JVET_Z0118_GDR                                    1 // JVET-Z0118: GDR
 #define JVET_AD0169_SMALL_SCALE_DOWNSAMPLING              1 // JVET-AD0169: Downsampling filters in range 1.1 to 1.35 based on Kaiser(7) windowed sinc
+#define JVET_AJ0237_INTERNAL_12BIT                        1 // JVET-AJ0237: Modifications for better operation at 12-bit internal bitdepth
 
 #if JVET_Z0118_GDR
 #define GDR_LEAK_TEST                                     0
diff --git a/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h b/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h
index bd42c0c21..b6f449d96 100644
--- a/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h
+++ b/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h
@@ -3761,7 +3761,11 @@ static void simdFilter13x13BlkExtDbResiDirect(
     adjustShift -= shiftPrecis; // add more precision
   }
   const int shift = adjustShift;
+#if JVET_AJ0237_INTERNAL_12BIT
+  const Pel currBase = 1 << (clpRng.bd - 1);
+#else
   const Pel currBase = 512;
+#endif
   int round = 1 << (shift - 1);
 
 #if !( USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION )
@@ -4976,7 +4980,11 @@ static void simdFilter13x13BlkExtDbResi(
     adjustShift -= shiftPrecis; // add more precision
   }
   const int shift = adjustShift;
+#if JVET_AJ0237_INTERNAL_12BIT
+  const Pel currBase = 1 << (clpRng.bd - 1);
+#else
   const Pel currBase = 512;
+#endif
   int round = 1 << (shift - 1);
 
 #if !( USE_AVX2 && JVET_AJ0188_CODING_INFO_CLASSIFICATION )
@@ -6115,7 +6123,11 @@ static void simdGaussFiltering(CodingStructure &cs, Pel ***gaussPic, const CPelB
     int clipIdx = gaussClipIdxTable[filterSetIdx][i];
     gaussClipTable[i] = clippingValues[clipIdx];
   }
+#if JVET_AJ0237_INTERNAL_12BIT
+  int16_t diffTH = 32 << std::max(0, cs.sps->getBitDepth(CHANNEL_TYPE_LUMA) - 10);
+#else
   int16_t diffTH = 32;
+#endif
 
 #if JVET_AJ0188_CODING_INFO_CLASSIFICATION
   const bool isIntraSlice = cs.slice->isIntra();
@@ -8372,9 +8384,19 @@ static void simdFixFilter9x9Db9Blk(AlfClassifier **classifier, const CPelBuf &sr
 #endif
 }
 
+#if JVET_AJ0237_INTERNAL_12BIT
+template<X86_VEXT vext>
+static void simdDeriveVariance(const CPelBuf& srcLuma, const Area& blkDst, const Area& blk, uint32_t ***variance, int bits)
+#else
 template<X86_VEXT vext>
 static void simdDeriveVariance(const CPelBuf &srcLuma, const Area &blkDst, const Area &blk, uint32_t ***variance)
+#endif
 {
+#if JVET_AJ0237_INTERNAL_12BIT
+  // temporary buffer, could be optimized
+  int64_t tempData[2][(256 + 10) >> 1][((256 + 16) >> 1) + 8] = { { { 0 } } };
+  int bdShift = 2 * std::max(0, bits - 10);
+#endif
   const size_t imgStride = srcLuma.stride;
   const Pel *  srcExt = srcLuma.buf;
   int fl = DIST_CLASS;
@@ -8386,16 +8408,19 @@ static void simdDeriveVariance(const CPelBuf &srcLuma, const Area &blkDst, const
   int numSample2 = 128 * 128;
   int offset = numSample2 >> 1;
 
+#if !JVET_AJ0237_INTERNAL_12BIT
   int num[8]{ numSample, numSample, numSample, numSample, numSample, numSample, numSample, numSample };
   int mul[8]{ 13, 13, 13, 13, 13, 13, 13, 13 };
   int off[8]{ offset, offset, offset, offset, offset, offset, offset, offset };
+#endif
 #if USE_AVX2
   if (vext >= AVX2 && (blk.width % 32) == 0)
   {
+#if !JVET_AJ0237_INTERNAL_12BIT
     __m256i n = _mm256_loadu_si256((__m256i *) num);
     __m256i m13 = _mm256_loadu_si256((__m256i *) mul);
     __m256i o = _mm256_loadu_si256((__m256i *) off);
-
+#endif
     const int posX = blk.pos().x;
     const int posY = blk.pos().y;
 
@@ -8525,6 +8550,41 @@ static void simdDeriveVariance(const CPelBuf &srcLuma, const Area &blkDst, const
 
         if (i == 8)
         {
+#if JVET_AJ0237_INTERNAL_12BIT
+          for (int kk = 0; kk < 4; kk++)
+          {
+            __m256i x8Low   = _mm256_cvtepi32_epi64(_mm_loadu_si128((__m128i*) & variance[2][iOffset - 4][jOffset + kk * 4]));
+            __m256i y8Low   = _mm256_cvtepi32_epi64(_mm_loadu_si128((__m128i*) & variance[3][iOffset - 4][jOffset + kk * 4]));
+            __m256i x6Low   = _mm256_cvtepi32_epi64(_mm_loadu_si128((__m128i*) & variance[2][iOffset - 3][jOffset + kk * 4]));
+            __m256i y6Low   = _mm256_cvtepi32_epi64(_mm_loadu_si128((__m128i*) & variance[3][iOffset - 3][jOffset + kk * 4]));
+            __m256i x4Low   = _mm256_cvtepi32_epi64(_mm_loadu_si128((__m128i*) & variance[2][iOffset - 2][jOffset + kk * 4]));
+            __m256i y4Low   = _mm256_cvtepi32_epi64(_mm_loadu_si128((__m128i*) & variance[3][iOffset - 2][jOffset + kk * 4]));
+            __m256i x2Low   = _mm256_cvtepi32_epi64(_mm_loadu_si128((__m128i*) & variance[2][iOffset - 1][jOffset + kk * 4]));
+            __m256i y2Low   = _mm256_cvtepi32_epi64(_mm_loadu_si128((__m128i*) & variance[3][iOffset - 1][jOffset + kk * 4]));
+            __m256i sumLow  = _mm256_cvtepi32_epi64(_mm_loadu_si128((__m128i*) & variance[2][iOffset    ][jOffset + kk * 4]));
+            __m256i sum2Low = _mm256_cvtepi32_epi64(_mm_loadu_si128((__m128i*) & variance[3][iOffset    ][jOffset + kk * 4]));
+
+            x8Low = _mm256_add_epi64(sumLow, x8Low);
+            y8Low = _mm256_add_epi64(sum2Low, y8Low);
+
+            x4Low = _mm256_add_epi64(x6Low, x4Low);
+            y4Low = _mm256_add_epi64(y6Low, y4Low);
+
+            x2Low = _mm256_add_epi64(x8Low, x2Low);
+            y2Low = _mm256_add_epi64(y8Low, y2Low);
+
+            sumLow  = _mm256_add_epi64(x4Low, x2Low);
+            sum2Low = _mm256_add_epi64(y4Low, y2Low);
+
+            _mm256_storeu_si256((__m256i*) &tempData[0][iOffset - 4][jOffset + kk * 4], sumLow);
+            _mm256_storeu_si256((__m256i*) &tempData[1][iOffset - 4][jOffset + kk * 4], sum2Low);
+
+            variance[VARIANCE][iOffset - 4][jOffset + kk * 4] = (uint32_t)((13 * ((numSample * tempData[1][iOffset - 4][jOffset + kk * 4] - tempData[0][iOffset - 4][jOffset + kk * 4] * tempData[0][iOffset - 4][jOffset + kk * 4] + offset) >> 3)) >> (14 + bdShift));
+            variance[VARIANCE][iOffset - 4][jOffset + kk * 4 + 1] = (uint32_t)((13 * ((numSample * tempData[1][iOffset - 4][jOffset + kk * 4 + 1] - tempData[0][iOffset - 4][jOffset + kk * 4 + 1] * tempData[0][iOffset - 4][jOffset + kk * 4 + 1] + offset) >> 3)) >> (14 + bdShift));
+            variance[VARIANCE][iOffset - 4][jOffset + kk * 4 + 2] = (uint32_t)((13 * ((numSample * tempData[1][iOffset - 4][jOffset + kk * 4 + 2] - tempData[0][iOffset - 4][jOffset + kk * 4 + 2] * tempData[0][iOffset - 4][jOffset + kk * 4 + 2] + offset) >> 3)) >> (14 + bdShift));
+            variance[VARIANCE][iOffset - 4][jOffset + kk * 4 + 3] = (uint32_t)((13 * ((numSample * tempData[1][iOffset - 4][jOffset + kk * 4 + 3] - tempData[0][iOffset - 4][jOffset + kk * 4 + 3] * tempData[0][iOffset - 4][jOffset + kk * 4 + 3] + offset) >> 3)) >> (14 + bdShift));
+          }
+#else
           x8 = _mm256_loadu_si256((__m256i *)&variance[2][iOffset - 4][jOffset]);
           y8 = _mm256_loadu_si256((__m256i *)&variance[3][iOffset - 4][jOffset]);
           x6 = _mm256_loadu_si256((__m256i *)&variance[2][iOffset - 3][jOffset]);
@@ -8583,9 +8643,37 @@ static void simdDeriveVariance(const CPelBuf &srcLuma, const Area &blkDst, const
           summ2 = _mm256_srli_epi32(summ2, 14);
           _mm256_storeu_si256((__m256i *) &variance[VARIANCE][iOffset - 4][jOffset], sum2);
           _mm256_storeu_si256((__m256i *) &variance[VARIANCE][iOffset - 4][jOffset + 8], summ2);
+#endif
         }
         else if (i > 8)
         {
+#if JVET_AJ0237_INTERNAL_12BIT
+          for (int kk = 0; kk < 4; kk++)
+          {
+            __m256i x8Low = _mm256_cvtepi32_epi64(_mm_loadu_si128((__m128i*) &variance[2][iOffset - 5][jOffset + kk * 4]));
+            __m256i y8Low = _mm256_cvtepi32_epi64(_mm_loadu_si128((__m128i*) &variance[3][iOffset - 5][jOffset + kk * 4]));
+
+            __m256i x6Low = _mm256_loadu_si256((__m256i*) &tempData[0][iOffset - 5][jOffset + kk * 4]);
+            __m256i y6Low = _mm256_loadu_si256((__m256i*) &tempData[1][iOffset - 5][jOffset + kk * 4]);
+
+            __m256i sumLow  = _mm256_cvtepi32_epi64(_mm_loadu_si128((__m128i*) &variance[2][iOffset][jOffset + kk * 4]));
+            __m256i sum2Low = _mm256_cvtepi32_epi64(_mm_loadu_si128((__m128i*) &variance[3][iOffset][jOffset + kk * 4]));
+
+            x6Low = _mm256_sub_epi64(x6Low, x8Low);
+            y6Low = _mm256_sub_epi64(y6Low, y8Low);
+
+            sumLow  = _mm256_add_epi64(x6Low, sumLow);
+            sum2Low = _mm256_add_epi64(y6Low, sum2Low);
+
+            _mm256_storeu_si256((__m256i*)& tempData[0][iOffset - 4][jOffset + kk * 4], sumLow);
+            _mm256_storeu_si256((__m256i*)& tempData[1][iOffset - 4][jOffset + kk * 4], sum2Low);
+
+            variance[VARIANCE][iOffset - 4][jOffset + kk * 4] = (uint32_t)((13 * ((numSample * tempData[1][iOffset - 4][jOffset + kk * 4] - tempData[0][iOffset - 4][jOffset + kk * 4] * tempData[0][iOffset - 4][jOffset + kk * 4] + offset) >> 3)) >> (14 + bdShift));
+            variance[VARIANCE][iOffset - 4][jOffset + kk * 4 + 1] = (uint32_t)((13 * ((numSample * tempData[1][iOffset - 4][jOffset + kk * 4 + 1] - tempData[0][iOffset - 4][jOffset + kk * 4 + 1] * tempData[0][iOffset - 4][jOffset + kk * 4 + 1] + offset) >> 3)) >> (14 + bdShift));
+            variance[VARIANCE][iOffset - 4][jOffset + kk * 4 + 2] = (uint32_t)((13 * ((numSample * tempData[1][iOffset - 4][jOffset + kk * 4 + 2] - tempData[0][iOffset - 4][jOffset + kk * 4 + 2] * tempData[0][iOffset - 4][jOffset + kk * 4 + 2] + offset) >> 3)) >> (14 + bdShift));
+            variance[VARIANCE][iOffset - 4][jOffset + kk * 4 + 3] = (uint32_t)((13 * ((numSample * tempData[1][iOffset - 4][jOffset + kk * 4 + 3] - tempData[0][iOffset - 4][jOffset + kk * 4 + 3] * tempData[0][iOffset - 4][jOffset + kk * 4 + 3] + offset) >> 3)) >> (14 + bdShift));
+          }
+#else
           x8 = _mm256_loadu_si256((__m256i *)&variance[2][iOffset - 5][jOffset]);
           xx8 = _mm256_loadu_si256((__m256i *)&variance[2][iOffset - 5][jOffset + 8]);
           y8 = _mm256_loadu_si256((__m256i *)&variance[3][iOffset - 5][jOffset]);
@@ -8625,6 +8713,7 @@ static void simdDeriveVariance(const CPelBuf &srcLuma, const Area &blkDst, const
           summ2 = _mm256_srli_epi32(summ2, 14);
           _mm256_storeu_si256((__m256i *) &variance[VARIANCE][iOffset - 4][jOffset], sum2);
           _mm256_storeu_si256((__m256i *) &variance[VARIANCE][iOffset - 4][jOffset + 8], summ2);
+#endif
         }
       }
 
@@ -8633,10 +8722,11 @@ static void simdDeriveVariance(const CPelBuf &srcLuma, const Area &blkDst, const
   else
   {
 #endif
+#if !JVET_AJ0237_INTERNAL_12BIT
     __m128i n = _mm_loadu_si128((__m128i *) num);
     __m128i m13 = _mm_loadu_si128((__m128i *) mul);
     __m128i o = _mm_loadu_si128((__m128i *) off);
-
+#endif
     const int posX = blk.pos().x;
     const int posY = blk.pos().y;
 
@@ -8706,6 +8796,39 @@ static void simdDeriveVariance(const CPelBuf &srcLuma, const Area &blkDst, const
 
         if (i == 8)
         {
+#if JVET_AJ0237_INTERNAL_12BIT
+          for (int kk = 0; kk < 2; kk++)
+          {
+            __m128i x8Low   = _mm_cvtepi32_epi64(_mm_loadu_si128((__m128i*) & variance[2][iOffset - 4][jOffset + kk * 2]));
+            __m128i y8Low   = _mm_cvtepi32_epi64(_mm_loadu_si128((__m128i*) & variance[3][iOffset - 4][jOffset + kk * 2]));
+            __m128i x6Low   = _mm_cvtepi32_epi64(_mm_loadu_si128((__m128i*) & variance[2][iOffset - 3][jOffset + kk * 2]));
+            __m128i y6Low   = _mm_cvtepi32_epi64(_mm_loadu_si128((__m128i*) & variance[3][iOffset - 3][jOffset + kk * 2]));
+            __m128i x4Low   = _mm_cvtepi32_epi64(_mm_loadu_si128((__m128i*) & variance[2][iOffset - 2][jOffset + kk * 2]));
+            __m128i y4Low   = _mm_cvtepi32_epi64(_mm_loadu_si128((__m128i*) & variance[3][iOffset - 2][jOffset + kk * 2]));
+            __m128i x2Low   = _mm_cvtepi32_epi64(_mm_loadu_si128((__m128i*) & variance[2][iOffset - 1][jOffset + kk * 2]));
+            __m128i y2Low   = _mm_cvtepi32_epi64(_mm_loadu_si128((__m128i*) & variance[3][iOffset - 1][jOffset + kk * 2]));
+            __m128i sumLow  = _mm_cvtepi32_epi64(_mm_loadu_si128((__m128i*) & variance[2][iOffset    ][jOffset + kk * 2]));
+            __m128i sum2Low = _mm_cvtepi32_epi64(_mm_loadu_si128((__m128i*) & variance[3][iOffset    ][jOffset + kk * 2]));
+
+            x8Low   = _mm_add_epi64(sumLow, x8Low);
+            y8Low   = _mm_add_epi64(sum2Low, y8Low);
+
+            x4Low   = _mm_add_epi64(x6Low, x4Low);
+            y4Low   = _mm_add_epi64(y6Low, y4Low);
+
+            x2Low   = _mm_add_epi64(x8Low, x2Low);
+            y2Low   = _mm_add_epi64(y8Low, y2Low);
+
+            sumLow  = _mm_add_epi64(x4Low, x2Low);
+            sum2Low = _mm_add_epi64(y4Low, y2Low);
+
+            _mm_storeu_si128((__m128i*) &tempData[0][iOffset - 4][jOffset + kk * 2], sumLow);
+            _mm_storeu_si128((__m128i*) &tempData[1][iOffset - 4][jOffset + kk * 2], sum2Low);
+
+            variance[VARIANCE][iOffset - 4][jOffset + kk * 2] = (uint32_t)((13 * ((numSample * tempData[1][iOffset - 4][jOffset + kk * 2] - tempData[0][iOffset - 4][jOffset + kk * 2] * tempData[0][iOffset - 4][jOffset + kk * 2] + offset) >> 3)) >> (14 + bdShift));
+            variance[VARIANCE][iOffset - 4][jOffset + kk * 2 + 1] = (uint32_t)((13 * ((numSample * tempData[1][iOffset - 4][jOffset + kk * 2 + 1] - tempData[0][iOffset - 4][jOffset + kk * 2 + 1] * tempData[0][iOffset - 4][jOffset + kk * 2 + 1] + offset) >> 3)) >> (14 + bdShift));
+          }
+#else
           x8 = _mm_loadu_si128((__m128i *)&variance[2][iOffset - 4][jOffset]);
           y8 = _mm_loadu_si128((__m128i *)&variance[3][iOffset - 4][jOffset]);
           x6 = _mm_loadu_si128((__m128i *)&variance[2][iOffset - 3][jOffset]);
@@ -8737,9 +8860,33 @@ static void simdDeriveVariance(const CPelBuf &srcLuma, const Area &blkDst, const
           sum2 = _mm_mullo_epi32(sum2, m13);
           sum2 = _mm_srli_epi32(sum2, 14);
           _mm_storeu_si128((__m128i *) &variance[VARIANCE][iOffset - 4][jOffset], sum2);
+#endif
         }
         else if (i > 8)
         {
+#if JVET_AJ0237_INTERNAL_12BIT
+          for (int kk = 0; kk < 2; kk++)
+          {
+            __m128i x8Low = _mm_cvtepi32_epi64(_mm_loadu_si128((__m128i*) & variance[2][iOffset - 5][jOffset + kk * 2]));
+            __m128i y8Low = _mm_cvtepi32_epi64(_mm_loadu_si128((__m128i*) & variance[3][iOffset - 5][jOffset + kk * 2]));
+            __m128i x6Low = _mm_loadu_si128((__m128i*) &tempData[0][iOffset - 5][jOffset + kk * 2]);
+            __m128i y6Low = _mm_loadu_si128((__m128i*) &tempData[1][iOffset - 5][jOffset + kk * 2]);
+            __m128i sumLow  = _mm_cvtepi32_epi64(_mm_loadu_si128((__m128i*) & variance[2][iOffset][jOffset + kk * 2]));
+            __m128i sum2Low = _mm_cvtepi32_epi64(_mm_loadu_si128((__m128i*) & variance[3][iOffset][jOffset + kk * 2]));
+
+            x6Low = _mm_sub_epi32(x6Low, x8Low);
+            y6Low = _mm_sub_epi32(y6Low, y8Low);
+
+            sumLow  = _mm_add_epi32(x6Low, sumLow);
+            sum2Low = _mm_add_epi32(y6Low, sum2Low);
+
+            _mm_storeu_si128((__m128i*) & tempData[0][iOffset - 4][jOffset + kk * 2], sumLow);
+            _mm_storeu_si128((__m128i*) & tempData[1][iOffset - 4][jOffset + kk * 2], sum2Low);
+
+            variance[VARIANCE][iOffset - 4][jOffset + kk * 2] = (uint32_t)((13 * ((numSample * tempData[1][iOffset - 4][jOffset + kk * 2] - tempData[0][iOffset - 4][jOffset + kk * 2] * tempData[0][iOffset - 4][jOffset + kk * 2] + offset) >> 3)) >> (14 + bdShift));
+            variance[VARIANCE][iOffset - 4][jOffset + kk * 2 + 1] = (uint32_t)((13 * ((numSample * tempData[1][iOffset - 4][jOffset + kk * 2 + 1] - tempData[0][iOffset - 4][jOffset + kk * 2 + 1] * tempData[0][iOffset - 4][jOffset + kk * 2 + 1] + offset) >> 3)) >> (14 + bdShift));
+          }
+#else
           x8 = _mm_loadu_si128((__m128i *)&variance[2][iOffset - 5][jOffset]);
           y8 = _mm_loadu_si128((__m128i *)&variance[3][iOffset - 5][jOffset]);
           x6 = _mm_loadu_si128((__m128i *)&variance[0][iOffset - 5][jOffset]);
@@ -8761,6 +8908,7 @@ static void simdDeriveVariance(const CPelBuf &srcLuma, const Area &blkDst, const
           sum2 = _mm_mullo_epi32(sum2, m13);
           sum2 = _mm_srli_epi32(sum2, 14);
           _mm_storeu_si128((__m128i *) &variance[VARIANCE][iOffset - 4][jOffset], sum2);
+#endif
         }
       }
 
diff --git a/source/Lib/CommonLib/x86/BilateralFilterX86.h b/source/Lib/CommonLib/x86/BilateralFilterX86.h
index 9e3262765..2c60e66cc 100644
--- a/source/Lib/CommonLib/x86/BilateralFilterX86.h
+++ b/source/Lib/CommonLib/x86/BilateralFilterX86.h
@@ -48,7 +48,11 @@
 #if USE_AVX2
 
 #if JVET_AF0112_BIF_DYNAMIC_SCALING
+#if JVET_AJ0237_INTERNAL_12BIT
+inline void simdBifApplyLut(__m256i& val, __m256i& acc, int cutBitsNum, __m256i& bitsRound, __m256i& bitsRound2, __m256i& lut, int bdShift)
+#else
 inline void simdBifApplyLut(__m256i& val, __m256i& acc, int cutBitsNum, __m256i& bitsRound, __m256i& bitsRound2, __m256i& lut)
+#endif
 #else
 inline void simdBifApplyLut(__m256i& val, __m256i& acc, __m256i& lut, int lutShift)
 #endif
@@ -74,23 +78,40 @@ inline void simdBifApplyLut(__m256i& val, __m256i& acc, __m256i& lut, int lutShi
   diffabs = _mm256_shuffle_epi8(lut, diffabs); /* lut */
   diffabs = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(diffabs)); /* back to 16-bit */
   diffabs = _mm256_srai_epi16(diffabs, lutShift); /* diagonal shift! */
+#endif
+#if JVET_AJ0237_INTERNAL_12BIT
+  diffabs = _mm256_slli_epi16(diffabs, bdShift);
 #endif
   diffabs = _mm256_sign_epi16(diffabs, val); /* add original sign */
   acc = _mm256_add_epi16(diffabs, acc); /* add to acc */
 }
 
+#if JVET_AJ0237_INTERNAL_12BIT
+template<X86_VEXT vext>
+void BilateralFilter::simdFilterDiamond5x5(uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bifRoundAdd, int bifRoundShift, bool isRDO, const char* lutRowPtr, bool noClip, int cutBitsNum, int bdShift)
+#else
 template<X86_VEXT vext>
 void BilateralFilter::simdFilterDiamond5x5(uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bifRoundAdd, int bifRoundShift, bool isRDO, const char* lutRowPtr, bool noClip, int cutBitsNum)
+#endif
 {
   //if( uiWidth < 4 || ( uiWidth < 8 && isRDO ) )
   if (uiWidth < 4)
   {
+#if JVET_AJ0237_INTERNAL_12BIT
+    return blockBilateralFilterDiamond5x5(uiWidth, uiHeight, block, blkFilt, clpRng, recPtr, recStride, iWidthExtSIMD, bfac, bifRoundAdd, bifRoundShift, isRDO, lutRowPtr, noClip, cutBitsNum, bdShift);
+#else
     return blockBilateralFilterDiamond5x5(uiWidth, uiHeight, block, blkFilt, clpRng, recPtr, recStride, iWidthExtSIMD, bfac, bifRoundAdd, bifRoundShift, isRDO, lutRowPtr, noClip, cutBitsNum);
+#endif
   }
 
   int pad = 2;
   int padwidth = iWidthExtSIMD;
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  cutBitsNum += bdShift;
+#endif
+
+
   __m256i center, left, right, up, down, lu, ld, ru, rd, acc, roundAdd, clipmin, clipmax, inputVals;
   __m256i ll, rr, uu, dd;
   __m128i lutTmp;
@@ -106,7 +127,11 @@ void BilateralFilter::simdFilterDiamond5x5(uint32_t uiWidth, uint32_t uiHeight,
   __m256i lut2 = _mm256_set_m128i(lutTmp, lutTmp);
   lutTmp = _mm_loadu_si128((__m128i*)(lutRowPtr + 32));
   __m256i lut3 = _mm256_set_m128i(lutTmp, lutTmp);
+#if JVET_AJ0237_INTERNAL_12BIT
+  __m256i mmBfac = _mm256_unpacklo_epi16(_mm256_set1_epi16(bfac), _mm256_set1_epi16(1));
+#else
   __m256i mmBfac = _mm256_set1_epi16(bfac);
+#endif
   roundAdd = _mm256_set1_epi16(bifRoundAdd << 3);
   __m256i bitsRound = _mm256_set1_epi16(1 << (cutBitsNum - 2));
   __m256i bitsRound2 = _mm256_set1_epi16((1 << (cutBitsNum - 2)) + (1 << (cutBitsNum - 1)));
@@ -160,6 +185,22 @@ void BilateralFilter::simdFilterDiamond5x5(uint32_t uiWidth, uint32_t uiHeight,
 
       // apply LUT
 #if JVET_AF0112_BIF_DYNAMIC_SCALING
+#if JVET_AJ0237_INTERNAL_12BIT
+      simdBifApplyLut(left, acc, cutBitsNum, bitsRound, bitsRound2, lut1, bdShift);
+      simdBifApplyLut(right, acc, cutBitsNum, bitsRound, bitsRound2, lut1, bdShift);
+      simdBifApplyLut(up, acc, cutBitsNum, bitsRound, bitsRound2, lut1, bdShift);
+      simdBifApplyLut(down, acc, cutBitsNum, bitsRound, bitsRound2, lut1, bdShift);
+
+      simdBifApplyLut(lu, acc, cutBitsNum, bitsRound, bitsRound2, lut2, bdShift);
+      simdBifApplyLut(ld, acc, cutBitsNum, bitsRound, bitsRound2, lut2, bdShift);
+      simdBifApplyLut(ru, acc, cutBitsNum, bitsRound, bitsRound2, lut2, bdShift);
+      simdBifApplyLut(rd, acc, cutBitsNum, bitsRound, bitsRound2, lut2, bdShift);
+
+      simdBifApplyLut(ll, acc, cutBitsNum, bitsRound, bitsRound2, lut3, bdShift);
+      simdBifApplyLut(rr, acc, cutBitsNum, bitsRound, bitsRound2, lut3, bdShift);
+      simdBifApplyLut(uu, acc, cutBitsNum, bitsRound, bitsRound2, lut3, bdShift);
+      simdBifApplyLut(dd, acc, cutBitsNum, bitsRound, bitsRound2, lut3, bdShift);
+#else
       simdBifApplyLut(left, acc, cutBitsNum, bitsRound, bitsRound2, lut1);
       simdBifApplyLut(right, acc, cutBitsNum, bitsRound, bitsRound2, lut1);
       simdBifApplyLut(up, acc, cutBitsNum, bitsRound, bitsRound2, lut1);
@@ -174,6 +215,7 @@ void BilateralFilter::simdFilterDiamond5x5(uint32_t uiWidth, uint32_t uiHeight,
       simdBifApplyLut(rr, acc, cutBitsNum, bitsRound, bitsRound2, lut3);
       simdBifApplyLut(uu, acc, cutBitsNum, bitsRound, bitsRound2, lut3);
       simdBifApplyLut(dd, acc, cutBitsNum, bitsRound, bitsRound2, lut3);
+#endif
 #else
       simdBifApplyLut(left, acc, lut, lutShift1);
       simdBifApplyLut(right, acc, lut, lutShift1);
@@ -193,9 +235,21 @@ void BilateralFilter::simdFilterDiamond5x5(uint32_t uiWidth, uint32_t uiHeight,
 
       // TU scaling
 #if JVET_AF0112_BIF_DYNAMIC_SCALING
+#if JVET_AJ0237_INTERNAL_12BIT
+      __m256i accLow  = _mm256_unpacklo_epi16(acc, roundAdd);
+      __m256i accHigh = _mm256_unpackhi_epi16(acc, roundAdd);
+      __m256i accLowPack = _mm256_madd_epi16(accLow, mmBfac);
+      __m256i accHighPack = _mm256_madd_epi16(accHigh, mmBfac);
+
+      accLow = _mm256_srai_epi32(accLowPack, bifRoundShift + 3);
+      accHigh = _mm256_srai_epi32(accHighPack, bifRoundShift + 3);
+
+      acc = _mm256_packs_epi32(accLow, accHigh);
+#else
       acc = _mm256_mullo_epi16(acc, mmBfac);
       acc = _mm256_adds_epi16(acc, roundAdd);
       acc = _mm256_srai_epi16(acc, bifRoundShift + 3);
+#endif
 #else
       if (bfac == 2)
       {
@@ -293,7 +347,11 @@ int BilateralFilter::simdCalcMAD(int16_t* block, int stride, int width, int heig
 #else // USE_AVX2
 
 #if JVET_AF0112_BIF_DYNAMIC_SCALING
+#if JVET_AJ0237_INTERNAL_12BIT
+inline void simdBifApplyLut(__m128i& val, __m128i& acc, int cutBitsNum, __m128i& bitsRound, __m128i& bitsRound2, __m128i& lut, int bdShift)
+#else
 inline void simdBifApplyLut(__m128i& val, __m128i& acc, int cutBitsNum, __m128i& bitsRound, __m128i& bitsRound2, __m128i& lut)
+#endif
 #else
 inline void simdBifApplyLut(__m128i& val, __m128i& acc, __m128i& lut, int lutShift)
 #endif
@@ -318,23 +376,39 @@ inline void simdBifApplyLut(__m128i& val, __m128i& acc, __m128i& lut, int lutShi
   diffabs = _mm_shuffle_epi8(lut, diffabs); /* lut */
   diffabs = _mm_cvtepi8_epi16(diffabs); /* back to 16-bit */
   diffabs = _mm_srai_epi16(diffabs, lutShift); /* diagonal shift! */
+#endif
+#if JVET_AJ0237_INTERNAL_12BIT
+  diffabs = _mm_slli_epi16(diffabs, bdShift);
 #endif
   diffabs = _mm_sign_epi16(diffabs, val); /* add original sign */
   acc = _mm_add_epi16(diffabs, acc); /* add to acc */
 }
 
+#if JVET_AJ0237_INTERNAL_12BIT
+template<X86_VEXT vext>
+void BilateralFilter::simdFilterDiamond5x5(uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bifRoundAdd, int bifRoundShift, bool isRDO, const char* lutRowPtr, bool noClip, int cutBitsNum, int bdShift)
+#else
 template<X86_VEXT vext>
 void BilateralFilter::simdFilterDiamond5x5(uint32_t uiWidth, uint32_t uiHeight, int16_t block[], int16_t blkFilt[], const ClpRng& clpRng, Pel* recPtr, int recStride, int iWidthExtSIMD, int bfac, int bifRoundAdd, int bifRoundShift, bool isRDO, const char* lutRowPtr, bool noClip, int cutBitsNum)
+#endif
 {
   //if( uiWidth < 4 || ( uiWidth < 8 && isRDO ) )
   if( uiWidth < 4 )
   {
+#if JVET_AJ0237_INTERNAL_12BIT
+    return blockBilateralFilterDiamond5x5(uiWidth, uiHeight, block, blkFilt, clpRng, recPtr, recStride, iWidthExtSIMD, bfac, bifRoundAdd, bifRoundShift, isRDO, lutRowPtr, noClip, cutBitsNum, bdShift);
+#else
     return blockBilateralFilterDiamond5x5(uiWidth, uiHeight, block, blkFilt, clpRng, recPtr, recStride, iWidthExtSIMD, bfac, bifRoundAdd, bifRoundShift, isRDO, lutRowPtr, noClip, cutBitsNum);
+#endif
   }
 
   int pad = 2;
   int padwidth = iWidthExtSIMD;
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  cutBitsNum += bdShift;
+#endif
+
   __m128i center, left, right, up, down, lu, ld, ru, rd, acc, roundAdd, clipmin, clipmax, inputVals;
   __m128i ll, rr, uu, dd;
 
@@ -346,7 +420,11 @@ void BilateralFilter::simdFilterDiamond5x5(uint32_t uiWidth, uint32_t uiHeight,
   __m128i lut1 = _mm_loadu_si128((__m128i*)(lutRowPtr));
   __m128i lut2 = _mm_loadu_si128((__m128i*)(lutRowPtr + 16));
   __m128i lut3 = _mm_loadu_si128((__m128i*)(lutRowPtr + 32));
+#if JVET_AJ0237_INTERNAL_12BIT
+  __m128i mmBfac = _mm_unpacklo_epi16(_mm_set1_epi16(bfac), _mm_set1_epi16(1));
+#else
   __m128i mmBfac = _mm_set1_epi16(bfac);
+#endif
   roundAdd = _mm_set1_epi16(bifRoundAdd << 3);
   __m128i bitsRound = _mm_set1_epi16(1 << (cutBitsNum - 2));
   __m128i bitsRound2 = _mm_set1_epi16((1 << (cutBitsNum - 2)) + (1 << (cutBitsNum - 1)));
@@ -399,6 +477,22 @@ void BilateralFilter::simdFilterDiamond5x5(uint32_t uiWidth, uint32_t uiHeight,
       
       // apply LUT
 #if JVET_AF0112_BIF_DYNAMIC_SCALING
+#if JVET_AJ0237_INTERNAL_12BIT
+      simdBifApplyLut(left, acc, cutBitsNum, bitsRound, bitsRound2, lut1, bdShift);
+      simdBifApplyLut(right, acc, cutBitsNum, bitsRound, bitsRound2, lut1, bdShift);
+      simdBifApplyLut(up, acc, cutBitsNum, bitsRound, bitsRound2, lut1, bdShift);
+      simdBifApplyLut(down, acc, cutBitsNum, bitsRound, bitsRound2, lut1, bdShift);
+
+      simdBifApplyLut(lu, acc, cutBitsNum, bitsRound, bitsRound2, lut2, bdShift);
+      simdBifApplyLut(ld, acc, cutBitsNum, bitsRound, bitsRound2, lut2, bdShift);
+      simdBifApplyLut(ru, acc, cutBitsNum, bitsRound, bitsRound2, lut2, bdShift);
+      simdBifApplyLut(rd, acc, cutBitsNum, bitsRound, bitsRound2, lut2, bdShift);
+
+      simdBifApplyLut(ll, acc, cutBitsNum, bitsRound, bitsRound2, lut3, bdShift);
+      simdBifApplyLut(rr, acc, cutBitsNum, bitsRound, bitsRound2, lut3, bdShift);
+      simdBifApplyLut(uu, acc, cutBitsNum, bitsRound, bitsRound2, lut3, bdShift);
+      simdBifApplyLut(dd, acc, cutBitsNum, bitsRound, bitsRound2, lut3, bdShift);
+#else
       simdBifApplyLut(left, acc, cutBitsNum, bitsRound, bitsRound2, lut1);
       simdBifApplyLut(right, acc, cutBitsNum, bitsRound, bitsRound2, lut1);
       simdBifApplyLut(up, acc, cutBitsNum, bitsRound, bitsRound2, lut1);
@@ -413,6 +507,7 @@ void BilateralFilter::simdFilterDiamond5x5(uint32_t uiWidth, uint32_t uiHeight,
       simdBifApplyLut(rr, acc, cutBitsNum, bitsRound, bitsRound2, lut3);
       simdBifApplyLut(uu, acc, cutBitsNum, bitsRound, bitsRound2, lut3);
       simdBifApplyLut(dd, acc, cutBitsNum, bitsRound, bitsRound2, lut3);
+#endif
 #else
       simdBifApplyLut(left, acc, lut, lutShift1);
       simdBifApplyLut(right, acc, lut, lutShift1);
@@ -432,9 +527,21 @@ void BilateralFilter::simdFilterDiamond5x5(uint32_t uiWidth, uint32_t uiHeight,
       
       // TU scaling
 #if JVET_AF0112_BIF_DYNAMIC_SCALING
+#if JVET_AJ0237_INTERNAL_12BIT
+      __m128i accLow  = _mm_unpacklo_epi16(acc, roundAdd);
+      __m128i accHigh = _mm_unpackhi_epi16(acc, roundAdd);
+      __m128i accLowPack = _mm_madd_epi16(accLow, mmBfac);
+      __m128i accHighPack = _mm_madd_epi16(accHigh, mmBfac);
+
+      accLow = _mm_srai_epi32(accLowPack, bifRoundShift + 3);
+      accHigh = _mm_srai_epi32(accHighPack, bifRoundShift + 3);
+
+      acc = _mm_packs_epi32(accLow, accHigh);
+#else
       acc = _mm_mullo_epi16(acc, mmBfac);
       acc = _mm_adds_epi16(acc, roundAdd);
       acc = _mm_srai_epi16(acc, bifRoundShift + 3);
+#endif
 #else
       if (bfac == 2)
       {
diff --git a/source/Lib/CommonLib/x86/BufferX86.h b/source/Lib/CommonLib/x86/BufferX86.h
index 5730bd2ec..b5028b225 100644
--- a/source/Lib/CommonLib/x86/BufferX86.h
+++ b/source/Lib/CommonLib/x86/BufferX86.h
@@ -3907,7 +3907,11 @@ void getAbsoluteDifferencePerSample_SSE(Pel* dst, int dstStride, const Pel* src0
 template <X86_VEXT vext, uint8_t maskType>
 int64_t getMaskedSampleSum_SSE(Pel* src, int srcStride, int width, int height, int bitDepth, short* weightMask, int maskStepX, int maskStride, int maskStride2)
 {
+#if JVET_AJ0237_INTERNAL_12BIT
+  if ((width & 7) != 0 || bitDepth > 12)
+#else
   if ((width & 7) != 0  || bitDepth > 10)
+#endif
   {
     return getMaskedSampleSumCore<maskType>(src, srcStride, width, height, bitDepth, weightMask, maskStepX, maskStride, maskStride2);
   }
diff --git a/source/Lib/CommonLib/x86/InterpolationFilterX86.h b/source/Lib/CommonLib/x86/InterpolationFilterX86.h
index 0d92aca83..039b840f0 100644
--- a/source/Lib/CommonLib/x86/InterpolationFilterX86.h
+++ b/source/Lib/CommonLib/x86/InterpolationFilterX86.h
@@ -79,6 +79,11 @@ static void fullPelCopySSE( const ClpRng& clpRng, const void*_src, int srcStride
 #endif
   int headroom_offset = 1 << ( headroom - 1 );
   int offset   = IF_INTERNAL_OFFS;
+
+#if JVET_AJ0237_INTERNAL_12BIT
+  int dmvrHeadRoom = IF_INTERNAL_PREC_BILINEAR(clpRng.bd) - clpRng.bd; // in the current setup, dmvr headroom should either be 0 or negative
+#endif
+
   __m128i voffset  = _mm_set1_epi16( offset );
   __m128i voffset_headroom  = _mm_set1_epi16( headroom_offset );
 
@@ -106,7 +111,29 @@ static void fullPelCopySSE( const ClpRng& clpRng, const void*_src, int srcStride
 #if MCIF_SIMD_NEW
         if (biMCForDMVR)
         {
+#if JVET_AJ0237_INTERNAL_12BIT
+          if ((isFirst == isLast) || (isFirst && dmvrHeadRoom == 0))
+          {
+            vsum = vsrc;
+          }
+          else if (isFirst)
+          {
+            if (dmvrHeadRoom > 0)
+            {
+              vsum = _mm_slli_epi16(vsrc, dmvrHeadRoom);
+            }
+            else
+            {
+              vsum = _mm_srai_epi16(vsrc, -dmvrHeadRoom);
+            }
+          }
+          else
+          {
+            CHECK(1, "Impossible to have isFirst being false and isLast being true, when biMCForDMVR is true");
+          }
+#else
           vsum = _mm_min_epi16(vibdimax, _mm_max_epi16(vibdimin, vsrc));
+#endif
         }
         else if (isFirst == isLast)
         {
@@ -152,6 +179,10 @@ static void fullPelCopyVerSSE(const ClpRng& clpRng, const void*_src, int srcStri
   int headroom = IF_INTERNAL_PREC - clpRng.bd;
   int headroom_offset = 1 << (headroom - 1);
   int offset = IF_INTERNAL_OFFS;
+#if JVET_AJ0237_INTERNAL_12BIT
+  int dmvrHeadRoom = IF_INTERNAL_PREC_BILINEAR(clpRng.bd) - clpRng.bd; // in the current setup, dmvr headroom should either be 0 or negative
+#endif
+
   __m128i voffset = _mm_set1_epi16(offset);
   __m128i voffset_headroom = _mm_set1_epi16(headroom_offset);
 
@@ -176,7 +207,29 @@ static void fullPelCopyVerSSE(const ClpRng& clpRng, const void*_src, int srcStri
         }
         if (biMCForDMVR)
         {
+#if JVET_AJ0237_INTERNAL_12BIT
+          if ((isFirst == isLast) || (isFirst && dmvrHeadRoom == 0))
+          {
+            vsum = vsrc;
+          }
+          else if (isFirst)
+          {
+            if (dmvrHeadRoom > 0)
+            {
+              vsum = _mm_slli_epi16(vsrc, dmvrHeadRoom);
+            }
+            else
+            {
+              vsum = _mm_srai_epi16(vsrc, -dmvrHeadRoom);
+            }
+          }
+          else
+          {
+            CHECK(1, "Impossible to have isFirst being false and isLast being true, when biMCForDMVR is true");
+          }
+#else
           vsum = _mm_min_epi16(vibdimax, _mm_max_epi16(vibdimin, vsrc));
+#endif
         }
         else if (isFirst == isLast)
         {
@@ -218,6 +271,10 @@ static void fullPelCopySSE_M4(const ClpRng& clpRng, const void*_src, ptrdiff_t s
   int headroom = IF_INTERNAL_PREC - clpRng.bd;
   int headroom_offset = 1 << (headroom - 1);
   int offset = IF_INTERNAL_OFFS;
+#if JVET_AJ0237_INTERNAL_12BIT
+  int dmvrHeadRoom = IF_INTERNAL_PREC_BILINEAR(clpRng.bd) - clpRng.bd; // in the current setup, dmvr headroom should either be 0 or negative
+#endif
+
   __m128i voffset = _mm_set1_epi16(offset);
   __m128i voffset_headroom = _mm_set1_epi16(headroom_offset);
   __m128i vibdimin = _mm_set1_epi16(clpRng.min);
@@ -243,7 +300,29 @@ static void fullPelCopySSE_M4(const ClpRng& clpRng, const void*_src, ptrdiff_t s
       }
       if (biMCForDMVR)
       {
+#if JVET_AJ0237_INTERNAL_12BIT
+        if ((isFirst == isLast) || (isFirst && dmvrHeadRoom == 0))
+        {
+          vsum = vsrc;
+        }
+        else if (isFirst)
+        {
+          if (dmvrHeadRoom > 0)
+          {
+            vsum = _mm_slli_epi16(vsrc, dmvrHeadRoom);
+          }
+          else
+          {
+            vsum = _mm_srai_epi16(vsrc, -dmvrHeadRoom);
+          }
+        }
+        else
+        {
+          CHECK(1, "Impossible to have isFirst being false and isLast being true, when biMCForDMVR is true");
+        }
+#else
         vsum = _mm_min_epi16(vibdimax, _mm_max_epi16(vibdimin, vsrc));
+#endif
       }
       else if (isFirst == isLast)
       {
@@ -280,6 +359,10 @@ static void fullPelCopyVerSSE_M4(const ClpRng& clpRng, const void*_src, ptrdiff_
   int headroom = IF_INTERNAL_PREC - clpRng.bd;
   int headroom_offset = 1 << (headroom - 1);
   int offset = IF_INTERNAL_OFFS;
+#if JVET_AJ0237_INTERNAL_12BIT
+  int dmvrHeadRoom = IF_INTERNAL_PREC_BILINEAR(clpRng.bd) - clpRng.bd; // in the current setup, dmvr headroom should either be 0 or negative
+#endif
+
   __m128i voffset = _mm_set1_epi16(offset);
   __m128i voffset_headroom = _mm_set1_epi16(headroom_offset);
   __m128i vibdimin = _mm_set1_epi16(clpRng.min);
@@ -301,7 +384,29 @@ static void fullPelCopyVerSSE_M4(const ClpRng& clpRng, const void*_src, ptrdiff_
       }
       if (biMCForDMVR)
       {
+#if JVET_AJ0237_INTERNAL_12BIT
+        if ((isFirst == isLast) || (isFirst && dmvrHeadRoom == 0))
+        {
+          vsum = vsrc;
+        }
+        else if (isFirst)
+        {
+          if (dmvrHeadRoom > 0)
+          {
+            vsum = _mm_slli_epi16(vsrc, dmvrHeadRoom);
+          }
+          else
+          {
+            vsum = _mm_srai_epi16(vsrc, -dmvrHeadRoom);
+          }
+        }
+        else
+        {
+          CHECK(1, "Impossible to have isFirst being false and isLast being true, when biMCForDMVR is true");
+        }
+#else
         vsum = _mm_min_epi16(vibdimax, _mm_max_epi16(vibdimin, vsrc));
+#endif
       }
       else if (isFirst == isLast)
       {
@@ -355,6 +460,9 @@ static void fullPelCopyAVX2( const ClpRng& clpRng, const void*_src, int srcStrid
   __m256i vibdimax = _mm256_set1_epi16( clpRng.max );
   __m256i vsrc, vsum;
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  int dmvrHeadRoom = IF_INTERNAL_PREC_BILINEAR(clpRng.bd) - clpRng.bd; // in the current setup, dmvr headroom should either be 0 or negative
+#endif
 
   for( int row = 0; row < height; row++ )
   {
@@ -376,7 +484,29 @@ static void fullPelCopyAVX2( const ClpRng& clpRng, const void*_src, int srcStrid
 #if MCIF_SIMD_NEW
         if (biMCForDMVR)
         {
+#if JVET_AJ0237_INTERNAL_12BIT
+          if ((isFirst == isLast) || (isFirst && dmvrHeadRoom == 0))
+          {
+            vsum = vsrc;
+          }
+          else if (isFirst)
+          {
+            if (dmvrHeadRoom > 0)
+            {
+              vsum = _mm256_slli_epi16(vsrc, dmvrHeadRoom);
+            }
+            else
+            {
+              vsum = _mm256_srai_epi16(vsrc, -dmvrHeadRoom);
+            }
+          }
+          else
+          {
+            CHECK(1, "Impossible to have isFirst being false and isLast being true, when biMCForDMVR is true");
+          }
+#else
           vsum = _mm256_min_epi16(vibdimax, _mm256_max_epi16(vibdimin, vsrc));
+#endif
         }
         else if (isFirst == isLast)
         {
@@ -426,6 +556,10 @@ static void fullPelCopyVerAVX2(const ClpRng& clpRng, const void*_src, int srcStr
   int offset = 1 << (headroom - 1);
   int internal_offset = IF_INTERNAL_OFFS;
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  int dmvrHeadRoom = IF_INTERNAL_PREC_BILINEAR(clpRng.bd) - clpRng.bd; // in the current setup, dmvr headroom should either be 0 or negative
+#endif
+
   __m256i vinternal_offset = _mm256_set1_epi16(internal_offset);
   __m256i vheadroom_offset = _mm256_set1_epi16(offset);
 
@@ -452,7 +586,29 @@ static void fullPelCopyVerAVX2(const ClpRng& clpRng, const void*_src, int srcStr
         }
         if (biMCForDMVR)
         {
+#if JVET_AJ0237_INTERNAL_12BIT
+          if ((isFirst == isLast) || (isFirst && dmvrHeadRoom == 0))
+          {
+            vsum = vsrc;
+          }
+          else if (isFirst)
+          {
+            if (dmvrHeadRoom > 0)
+            {
+              vsum = _mm256_slli_epi16(vsrc, dmvrHeadRoom);
+            }
+            else
+            {
+              vsum = _mm256_srai_epi16(vsrc, -dmvrHeadRoom);
+            }
+          }
+          else
+          {
+            CHECK(1, "Impossible to have isFirst being false and isLast being true, when biMCForDMVR is true");
+          }
+#else
           vsum = _mm256_min_epi16(vibdimax, _mm256_max_epi16(vibdimin, vsrc));
+#endif
         }
         else if (isFirst == isLast)
         {
@@ -3262,7 +3418,11 @@ static void simdFilter( const ClpRng& clpRng, Pel const *src, int srcStride, Pel
   {
     if( isFirst )
     {
+#if JVET_AJ0237_INTERNAL_12BIT
+      shift = IF_FILTER_PREC_BILINEAR - (IF_INTERNAL_PREC_BILINEAR(clpRng.bd) - clpRng.bd);
+#else
       shift = IF_FILTER_PREC_BILINEAR - (IF_INTERNAL_PREC_BILINEAR - clpRng.bd);
+#endif
       offset = 1 << (shift - 1);
     }
     else
diff --git a/source/Lib/CommonLib/x86/IntraX86.h b/source/Lib/CommonLib/x86/IntraX86.h
index 8d03a3abc..d505792cd 100644
--- a/source/Lib/CommonLib/x86/IntraX86.h
+++ b/source/Lib/CommonLib/x86/IntraX86.h
@@ -2723,7 +2723,11 @@ bool xPredIntraOpt_SIMD(PelBuf &pDst, const PredictionUnit &pu, const uint32_t m
   const int addShift = 1 << 13;
 
   const __m128i offset = _mm_set1_epi32( addShift );
+#if JVET_AJ0237_INTERNAL_12BIT
+  const __m128i max = _mm_set1_epi32((1 << clpRng.bd) - 1);
+#else
   const __m128i max = _mm_set1_epi32( 1023 );
+#endif
   const __m128i zeros = _mm_setzero_si128();
   __m128i vmat[ 4 ], vcoef[ 4 ], vsrc;
 
@@ -2862,6 +2866,27 @@ int64_t calcAeipGroupSumSIMD(const Pel* src1, const Pel* src2, const int numSamp
   __m256i vzero = _mm256_setzero_si256();
   __m256i vsum32 = vzero;
   const int samplesBySIMD = (numSamples >> 4) << 4;
+#if JVET_AJ0237_INTERNAL_12BIT
+  int64_t sum = 0;
+  const int simdSampleBatchCnt = (samplesBySIMD >> 4) >> 2;
+  for (int batchIdx = 0; batchIdx < simdSampleBatchCnt; batchIdx++)
+  {
+    vsum32 = vzero;
+    for (; i < ((batchIdx + 1) * 64); i += 16)
+    {
+      __m256i vsrc1 = _mm256_lddqu_si256((__m256i*)(&src1[i]));
+      __m256i vsrc2 = _mm256_lddqu_si256((__m256i*)(&src2[i]));
+      __m256i vsumtemp = _mm256_madd_epi16(vsrc1, vsrc2);
+      vsum32 = _mm256_add_epi32(vsum32, vsumtemp);
+    }
+    vsum32 = _mm256_hadd_epi32(vsum32, vsum32);
+    vsum32 = _mm256_hadd_epi32(vsum32, vsum32);
+    sum += (_mm_cvtsi128_si32(_mm256_castsi256_si128(vsum32)) + _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_permute2x128_si256(vsum32, vsum32, 0x11))));
+  }
+  if (i < samplesBySIMD)
+  {
+    vsum32 = vzero;
+#endif
   for (; i < samplesBySIMD; i += 16)
   {
     __m256i vsrc1 = _mm256_lddqu_si256((__m256i*)(&src1[i]));
@@ -2871,11 +2896,37 @@ int64_t calcAeipGroupSumSIMD(const Pel* src1, const Pel* src2, const int numSamp
   }
   vsum32 = _mm256_hadd_epi32(vsum32, vsum32);
   vsum32 = _mm256_hadd_epi32(vsum32, vsum32);
+#if JVET_AJ0237_INTERNAL_12BIT
+  sum += (_mm_cvtsi128_si32(_mm256_castsi256_si128(vsum32)) + _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_permute2x128_si256(vsum32, vsum32, 0x11))));
+  }
+#else
   int64_t sum = (_mm_cvtsi128_si32(_mm256_castsi256_si128(vsum32)) + _mm_cvtsi128_si32(_mm256_castsi256_si128(_mm256_permute2x128_si256(vsum32, vsum32, 0x11))));
+#endif
 #else
   __m128i vzero = _mm_setzero_si128();
   __m128i vsum32 = vzero;
   const int samplesBySIMD = (numSamples >> 4) << 4;
+#if JVET_AJ0237_INTERNAL_12BIT
+  int64_t sum = 0;
+  const int simdSampleBatchCnt = (samplesBySIMD >> 4) >> 2;
+  for (int batchIdx = 0; batchIdx < simdSampleBatchCnt; batchIdx++)
+  {
+    vsum32 = vzero;
+    for (; i < ((batchIdx + 1) * 64); i += 8)
+    {
+      __m128i vsrc1 = _mm_loadu_si128((__m128i*)(&src1[i]));
+      __m128i vsrc2 = _mm_loadu_si128((__m128i*)(&src2[i]));
+      __m128i vsumtemp = _mm_madd_epi16(vsrc1, vsrc2);
+      vsum32 = _mm_add_epi32(vsum32, vsumtemp);
+    }
+    vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e));
+    vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1));
+    sum += _mm_cvtsi128_si32(vsum32);
+  }
+  if (i < samplesBySIMD)
+  {
+    vsum32 = vzero;
+#endif
   for (; i < samplesBySIMD; i += 8)
   {
     __m128i vsrc1 = _mm_loadu_si128((__m128i*)(&src1[i]));
@@ -2885,7 +2936,12 @@ int64_t calcAeipGroupSumSIMD(const Pel* src1, const Pel* src2, const int numSamp
   }
   vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0x4e));   // 01001110
   vsum32 = _mm_add_epi32(vsum32, _mm_shuffle_epi32(vsum32, 0xb1));   // 10110001
+#if JVET_AJ0237_INTERNAL_12BIT
+  sum += _mm_cvtsi128_si32(vsum32);
+  }
+#else
   int64_t sum = _mm_cvtsi128_si32(vsum32);
+#endif
 #endif
   for (; i < numSamples; i++)
   {
diff --git a/source/Lib/CommonLib/x86/RdCostX86.h b/source/Lib/CommonLib/x86/RdCostX86.h
index 5a877a07e..f0c53c1a2 100644
--- a/source/Lib/CommonLib/x86/RdCostX86.h
+++ b/source/Lib/CommonLib/x86/RdCostX86.h
@@ -50,7 +50,11 @@ template<X86_VEXT vext >
 Distortion RdCost::xGetSSE_SIMD( const DistParam &rcDtParam )
 {
 #if DIST_SSE_ENABLE
+#if JVET_AJ0237_INTERNAL_12BIT
+  if (rcDtParam.org.width < 4 || rcDtParam.bitDepth > 12 || rcDtParam.applyWeight)
+#else
   if (rcDtParam.org.width < 4 || rcDtParam.bitDepth > 10 || rcDtParam.applyWeight)
+#endif
 #else
   if( rcDtParam.bitDepth > 10 )
 #endif
@@ -179,7 +183,11 @@ Distortion RdCost::xGetSSE_SIMD( const DistParam &rcDtParam )
 template<int iWidth, X86_VEXT vext >
 Distortion RdCost::xGetSSE_NxN_SIMD( const DistParam &rcDtParam )
 {
+#if JVET_AJ0237_INTERNAL_12BIT
+  if (rcDtParam.bitDepth > 12 || rcDtParam.applyWeight)
+#else
   if( rcDtParam.bitDepth > 10 || rcDtParam.applyWeight )
+#endif
     return RdCost::xGetSSE( rcDtParam );
 
   const Torg* pSrc1     = (const Torg*)rcDtParam.org.buf;
@@ -354,7 +362,11 @@ Distortion RdCost::xGetSSE_NxN_SIMD( const DistParam &rcDtParam )
 template<X86_VEXT vext> 
 Distortion RdCost::xGetSSE_NxN_SIMD(const DistParam &rcDtParam)
 {
+#if JVET_AJ0237_INTERNAL_12BIT
+  if (rcDtParam.bitDepth > 12 || rcDtParam.applyWeight)
+#else
   if (rcDtParam.bitDepth > 10 || rcDtParam.applyWeight)
+#endif
     return RdCost::xGetSSE(rcDtParam);
 
   const Torg *pSrc1       = (const Torg *) rcDtParam.org.buf;
@@ -500,7 +512,11 @@ Distortion RdCost::xGetSSE_NxN_SIMD(const DistParam &rcDtParam)
 template< X86_VEXT vext >
 Distortion RdCost::xGetSAD_SIMD( const DistParam &rcDtParam )
 {
+#if JVET_AJ0237_INTERNAL_12BIT
+  if (rcDtParam.org.width < 4 || rcDtParam.bitDepth > 12 || rcDtParam.applyWeight)
+#else
   if( rcDtParam.org.width < 4 || rcDtParam.bitDepth > 10 || rcDtParam.applyWeight )
+#endif
     return RdCost::xGetSAD( rcDtParam );
 
   const short* pSrc1   = (const short*)rcDtParam.org.buf;
@@ -593,7 +609,11 @@ Distortion RdCost::xGetSAD_SIMD( const DistParam &rcDtParam )
 template< X86_VEXT vext >
 Distortion RdCost::xGetSAD_IBD_SIMD(const DistParam &rcDtParam)
 {
+#if JVET_AJ0237_INTERNAL_12BIT
+  if (rcDtParam.org.width < 4 || rcDtParam.bitDepth > 12 || rcDtParam.applyWeight)
+#else
   if (rcDtParam.org.width < 4 || rcDtParam.bitDepth > 10 || rcDtParam.applyWeight)
+#endif
     return RdCost::xGetSAD(rcDtParam);
 
   const short* src0 = (const short*)rcDtParam.org.buf;
@@ -631,7 +651,11 @@ Distortion RdCost::xGetSAD_IBD_SIMD(const DistParam &rcDtParam)
 template< int iWidth, X86_VEXT vext >
 Distortion RdCost::xGetSAD_NxN_SIMD( const DistParam &rcDtParam )
 {
+#if JVET_AJ0237_INTERNAL_12BIT
+  if (rcDtParam.bitDepth > 12 || rcDtParam.applyWeight)
+#else
   if( rcDtParam.bitDepth > 10 || rcDtParam.applyWeight )
+#endif
     return RdCost::xGetSAD( rcDtParam );
 
   //  assert( rcDtParam.iCols == iWidth);
@@ -2493,7 +2517,11 @@ static uint32_t xCalcHAD8x16_AVX2( const Pel* piOrg, const Pel* piCur, const int
 template< X86_VEXT vext >
 Distortion RdCost::xGetSADwMask_SIMD( const DistParam &rcDtParam )
 {
+#if JVET_AJ0237_INTERNAL_12BIT
+  if (rcDtParam.org.width < 4 || rcDtParam.bitDepth > 12 || rcDtParam.applyWeight)
+#else
   if (rcDtParam.org.width < 4  || rcDtParam.bitDepth > 10 || rcDtParam.applyWeight)
+#endif
     return RdCost::xGetSADwMask( rcDtParam );
 
   const short* src1   = (const short*)rcDtParam.org.buf;
@@ -2583,7 +2611,11 @@ Distortion RdCost::xGetSADwMask_SIMD( const DistParam &rcDtParam )
 template<X86_VEXT vext>
 Distortion RdCost::xGetHADs_SIMD( const DistParam &rcDtParam )
 {
+#if JVET_AJ0237_INTERNAL_12BIT
+  if (rcDtParam.bitDepth > 12 || rcDtParam.applyWeight)
+#else
   if( rcDtParam.bitDepth > 10 || rcDtParam.applyWeight )
+#endif
   {
     return RdCost::xGetHADs( rcDtParam );
   }
@@ -2842,7 +2874,11 @@ Distortion RdCost::xGetMRSAD_SIMD(const DistParam &rcDtParam)
 {
   int width = rcDtParam.org.width;
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  if (width < 4 || rcDtParam.bitDepth > 12 || rcDtParam.applyWeight)
+#else
   if (width < 4 || rcDtParam.bitDepth > 10 || rcDtParam.applyWeight)
+#endif
   {
     return RdCost::xGetMRSAD(rcDtParam);
   }
@@ -3056,7 +3092,11 @@ Distortion RdCost::xGetTMErrorFull_SIMD(const DistParam& rcDtParam)
 {
   if ( rcDtParam.org.width < 4
     || ( trueAfalseL && (rcDtParam.org.width & 15) ) // (Above template) multiple of 16
+#if JVET_AJ0237_INTERNAL_12BIT
+    || rcDtParam.bitDepth > 12
+#else
     || rcDtParam.bitDepth > 10
+#endif
     || rcDtParam.applyWeight
   )
   {
diff --git a/source/Lib/DecoderLib/DecLib.cpp b/source/Lib/DecoderLib/DecLib.cpp
index 955a7d0b2..8d36328a2 100644
--- a/source/Lib/DecoderLib/DecLib.cpp
+++ b/source/Lib/DecoderLib/DecLib.cpp
@@ -1119,9 +1119,17 @@ void DecLib::finishPicture(int& poc, PicList*& rpcListPic, MsgLevel msgl )
 #if JVET_AA0096_MC_BOUNDARY_PADDING
   m_cFrameMcPadPrediction.init(&m_cRdCost, pcSlice->getSPS()->getChromaFormatIdc(), pcSlice->getSPS()->getMaxCUHeight(),
 #if JVET_AJ0172_IBC_ITMP_ALIGN_REF_AREA
+#if JVET_AJ0237_INTERNAL_12BIT
+                               NULL, m_pcPic->getPicWidthInLumaSamples(),m_pcPic->getPicHeightInLumaSamples(), pcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA));
+#else
                                NULL, m_pcPic->getPicWidthInLumaSamples(),m_pcPic->getPicHeightInLumaSamples());
+#endif
+#else
+#if JVET_AJ0237_INTERNAL_12BIT
+                               NULL, m_pcPic->getPicWidthInLumaSamples(), pcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA));
 #else
                                NULL, m_pcPic->getPicWidthInLumaSamples());
+#endif
 #endif
   m_cFrameMcPadPrediction.mcFramePad(m_pcPic, *(m_pcPic->slices[0]));
 #endif
@@ -1969,6 +1977,10 @@ void DecLib::xActivateParameterSets( const InputNALUnit nalu )
                    sps->getMaxCUWidth(), sps->getMaxCUHeight(),
                    maxDepth,
                    log2SaoOffsetScaleLuma, log2SaoOffsetScaleChroma );
+#if JVET_AJ0237_INTERNAL_12BIT
+    m_cSAO.m_bilateralFilter.setInternalBitDepth(sps->getBitDepth(CHANNEL_TYPE_LUMA));
+    m_cBilateralFilter.setInternalBitDepth(sps->getBitDepth(CHANNEL_TYPE_LUMA));
+#endif
 #if JVET_W0066_CCSAO
     pSlice->m_ccSaoControl[COMPONENT_Y ] = m_cSAO.getCcSaoControlIdc(COMPONENT_Y);
     pSlice->m_ccSaoControl[COMPONENT_Cb] = m_cSAO.getCcSaoControlIdc(COMPONENT_Cb);
@@ -1979,10 +1991,18 @@ void DecLib::xActivateParameterSets( const InputNALUnit nalu )
 #if INTER_LIC || (TM_AMVP || TM_MRG || JVET_Z0084_IBC_TM) || JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING
 #if JVET_Z0153_IBC_EXT_REF
 #if JVET_AJ0172_IBC_ITMP_ALIGN_REF_AREA
+#if JVET_AJ0237_INTERNAL_12BIT
+    m_cInterPred.init(&m_cRdCost, sps->getChromaFormatIdc(), sps->getMaxCUHeight(), &m_cReshaper, sps->getMaxPicWidthInLumaSamples(),sps->getMaxPicHeightInLumaSamples(), sps->getBitDepth(CHANNEL_TYPE_LUMA));
+#else
     m_cInterPred.init(&m_cRdCost, sps->getChromaFormatIdc(), sps->getMaxCUHeight(), &m_cReshaper, sps->getMaxPicWidthInLumaSamples(),sps->getMaxPicHeightInLumaSamples());
+#endif
+#else
+#if JVET_AJ0237_INTERNAL_12BIT
+    m_cInterPred.init(&m_cRdCost, sps->getChromaFormatIdc(), sps->getMaxCUHeight(), &m_cReshaper, sps->getMaxPicWidthInLumaSamples(), sps->getBitDepth(CHANNEL_TYPE_LUMA));
 #else
     m_cInterPred.init(&m_cRdCost, sps->getChromaFormatIdc(), sps->getMaxCUHeight(), &m_cReshaper, sps->getMaxPicWidthInLumaSamples());
 #endif
+#endif
 #else
     m_cInterPred.init( &m_cRdCost, sps->getChromaFormatIdc(), sps->getMaxCUHeight(), &m_cReshaper);
 #endif
@@ -3086,6 +3106,9 @@ bool DecLib::xDecodeSlice(InputNALUnit &nalu, int &iSkipFrame, int iPOCLastDispl
     {
       clipDeltaShift = ADAPTIVE_CLIP_SHIFT_DELTA_VALUE_0;
     }
+#if JVET_AJ0237_INTERNAL_12BIT
+    clipDeltaShift += std::max(0, pcSlice->getSPS()->getBitDepth(toChannelType(COMPONENT_Y)) - 10);
+#endif
     if (pcSlice->getSliceType() != I_SLICE)
     {
       int deltaMax = pcSlice->getLumaPelMax();
diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp
index f7a37c708..103394181 100644
--- a/source/Lib/DecoderLib/VLCReader.cpp
+++ b/source/Lib/DecoderLib/VLCReader.cpp
@@ -4711,7 +4711,11 @@ void HLSyntaxReader::parseScaleAlf( Slice* pcSlice, SPS* sps, ParameterSetManage
     {
       ScaleAlf& curScaleAlfParam = pcSlice->getAlfScale( i , j );
 
+#if JVET_AJ0237_INTERNAL_12BIT
+      curScaleAlfParam.init(apsIdx, j, alfParam.lumaClassifierIdx[j], sps->getBitDepth(CHANNEL_TYPE_LUMA));
+#else
       curScaleAlfParam.init( apsIdx, j, alfParam.lumaClassifierIdx[j] );
+#endif
       curScaleAlfParam.apsIdx = apsIdx;
 
       if ( !bReadUseAlfScale ) 
@@ -5807,7 +5811,11 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, PicHeader* picHeader, Par
     if (index != -1)
     {
       lambdaCanBePredicted = true;
+#if JVET_AJ0237_INTERNAL_12BIT
+      pcSlice->setCostForARMC(sps->getLambdaVal((int)index), sps->getBitDepth(CHANNEL_TYPE_LUMA));
+#else
       pcSlice->setCostForARMC(sps->getLambdaVal((int) index));
+#endif
     }
     if (!lambdaCanBePredicted)
     {
@@ -5816,7 +5824,11 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, PicHeader* picHeader, Par
 #else
       READ_CODE(9, uiCode, "Lambda");
 #endif
+#if JVET_AJ0237_INTERNAL_12BIT
+      pcSlice->setCostForARMC((uint32_t)uiCode, sps->getBitDepth(CHANNEL_TYPE_LUMA));
+#else
       pcSlice->setCostForARMC((uint32_t)uiCode);
+#endif
     }
   }
 #endif
@@ -5861,6 +5873,9 @@ void HLSyntaxReader::parseSliceHeader (Slice* pcSlice, PicHeader* picHeader, Par
   {
     clipDeltaShift = ADAPTIVE_CLIP_SHIFT_DELTA_VALUE_0;
   }
+#if JVET_AJ0237_INTERNAL_12BIT
+  clipDeltaShift += std::max(0, sps->getBitDepth(toChannelType(COMPONENT_Y)) - 10);
+#endif
   if (pcSlice->getSliceType() == I_SLICE)
   {
     READ_SVLC(iCode, "clip_luma_pel_max");
diff --git a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp
index 38e77f597..79bad478e 100644
--- a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp
+++ b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp
@@ -7051,7 +7051,11 @@ void EncAdaptiveLoopFilter::alfCorrection( CodingStructure& cs, const PelUnitBuf
           {
             curScaleAlfEncParam.reset();
 
+#if JVET_AJ0237_INTERNAL_12BIT
+            curScaleAlfParam.init(filterSetIndex, alt_num, classifierIdx, cs.slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA));
+#else
             curScaleAlfParam.init( filterSetIndex, alt_num, classifierIdx );
+#endif
             curScaleAlfParam.setMinMax( cs.slice->getLumaPelMin(), cs.slice->getLumaPelMax() );
 
             const int apsIdx = cs.slice->getTileGroupApsIdLuma()[filterSetIndex - NUM_FIXED_FILTER_SETS];
@@ -7061,7 +7065,11 @@ void EncAdaptiveLoopFilter::alfCorrection( CodingStructure& cs, const PelUnitBuf
           CHECK( curScaleAlfParam.classifierIdx != classifierIdx || curScaleAlfParam.filterSetIndex != filterSetIndex || curScaleAlfParam.alt_num != alt_num, "alfCorrection() failed.");
 
           char coeffBits = m_coeffBitsApsLuma[filterSetIndex - NUM_FIXED_FILTER_SETS][alt_num];
+#if JVET_AJ0237_INTERNAL_12BIT
+          const Pel currBase = 1 << (curScaleAlfParam.bitDepth - 1);
+#else
           const Pel currBase = 512; // 10-bits
+#endif
 
           if ( !bModeAnalysis )
           {
@@ -9463,7 +9471,11 @@ void EncAdaptiveLoopFilter::countLumaSwingGreaterThanThreshold(const Pel* luma,
 void EncAdaptiveLoopFilter::countChromaSampleValueNearMidPoint(const Pel* chroma, int chromaStride, int height, int width, int log2BlockWidth, int log2BlockHeight, uint64_t* chromaSampleCountNearMidPoint, int chromaSampleCountNearMidPointStride)
 {
   const int midPoint  = (1 << m_inputBitDepth[CH_C]) >> 1;
+#if JVET_AJ0237_INTERNAL_12BIT
+  const int threshold = 16 << std::max(0, m_inputBitDepth[CH_C] - 10);
+#else
   const int threshold = 16;
+#endif
 
   for (int y = 0; y < height; y += (1 << log2BlockHeight))
   {
diff --git a/source/Lib/EncoderLib/EncCu.cpp b/source/Lib/EncoderLib/EncCu.cpp
index f06e35c17..bcf1afb77 100644
--- a/source/Lib/EncoderLib/EncCu.cpp
+++ b/source/Lib/EncoderLib/EncCu.cpp
@@ -122,6 +122,9 @@ void EncCu::create( EncCfg* encCfg )
 #if JVET_V0094_BILATERAL_FILTER || JVET_X0071_CHROMA_BILATERAL_FILTER
   m_bilateralFilter = new BilateralFilter();;
   m_bilateralFilter->create();
+#if JVET_AJ0237_INTERNAL_12BIT
+  m_bilateralFilter->setInternalBitDepth(encCfg->getBitDepth(CHANNEL_TYPE_LUMA));
+#endif
 #endif
 
   unsigned      uiMaxWidth    = encCfg->getMaxCUWidth();
diff --git a/source/Lib/EncoderLib/EncGOP.cpp b/source/Lib/EncoderLib/EncGOP.cpp
index 472f0680d..fb066e072 100644
--- a/source/Lib/EncoderLib/EncGOP.cpp
+++ b/source/Lib/EncoderLib/EncGOP.cpp
@@ -2707,11 +2707,19 @@ void EncGOP::compressGOP(int iPOCLast, int iNumPicRcvd, PicList &rcListPic, std:
       if (index != -1)
       {
         const SPS* sps = pcSlice->getSPS();
+#if JVET_AJ0237_INTERNAL_12BIT
+        pcSlice->setCostForARMC(sps->getLambdaVal(index), sps->getBitDepth(CHANNEL_TYPE_LUMA));
+#else
         pcSlice->setCostForARMC(sps->getLambdaVal(index));
+#endif
       }
       else
       {
+#if JVET_AJ0237_INTERNAL_12BIT
+        pcSlice->setCostForARMC((uint32_t)LAMBDA_DEC_SIDE[min(max(pcSlice->getSliceQp(), 0), MAX_QP)], pcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA));
+#else
         pcSlice->setCostForARMC((uint32_t) LAMBDA_DEC_SIDE[min(max(pcSlice->getSliceQp(), 0), MAX_QP)]);
+#endif
       }
 
       if (pcSlice->getCheckLDC())
@@ -2738,12 +2746,20 @@ void EncGOP::compressGOP(int iPOCLast, int iNumPicRcvd, PicList &rcListPic, std:
         }
         if (mindist != 1 )
         {
+#if JVET_AJ0237_INTERNAL_12BIT
+          pcSlice->setCostForARMC((uint32_t)LAMBDA_DEC_SIDE[min(max(pcSlice->getSliceQp() - 4, 0), MAX_QP)], pcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA));
+#else
           pcSlice->setCostForARMC((uint32_t) LAMBDA_DEC_SIDE[min(max(pcSlice->getSliceQp() - 4, 0), MAX_QP)]);
+#endif
         }
       }
       else
       {
+#if JVET_AJ0237_INTERNAL_12BIT
+        pcSlice->setCostForARMC((uint32_t)LAMBDA_DEC_SIDE[min(max(pcSlice->getSliceQp() - 4, 0), MAX_QP)], pcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA));
+#else
         pcSlice->setCostForARMC((uint32_t) LAMBDA_DEC_SIDE[min(max(pcSlice->getSliceQp() - 4, 0), MAX_QP)]);
+#endif
       }
     }
 #endif
@@ -3803,6 +3819,9 @@ void EncGOP::compressGOP(int iPOCLast, int iNumPicRcvd, PicList &rcListPic, std:
         if ( saoSize.width != picWidth || saoSize.height != picHeight ) 
         {
           m_pcSAO->create(picWidth, picHeight, chromaFormatIDC, maxCUWidth, maxCUHeight, maxTotalCUDepth, log2SaoOffsetScaleLuma, log2SaoOffsetScaleChroma);
+#if JVET_AJ0237_INTERNAL_12BIT
+          m_pcSAO->m_bilateralFilter.setInternalBitDepth(pcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA));
+#endif
           m_pcSAO->setReshaper(m_pcReshaper);
         }
 
@@ -4852,9 +4871,17 @@ void EncGOP::compressGOP(int iPOCLast, int iNumPicRcvd, PicList &rcListPic, std:
 #if JVET_AA0096_MC_BOUNDARY_PADDING
     m_pcFrameMcPadPrediction->init(m_pcEncLib->getRdCost(), pcSlice->getSPS()->getChromaFormatIdc(),
 #if JVET_AJ0172_IBC_ITMP_ALIGN_REF_AREA
+#if JVET_AJ0237_INTERNAL_12BIT
+                                   pcSlice->getSPS()->getMaxCUHeight(), NULL, pcPic->getPicWidthInLumaSamples(),pcPic->getPicHeightInLumaSamples(), pcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA));
+#else
                                    pcSlice->getSPS()->getMaxCUHeight(), NULL, pcPic->getPicWidthInLumaSamples(),pcPic->getPicHeightInLumaSamples());
+#endif
+#else
+#if JVET_AJ0237_INTERNAL_12BIT
+                                   pcSlice->getSPS()->getMaxCUHeight(), NULL, pcPic->getPicWidthInLumaSamples(), pcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA));
 #else
                                    pcSlice->getSPS()->getMaxCUHeight(), NULL, pcPic->getPicWidthInLumaSamples());
+#endif
 #endif
     m_pcFrameMcPadPrediction->mcFramePad(pcPic, *(pcPic->slices[0]));
     m_pcFrameMcPadPrediction->destroy();
diff --git a/source/Lib/EncoderLib/EncGOP.h b/source/Lib/EncoderLib/EncGOP.h
index 907cf60ff..7fad71fa8 100644
--- a/source/Lib/EncoderLib/EncGOP.h
+++ b/source/Lib/EncoderLib/EncGOP.h
@@ -140,8 +140,10 @@ private:
   PicList*                m_pcListPic;
 
   HLSWriter*              m_HLSWriter;
+#if !JVET_AJ0237_INTERNAL_12BIT
 #if JVET_V0094_BILATERAL_FILTER || JVET_X0071_CHROMA_BILATERAL_FILTER
   BilateralFilter         m_cBilateralFilter;
+#endif
 #endif
   LoopFilter*             m_pcLoopFilter;
 
@@ -214,6 +216,12 @@ private:
 #endif
 
 public:
+#if JVET_AJ0237_INTERNAL_12BIT
+#if JVET_V0094_BILATERAL_FILTER || JVET_X0071_CHROMA_BILATERAL_FILTER
+  BilateralFilter         m_cBilateralFilter;
+#endif
+#endif
+
   EncGOP();
   virtual ~EncGOP();
 
diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp
index 964180341..613ba5a02 100644
--- a/source/Lib/EncoderLib/EncLib.cpp
+++ b/source/Lib/EncoderLib/EncLib.cpp
@@ -101,6 +101,9 @@ void EncLib::create( const int layerId )
   m_iPOCLast = m_compositeRefEnabled ? -2 : -1;
   // create processing unit classes
   m_cGOPEncoder.        create( );
+#if JVET_AJ0237_INTERNAL_12BIT
+  m_cGOPEncoder.m_cBilateralFilter.setInternalBitDepth(m_bitDepth[COMPONENT_Y]);
+#endif
 #if ENABLE_SPLIT_PARALLELISM
 #if ENABLE_SPLIT_PARALLELISM
   m_numCuEncStacks  = m_numSplitThreads == 1 ? 1 : NUM_RESERVERD_SPLIT_JOBS;
@@ -131,6 +134,9 @@ void EncLib::create( const int layerId )
   m_cCuEncoder.         create( this );
 #if JVET_V0094_BILATERAL_FILTER || JVET_X0071_CHROMA_BILATERAL_FILTER
   m_bilateralFilter.    create();
+#if JVET_AJ0237_INTERNAL_12BIT
+  m_bilateralFilter.setInternalBitDepth(m_bitDepth[COMPONENT_Y]);
+#endif
 #endif
 #endif
 #if JVET_J0090_MEMORY_BANDWITH_MEASURE
@@ -219,6 +225,9 @@ void EncLib::create( const int layerId )
     m_cEncSAO.create(m_iSourceWidth, m_iSourceHeight, m_chromaFormatIDC, m_maxCUWidth, m_maxCUHeight, floorLog2(m_maxCUWidth) - m_log2MinCUSize, (uint32_t)std::max(0, m_bitDepth[CHANNEL_TYPE_LUMA] - MAX_SAO_TRUNCATED_BITDEPTH), (uint32_t)std::max(0, m_bitDepth[CHANNEL_TYPE_CHROMA] - MAX_SAO_TRUNCATED_BITDEPTH));
 #endif
     m_cEncSAO.createEncData(m_saoCtuBoundary, numCtuInFrame);
+#if JVET_AJ0237_INTERNAL_12BIT
+    m_cEncSAO.m_bilateralFilter.setInternalBitDepth(m_bitDepth[COMPONENT_Y]);
+#endif
   }
 }
 
diff --git a/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp b/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp
index e850b03da..04ff642e8 100644
--- a/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp
+++ b/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp
@@ -413,6 +413,9 @@ void EncSampleAdaptiveOffset::SAOProcess( CodingStructure& cs, bool* sliceEnable
     if(!cs.sps->getSAOEnabledFlag() && (cs.pps->getUseBIF() || cs.pps->getUseChromaBIF()))
     {
       bilateralFilter.create();
+#if JVET_AJ0237_INTERNAL_12BIT
+      bilateralFilter.setInternalBitDepth(cs.sps->getBitDepth(CHANNEL_TYPE_LUMA));
+#endif
       if( cs.pps->getUseBIF() )
       {
         bilateralFilter.bilateralFilterPicRDOperCTU( COMPONENT_Y, cs, src, bifCABACEstimator ); // Filters from src to res
@@ -468,6 +471,9 @@ void EncSampleAdaptiveOffset::SAOProcess( CodingStructure& cs, bool* sliceEnable
   if( cs.pps->getUseBIF() || cs.pps->getUseChromaBIF() )
   {
     bilateralFilter.create();
+#if JVET_AJ0237_INTERNAL_12BIT
+    bilateralFilter.setInternalBitDepth(cs.sps->getBitDepth(CHANNEL_TYPE_LUMA));
+#endif
     if( cs.pps->getUseBIF() )
     {
       bilateralFilter.bilateralFilterPicRDOperCTU( COMPONENT_Y, cs, src, bifCABACEstimator ); // Filters from src to res'
@@ -774,9 +780,18 @@ int64_t EncSampleAdaptiveOffset::getDistortion(const int channelBitDepth, int ty
   return dist;
 }
 
+#if JVET_AJ0237_INTERNAL_12BIT
+inline int64_t EncSampleAdaptiveOffset::estSaoDist(int64_t count, int64_t offset, int64_t diffSum, int shift, int bdShift)
+#else
 inline int64_t EncSampleAdaptiveOffset::estSaoDist(int64_t count, int64_t offset, int64_t diffSum, int shift)
+#endif
 {
+#if JVET_AJ0237_INTERNAL_12BIT
+  int64_t tmpOffset = offset << bdShift;
+  return ((count * tmpOffset * tmpOffset - diffSum * tmpOffset * 2) >> shift);
+#else
   return (( count*offset*offset-diffSum*offset*2 ) >> shift);
+#endif
 }
 
 
@@ -1154,6 +1169,9 @@ void EncSampleAdaptiveOffset::decideBlkParams(CodingStructure& cs, bool* sliceEn
 #if JVET_V0094_BILATERAL_FILTER || JVET_X0071_CHROMA_BILATERAL_FILTER
   BilateralFilter bilateralFilter;
   bilateralFilter.create();
+#if JVET_AJ0237_INTERNAL_12BIT
+  bilateralFilter.setInternalBitDepth(cs.sps->getBitDepth(CHANNEL_TYPE_LUMA));
+#endif
 #endif
   
   const TempCtx ctxPicStart ( m_ctxCache, SAOCtx( m_CABACEstimator->getCtx() ) );
@@ -3011,6 +3029,19 @@ void EncSampleAdaptiveOffset::CCSAOProcess(CodingStructure& cs, const double* la
   }
 #endif
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  if (!cs.slice->isIntra() && !cs.slice->getCheckLDC() && (cs.slice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA) > 10) && (cs.slice->getSliceQp() > 45) && (m_picWidth * m_picHeight <= 1920 * 1080))
+  {
+    for (int compIdx = COMPONENT_Y; compIdx < MAX_NUM_COMPONENT; compIdx++)
+    {
+      ComponentID compID = (ComponentID)compIdx;
+      m_ccSaoComParam.reset(compID);
+      memset(m_ccSaoControl[compID], 0, sizeof(uint8_t) * m_numCTUsInPic);
+    }
+    return;
+  }
+#endif
+
   PelUnitBuf orgYuv = cs.getOrgBuf(); 
   PelUnitBuf dstYuv = cs.getRecoBuf();
   PelUnitBuf srcYuv = m_ccSaoBuf.getBuf( cs.area );
@@ -3413,6 +3444,10 @@ void EncSampleAdaptiveOffset::setupInitCcSaoParam(CodingStructure& cs, const Com
   initCcSaoParam.reset();
   memset(initCcSaoControl, 0, sizeof(uint8_t) * m_numCTUsInPic);
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  const int shift = 2 * DISTORTION_PRECISION_ADJUSTMENT(cs.sps->getBitDepth(CHANNEL_TYPE_LUMA));
+#endif
+
   if (setNum == 1)
   {
     std::fill_n(initCcSaoControl, m_numCTUsInPic, 1);
@@ -3432,7 +3467,11 @@ void EncSampleAdaptiveOffset::setupInitCcSaoParam(CodingStructure& cs, const Com
 #if JVET_AE0151_CCSAO_HISTORY_OFFSETS_AND_EXT_EO
       getCcSaoDistortion(compID, setIdx
                        , bestCcSaoParam.setType[setIdx] == CCSAO_SET_TYPE_BAND ? blkStats : blkStatsEdge
+#if JVET_AJ0237_INTERNAL_12BIT
+                       , bestCcSaoParam.offset, trainingDistortion, shift);
+#else
                        , bestCcSaoParam.offset, trainingDistortion);
+#endif
 #else
       if (bestCcSaoParam.setType[setIdx] == 0) /* band */
       {
@@ -3941,6 +3980,10 @@ void EncSampleAdaptiveOffset::getCcSaoBlkStatsEdgeNew(const ComponentID compID,
   const int srcStrideTab[MAX_NUM_COMPONENT] = { srcStrideY, srcStrideU, srcStrideU };
 #endif
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  const int bdShift = std::max(0, bitDepth - 10);
+#endif
+
 #if JVET_Z0105_LOOP_FILTER_VIRTUAL_BOUNDARY
   int x, y, startX, startY, endX, endY;
   int firstLineStartX, firstLineEndX;
@@ -3997,7 +4040,11 @@ void EncSampleAdaptiveOffset::getCcSaoBlkStatsEdgeNew(const ComponentID compID,
                 const int edgeNumUni = g_ccSaoEdgeNum[edgeIdc][1];
                 for (int edgeThr = 0; edgeThr < MAX_CCSAO_EDGE_THR; edgeThr++)
                 {
+#if JVET_AJ0237_INTERNAL_12BIT
+                  const int edgeThrVal = g_ccSaoEdgeThr[edgeIdc][edgeThr] << bdShift;
+#else
                   const int edgeThrVal = g_ccSaoEdgeThr[edgeIdc][edgeThr];
+#endif
                   const int edgeIdxA = getCcSaoEdgeIdx(*colE, *colA, edgeThrVal, edgeIdc);
                   const int edgeIdxB = getCcSaoEdgeIdx(*colE, *colB, edgeThrVal, edgeIdc);
                   const int edgeIdx  = edgeIdxA * edgeNumUni + edgeIdxB;
@@ -4067,7 +4114,11 @@ void EncSampleAdaptiveOffset::getCcSaoBlkStatsEdgeNew(const ComponentID compID,
                 const int edgeNumUni = g_ccSaoEdgeNum[edgeIdc][1];
                 for (int edgeThr = 0; edgeThr < MAX_CCSAO_EDGE_THR; edgeThr++)
                 {
+#if JVET_AJ0237_INTERNAL_12BIT
+                  const int edgeThrVal = g_ccSaoEdgeThr[edgeIdc][edgeThr] << bdShift;
+#else
                   const int edgeThrVal = g_ccSaoEdgeThr[edgeIdc][edgeThr];
+#endif
                   const int edgeIdxA = getCcSaoEdgeIdx(*colE, *colA, edgeThrVal, edgeIdc);
                   const int edgeIdxB = getCcSaoEdgeIdx(*colE, *colB, edgeThrVal, edgeIdc);
                   const int edgeIdx  = edgeIdxA * edgeNumUni + edgeIdxB;
@@ -4131,7 +4182,11 @@ void EncSampleAdaptiveOffset::getCcSaoBlkStatsEdgeNew(const ComponentID compID,
               const int edgeNumUni = g_ccSaoEdgeNum[edgeIdc][1];
               for (int edgeThr = 0; edgeThr < MAX_CCSAO_EDGE_THR; edgeThr++)
               {
+#if JVET_AJ0237_INTERNAL_12BIT
+                const int edgeThrVal = g_ccSaoEdgeThr[edgeIdc][edgeThr] << bdShift;
+#else
                 const int edgeThrVal = g_ccSaoEdgeThr[edgeIdc][edgeThr];
+#endif
                 const int edgeIdxA = getCcSaoEdgeIdx(*colE, *colA, edgeThrVal, edgeIdc);
                 const int edgeIdxB = getCcSaoEdgeIdx(*colE, *colB, edgeThrVal, edgeIdc);
                 const int edgeIdx  = edgeIdxA * edgeNumUni + edgeIdxB;
@@ -4188,7 +4243,11 @@ void EncSampleAdaptiveOffset::getCcSaoBlkStatsEdgeNew(const ComponentID compID,
                 const int edgeNumUni = g_ccSaoEdgeNum[edgeIdc][1];
                 for (int edgeThr = 0; edgeThr < MAX_CCSAO_EDGE_THR; edgeThr++)
                 {
+#if JVET_AJ0237_INTERNAL_12BIT
+                  const int edgeThrVal = g_ccSaoEdgeThr[edgeIdc][edgeThr] << bdShift;
+#else
                   const int edgeThrVal = g_ccSaoEdgeThr[edgeIdc][edgeThr];
+#endif
                   const int edgeIdxA = getCcSaoEdgeIdx(*colE, *colA, edgeThrVal, edgeIdc);
                   const int edgeIdxB = getCcSaoEdgeIdx(*colE, *colB, edgeThrVal, edgeIdc);
                   const int edgeIdx  = edgeIdxA * edgeNumUni + edgeIdxB;
@@ -4252,7 +4311,11 @@ void EncSampleAdaptiveOffset::getCcSaoBlkStatsEdgeNew(const ComponentID compID,
               const int edgeNumUni = g_ccSaoEdgeNum[edgeIdc][1];
               for (int edgeThr = 0; edgeThr < MAX_CCSAO_EDGE_THR; edgeThr++)
               {
+#if JVET_AJ0237_INTERNAL_12BIT
+                const int edgeThrVal = g_ccSaoEdgeThr[edgeIdc][edgeThr] << bdShift;
+#else
                 const int edgeThrVal = g_ccSaoEdgeThr[edgeIdc][edgeThr];
+#endif
                 const int edgeIdxA = getCcSaoEdgeIdx(*colE, *colA, edgeThrVal, edgeIdc);
                 const int edgeIdxB = getCcSaoEdgeIdx(*colE, *colB, edgeThrVal, edgeIdc);
                 const int edgeIdx  = edgeIdxA * edgeNumUni + edgeIdxB;
@@ -4309,7 +4372,11 @@ void EncSampleAdaptiveOffset::getCcSaoBlkStatsEdgeNew(const ComponentID compID,
                 const int edgeNumUni = g_ccSaoEdgeNum[edgeIdc][1];
                 for (int edgeThr = 0; edgeThr < MAX_CCSAO_EDGE_THR; edgeThr++)
                 {
+#if JVET_AJ0237_INTERNAL_12BIT
+                  const int edgeThrVal = g_ccSaoEdgeThr[edgeIdc][edgeThr] << bdShift;
+#else
                   const int edgeThrVal = g_ccSaoEdgeThr[edgeIdc][edgeThr];
+#endif
                   const int edgeIdxA = getCcSaoEdgeIdx(*colE, *colA, edgeThrVal, edgeIdc);
                   const int edgeIdxB = getCcSaoEdgeIdx(*colE, *colB, edgeThrVal, edgeIdc);
                   const int edgeIdx  = edgeIdxA * edgeNumUni + edgeIdxB;
@@ -4779,7 +4846,11 @@ void EncSampleAdaptiveOffset::getCcSaoBlkStatsEdgeNew(const ComponentID compID,
                 const int edgeNumUni = g_ccSaoEdgeNum[edgeIdc][1];
                 for (int edgeThr = 0; edgeThr < MAX_CCSAO_EDGE_THR; edgeThr++)
                 {
+#if JVET_AJ0237_INTERNAL_12BIT
+                  const int edgeThrVal = g_ccSaoEdgeThr[edgeIdc][edgeThr] << bdShift;
+#else
                   const int edgeThrVal = g_ccSaoEdgeThr[edgeIdc][edgeThr];
+#endif
                   const int edgeIdxA = getCcSaoEdgeIdx(*colE, *colA, edgeThrVal, edgeIdc);
                   const int edgeIdxB = getCcSaoEdgeIdx(*colE, *colB, edgeThrVal, edgeIdc);
                   const int edgeIdx  = edgeIdxA * edgeNumUni + edgeIdxB;
@@ -4849,7 +4920,11 @@ void EncSampleAdaptiveOffset::getCcSaoBlkStatsEdgeNew(const ComponentID compID,
                 const int edgeNumUni = g_ccSaoEdgeNum[edgeIdc][1];
                 for (int edgeThr = 0; edgeThr < MAX_CCSAO_EDGE_THR; edgeThr++)
                 {
+#if JVET_AJ0237_INTERNAL_12BIT
+                  const int edgeThrVal = g_ccSaoEdgeThr[edgeIdc][edgeThr] << bdShift;
+#else
                   const int edgeThrVal = g_ccSaoEdgeThr[edgeIdc][edgeThr];
+#endif
                   const int edgeIdxA = getCcSaoEdgeIdx(*colE, *colA, edgeThrVal, edgeIdc);
                   const int edgeIdxB = getCcSaoEdgeIdx(*colE, *colB, edgeThrVal, edgeIdc);
                   const int edgeIdx  = edgeIdxA * edgeNumUni + edgeIdxB;
@@ -4913,7 +4988,11 @@ void EncSampleAdaptiveOffset::getCcSaoBlkStatsEdgeNew(const ComponentID compID,
               const int edgeNumUni = g_ccSaoEdgeNum[edgeIdc][1];
               for (int edgeThr = 0; edgeThr < MAX_CCSAO_EDGE_THR; edgeThr++)
               {
+#if JVET_AJ0237_INTERNAL_12BIT
+                const int edgeThrVal = g_ccSaoEdgeThr[edgeIdc][edgeThr] << bdShift;
+#else
                 const int edgeThrVal = g_ccSaoEdgeThr[edgeIdc][edgeThr];
+#endif
                 const int edgeIdxA = getCcSaoEdgeIdx(*colE, *colA, edgeThrVal, edgeIdc);
                 const int edgeIdxB = getCcSaoEdgeIdx(*colE, *colB, edgeThrVal, edgeIdc);
                 const int edgeIdx  = edgeIdxA * edgeNumUni + edgeIdxB;
@@ -4970,7 +5049,11 @@ void EncSampleAdaptiveOffset::getCcSaoBlkStatsEdgeNew(const ComponentID compID,
                 const int edgeNumUni = g_ccSaoEdgeNum[edgeIdc][1];
                 for (int edgeThr = 0; edgeThr < MAX_CCSAO_EDGE_THR; edgeThr++)
                 {
+#if JVET_AJ0237_INTERNAL_12BIT
+                  const int edgeThrVal = g_ccSaoEdgeThr[edgeIdc][edgeThr] << bdShift;
+#else
                   const int edgeThrVal = g_ccSaoEdgeThr[edgeIdc][edgeThr];
+#endif
                   const int edgeIdxA = getCcSaoEdgeIdx(*colE, *colA, edgeThrVal, edgeIdc);
                   const int edgeIdxB = getCcSaoEdgeIdx(*colE, *colB, edgeThrVal, edgeIdc);
                   const int edgeIdx  = edgeIdxA * edgeNumUni + edgeIdxB;
@@ -5034,7 +5117,11 @@ void EncSampleAdaptiveOffset::getCcSaoBlkStatsEdgeNew(const ComponentID compID,
               const int edgeNumUni = g_ccSaoEdgeNum[edgeIdc][1];
               for (int edgeThr = 0; edgeThr < MAX_CCSAO_EDGE_THR; edgeThr++)
               {
+#if JVET_AJ0237_INTERNAL_12BIT
+                const int edgeThrVal = g_ccSaoEdgeThr[edgeIdc][edgeThr] << bdShift;
+#else
                 const int edgeThrVal = g_ccSaoEdgeThr[edgeIdc][edgeThr];
+#endif
                 const int edgeIdxA = getCcSaoEdgeIdx(*colE, *colA, edgeThrVal, edgeIdc);
                 const int edgeIdxB = getCcSaoEdgeIdx(*colE, *colB, edgeThrVal, edgeIdc);
                 const int edgeIdx  = edgeIdxA * edgeNumUni + edgeIdxB;
@@ -5091,7 +5178,11 @@ void EncSampleAdaptiveOffset::getCcSaoBlkStatsEdgeNew(const ComponentID compID,
                 const int edgeNumUni = g_ccSaoEdgeNum[edgeIdc][1];
                 for (int edgeThr = 0; edgeThr < MAX_CCSAO_EDGE_THR; edgeThr++)
                 {
+#if JVET_AJ0237_INTERNAL_12BIT
+                  const int edgeThrVal = g_ccSaoEdgeThr[edgeIdc][edgeThr] << bdShift;
+#else
                   const int edgeThrVal = g_ccSaoEdgeThr[edgeIdc][edgeThr];
+#endif
                   const int edgeIdxA = getCcSaoEdgeIdx(*colE, *colA, edgeThrVal, edgeIdc);
                   const int edgeIdxB = getCcSaoEdgeIdx(*colE, *colB, edgeThrVal, edgeIdc);
                   const int edgeIdx  = edgeIdxA * edgeNumUni + edgeIdxB;
@@ -6726,14 +6817,23 @@ void EncSampleAdaptiveOffset::deriveCcSaoOffsets(const ComponentID compID, const
 {
   int quantOffsets[MAX_CCSAO_CLASS_NUM] = { 0 };
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  int shift = 2 * DISTORTION_PRECISION_ADJUSTMENT(bitDepth);
+#endif
+
   for(int k = 0; k < MAX_CCSAO_CLASS_NUM; k++)
   {
     if(frameStats[setIdx].count[k] == 0)
       continue;
 
     quantOffsets[k] =
+#if JVET_AJ0237_INTERNAL_12BIT
+      (int) xRoundIbdi(bitDepth, (double)(frameStats[setIdx].diff [k] << DISTORTION_PRECISION_ADJUSTMENT(bitDepth))
+                               / (double)((int64_t)frameStats[setIdx].count[k] << m_offsetStepLog2[compID]));
+#else
       (int) xRoundIbdi(bitDepth, (double)(frameStats[setIdx].diff [k] << DISTORTION_PRECISION_ADJUSTMENT(bitDepth))
                                / (double)(frameStats[setIdx].count[k]));
+#endif
     quantOffsets[k] = Clip3(-MAX_CCSAO_OFFSET_THR, MAX_CCSAO_OFFSET_THR, quantOffsets[k]);
   }
 
@@ -6744,7 +6844,11 @@ void EncSampleAdaptiveOffset::deriveCcSaoOffsets(const ComponentID compID, const
     cost[k] = m_lambda[compID];
     if (quantOffsets[k] != 0)
     {
+#if JVET_AJ0237_INTERNAL_12BIT
+      quantOffsets[k] = estCcSaoIterOffset(m_lambda[compID], quantOffsets[k], frameStats[setIdx].count[k], frameStats[setIdx].diff[k], shift, m_offsetStepLog2[compID], dist[k], cost[k], MAX_CCSAO_OFFSET_THR);
+#else
       quantOffsets[k] = estCcSaoIterOffset(m_lambda[compID], quantOffsets[k], frameStats[setIdx].count[k], frameStats[setIdx].diff[k], 0, 0, dist[k], cost[k], MAX_CCSAO_OFFSET_THR);
+#endif
     }
   }
 
@@ -6757,7 +6861,11 @@ void EncSampleAdaptiveOffset::deriveCcSaoOffsets(const ComponentID compID, const
 
 void EncSampleAdaptiveOffset::getCcSaoDistortion(const ComponentID compID, const int setIdx, CcSaoStatData* blkStats[MAX_CCSAO_SET_NUM]
                                                , short offset[MAX_CCSAO_SET_NUM][MAX_CCSAO_CLASS_NUM]
+#if JVET_AJ0237_INTERNAL_12BIT
+                                               , int64_t* trainingDistortion[MAX_CCSAO_SET_NUM], const int shift)
+#else
                                                , int64_t* trainingDistortion[MAX_CCSAO_SET_NUM])
+#endif
 {
   ::memset(trainingDistortion[setIdx], 0, sizeof(int64_t) * m_numCTUsInPic);
 
@@ -6766,7 +6874,11 @@ void EncSampleAdaptiveOffset::getCcSaoDistortion(const ComponentID compID, const
     for (int k = 0; k < MAX_CCSAO_CLASS_NUM; k++)
     {
       trainingDistortion[setIdx][ctbIdx]
+#if JVET_AJ0237_INTERNAL_12BIT
+        += estSaoDist(blkStats[setIdx][ctbIdx].count[k], offset[setIdx][k], blkStats[setIdx][ctbIdx].diff[k], shift, m_offsetStepLog2[toChannelType(compID)]);
+#else
         += estSaoDist(blkStats[setIdx][ctbIdx].count[k], offset[setIdx][k], blkStats[setIdx][ctbIdx].diff[k], 0);
+#endif
     }
   }
 }
@@ -7064,6 +7176,10 @@ void EncSampleAdaptiveOffset::deriveCcSaoRDO(CodingStructure& cs, const Componen
 
   const TempCtx ctxStartCcSaoControlFlag  ( m_ctxCache, SubCtx( Ctx::CcSaoControlIdc, m_CABACEstimator->getCtx() ) );
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  const int shift = 2 * DISTORTION_PRECISION_ADJUSTMENT(cs.sps->getBitDepth(toChannelType(compID)));
+#endif
+
   int    trainingIter = 0;
   bool   keepTraining = true;
   bool   improved = false;
@@ -7081,7 +7197,11 @@ void EncSampleAdaptiveOffset::deriveCcSaoRDO(CodingStructure& cs, const Componen
         {
           getCcSaoDistortion(compID, setIdx
                            , tempCcSaoParam.setType[setIdx] == CCSAO_SET_TYPE_BAND ? blkStats : blkStatsEdge
+#if JVET_AJ0237_INTERNAL_12BIT
+                           , tempCcSaoParam.offset, trainingDistortion, shift);
+#else
                            , tempCcSaoParam.offset, trainingDistortion);
+#endif
         }
         else
         {
@@ -7099,12 +7219,20 @@ void EncSampleAdaptiveOffset::deriveCcSaoRDO(CodingStructure& cs, const Componen
 #endif
         {
           deriveCcSaoOffsets(compID, cs.sps->getBitDepth(toChannelType(compID)), setIdx, frameStats, tempCcSaoParam.offset);
+#if JVET_AJ0237_INTERNAL_12BIT
+          getCcSaoDistortion(compID, setIdx, blkStats, tempCcSaoParam.offset, trainingDistortion, shift);
+#else
           getCcSaoDistortion(compID, setIdx, blkStats, tempCcSaoParam.offset, trainingDistortion);
+#endif
         }
         else
         {
           deriveCcSaoOffsets(compID, cs.sps->getBitDepth(toChannelType(compID)), setIdx, frameStatsEdge, tempCcSaoParam.offset);
+#if JVET_AJ0237_INTERNAL_12BIT
+          getCcSaoDistortion(compID, setIdx, blkStatsEdge, tempCcSaoParam.offset, trainingDistortion, shift);
+#else
           getCcSaoDistortion(compID, setIdx, blkStatsEdge, tempCcSaoParam.offset, trainingDistortion);
+#endif
         }
 #else
         deriveCcSaoOffsets(compID, cs.sps->getBitDepth(toChannelType(compID)), setIdx, frameStats,
diff --git a/source/Lib/EncoderLib/EncSampleAdaptiveOffset.h b/source/Lib/EncoderLib/EncSampleAdaptiveOffset.h
index 7cfd6e040..1ef5732cb 100644
--- a/source/Lib/EncoderLib/EncSampleAdaptiveOffset.h
+++ b/source/Lib/EncoderLib/EncSampleAdaptiveOffset.h
@@ -216,7 +216,11 @@ private: //methods
   void deriveModeMergeRDO(const BitDepths &bitDepths, int ctuRsAddr, SAOBlkParam* mergeList[NUM_SAO_MERGE_TYPES], bool* sliceEnabled, std::vector<SAOStatData**>& blkStats, SAOBlkParam& modeParam, double& modeNormCost );
   int64_t getDistortion(const int channelBitDepth, int typeIdc, int typeAuxInfo, int* offsetVal, SAOStatData& statData);
   void deriveOffsets(ComponentID compIdx, const int channelBitDepth, int typeIdc, SAOStatData& statData, int* quantOffsets, int& typeAuxInfo);
+#if JVET_AJ0237_INTERNAL_12BIT
+  inline int64_t estSaoDist(int64_t count, int64_t offset, int64_t diffSum, int shift, int bdShift = 0);
+#else
   inline int64_t estSaoDist(int64_t count, int64_t offset, int64_t diffSum, int shift);
+#endif
   inline int estIterOffset(int typeIdx, double lambda, int offsetInput, int64_t count, int64_t diffSum, int shift, int bitIncrease, int64_t& bestDist, double& bestCost, int offsetTh );
   void addPreDBFStatistics(std::vector<SAOStatData**>& blkStats);
 #if JVET_W0066_CCSAO
@@ -335,8 +339,13 @@ private: //methods
                         , CcSaoStatData frameStats[MAX_CCSAO_SET_NUM]
                         , short offset[MAX_CCSAO_SET_NUM][MAX_CCSAO_CLASS_NUM]);
   inline int estCcSaoIterOffset(const double lambda, const int offsetInput, const int64_t count, const int64_t diffSum, const int shift, const int bitIncrease, int64_t& bestDist, double& bestCost, const int offsetTh);
+#if JVET_AJ0237_INTERNAL_12BIT
+  void getCcSaoDistortion(const ComponentID compID, const int setIdx, CcSaoStatData* blkStats[MAX_CCSAO_SET_NUM]
+                        , short offset[MAX_CCSAO_SET_NUM][MAX_CCSAO_CLASS_NUM], int64_t* trainingDistortion[MAX_CCSAO_SET_NUM], const int shift);
+#else
   void getCcSaoDistortion(const ComponentID compID, const int setIdx, CcSaoStatData* blkStats[MAX_CCSAO_SET_NUM]
                         , short offset[MAX_CCSAO_SET_NUM][MAX_CCSAO_CLASS_NUM], int64_t* trainingDistortion[MAX_CCSAO_SET_NUM]);
+#endif
 #if JVET_Y0106_CCSAO_EDGE_CLASSIFIER && !JVET_AE0151_CCSAO_HISTORY_OFFSETS_AND_EXT_EO
   void getCcSaoDistortionEdge(const ComponentID compID, const int setIdx,
                               CcSaoStatData *blkStatsEdge[MAX_CCSAO_SET_NUM],
diff --git a/source/Lib/EncoderLib/EncTemporalFilter.cpp b/source/Lib/EncoderLib/EncTemporalFilter.cpp
index 5cbf713dd..17e202fec 100644
--- a/source/Lib/EncoderLib/EncTemporalFilter.cpp
+++ b/source/Lib/EncoderLib/EncTemporalFilter.cpp
@@ -349,6 +349,16 @@ void EncTemporalFilter::subsampleLuma(const PelStorage &input, PelStorage &outpu
   output.extendBorderPel(m_padding, m_padding);
 }
 
+#if JVET_AJ0237_INTERNAL_12BIT
+int64_t EncTemporalFilter::motionErrorLuma(const PelStorage& orig,
+  const PelStorage& buffer,
+  const int x,
+  const int y,
+  int dx,
+  int dy,
+  const int bs,
+  const int64_t besterror) const
+#else
 int EncTemporalFilter::motionErrorLuma(const PelStorage &orig,
   const PelStorage &buffer,
   const int x,
@@ -357,13 +367,18 @@ int EncTemporalFilter::motionErrorLuma(const PelStorage &orig,
   int dy,
   const int bs,
   const int besterror = 8 * 8 * 1024 * 1024) const
+#endif
 {
   const Pel* origOrigin = orig.Y().buf;
   const int  origStride = orig.Y().stride;
   const Pel* buffOrigin = buffer.Y().buf;
   const int  buffStride = buffer.Y().stride;
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  int64_t error = 0;
+#else
   int error = 0;
+#endif
   if (((dx | dy) & 0xF) == 0)
   {
     dx /= m_motionVectorFactor;
@@ -454,6 +469,12 @@ void EncTemporalFilter::motionEstimationLuma(Array2D<MotionVector> &mvs, const P
   const int origWidth  = orig.Y().width;
   const int origHeight = orig.Y().height;
 
+#if JVET_AJ0237_INTERNAL_12BIT
+  const int bitShift = 2 * (16 - m_internalBitDepth[CHANNEL_TYPE_LUMA]);
+  const int denorm = 204800 / (1 << bitShift);
+  const double offset = 20480 / (1 << bitShift);
+#endif
+
 #if JVET_V0056
   for (int blockY = 0; blockY + blockSize <= origHeight; blockY += stepSize)
   {
@@ -489,7 +510,11 @@ void EncTemporalFilter::motionEstimationLuma(Array2D<MotionVector> &mvs, const P
             if ((testx >= 0) && (testx < origWidth / (2 * blockSize)) && (testy >= 0) && (testy < origHeight / (2 * blockSize)))
             {
               MotionVector old = previous->get(testx, testy);
+#if JVET_AJ0237_INTERNAL_12BIT
+              int64_t error = motionErrorLuma(orig, buffer, blockX, blockY, old.x * factor, old.y * factor, blockSize, best.error);
+#else
               int error = motionErrorLuma(orig, buffer, blockX, blockY, old.x * factor, old.y * factor, blockSize, best.error);
+#endif
               if (error < best.error)
               {
                 best.set(old.x * factor, old.y * factor, error);
@@ -498,7 +523,11 @@ void EncTemporalFilter::motionEstimationLuma(Array2D<MotionVector> &mvs, const P
           }
         }
 #if JVET_V0056
+#if JVET_AJ0237_INTERNAL_12BIT
+        int64_t error = motionErrorLuma(orig, buffer, blockX, blockY, 0, 0, blockSize, best.error);
+#else
         int error = motionErrorLuma(orig, buffer, blockX, blockY, 0, 0, blockSize, best.error);
+#endif
         if (error < best.error)
         {
           best.set(0, 0, error);
@@ -510,7 +539,11 @@ void EncTemporalFilter::motionEstimationLuma(Array2D<MotionVector> &mvs, const P
       {
         for (int x2 = prevBest.x / m_motionVectorFactor - range; x2 <= prevBest.x / m_motionVectorFactor + range; x2++)
         {
+#if JVET_AJ0237_INTERNAL_12BIT
+          int64_t error = motionErrorLuma(orig, buffer, blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, best.error);
+#else
           int error = motionErrorLuma(orig, buffer, blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, best.error);
+#endif
           if (error < best.error)
           {
             best.set(x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, error);
@@ -525,7 +558,11 @@ void EncTemporalFilter::motionEstimationLuma(Array2D<MotionVector> &mvs, const P
         {
           for (int x2 = prevBest.x - doubleRange; x2 <= prevBest.x + doubleRange; x2 += 4)
           {
+#if JVET_AJ0237_INTERNAL_12BIT
+            int64_t error = motionErrorLuma(orig, buffer, blockX, blockY, x2, y2, blockSize, best.error);
+#else
             int error = motionErrorLuma(orig, buffer, blockX, blockY, x2, y2, blockSize, best.error);
+#endif
             if (error < best.error)
             {
               best.set(x2, y2, error);
@@ -539,7 +576,11 @@ void EncTemporalFilter::motionEstimationLuma(Array2D<MotionVector> &mvs, const P
         {
           for (int x2 = prevBest.x - doubleRange; x2 <= prevBest.x + doubleRange; x2++)
           {
+#if JVET_AJ0237_INTERNAL_12BIT
+            int64_t error = motionErrorLuma(orig, buffer, blockX, blockY, x2, y2, blockSize, best.error);
+#else
             int error = motionErrorLuma(orig, buffer, blockX, blockY, x2, y2, blockSize, best.error);
+#endif
             if (error < best.error)
             {
               best.set(x2, y2, error);
@@ -552,7 +593,11 @@ void EncTemporalFilter::motionEstimationLuma(Array2D<MotionVector> &mvs, const P
       if (blockY > 0)
       {
         MotionVector aboveMV = mvs.get(blockX / stepSize, (blockY - stepSize) / stepSize);
+#if JVET_AJ0237_INTERNAL_12BIT
+        int64_t error = motionErrorLuma(orig, buffer, blockX, blockY, aboveMV.x, aboveMV.y, blockSize, best.error);
+#else
         int error = motionErrorLuma(orig, buffer, blockX, blockY, aboveMV.x, aboveMV.y, blockSize, best.error);
+#endif
         if (error < best.error)
         {
           best.set(aboveMV.x, aboveMV.y, error);
@@ -561,7 +606,11 @@ void EncTemporalFilter::motionEstimationLuma(Array2D<MotionVector> &mvs, const P
       if (blockX > 0)
       {
         MotionVector leftMV = mvs.get((blockX - stepSize) / stepSize, blockY / stepSize);
+#if JVET_AJ0237_INTERNAL_12BIT
+        int64_t error = motionErrorLuma(orig, buffer, blockX, blockY, leftMV.x, leftMV.y, blockSize, best.error);
+#else
         int error = motionErrorLuma(orig, buffer, blockX, blockY, leftMV.x, leftMV.y, blockSize, best.error);
+#endif
         if (error < best.error)
         {
           best.set(leftMV.x, leftMV.y, error);
@@ -589,7 +638,11 @@ void EncTemporalFilter::motionEstimationLuma(Array2D<MotionVector> &mvs, const P
           variance = variance + (pix - avg) * (pix - avg);
         }
       }
+#if JVET_AJ0237_INTERNAL_12BIT
+      best.error = (int)(20 * ((best.error + offset) / (variance + offset)) + (best.error / (blockSize * blockSize)) / denorm);
+#else
       best.error = (int)(20 * ((best.error + 5.0) / (variance + 5.0)) + (best.error / (blockSize * blockSize)) / 50);
+#endif
 #endif
       mvs.get(blockX / stepSize, blockY / stepSize) = best;
     }
@@ -731,6 +784,12 @@ void EncTemporalFilter::bilateralFilter(const PelStorage &orgPic,
     const double weightScaling = overallStrength * (isChroma(compID) ? m_chromaFactor : 0.4);
     const Pel maxSampleValue   = (1 << m_internalBitDepth[toChannelType(compID)]) - 1;
     const double bitDepthDiffWeighting = 1024.0 / (maxSampleValue + 1);
+
+#if JVET_AJ0237_INTERNAL_12BIT
+    const int bitShift = 2 * (16 - m_internalBitDepth[toChannelType(compID)]);
+    const double offset = 20480 / (1 << bitShift);
+#endif
+
 #if JVET_V0056
     const int lumaBlockSize = 8;
     const int csx = getComponentScaleX(compID, m_chromaFormatIDC);
@@ -783,7 +842,11 @@ void EncTemporalFilter::bilateralFilter(const PelStorage &orgPic,
             const int cntV = blockSizeX * blockSizeY;
             const int cntD = 2 * cntV - blockSizeX - blockSizeY;
             srcFrameInfo[i].mvs.get( x / blockSizeX, y / blockSizeY ).noise =
+#if JVET_AJ0237_INTERNAL_12BIT
+              ( int ) round( (15.0 * cntD / cntV * variance + offset) / (diffsum + offset) );
+#else
               ( int ) round( (15.0 * cntD / cntV * variance + 5.0) / (diffsum + 5.0) );
+#endif
           }
         }
         double minError = 9999999;
@@ -795,7 +858,11 @@ void EncTemporalFilter::bilateralFilter(const PelStorage &orgPic,
         for (int i = 0; i < numRefs; i++)
         {
 #if JVET_V0056
+#if JVET_AJ0237_INTERNAL_12BIT
+          const int64_t error = srcFrameInfo[i].mvs.get(x / blockSizeX, y / blockSizeY).error;
+#else
           const int error = srcFrameInfo[i].mvs.get(x / blockSizeX, y / blockSizeY).error;
+#endif
           const int noise = srcFrameInfo[i].mvs.get(x / blockSizeX, y / blockSizeY).noise;
 #endif
           const Pel* pCorrectedPelPtr = srcFrameInfo[i].picBuffer.bufs[c].buf + (y * srcFrameInfo[i].picBuffer.bufs[c].stride + x);
diff --git a/source/Lib/EncoderLib/EncTemporalFilter.h b/source/Lib/EncoderLib/EncTemporalFilter.h
index 38b4f3279..c5cb475d3 100644
--- a/source/Lib/EncoderLib/EncTemporalFilter.h
+++ b/source/Lib/EncoderLib/EncTemporalFilter.h
@@ -50,14 +50,26 @@
 struct MotionVector
 {
   int x, y;
+#if JVET_AJ0237_INTERNAL_12BIT
+  int64_t error;
+#else
   int error;
+#endif
 #if JVET_V0056
   int noise;
+#if JVET_AJ0237_INTERNAL_12BIT
+  MotionVector() : x(0), y(0), error(INT_LEAST64_MAX), noise(0) {}
+#else
   MotionVector() : x(0), y(0), error(INT_LEAST32_MAX), noise(0) {}
+#endif
 #else
   MotionVector() : x(0), y(0), error(INT_LEAST32_MAX) {}
 #endif
+#if JVET_AJ0237_INTERNAL_12BIT
+  void set(int vectorX, int vectorY, int64_t errorValue) { x = vectorX; y = vectorY; error = errorValue; }
+#else
   void set(int vectorX, int vectorY, int errorValue) { x = vectorX; y = vectorY; error = errorValue; }
+#endif
 };
 
 template <class T>
@@ -169,7 +181,11 @@ private:
 
   // Private functions
   void subsampleLuma(const PelStorage &input, PelStorage &output, const int factor = 2) const;
+#if JVET_AJ0237_INTERNAL_12BIT
+  int64_t motionErrorLuma(const PelStorage& orig, const PelStorage& buffer, const int x, const int y, int dx, int dy, const int bs, const int64_t besterror) const;
+#else
   int motionErrorLuma(const PelStorage &orig, const PelStorage &buffer, const int x, const int y, int dx, int dy, const int bs, const int besterror) const;
+#endif
   void motionEstimationLuma(Array2D<MotionVector> &mvs, const PelStorage &orig, const PelStorage &buffer, const int bs,
     const Array2D<MotionVector> *previous=0, const int factor = 1, const bool doubleRes = false) const;
   void motionEstimation(Array2D<MotionVector> &mvs, const PelStorage &orgPic, const PelStorage &buffer, const PelStorage &origSubsampled2, const PelStorage &origSubsampled4) const;
diff --git a/source/Lib/EncoderLib/InterSearch.cpp b/source/Lib/EncoderLib/InterSearch.cpp
index f4824391e..ff0f3e365 100644
--- a/source/Lib/EncoderLib/InterSearch.cpp
+++ b/source/Lib/EncoderLib/InterSearch.cpp
@@ -498,10 +498,18 @@ void InterSearch::init( EncCfg*        pcEncCfg,
 #if INTER_LIC || (TM_AMVP || TM_MRG || JVET_Z0084_IBC_TM) || JVET_W0090_ARMC_TM || JVET_Z0056_GPM_SPLIT_MODE_REORDERING
 #if JVET_Z0153_IBC_EXT_REF
 #if JVET_AJ0172_IBC_ITMP_ALIGN_REF_AREA
+#if JVET_AJ0237_INTERNAL_12BIT
+  InterPrediction::init( pcRdCost, cform, maxCUHeight, m_pcReshape, curPicWidthY, curPicHeightY, pcEncCfg->getBitDepth(CHANNEL_TYPE_LUMA));
+#else
   InterPrediction::init( pcRdCost, cform, maxCUHeight, m_pcReshape, curPicWidthY, curPicHeightY );
+#endif
+#else
+#if JVET_AJ0237_INTERNAL_12BIT
+  InterPrediction::init( pcRdCost, cform, maxCUHeight, m_pcReshape, curPicWidthY, pcEncCfg->getBitDepth(CHANNEL_TYPE_LUMA));
 #else
   InterPrediction::init( pcRdCost, cform, maxCUHeight, m_pcReshape, curPicWidthY );
 #endif
+#endif
 #else
   InterPrediction::init( pcRdCost, cform, maxCUHeight, m_pcReshape );
 #endif
diff --git a/source/Lib/EncoderLib/IntraSearch.cpp b/source/Lib/EncoderLib/IntraSearch.cpp
index f8f9b5bd8..8453b1015 100644
--- a/source/Lib/EncoderLib/IntraSearch.cpp
+++ b/source/Lib/EncoderLib/IntraSearch.cpp
@@ -114,8 +114,10 @@ IntraSearch::IntraSearch()
   m_dimdPredBuf = nullptr;
   m_obicPredBuf = nullptr;
 #endif
+#if !JVET_AJ0237_INTERNAL_12BIT
   m_truncBinBits = nullptr;
   m_escapeNumBins = nullptr;
+#endif
   m_minErrorIndexMap = nullptr;
   for (unsigned i = 0; i < (MAXPLTSIZE + 1); i++)
   {
@@ -324,6 +326,7 @@ void IntraSearch::destroy()
   m_obicPredBuf = nullptr;
 #endif
   m_isInitialized = false;
+#if !JVET_AJ0237_INTERNAL_12BIT
   if (m_truncBinBits != nullptr)
   {
     for (unsigned i = 0; i < m_symbolSize; i++)
@@ -339,6 +342,7 @@ void IntraSearch::destroy()
     delete[] m_escapeNumBins;
     m_escapeNumBins = nullptr;
   }
+#endif
   if (m_indexError[0] != nullptr)
   {
     for (unsigned i = 0; i < (MAXPLTSIZE + 1); i++)
@@ -597,6 +601,7 @@ void IntraSearch::init( EncCfg*        pcEncCfg,
   m_isInitialized = true;
   if (pcEncCfg->getPLTMode())
   {
+#if !JVET_AJ0237_INTERNAL_12BIT
     m_symbolSize = (1 << bitDepthY); // pixel values are within [0, SymbolSize-1] with size SymbolSize
     if (m_truncBinBits == nullptr)
     {
@@ -611,6 +616,7 @@ void IntraSearch::init( EncCfg*        pcEncCfg,
       m_escapeNumBins = new uint16_t[m_symbolSize];
     }
     initTBCTable(bitDepthY);
+#endif
     if (m_indexError[0] == nullptr)
     {
       for (unsigned i = 0; i < (MAXPLTSIZE + 1); i++)
@@ -9119,7 +9125,11 @@ void IntraSearch::preCalcPLTIndexRD(CodingStructure& cs, Partitioner& partitione
       {
         if (lossless)
         {
+#if JVET_AJ0237_INTERNAL_12BIT
+          rate += getEpExGolombNumBins(curPel[comp], 5);
+#else
           rate += m_escapeNumBins[curPel[comp]];
+#endif
         }
         else
         {
@@ -9132,7 +9142,11 @@ void IntraSearch::preCalcPLTIndexRD(CodingStructure& cs, Partitioner& partitione
           {
             error += tmpErr * tmpErr;
           }
+#if JVET_AJ0237_INTERNAL_12BIT
+          rate += getEpExGolombNumBins(paPixelValue[comp], 5);
+#else
           rate += m_escapeNumBins[paPixelValue[comp]];   // encode quantized escape color
+#endif
         }
       }
       double rdCost = (double)error + m_pcRdCost->getLambda()*(double)rate;
@@ -9439,7 +9453,11 @@ double IntraSearch::rateDistOptPLT(
       rdCost = MAX_DOUBLE;
       return rdCost;
     }
+#if JVET_AJ0237_INTERNAL_12BIT
+    rdCost += m_pcRdCost->getLambda() * (getTruncBinBits((runIndex > refIndex) ? runIndex - 1 : runIndex, (scanPos == 0) ? (indexMaxValue + 1) : indexMaxValue) << SCALE_BITS);
+#else
     rdCost += m_pcRdCost->getLambda()*(m_truncBinBits[(runIndex > refIndex) ? runIndex - 1 : runIndex][(scanPos == 0) ? (indexMaxValue + 1) : indexMaxValue] << SCALE_BITS);
+#endif
   }
   rdCost += m_indexError[runIndex][m_scanOrder[scanPos].idx] * (1 << SCALE_BITS);
   if (scanPos > 0)
@@ -9508,6 +9526,7 @@ uint32_t IntraSearch::getTruncBinBits(uint32_t symbol, uint32_t maxSymbol)
   return idxCodeBit;
 }
 
+#if !JVET_AJ0237_INTERNAL_12BIT
 void IntraSearch::initTBCTable(int bitDepth)
 {
   for (uint32_t i = 0; i < m_symbolSize; i++)
@@ -9527,6 +9546,7 @@ void IntraSearch::initTBCTable(int bitDepth)
     m_escapeNumBins[i] = getEpExGolombNumBins(i, 5);
   }
 }
+#endif
 
 void IntraSearch::calcPixelPred(CodingStructure& cs, Partitioner& partitioner, uint32_t yPos, uint32_t xPos, ComponentID compBegin, uint32_t numComp)
 {
@@ -9680,7 +9700,11 @@ void IntraSearch::derivePLTLossy(CodingStructure& cs, Partitioner& partitioner,
 
   TransformUnit &tu = *cs.getTU(partitioner.chType);
   QpParam cQP(tu, compBegin);
+#if JVET_AJ0237_INTERNAL_12BIT
+  int qp = cQP.Qp(true) - 6 * (channelBitDepth_L - 8);
+#else
   int qp = cQP.Qp(true) - 12;
+#endif
   qp = (qp < 0) ? 0 : ((qp > 56) ? 56 : qp);
   int errorLimit = g_paletteQuant[qp];
   if (lossless)
diff --git a/source/Lib/EncoderLib/IntraSearch.h b/source/Lib/EncoderLib/IntraSearch.h
index 081574c43..d08dc5706 100644
--- a/source/Lib/EncoderLib/IntraSearch.h
+++ b/source/Lib/EncoderLib/IntraSearch.h
@@ -742,9 +742,11 @@ protected:
   CtxCache*       m_ctxCache;
 
   bool            m_isInitialized;
+#if !JVET_AJ0237_INTERNAL_12BIT
   uint32_t        m_symbolSize;
   uint16_t**      m_truncBinBits;
   uint16_t*       m_escapeNumBins;
+#endif
   bool            m_bestEscape;
   double*         m_indexError[MAXPLTSIZE + 1];
   uint8_t*        m_minErrorIndexMap; // store the best index in terms of distortion for each pixel
@@ -932,7 +934,9 @@ protected:
   void     deriveIndexMap         (CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, uint32_t numComp, PLTScanMode pltScanMode, double& dCost, bool* idxExist);
   bool     deriveSubblockIndexMap(CodingStructure& cs, Partitioner& partitioner, ComponentID compBegin, PLTScanMode pltScanMode, int minSubPos, int maxSubPos, const BinFracBits& fracBitsPltRunType, const BinFracBits* fracBitsPltIndexINDEX, const BinFracBits* fracBitsPltIndexCOPY, const double minCost, bool useRotate);
   double   rateDistOptPLT         (bool RunType, uint8_t RunIndex, bool prevRunType, uint8_t prevRunIndex, uint8_t aboveRunIndex, bool& prevCodedRunType, int& prevCodedRunPos, int scanPos, uint32_t width, int dist, int indexMaxValue, const BinFracBits* IndexfracBits, const BinFracBits& TypefracBits);
+#if !JVET_AJ0237_INTERNAL_12BIT
   void     initTBCTable           (int bitDepth);
+#endif
   uint32_t getTruncBinBits        (uint32_t symbol, uint32_t maxSymbol);
   uint32_t getEpExGolombNumBins   (uint32_t symbol, uint32_t count);
   void xGetNextISPMode                    ( ModeInfo& modeInfo, const ModeInfo* lastMode, const Size cuSize );
diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp
index b92443151..af74cd1fb 100644
--- a/source/Lib/EncoderLib/VLCWriter.cpp
+++ b/source/Lib/EncoderLib/VLCWriter.cpp
@@ -3604,7 +3604,11 @@ void HLSWriter::codeSliceHeader         ( Slice* pcSlice )
     bool lambdaCanBePredicted = false;
     if (index !=-1)
     {
+#if JVET_AJ0237_INTERNAL_12BIT
+      if (pcSlice->getSPS()->getLambdaVal(index) == pcSlice->getCostForARMC(pcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA)))
+#else
       if(pcSlice->getSPS()->getLambdaVal(index) == pcSlice->getCostForARMC())
+#endif
       {
         lambdaCanBePredicted = true;
       }
@@ -3612,7 +3616,11 @@ void HLSWriter::codeSliceHeader         ( Slice* pcSlice )
     if (!lambdaCanBePredicted)
     {
 #if JVET_AB0082
+#if JVET_AJ0237_INTERNAL_12BIT
+      WRITE_CODE(pcSlice->getCostForARMC(pcSlice->getSPS()->getBitDepth(CHANNEL_TYPE_LUMA)), 10, "Lambda");
+#else
       WRITE_CODE(pcSlice->getCostForARMC(), 10, "Lambda");
+#endif
 #else
       WRITE_CODE(pcSlice->getCostForARMC(), 9, "Lambda");
 #endif
@@ -3648,6 +3656,9 @@ void HLSWriter::codeSliceHeader         ( Slice* pcSlice )
   {
     clipDeltaShift = ADAPTIVE_CLIP_SHIFT_DELTA_VALUE_0;
   }
+#if JVET_AJ0237_INTERNAL_12BIT
+  clipDeltaShift += std::max(0, pcSlice->getSPS()->getBitDepth(toChannelType(COMPONENT_Y)) - 10);
+#endif
   if (pcSlice->getSliceType() == I_SLICE)
   {
 #if JVET_AI0096_ADAPTIVE_CLIPPING_BIT_DEPTH_FIX
-- 
GitLab