From 8607019780a132f7e01d6c5c2c5a6b3748bdb01b Mon Sep 17 00:00:00 2001
From: Karl Sharman <karl.sharman@sony.com>
Date: Wed, 17 Jun 2020 01:10:08 +0200
Subject: [PATCH] JVET-R0351: High bit depth coding

---
 source/Lib/CommonLib/AdaptiveLoopFilter.cpp   |  25 +-
 source/Lib/CommonLib/AdaptiveLoopFilter.h     |  33 ++-
 source/Lib/CommonLib/AlfParameters.h          |   8 +
 source/Lib/CommonLib/Buffer.cpp               |  32 +++
 source/Lib/CommonLib/Buffer.h                 |   5 +
 source/Lib/CommonLib/CommonDef.h              |   6 +
 source/Lib/CommonLib/ContextModelling.cpp     |   4 +
 source/Lib/CommonLib/ContextModelling.h       |  24 ++
 source/Lib/CommonLib/DepQuant.cpp             |   8 +
 source/Lib/CommonLib/InterPrediction.cpp      |  20 ++
 source/Lib/CommonLib/InterpolationFilter.cpp  |  16 ++
 source/Lib/CommonLib/InterpolationFilter.h    |   3 +
 source/Lib/CommonLib/Rom.cpp                  |   8 +
 source/Lib/CommonLib/Rom.h                    |   8 +
 source/Lib/CommonLib/RomTr.cpp                |  86 ++++++-
 source/Lib/CommonLib/TrQuant.cpp              |  41 ++-
 source/Lib/CommonLib/TrQuant.h                |   5 +
 source/Lib/CommonLib/TrQuant_EMT.cpp          | 237 +++++++++++++++++-
 source/Lib/CommonLib/TypeDef.h                |   7 +
 source/Lib/CommonLib/WeightPrediction.cpp     |  12 +
 .../Lib/CommonLib/x86/AdaptiveLoopFilterX86.h |   8 +
 .../CommonLib/x86/InterpolationFilterX86.h    |  16 ++
 source/Lib/DecoderLib/CABACReader.cpp         |  25 ++
 source/Lib/DecoderLib/VLCReader.cpp           |   4 +
 .../Lib/EncoderLib/EncAdaptiveLoopFilter.cpp  |  92 +++++++
 source/Lib/EncoderLib/EncAdaptiveLoopFilter.h |   8 +
 source/Lib/EncoderLib/VLCWriter.cpp           |   4 +
 27 files changed, 733 insertions(+), 12 deletions(-)

diff --git a/source/Lib/CommonLib/AdaptiveLoopFilter.cpp b/source/Lib/CommonLib/AdaptiveLoopFilter.cpp
index c82551538..9e16dcac0 100644
--- a/source/Lib/CommonLib/AdaptiveLoopFilter.cpp
+++ b/source/Lib/CommonLib/AdaptiveLoopFilter.cpp
@@ -501,7 +501,11 @@ void AdaptiveLoopFilter::ALFProcess(CodingStructure& cs)
               deriveClassification( m_classifier, buf.get(COMPONENT_Y), blkDst, blkSrc );
               short filterSetIndex = alfCtuFilterIndex[ctuIdx];
               short *coeff;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+              Pel *clip;
+#else
               short *clip;
+#endif
               if (filterSetIndex >= NUM_FIXED_FILTER_SETS)
               {
                 coeff = m_coeffApsLuma[filterSetIndex - NUM_FIXED_FILTER_SETS];
@@ -565,7 +569,11 @@ void AdaptiveLoopFilter::ALFProcess(CodingStructure& cs)
           deriveClassification( m_classifier, tmpYuv.get( COMPONENT_Y ), blk, blk );
           short filterSetIndex = alfCtuFilterIndex[ctuIdx];
           short *coeff;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+          Pel *clip;
+#else
           short *clip;
+#endif
           if (filterSetIndex >= NUM_FIXED_FILTER_SETS)
           {
             coeff = m_coeffApsLuma[filterSetIndex - NUM_FIXED_FILTER_SETS];
@@ -662,7 +670,11 @@ void AdaptiveLoopFilter::reconstructCoeff( AlfParam& alfParam, ChannelType chann
   {
     int numFilters = isLuma( channel ) ? alfParam.numLumaFilters : 1;
     short* coeff = isLuma( channel ) ? alfParam.lumaCoeff : alfParam.chromaCoeff[altIdx];
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+    Pel* clipp = isLuma( channel ) ? alfParam.lumaClipp : alfParam.chromaClipp[altIdx];
+#else
     short* clipp = isLuma( channel ) ? alfParam.lumaClipp : alfParam.chromaClipp[altIdx];
+#endif
 
     for( int filterIdx = 0; filterIdx < numFilters; filterIdx++ )
     {
@@ -1058,7 +1070,11 @@ void AdaptiveLoopFilter::deriveClassificationBlk(AlfClassifier **classifier, int
 template<AlfFilterType filtType>
 void AdaptiveLoopFilter::filterBlk(AlfClassifier **classifier, const PelUnitBuf &recDst, const CPelUnitBuf &recSrc,
                                    const Area &blkDst, const Area &blk, const ComponentID compId,
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+                                   const short *filterSet, const Pel *fClipSet, const ClpRng &clpRng,
+#else
                                    const short *filterSet, const short *fClipSet, const ClpRng &clpRng,
+#endif
                                    CodingStructure &cs, const int vbCTUHeight, int vbPos)
 {
   CHECK((vbCTUHeight & (vbCTUHeight - 1)) != 0, "vbCTUHeight must be a power of 2");
@@ -1087,8 +1103,11 @@ void AdaptiveLoopFilter::filterBlk(AlfClassifier **classifier, const PelUnitBuf
   const Pel *pImg0, *pImg1, *pImg2, *pImg3, *pImg4, *pImg5, *pImg6;
 
   const short *coef = filterSet;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  const Pel *clip = fClipSet;
+#else
   const short *clip = fClipSet;
-
+#endif
   const int shift = m_NUM_BITS - 1;
 
   const int offset = 1 << ( shift - 1 );
@@ -1225,7 +1244,11 @@ void AdaptiveLoopFilter::filterBlk(AlfClassifier **classifier, const PelUnitBuf
         {
 
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+          Pel sum = 0;
+#else
           int sum = 0;
+#endif
           const Pel curr = pImg0[+0];
           if( filtType == ALF_FILTER_7 )
           {
diff --git a/source/Lib/CommonLib/AdaptiveLoopFilter.h b/source/Lib/CommonLib/AdaptiveLoopFilter.h
index 74c9bdbd0..be9819ada 100644
--- a/source/Lib/CommonLib/AdaptiveLoopFilter.h
+++ b/source/Lib/CommonLib/AdaptiveLoopFilter.h
@@ -67,10 +67,17 @@ enum Direction
 class AdaptiveLoopFilter
 {
 public:
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  static inline Pel clipALF(const Pel clip, const Pel ref, const Pel val0, const Pel val1)
+  {
+    return Clip3<Pel>(-clip, +clip, val0-ref) + Clip3<Pel>(-clip, +clip, val1-ref);
+  }
+#else
   static inline int clipALF(const int clip, const short ref, const short val0, const short val1)
   {
     return Clip3<int>(-clip, +clip, val0-ref) + Clip3<int>(-clip, +clip, val1-ref);
   }
+#endif
 
   static constexpr int AlfNumClippingValues[MAX_NUM_CHANNEL_TYPE] = { 4, 4 };
   static constexpr int MaxAlfNumClippingValues = 4;
@@ -99,8 +106,12 @@ public:
   template<AlfFilterType filtType>
   static void filterBlk(AlfClassifier **classifier, const PelUnitBuf &recDst, const CPelUnitBuf &recSrc,
                         const Area &blkDst, const Area &blk, const ComponentID compId, const short *filterSet,
-                        const short *fClipSet, const ClpRng &clpRng, CodingStructure &cs, const int vbCTUHeight,
-                        int vbPos);
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+                       const Pel *fClipSet, const ClpRng &clpRng, CodingStructure &cs, const int vbCTUHeight,
+#else
+                       const short *fClipSet, const ClpRng &clpRng, CodingStructure &cs, const int vbCTUHeight,
+#endif
+                       int vbPos);
   void (*m_deriveClassificationBlk)(AlfClassifier **classifier, int **laplacian[NUM_DIRECTIONS], const CPelBuf &srcLuma,
                                     const Area &blkDst, const Area &blk, const int shift, const int vbCTUHeight,
                                     int vbPos);
@@ -116,11 +127,19 @@ public:
   uint8_t* getCcAlfControlIdc(const ComponentID compID)   { return m_ccAlfFilterControl[compID-1]; }
   void (*m_filter5x5Blk)(AlfClassifier **classifier, const PelUnitBuf &recDst, const CPelUnitBuf &recSrc,
                          const Area &blkDst, const Area &blk, const ComponentID compId, const short *filterSet,
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+                         const Pel *fClipSet, const ClpRng &clpRng, CodingStructure &cs, const int vbCTUHeight,
+#else
                          const short *fClipSet, const ClpRng &clpRng, CodingStructure &cs, const int vbCTUHeight,
+#endif
                          int vbPos);
   void (*m_filter7x7Blk)(AlfClassifier **classifier, const PelUnitBuf &recDst, const CPelUnitBuf &recSrc,
                          const Area &blkDst, const Area &blk, const ComponentID compId, const short *filterSet,
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+                         const Pel *fClipSet, const ClpRng &clpRng, CodingStructure &cs, const int vbCTUHeight,
+#else
                          const short *fClipSet, const ClpRng &clpRng, CodingStructure &cs, const int vbCTUHeight,
+#endif
                          int vbPos);
 
 #ifdef TARGET_SIMD_X86
@@ -138,8 +157,13 @@ protected:
   static const int             m_fixedFilterSetCoeff[ALF_FIXED_FILTER_NUM][MAX_NUM_ALF_LUMA_COEFF];
   short                        m_fixedFilterSetCoeffDec[NUM_FIXED_FILTER_SETS][MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF];
   short                        m_coeffApsLuma[ALF_CTB_MAX_NUM_APS][MAX_NUM_ALF_LUMA_COEFF * MAX_NUM_ALF_CLASSES];
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  Pel                          m_clippApsLuma[ALF_CTB_MAX_NUM_APS][MAX_NUM_ALF_LUMA_COEFF * MAX_NUM_ALF_CLASSES];
+  Pel                          m_clipDefault[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF];
+#else
   short                        m_clippApsLuma[ALF_CTB_MAX_NUM_APS][MAX_NUM_ALF_LUMA_COEFF * MAX_NUM_ALF_CLASSES];
   short                        m_clipDefault[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF];
+#endif
   bool                         m_created = false;
   short                        m_chromaCoeffFinal[MAX_NUM_ALF_ALTERNATIVES_CHROMA][MAX_NUM_ALF_CHROMA_COEFF];
   AlfParam*                    m_alfParamChroma;
@@ -148,8 +172,13 @@ protected:
   std::vector<AlfFilterShape>  m_filterShapes[MAX_NUM_CHANNEL_TYPE];
   AlfClassifier**              m_classifier;
   short                        m_coeffFinal[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF];
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  Pel                          m_clippFinal[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF];
+  Pel                          m_chromaClippFinal[MAX_NUM_ALF_ALTERNATIVES_CHROMA][MAX_NUM_ALF_CHROMA_COEFF];
+#else
   short                        m_clippFinal[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF];
   short                        m_chromaClippFinal[MAX_NUM_ALF_ALTERNATIVES_CHROMA][MAX_NUM_ALF_CHROMA_COEFF];
+#endif
   int**                        m_laplacian[NUM_DIRECTIONS];
   int *                        m_laplacianPtr[NUM_DIRECTIONS][m_CLASSIFICATION_BLK_SIZE + 5];
   int m_laplacianData[NUM_DIRECTIONS][m_CLASSIFICATION_BLK_SIZE + 5][m_CLASSIFICATION_BLK_SIZE + 5];
diff --git a/source/Lib/CommonLib/AlfParameters.h b/source/Lib/CommonLib/AlfParameters.h
index 989952d81..fe7f23ae8 100644
--- a/source/Lib/CommonLib/AlfParameters.h
+++ b/source/Lib/CommonLib/AlfParameters.h
@@ -129,10 +129,18 @@ struct AlfParam
   bool                         enabledFlag[MAX_NUM_COMPONENT];                          // alf_slice_enable_flag, alf_chroma_idc
   bool                         nonLinearFlag[MAX_NUM_CHANNEL_TYPE];                     // alf_[luma/chroma]_clip_flag
   short                        lumaCoeff[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF]; // alf_coeff_luma_delta[i][j]
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  Pel                          lumaClipp[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF]; // alf_clipp_luma_[i][j]
+#else
   short                        lumaClipp[MAX_NUM_ALF_CLASSES * MAX_NUM_ALF_LUMA_COEFF]; // alf_clipp_luma_[i][j]
+#endif
   int                          numAlternativesChroma;                                                  // alf_chroma_num_alts_minus_one + 1
   short                        chromaCoeff[MAX_NUM_ALF_ALTERNATIVES_CHROMA][MAX_NUM_ALF_CHROMA_COEFF]; // alf_coeff_chroma[i]
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  Pel                          chromaClipp[MAX_NUM_ALF_ALTERNATIVES_CHROMA][MAX_NUM_ALF_CHROMA_COEFF]; // alf_clipp_chroma[i]
+#else
   short                        chromaClipp[MAX_NUM_ALF_ALTERNATIVES_CHROMA][MAX_NUM_ALF_CHROMA_COEFF]; // alf_clipp_chroma[i]
+#endif
   short                        filterCoeffDeltaIdx[MAX_NUM_ALF_CLASSES];                // filter_coeff_delta[i]
   bool                         alfLumaCoeffFlag[MAX_NUM_ALF_CLASSES];                   // alf_luma_coeff_flag[i]
   int                          numLumaFilters;                                          // number_of_filters_minus1 + 1
diff --git a/source/Lib/CommonLib/Buffer.cpp b/source/Lib/CommonLib/Buffer.cpp
index b1ed883e4..3db48cdb6 100644
--- a/source/Lib/CommonLib/Buffer.cpp
+++ b/source/Lib/CommonLib/Buffer.cpp
@@ -94,16 +94,32 @@ void addBIOAvgCore(const Pel* src0, int src0Stride, const Pel* src1, int src1Str
     for (int x = 0; x < width; x += 4)
     {
       b = tmpx * (gradX0[x] - gradX1[x]) + tmpy * (gradY0[x] - gradY1[x]);
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+      dst[x] = ClipPel(rightShift((src0[x] + src1[x] + b + offset), shift), clpRng);
+#else
       dst[x] = ClipPel((int16_t)rightShift((src0[x] + src1[x] + b + offset), shift), clpRng);
+#endif
 
       b = tmpx * (gradX0[x + 1] - gradX1[x + 1]) + tmpy * (gradY0[x + 1] - gradY1[x + 1]);
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+      dst[x + 1] = ClipPel(rightShift((src0[x + 1] + src1[x + 1] + b + offset), shift), clpRng);
+#else
       dst[x + 1] = ClipPel((int16_t)rightShift((src0[x + 1] + src1[x + 1] + b + offset), shift), clpRng);
+#endif
 
       b = tmpx * (gradX0[x + 2] - gradX1[x + 2]) + tmpy * (gradY0[x + 2] - gradY1[x + 2]);
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+      dst[x + 2] = ClipPel(rightShift((src0[x + 2] + src1[x + 2] + b + offset), shift), clpRng);
+#else
       dst[x + 2] = ClipPel((int16_t)rightShift((src0[x + 2] + src1[x + 2] + b + offset), shift), clpRng);
+#endif
 
       b = tmpx * (gradX0[x + 3] - gradX1[x + 3]) + tmpy * (gradY0[x + 3] - gradY1[x + 3]);
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+      dst[x + 3] = ClipPel(rightShift((src0[x + 3] + src1[x + 3] + b + offset), shift), clpRng);
+#else
       dst[x + 3] = ClipPel((int16_t)rightShift((src0[x + 3] + src1[x + 3] + b + offset), shift), clpRng);
+#endif
     }
     dst += dstStride;       src0 += src0Stride;     src1 += src1Stride;
     gradX0 += gradStride; gradX1 += gradStride; gradY0 += gradStride; gradY1 += gradStride;
@@ -361,7 +377,11 @@ void AreaBuf<Pel>::addWeightedAvg(const AreaBuf<const Pel> &other1, const AreaBu
   const unsigned src2Stride = other2.stride;
   const unsigned destStride = stride;
   const int clipbd = clpRng.bd;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  const int shiftNum = IF_INTERNAL_FRAC_BITS(clipbd) + log2WeightBase;
+#else
   const int shiftNum = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + log2WeightBase;
+#endif
   const int offset = (1 << (shiftNum - 1)) + (IF_INTERNAL_OFFS << log2WeightBase);
 
 #define ADD_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShift( ( src0[ADDR]*w0 + src2[ADDR]*w1 + offset ), shiftNum ), clpRng )
@@ -454,7 +474,11 @@ void AreaBuf<Pel>::addAvg( const AreaBuf<const Pel> &other1, const AreaBuf<const
   const unsigned src2Stride = other2.stride;
   const unsigned destStride =        stride;
   const int     clipbd      = clpRng.bd;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  const int shiftNum = IF_INTERNAL_FRAC_BITS(clipbd) + 1;
+#else
   const int     shiftNum    = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + 1;
+#endif
   const int     offset      = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
 
 #if ENABLE_SIMD_OPT_BUFFER && defined(TARGET_SIMD_X86)
@@ -489,7 +513,11 @@ void AreaBuf<Pel>::toLast( const ClpRng& clpRng )
   const uint32_t srcStride = stride;
 
   const int  clipbd    = clpRng.bd;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  const int shiftNum = IF_INTERNAL_FRAC_BITS(clipbd);
+#else
   const int  shiftNum  = std::max<int>(2, (IF_INTERNAL_PREC - clipbd));
+#endif
   const int  offset    = ( 1 << ( shiftNum - 1 ) ) + IF_INTERNAL_OFFS;
 
   if (width == 1)
@@ -562,7 +590,11 @@ void AreaBuf<Pel>::roundToOutputBitdepth( const AreaBuf<const Pel> &src, const C
   const unsigned destStride = stride;
 
   const int32_t clipbd            = clpRng.bd;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  const int32_t shiftDefault      = IF_INTERNAL_FRAC_BITS(clipbd);
+#else
   const int32_t shiftDefault      = std::max<int>(2, (IF_INTERNAL_PREC - clipbd));
+#endif
   const int32_t offsetDefault     = (1<<(shiftDefault-1)) + IF_INTERNAL_OFFS;
 
   if( width == 1 )
diff --git a/source/Lib/CommonLib/Buffer.h b/source/Lib/CommonLib/Buffer.h
index 5719f521c..4dfe5a27c 100644
--- a/source/Lib/CommonLib/Buffer.h
+++ b/source/Lib/CommonLib/Buffer.h
@@ -440,8 +440,13 @@ void AreaBuf<T>::removeWeightHighFreq(const AreaBuf<T>& other, const bool bClip,
   {
 #endif
     int normalizer = ((1 << 16) + (bcwWeight > 0 ? (bcwWeight >> 1) : -(bcwWeight >> 1))) / bcwWeight;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+    Intermediate_Int weight0 = normalizer << log2WeightBase;
+    Intermediate_Int weight1 = bcwWeightOther * normalizer;
+#else
     int weight0 = normalizer << log2WeightBase;
     int weight1 = bcwWeightOther * normalizer;
+#endif
 #define REM_HF_INC  \
   src += srcStride; \
   dst += dstStride; \
diff --git a/source/Lib/CommonLib/CommonDef.h b/source/Lib/CommonLib/CommonDef.h
index 2fa1f4a5e..40bf83cdf 100644
--- a/source/Lib/CommonLib/CommonDef.h
+++ b/source/Lib/CommonLib/CommonDef.h
@@ -203,8 +203,10 @@ static const int MAX_BDOF_APPLICATION_REGION =                     16;
 static const int MAX_CPB_CNT =                                     32; ///< Upper bound of (cpb_cnt_minus1 + 1)
 static const int MAX_NUM_LAYER_IDS =                               64;
 static const int COEF_REMAIN_BIN_REDUCTION =                        5; ///< indicates the level at which the VLC transitions from Golomb-Rice to TU+EG(k)
+#if !JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
 static const int COEFF_MIN =                                   -32768;
 static const int COEFF_MAX =                                    32767;
+#endif
 static const int CU_DQP_TU_CMAX =                                   5; ///< max number bins for truncated unary
 static const int CU_DQP_EG_k =                                      0; ///< expgolomb order
 
@@ -629,7 +631,11 @@ const char* read_x86_extension(const std::string &extStrId);
 template <typename ValueType> inline ValueType leftShift       (const ValueType value, const int shift) { return (shift >= 0) ? ( value                                  << shift) : ( value                                   >> -shift); }
 template <typename ValueType> inline ValueType rightShift      (const ValueType value, const int shift) { return (shift >= 0) ? ( value                                  >> shift) : ( value                                   << -shift); }
 template <typename ValueType> inline ValueType leftShift_round (const ValueType value, const int shift) { return (shift >= 0) ? ( value                                  << shift) : ((value + (ValueType(1) << (-shift - 1))) >> -shift); }
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+template <typename ValueType> inline ValueType rightShift_round(const ValueType value, const int shift) { return (shift > 0) ? ((value + (ValueType(1) << (shift - 1))) >> shift) : ( value                                   << -shift); }
+#else
 template <typename ValueType> inline ValueType rightShift_round(const ValueType value, const int shift) { return (shift >= 0) ? ((value + (ValueType(1) << (shift - 1))) >> shift) : ( value                                   << -shift); }
+#endif
 
 static inline int floorLog2(uint32_t x)
 {
diff --git a/source/Lib/CommonLib/ContextModelling.cpp b/source/Lib/CommonLib/ContextModelling.cpp
index 56562db08..34d20b05a 100644
--- a/source/Lib/CommonLib/ContextModelling.cpp
+++ b/source/Lib/CommonLib/ContextModelling.cpp
@@ -69,6 +69,10 @@ CoeffCodingContext::CoeffCodingContext( const TransformUnit& tu, ComponentID com
   , m_lastShiftX                (0)
   , m_lastShiftY                (0)
   , m_TrafoBypass               (tu.cs->sps->getSpsRangeExtension().getTransformSkipContextEnabledFlag() && (tu.mtsIdx[m_compID] == MTS_SKIP))
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  , m_minCoeff                  (-(1 << tu.cs->sps->getMaxLog2TrDynamicRange(m_chType)))
+  , m_maxCoeff                  ((1 << tu.cs->sps->getMaxLog2TrDynamicRange(m_chType)) - 1)
+#endif
   , m_scanPosLast               (-1)
   , m_subSetId                  (-1)
   , m_subSetPos                 (-1)
diff --git a/source/Lib/CommonLib/ContextModelling.h b/source/Lib/CommonLib/ContextModelling.h
index b06839a3b..10160254f 100644
--- a/source/Lib/CommonLib/ContextModelling.h
+++ b/source/Lib/CommonLib/ContextModelling.h
@@ -102,6 +102,11 @@ public:
   void            decimateNumCtxBins(int n) { m_remainingContextBins -= n; }
   void            increaseNumCtxBins(int n) { m_remainingContextBins += n; }
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  TCoeff          minCoeff()                                const { return m_minCoeff; }
+  TCoeff          maxCoeff()                                const { return m_maxCoeff; }
+#endif
+
   unsigned sigCtxIdAbs( int scanPos, const TCoeff* coeff, const int state )
   {
     const uint32_t posY      = m_scan[scanPos].y;
@@ -241,6 +246,13 @@ public:
     return m_tsLrg1FlagCtxSet(numPos);
   }
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  template <typename T> int sgn(T val)
+  {
+    return (T(0) < val) - (val < T(0));
+  }
+
+#endif
   unsigned signCtxIdAbsTS(int scanPos, const TCoeff* coeff, int bdpcm)
   {
     const uint32_t  posY = m_scan[scanPos].y;
@@ -252,11 +264,19 @@ public:
 
     if (posX > 0)
     {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+      rightSign = sgn(pData[-1]);
+#else
       rightSign = pData[-1];
+#endif
     }
     if (posY > 0)
     {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+      belowSign = sgn(pData[-(int)m_width]);
+#else
       belowSign = pData[-(int)m_width];
+#endif
     }
 
     if ((rightSign == 0 && belowSign == 0) || ((rightSign*belowSign) < 0))
@@ -380,6 +400,10 @@ private:
   const int                 m_lastShiftX;
   const int                 m_lastShiftY;
   const bool                m_TrafoBypass;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  const TCoeff              m_minCoeff;
+  const TCoeff              m_maxCoeff;
+#endif
   // modified
   int                       m_scanPosLast;
   int                       m_subSetId;
diff --git a/source/Lib/CommonLib/DepQuant.cpp b/source/Lib/CommonLib/DepQuant.cpp
index 2bb92ba14..72f65178e 100644
--- a/source/Lib/CommonLib/DepQuant.cpp
+++ b/source/Lib/CommonLib/DepQuant.cpp
@@ -1084,7 +1084,11 @@ namespace DQIntern
         }
 #undef UPDATE
         TCoeff sumGt1 = sumAbs1 - sumNum;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+        m_sigFracBits = m_sigFracBitsArray[scanInfo.sigCtxOffsetNext + std::min<TCoeff>( (sumAbs1+1)>>1, 3 )];
+#else
         m_sigFracBits = m_sigFracBitsArray[scanInfo.sigCtxOffsetNext + std::min( (sumAbs1+1)>>1, 3 )];
+#endif
         m_coeffFracBits = m_gtxFracBitsArray[scanInfo.gtxCtxOffsetNext + (sumGt1 < 4 ? sumGt1 : 4)];
 
         TCoeff  sumAbs = m_absLevelsAndCtxInit[8 + scanInfo.nextInsidePos] >> 8;
@@ -1198,7 +1202,11 @@ namespace DQIntern
       TCoeff  sumNum  =   tinit        & 7;
       TCoeff  sumAbs1 = ( tinit >> 3 ) & 31;
       TCoeff  sumGt1  = sumAbs1        - sumNum;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+      m_sigFracBits   = m_sigFracBitsArray[ scanInfo.sigCtxOffsetNext + std::min<TCoeff>( (sumAbs1+1)>>1, 3 ) ];
+#else
       m_sigFracBits   = m_sigFracBitsArray[ scanInfo.sigCtxOffsetNext + std::min( (sumAbs1+1)>>1, 3 ) ];
+#endif
       m_coeffFracBits = m_gtxFracBitsArray[ scanInfo.gtxCtxOffsetNext + ( sumGt1  < 4 ? sumGt1  : 4 ) ];
     }
   }
diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp
index 4adf3ac3c..ce9c6c1d5 100644
--- a/source/Lib/CommonLib/InterPrediction.cpp
+++ b/source/Lib/CommonLib/InterPrediction.cpp
@@ -779,7 +779,11 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio
   JVET_J0090_SET_CACHE_ENABLE((srcPadStride == 0) && (bioApplied == false)); // Enabled only in non-DMVR-non-BDOF process, In DMVR process, srcPadStride is always non-zero
   if (bioApplied && compID == COMPONENT_Y)
   {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+    const int shift = IF_INTERNAL_FRAC_BITS(clpRng.bd);
+#else
     const int shift = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd));
+#endif
     int xOffset = (xFrac < 8) ? 1 : 0;
     int yOffset = (yFrac < 8) ? 1 : 0;
     const Pel* refPel = refBuf.buf - yOffset * refBuf.stride - xOffset;
@@ -1192,7 +1196,11 @@ void InterPrediction::xPredAffineBlk(const ComponentID &compID, const Prediction
       }
       if (enablePROF)
       {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+        const int shift = IF_INTERNAL_FRAC_BITS(clpRng.bd);
+#else
         const int shift = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd));
+#endif
         const int xOffset = xFrac >> 3;
         const int yOffset = yFrac >> 3;
 
@@ -1219,15 +1227,23 @@ void InterPrediction::xPredAffineBlk(const ComponentID &compID, const Prediction
         PelBuf gradYBuf = gradYExt.subBuf(0, 0, blockWidth + 2, blockHeight + 2);
         g_pelBufOP.profGradFilter(dstExtBuf.buf, dstExtBuf.stride, blockWidth + 2, blockHeight + 2, gradXBuf.stride, gradXBuf.buf, gradYBuf.buf, clpRng.bd);
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+        const Pel offset = (1 << (shift - 1)) + IF_INTERNAL_OFFS;
+#else
         const int shiftNum = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd));
         const Pel offset = (1 << (shiftNum - 1)) + IF_INTERNAL_OFFS;
+#endif
         Pel* src = dstExtBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
         Pel* gX = gradXBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
         Pel* gY = gradYBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
 
         Pel * dstY = dstBuf.bufAt(w, h);
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+        g_pelBufOP.applyPROF(dstY, dstBuf.stride, src, dstExtBuf.stride, blockWidth, blockHeight, gX, gY, gradXBuf.stride, dMvScaleHor, dMvScaleVer, blockWidth, bi, shift, offset, clpRng);
+#else
         g_pelBufOP.applyPROF(dstY, dstBuf.stride, src, dstExtBuf.stride, blockWidth, blockHeight, gX, gY, gradXBuf.stride, dMvScaleHor, dMvScaleVer, blockWidth, bi, shiftNum, offset, clpRng);
+#endif
       }
       }
     }
@@ -1280,7 +1296,11 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf
 
   const ClpRng& clpRng = pu.cu->cs->slice->clpRng(COMPONENT_Y);
   const int   bitDepth = clipBitDepths.recon[toChannelType(COMPONENT_Y)];
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  const int   shiftNum = IF_INTERNAL_FRAC_BITS(bitDepth) + 1;
+#else
   const int   shiftNum = IF_INTERNAL_PREC + 1 - bitDepth;
+#endif
   const int   offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
   const int   limit = ( 1 << 4 ) - 1;
 
diff --git a/source/Lib/CommonLib/InterpolationFilter.cpp b/source/Lib/CommonLib/InterpolationFilter.cpp
index ba76ca68f..22148635c 100644
--- a/source/Lib/CommonLib/InterpolationFilter.cpp
+++ b/source/Lib/CommonLib/InterpolationFilter.cpp
@@ -415,7 +415,11 @@ void InterpolationFilter::filterCopy( const ClpRng& clpRng, const Pel *src, int
   }
   else if ( isFirst )
   {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+    const int shift = IF_INTERNAL_FRAC_BITS(clpRng.bd);
+#else
     const int shift = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd));
+#endif
 
     if (biMCForDMVR)
     {
@@ -464,7 +468,11 @@ void InterpolationFilter::filterCopy( const ClpRng& clpRng, const Pel *src, int
   }
   else
   {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+    const int shift = IF_INTERNAL_FRAC_BITS(clpRng.bd);
+#else
     const int shift = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd));
+#endif
 
     if (biMCForDMVR)
     {
@@ -567,7 +575,11 @@ void InterpolationFilter::filter(const ClpRng& clpRng, Pel const *src, int srcSt
   src -= ( N/2 - 1 ) * cStride;
 
   int offset;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  int headRoom = IF_INTERNAL_FRAC_BITS(clpRng.bd);
+#else
   int headRoom = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd));
+#endif
   int shift    = IF_FILTER_PREC;
   // with the current settings (IF_INTERNAL_PREC = 14 and IF_FILTER_PREC = 6), though headroom can be
   // negative for bit depths greater than 14, shift will remain non-negative for bit depths of 8->20
@@ -899,7 +911,11 @@ void InterpolationFilter::xWeightedGeoBlk(const PredictionUnit &pu, const uint32
   const char    log2WeightBase = 3;
   const ClpRng  clipRng = pu.cu->slice->clpRngs().comp[compIdx];
   const int32_t clipbd = clipRng.bd;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  const int32_t shiftWeighted = IF_INTERNAL_FRAC_BITS(clipbd) + log2WeightBase;
+#else
   const int32_t shiftWeighted = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + log2WeightBase;
+#endif
   const int32_t offsetWeighted = (1 << (shiftWeighted - 1)) + (IF_INTERNAL_OFFS << log2WeightBase);
   const uint32_t scaleX = getComponentScaleX(compIdx, pu.chromaFormat);
   const uint32_t scaleY = getComponentScaleY(compIdx, pu.chromaFormat);
diff --git a/source/Lib/CommonLib/InterpolationFilter.h b/source/Lib/CommonLib/InterpolationFilter.h
index 58a811c38..bae5ddda6 100644
--- a/source/Lib/CommonLib/InterpolationFilter.h
+++ b/source/Lib/CommonLib/InterpolationFilter.h
@@ -50,6 +50,9 @@
 #define IF_INTERNAL_OFFS (1<<(IF_INTERNAL_PREC-1)) ///< Offset used internally
 #define IF_INTERNAL_PREC_BILINEAR 10 ///< Number of bits for internal precision
 #define IF_FILTER_PREC_BILINEAR   4  ///< Bilinear filter coeff precision so that intermediate value will not exceed 16 bit for SIMD - bit exact
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+#define IF_INTERNAL_FRAC_BITS(bd) std::max(2, IF_INTERNAL_PREC - int(bd))
+#endif
 /**
  * \brief Interpolation filter class
  */
diff --git a/source/Lib/CommonLib/Rom.cpp b/source/Lib/CommonLib/Rom.cpp
index 61814be66..d00d1da9e 100644
--- a/source/Lib/CommonLib/Rom.cpp
+++ b/source/Lib/CommonLib/Rom.cpp
@@ -723,7 +723,11 @@ void initGeoTemplate()
     if (g_angle2mask[angleIdx] == -1)
       continue;
     g_globalGeoWeights[g_angle2mask[angleIdx]] = new int16_t[GEO_WEIGHT_MASK_SIZE * GEO_WEIGHT_MASK_SIZE];
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+    g_globalGeoEncSADmask[g_angle2mask[angleIdx]] = new Pel[GEO_WEIGHT_MASK_SIZE * GEO_WEIGHT_MASK_SIZE];
+#else
     g_globalGeoEncSADmask[g_angle2mask[angleIdx]] = new int16_t[GEO_WEIGHT_MASK_SIZE * GEO_WEIGHT_MASK_SIZE];
+#endif
 
     int distanceX = angleIdx;
     int distanceY = (distanceX + (GEO_NUM_ANGLES >> 2)) % GEO_NUM_ANGLES;
@@ -775,7 +779,11 @@ void initGeoTemplate()
 }
 int16_t** g_GeoParams;
 int16_t*  g_globalGeoWeights   [GEO_NUM_PRESTORED_MASK];
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+Pel*      g_globalGeoEncSADmask[GEO_NUM_PRESTORED_MASK];
+#else
 int16_t*  g_globalGeoEncSADmask[GEO_NUM_PRESTORED_MASK];
+#endif
 int16_t   g_weightOffset       [GEO_NUM_PARTITION_MODE][GEO_NUM_CU_SIZE][GEO_NUM_CU_SIZE][2];
 int8_t    g_angle2mask[GEO_NUM_ANGLES] = { 0, -1, 1, 2, 3, 4, -1, -1, 5, -1, -1, 4, 3, 2, 1, -1, 0, -1, 1, 2, 3, 4, -1, -1, 5, -1, -1, 4, 3, 2, 1, -1 };
 int8_t    g_Dis[GEO_NUM_ANGLES] = { 8, 8, 8, 8, 4, 4, 2, 1, 0, -1, -2, -4, -4, -8, -8, -8, -8, -8, -8, -8, -4, -4, -2, -1, 0, 1, 2, 4, 4, 8, 8, 8 };
diff --git a/source/Lib/CommonLib/Rom.h b/source/Lib/CommonLib/Rom.h
index 54e7a3b1c..42d43aa10 100644
--- a/source/Lib/CommonLib/Rom.h
+++ b/source/Lib/CommonLib/Rom.h
@@ -77,7 +77,11 @@ extern const int g_quantScales   [2/*0=4^n blocks, 1=2*4^n blocks*/][SCALING_LIS
 extern const int g_invQuantScales[2/*0=4^n blocks, 1=2*4^n blocks*/][SCALING_LIST_REM_NUM];          // IQ(QP%6)
 
 static const int g_numTransformMatrixSizes = 6;
+#if RExt__HIGH_PRECISION_FORWARD_TRANSFORM
+static const int g_transformMatrixShift[TRANSFORM_NUMBER_OF_DIRECTIONS] = { 14, 6 };
+#else
 static const int g_transformMatrixShift[TRANSFORM_NUMBER_OF_DIRECTIONS] = {  6, 6 };
+#endif
 
 
 // ====================================================================================================================
@@ -224,7 +228,11 @@ const int g_IBCBufferSize = 256 * 128;
 void initGeoTemplate();
 extern int16_t** g_GeoParams;
 extern int16_t*  g_globalGeoWeights   [GEO_NUM_PRESTORED_MASK];
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+extern Pel*      g_globalGeoEncSADmask[GEO_NUM_PRESTORED_MASK];
+#else
 extern int16_t*  g_globalGeoEncSADmask[GEO_NUM_PRESTORED_MASK];
+#endif
 extern int16_t   g_weightOffset       [GEO_NUM_PARTITION_MODE][GEO_NUM_CU_SIZE][GEO_NUM_CU_SIZE][2];
 extern int8_t    g_angle2mask         [GEO_NUM_ANGLES];
 extern int8_t    g_Dis[GEO_NUM_ANGLES];
diff --git a/source/Lib/CommonLib/RomTr.cpp b/source/Lib/CommonLib/RomTr.cpp
index a60611a63..722df0e3c 100644
--- a/source/Lib/CommonLib/RomTr.cpp
+++ b/source/Lib/CommonLib/RomTr.cpp
@@ -345,6 +345,90 @@
   {  b, -d,  f, -h,  j, -l,  n, -p,  r, -t,  v, -x,  z, -B,  D, -F,  E, -C,  A, -y,  w, -u,  s, -q,  o, -m,  k, -i,  g, -e,  c, -a,}, \
 }
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT && RExt__HIGH_PRECISION_FORWARD_TRANSFORM
+//--------------------------------------------------------------------------------------------------
+// DCT-2
+const TMatrixCoeff g_trCoreDCT2P2[TRANSFORM_NUMBER_OF_DIRECTIONS][2][2] =
+{
+  DEFINE_DCT2_P2_MATRIX(16384),
+  DEFINE_DCT2_P2_MATRIX(64)
+};
+
+const TMatrixCoeff g_trCoreDCT2P4 [TRANSFORM_NUMBER_OF_DIRECTIONS][4][4]   =
+{
+  DEFINE_DCT2_P4_MATRIX  (16384, 21266,  9224),
+  DEFINE_DCT2_P4_MATRIX  (   64,    83,    36)
+};
+
+const TMatrixCoeff g_trCoreDCT2P8[TRANSFORM_NUMBER_OF_DIRECTIONS][8][8] =
+{
+  DEFINE_DCT2_P8_MATRIX(16384, 21266,  9224, 22813, 19244, 12769,  4563),
+  DEFINE_DCT2_P8_MATRIX(64,    83,    36,    89,    75,    50,    18)
+};
+
+const TMatrixCoeff g_trCoreDCT2P16[TRANSFORM_NUMBER_OF_DIRECTIONS][16][16] =
+{
+  DEFINE_DCT2_P16_MATRIX(16384, 21266,  9224, 22813, 19244, 12769,  4563, 23120, 22063, 20450, 17972, 14642, 11109,  6446,  2316),
+  DEFINE_DCT2_P16_MATRIX(   64,    83,    36,    89,    75,    50,    18,    90,    87,    80,    70,    57,    43,    25,     9)
+};
+
+const TMatrixCoeff g_trCoreDCT2P32[TRANSFORM_NUMBER_OF_DIRECTIONS][32][32] =
+{
+  DEFINE_DCT2_P32_MATRIX(16384, 21266,  9224, 22813, 19244, 12769,  4563, 23120, 22063, 20450, 17972, 14642, 11109,  6446 , 2316, 23106, 22852, 22445, 21848, 20995, 19810, 18601, 17143, 15718, 13853, 11749,  9846,  7908,  5573,  3281,   946),
+  DEFINE_DCT2_P32_MATRIX(   64,    83,    36,    89,    75,    50,    18,    90,    87,    80,    70,    57,    43,    25,     9,    90,    90,    88,    85,    82,    78,    73,    67,    61,    54,    46,    38,    31,    22,    13,     4)
+};
+
+const TMatrixCoeff g_trCoreDCT2P64[TRANSFORM_NUMBER_OF_DIRECTIONS][64][64] =
+{
+  DEFINE_DCT2_P64_MATRIX(16384, 21266,  9224, 22813, 19244, 12769,  4563, 23129, 22063, 20450, 17972, 14642, 11109,  6446,  2316, 23106, 22852, 22445, 21848, 20995, 19810, 18601, 17143, 15718, 13853, 11749,  9846,  7908,  5573,  3281,   946, 23360, 23053, 23048, 23023, 22610, 22339, 21936, 21502, 21266, 20730, 20251, 19726, 18731, 18201, 17638, 16604, 15881, 15084, 14322, 13340, 12238, 11330, 10493,  9428,  8426,  7100,  6151,  5101,  3848,  2734,  1754,   574),
+  DEFINE_DCT2_P64_MATRIX(   64,    83,    36,    89,    75,    50,    18,    90,    87,    80,    70,    57,    43,    25,     9,    90,    90,    88,    85,    82,    78,    73,    67,    61,    54,    46,    38,    31,    22,    13,     4,    91,    90,    90,    90,    88,    87,    86,    84,    83,    81,    79,    77,    73,    71,    69,    65,    62,    59,    56,    52,    48,    44,    41,    37,    33,    28,    24,    20,    15,    11,     7,     2)
+};
+
+// DCT-8
+const TMatrixCoeff g_trCoreDCT8P4[TRANSFORM_NUMBER_OF_DIRECTIONS][4][4] =
+{
+  DEFINE_DCT8_P4_MATRIX(21505, 18893, 14081,  7425),
+  DEFINE_DCT8_P4_MATRIX(84,     74,     55,     29)
+};
+const TMatrixCoeff g_trCoreDCT8P8[TRANSFORM_NUMBER_OF_DIRECTIONS][8][8] =
+{
+  DEFINE_DCT8_P8_MATRIX(22018, 21790, 19958, 18154, 15363, 11754,  8148,  4350),
+  DEFINE_DCT8_P8_MATRIX(   86,    85,    78,    71,    60,    46,    32,    17)
+};
+const TMatrixCoeff g_trCoreDCT8P16[TRANSFORM_NUMBER_OF_DIRECTIONS][16][16] =
+{
+  DEFINE_DCT8_P16_MATRIX(22569, 22542, 22202, 21664, 20754, 19738, 18787, 17369, 15781, 14044, 12209, 10360,  8498,  6421,  4295,  1967),
+  DEFINE_DCT8_P16_MATRIX(   88,    88,    87,    85,    81,    77,    73,    68,    62,    55,    48,    40,    33,    25,    17,     8)
+};
+const TMatrixCoeff g_trCoreDCT8P32[TRANSFORM_NUMBER_OF_DIRECTIONS][32][32] =
+{
+  DEFINE_DCT8_P32_MATRIX(23065, 23136, 22715, 22533, 22544, 22053, 21901, 21463, 21131, 20385, 20019, 19708, 19007, 18415, 17448, 16894, 16143, 15230, 14312, 13616, 12679, 11526, 10770,  9720,  8606,  7734,  6623,  5414,  4478,  3225,  2291,  1043),
+  DEFINE_DCT8_P32_MATRIX(   90,    90,    89,    88,    87,    86,    85,    84,    82,    80,    78,    77,    74,    72,    68,    66,    63,    60,    56,    53,    50,    46,    42,    38,    34,    30,    26,    21,    17,    13,     9,     4)
+};
+
+// DST-7
+const TMatrixCoeff g_trCoreDST7P4[TRANSFORM_NUMBER_OF_DIRECTIONS][4][4] =
+{
+  DEFINE_DST7_P4_MATRIX( 7425, 14081, 18893, 21505),
+  DEFINE_DST7_P4_MATRIX(   29,    55,    74,    84)
+};
+const TMatrixCoeff g_trCoreDST7P8[TRANSFORM_NUMBER_OF_DIRECTIONS][8][8] =
+{
+  DEFINE_DST7_P8_MATRIX( 4350,  8148, 11754, 15363, 18154, 19958, 21790, 22018),
+  DEFINE_DST7_P8_MATRIX(   17,    32,    46,    60,    71,    78,    85,    86)
+};
+const TMatrixCoeff g_trCoreDST7P16[TRANSFORM_NUMBER_OF_DIRECTIONS][16][16] =
+{
+  DEFINE_DST7_P16_MATRIX(1967,  4295,  6421,  8498, 10360, 12209, 14044, 15781, 17369, 18787, 19738, 20754, 21664, 22202, 22542, 22569),
+  DEFINE_DST7_P16_MATRIX(   8,    17,    25,    33,    40,    48,    55,    62,    68,    73,    77,    81,    85,    87,    88,    88)
+};
+const TMatrixCoeff g_trCoreDST7P32[TRANSFORM_NUMBER_OF_DIRECTIONS][32][32] =
+{
+  DEFINE_DST7_P32_MATRIX( 1043,  2291,  3225,  4478,  5414,  6623,  7734,  8606,  9720, 10770, 11526, 12679, 13616, 14312, 15230, 16143, 16894, 17448, 18415, 19007, 19708, 20019, 20385, 21131, 21463, 21901, 22053, 22544, 22533, 22715, 23136, 23065),
+  DEFINE_DST7_P32_MATRIX(    4,     9,    13,    17,    21,    26,    30,    34,    38,    42,    46,    50,    53,    56,    60,    63,    66,    68,    72,    74,    77,    78,    80,    82,    84,    85,    86,    87,    88,    89,    90,    90)
+};
+
+#else
 //--------------------------------------------------------------------------------------------------
 // DCT-2
 const TMatrixCoeff g_trCoreDCT2P2[TRANSFORM_NUMBER_OF_DIRECTIONS][2][2] =
@@ -426,5 +510,5 @@ const TMatrixCoeff g_trCoreDST7P32[TRANSFORM_NUMBER_OF_DIRECTIONS][32][32] =
   DEFINE_DST7_P32_MATRIX(4,     9,    13,    17,    21,    26,    30,    34,    38,    42,    46,    50,    53,    56,    60,    63,    66,    68,    72,    74,    77,    78,    80,    82,    84,    85,    86,    87,    88,    89,    90,    90),
   DEFINE_DST7_P32_MATRIX(4,     9,    13,    17,    21,    26,    30,    34,    38,    42,    46,    50,    53,    56,    60,    63,    66,    68,    72,    74,    77,    78,    80,    82,    84,    85,    86,    87,    88,    89,    90,    90)
 };
-
+#endif
 //--------------------------------------------------------------------------------------------------
diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp
index 7532aea6f..69908d9af 100644
--- a/source/Lib/CommonLib/TrQuant.cpp
+++ b/source/Lib/CommonLib/TrQuant.cpp
@@ -229,18 +229,30 @@ void TrQuant::init( const Quant* otherQuant,
   }
 }
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+void TrQuant::fwdLfnstNxN( TCoeff* src, TCoeff* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize )
+#else
 void TrQuant::fwdLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize )
+#endif
 {
   const int8_t* trMat  = ( size > 4 ) ? g_lfnst8x8[ mode ][ index ][ 0 ] : g_lfnst4x4[ mode ][ index ][ 0 ];
   const int     trSize = ( size > 4 ) ? 48 : 16;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  TCoeff           coef;
+  TCoeff*          out    = dst;
+#else
   int           coef;
   int*          out    = dst;
-
+#endif
   assert( index < 3 );
 
   for( int j = 0; j < zeroOutSize; j++ )
   {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+    TCoeff*          srcPtr   = src;
+#else
     int*          srcPtr   = src;
+#endif
     const int8_t* trMatTmp = trMat;
     coef = 0;
     for( int i = 0; i < trSize; i++ )
@@ -254,29 +266,46 @@ void TrQuant::fwdLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32
   ::memset( out, 0, ( trSize - zeroOutSize ) * sizeof( int ) );
 }
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+void TrQuant::invLfnstNxN( TCoeff* src, TCoeff* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize, const int maxLog2TrDynamicRange )
+{
+#else
 void TrQuant::invLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize )
 {
   int             maxLog2TrDynamicRange =  15;
+#endif
   const TCoeff    outputMinimum         = -( 1 << maxLog2TrDynamicRange );
   const TCoeff    outputMaximum         =  ( 1 << maxLog2TrDynamicRange ) - 1;
   const int8_t*   trMat                 =  ( size > 4 ) ? g_lfnst8x8[ mode ][ index ][ 0 ] : g_lfnst4x4[ mode ][ index ][ 0 ];
   const int       trSize                =  ( size > 4 ) ? 48 : 16;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  TCoeff          resi;
+  TCoeff*         out                   =  dst;
+#else
   int             resi;
   int*            out                   =  dst;
-
+#endif
   assert( index < 3 );
 
   for( int j = 0; j < trSize; j++ )
   {
     resi = 0;
     const int8_t* trMatTmp = trMat;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+    TCoeff*       srcPtr   = src;
+#else
     int*          srcPtr   = src;
+#endif
     for( int i = 0; i < zeroOutSize; i++ )
     {
       resi += *srcPtr++ * *trMatTmp;
       trMatTmp += trSize;
     }
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+    *out++ = Clip3<TCoeff>( outputMinimum, outputMaximum, ( resi + 64 ) >> 7 );
+#else
     *out++ = Clip3( outputMinimum, outputMaximum, ( int ) ( resi + 64 ) >> 7 );
+#endif
     trMat++;
   }
 }
@@ -309,6 +338,9 @@ bool TrQuant::getTransposeFlag( uint32_t intraMode )
 
 void TrQuant::xInvLfnst( const TransformUnit &tu, const ComponentID compID )
 {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  const int maxLog2TrDynamicRange = tu.cs->sps->getMaxLog2TrDynamicRange(toChannelType(compID));
+#endif
   const CompArea& area     = tu.blocks[ compID ];
   const uint32_t  width    = area.width;
   const uint32_t  height   = area.height;
@@ -352,8 +384,11 @@ void TrQuant::xInvLfnst( const TransformUnit &tu, const ComponentID compID )
             scanPtr++;
           }
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+          invLfnstNxN( m_tempInMatrix, m_tempOutMatrix, g_lfnstLut[ intraMode ], lfnstIdx - 1, sbSize, ( tu4x4Flag || tu8x8Flag ) ? 8 : 16, maxLog2TrDynamicRange );
+#else
           invLfnstNxN( m_tempInMatrix, m_tempOutMatrix, g_lfnstLut[ intraMode ], lfnstIdx - 1, sbSize, ( tu4x4Flag || tu8x8Flag ) ? 8 : 16 );
-
+#endif
           lfnstTemp = m_tempOutMatrix; // inverse spectral rearrangement
 
           if( transposeFlag )
diff --git a/source/Lib/CommonLib/TrQuant.h b/source/Lib/CommonLib/TrQuant.h
index 50f893da8..619f743f9 100644
--- a/source/Lib/CommonLib/TrQuant.h
+++ b/source/Lib/CommonLib/TrQuant.h
@@ -79,8 +79,13 @@ public:
   );
   void getTrTypes(const TransformUnit tu, const ComponentID compID, int &trTypeHor, int &trTypeVer);
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  void fwdLfnstNxN( TCoeff* src, TCoeff* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize );
+  void invLfnstNxN( TCoeff* src, TCoeff* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize, const int maxLog2TrDynamicRange );
+#else
   void fwdLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize );
   void invLfnstNxN( int* src, int* dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize );
+#endif
 
   uint32_t getLFNSTIntraMode( int wideAngPredMode );
   bool     getTransposeFlag ( uint32_t intraMode  );
diff --git a/source/Lib/CommonLib/TrQuant_EMT.cpp b/source/Lib/CommonLib/TrQuant_EMT.cpp
index b21ede257..82e34f176 100644
--- a/source/Lib/CommonLib/TrQuant_EMT.cpp
+++ b/source/Lib/CommonLib/TrQuant_EMT.cpp
@@ -51,7 +51,11 @@
 void fastForwardDCT2_B2(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2)
 {
   int j;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  TCoeff E, O;
+#else
   int E, O;
+#endif
   TCoeff add = (shift > 0) ? (1 << (shift - 1)) : 0;
 
   const TMatrixCoeff *iT = g_trCoreDCT2P2[TRANSFORM_FORWARD][0];
@@ -85,8 +89,13 @@ void fastForwardDCT2_B2(const TCoeff *src, TCoeff *dst, int shift, int line, int
 void fastInverseDCT2_B2(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
 {
   int j;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  TCoeff E, O;
+  TCoeff add = 1 << (shift - 1);
+#else
   int E, O;
   int add = 1 << (shift - 1);
+#endif
 
   const TMatrixCoeff *iT = g_trCoreDCT2P2[TRANSFORM_INVERSE][0];
 
@@ -98,8 +107,13 @@ void fastInverseDCT2_B2(const TCoeff *src, TCoeff *dst, int shift, int line, int
     O = iT[2] * (src[0] - src[line]);
 
     /* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+    dst[0] = Clip3<TCoeff>(outputMinimum, outputMaximum, (E + add) >> shift);
+    dst[1] = Clip3<TCoeff>(outputMinimum, outputMaximum, (O + add) >> shift);
+#else
     dst[0] = Clip3(outputMinimum, outputMaximum, (E + add) >> shift);
     dst[1] = Clip3(outputMinimum, outputMaximum, (O + add) >> shift);
+#endif
 
     src++;
     dst += 2;
@@ -176,8 +190,13 @@ void fastForwardDCT2_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int
 void fastInverseDCT2_B4( const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum )
 {
   int j;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  TCoeff E[2], O[2];
+  TCoeff add = 1 << ( shift - 1 );
+#else
   int E[2], O[2];
   int add = 1 << ( shift - 1 );
+#endif
 
   const TMatrixCoeff *iT = g_trCoreDCT2P4[TRANSFORM_INVERSE][0];
 
@@ -191,10 +210,17 @@ void fastInverseDCT2_B4( const TCoeff *src, TCoeff *dst, int shift, int line, in
     E[1] = iT[0 * 4 + 1] * src[   0] + iT[2 * 4 + 1] * src[2 * line];
 
     /* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+    dst[0] = Clip3<TCoeff>( outputMinimum, outputMaximum, ( E[0] + O[0] + add ) >> shift );
+    dst[1] = Clip3<TCoeff>( outputMinimum, outputMaximum, ( E[1] + O[1] + add ) >> shift );
+    dst[2] = Clip3<TCoeff>( outputMinimum, outputMaximum, ( E[1] - O[1] + add ) >> shift );
+    dst[3] = Clip3<TCoeff>( outputMinimum, outputMaximum, ( E[0] - O[0] + add ) >> shift );
+#else
     dst[0] = Clip3( outputMinimum, outputMaximum, ( E[0] + O[0] + add ) >> shift );
     dst[1] = Clip3( outputMinimum, outputMaximum, ( E[1] + O[1] + add ) >> shift );
     dst[2] = Clip3( outputMinimum, outputMaximum, ( E[1] - O[1] + add ) >> shift );
     dst[3] = Clip3( outputMinimum, outputMaximum, ( E[0] - O[0] + add ) >> shift );
+#endif
 
     src++;
     dst += 4;
@@ -209,7 +235,11 @@ void fastInverseDCT2_B4( const TCoeff *src, TCoeff *dst, int shift, int line, in
 template< int uiTrSize >
 inline void _fastInverseMM( const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum, const TMatrixCoeff* iT )
 {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  const TCoeff rnd_factor = 1 << (shift - 1);
+#else
   const int  rnd_factor  = 1 << (shift - 1);
+#endif
   const int  reducedLine = line - iSkipLine;
   const int  cutoff      = uiTrSize - iSkipLine2;
 
@@ -217,12 +247,20 @@ inline void _fastInverseMM( const TCoeff *src, TCoeff *dst, int shift, int line,
   {
     for( int j = 0; j<uiTrSize; j++ )
     {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+      TCoeff iSum = 0;
+#else
       int iSum = 0;
+#endif
       for( int k = 0; k<cutoff; k++)
       {
         iSum += src[k*line + i] * iT[k*uiTrSize + j];
       }
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+      dst[i*uiTrSize + j] = Clip3<TCoeff>(outputMinimum, outputMaximum, (iSum + rnd_factor) >> shift);
+#else
       dst[i*uiTrSize + j] = Clip3(outputMinimum, outputMaximum, (int)(iSum + rnd_factor) >> shift);
+#endif
     }
   }
 
@@ -236,7 +274,11 @@ inline void _fastInverseMM( const TCoeff *src, TCoeff *dst, int shift, int line,
 template< int uiTrSize >
 inline void _fastForwardMM( const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TMatrixCoeff* tc )
 {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  const TCoeff rnd_factor = 1 << (shift - 1);
+#else
   const int  rnd_factor  = 1 << (shift - 1);
+#endif
   const int  reducedLine = line - iSkipLine;
   const int  cutoff      = uiTrSize - iSkipLine2;
   TCoeff *pCoef;
@@ -247,7 +289,11 @@ inline void _fastForwardMM( const TCoeff *src, TCoeff *dst, int shift, int line,
     const TMatrixCoeff* iT = tc;
     for( int j = 0; j<cutoff; j++ )
     {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+      TCoeff iSum = 0;
+#else
       int iSum = 0;
+#endif
       for( int k = 0; k<uiTrSize; k++ )
       {
         iSum += src[k] * iT[k];
@@ -344,9 +390,15 @@ void fastForwardDCT2_B8( const TCoeff *src, TCoeff *dst, int shift, int line, in
 void fastInverseDCT2_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
 {
   int j, k;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  TCoeff E[4], O[4];
+  TCoeff EE[2], EO[2];
+  TCoeff add = 1 << (shift - 1);
+#else
   int E[4], O[4];
   int EE[2], EO[2];
   int add = 1 << (shift - 1);
+#endif
 
   const TMatrixCoeff *iT = g_trCoreDCT2P8[TRANSFORM_INVERSE][0];
 
@@ -372,8 +424,13 @@ void fastInverseDCT2_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int
 
     for( k = 0; k < 4; k++ )
     {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+      dst[k    ] = Clip3<TCoeff>( outputMinimum, outputMaximum, ( E[    k] + O[    k] + add ) >> shift );
+      dst[k + 4] = Clip3<TCoeff>( outputMinimum, outputMaximum, ( E[3 - k] - O[3 - k] + add ) >> shift );
+#else
       dst[k    ] = Clip3( outputMinimum, outputMaximum, ( E[    k] + O[    k] + add ) >> shift );
       dst[k + 4] = Clip3( outputMinimum, outputMaximum, ( E[3 - k] - O[3 - k] + add ) >> shift );
+#endif
     }
     src++;
     dst += 8;
@@ -465,10 +522,17 @@ void fastForwardDCT2_B16(const TCoeff *src, TCoeff *dst, int shift, int line, in
 void fastInverseDCT2_B16( const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum )
 {
   int j, k;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  TCoeff E  [8], O  [8];
+  TCoeff EE [4], EO [4];
+  TCoeff EEE[2], EEO[2];
+  TCoeff add = 1 << ( shift - 1 );
+#else
   int E  [8], O  [8];
   int EE [4], EO [4];
   int EEE[2], EEO[2];
   int add = 1 << ( shift - 1 );
+#endif
 
   const TMatrixCoeff *iT = g_trCoreDCT2P16[TRANSFORM_INVERSE][0];
 
@@ -504,8 +568,13 @@ void fastInverseDCT2_B16( const TCoeff *src, TCoeff *dst, int shift, int line, i
     }
     for( k = 0; k < 8; k++ )
     {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+      dst[k    ] = Clip3<TCoeff>( outputMinimum, outputMaximum, ( E[    k] + O[    k] + add ) >> shift );
+      dst[k + 8] = Clip3<TCoeff>( outputMinimum, outputMaximum, ( E[7 - k] - O[7 - k] + add ) >> shift );
+#else
       dst[k    ] = Clip3( outputMinimum, outputMaximum, ( E[    k] + O[    k] + add ) >> shift );
       dst[k + 8] = Clip3( outputMinimum, outputMaximum, ( E[7 - k] - O[7 - k] + add ) >> shift );
+#endif
     }
     src++;
     dst += 16;
@@ -607,13 +676,21 @@ void fastForwardDCT2_B32( const TCoeff *src, TCoeff *dst, int shift, int line, i
 */
 void fastInverseDCT2_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, const TCoeff outputMinimum, const TCoeff outputMaximum)
 {
-
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  TCoeff j, k;
+  TCoeff E[16], O[16];
+  TCoeff EE[8], EO[8];
+  TCoeff EEE[4], EEO[4];
+  TCoeff EEEE[2], EEEO[2];
+  TCoeff add = 1 << (shift - 1);
+#else
   int j, k;
   int E[16], O[16];
   int EE[8], EO[8];
   int EEE[4], EEO[4];
   int EEEE[2], EEEO[2];
   int add = 1 << (shift - 1);
+#endif
 
   const TMatrixCoeff *iT = g_trCoreDCT2P32[TRANSFORM_INVERSE][0];
 
@@ -659,8 +736,13 @@ void fastInverseDCT2_B32(const TCoeff *src, TCoeff *dst, int shift, int line, in
     }
     for (k = 0;k<16;k++)
     {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+      dst[k] = Clip3<TCoeff>(outputMinimum, outputMaximum, (E[k] + O[k] + add) >> shift);
+      dst[k + 16] = Clip3<TCoeff>(outputMinimum, outputMaximum, (E[15 - k] - O[15 - k] + add) >> shift);
+#else
       dst[k] = Clip3(outputMinimum, outputMaximum, (E[k] + O[k] + add) >> shift);
       dst[k + 16] = Clip3(outputMinimum, outputMaximum, (E[15 - k] - O[15 - k] + add) >> shift);
+#endif
     }
     src++;
     dst += 32;
@@ -851,8 +933,13 @@ void fastInverseDCT2_B64(const TCoeff *src, TCoeff *dst, int shift, int line, in
     }
     for (k = 0;k<32;k++)
     {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+      dst[k] = Clip3<TCoeff>(outputMinimum, outputMaximum, (E[k] + O[k] + rnd_factor) >> shift);
+      dst[k + 32] = Clip3<TCoeff>(outputMinimum, outputMaximum, (E[31 - k] - O[31 - k] + rnd_factor) >> shift);
+#else
       dst[k] = Clip3(outputMinimum, outputMaximum, (E[k] + O[k] + rnd_factor) >> shift);
       dst[k + 32] = Clip3(outputMinimum, outputMaximum, (E[31 - k] - O[31 - k] + rnd_factor) >> shift);
+#endif
     }
     src++;
     dst += uiTrSize;
@@ -871,7 +958,11 @@ void fastForwardDST7_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int
 
   const TMatrixCoeff *iT = g_trCoreDST7P4[TRANSFORM_FORWARD][0];
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  TCoeff c[4];
+#else
   int c[4];
+#endif
   TCoeff *pCoeff = dst;
   const int  reducedLine = line - iSkipLine;
   for (i = 0; i<reducedLine; i++)
@@ -918,10 +1009,17 @@ void fastInverseDST7_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int
     c[2] = src[0 * line] - src[3 * line];
     c[3] = iT[2] * src[1 * line];
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+    dst[0] = Clip3<TCoeff>(outputMinimum, outputMaximum, (iT[0] * c[0] + iT[1] * c[1] + c[3] + rnd_factor) >> shift);
+    dst[1] = Clip3<TCoeff>(outputMinimum, outputMaximum, (iT[1] * c[2] - iT[0] * c[1] + c[3] + rnd_factor) >> shift);
+    dst[2] = Clip3<TCoeff>(outputMinimum, outputMaximum, (iT[2] * (src[0 * line] - src[2 * line] + src[3 * line]) + rnd_factor) >> shift);
+    dst[3] = Clip3<TCoeff>(outputMinimum, outputMaximum, (iT[1] * c[0] + iT[0] * c[2] - c[3] + rnd_factor) >> shift);
+#else
     dst[0] = Clip3(outputMinimum, outputMaximum, (iT[0] * c[0] + iT[1] * c[1] + c[3] + rnd_factor) >> shift);
     dst[1] = Clip3(outputMinimum, outputMaximum, (iT[1] * c[2] - iT[0] * c[1] + c[3] + rnd_factor) >> shift);
     dst[2] = Clip3(outputMinimum, outputMaximum, (iT[2] * (src[0 * line] - src[2 * line] + src[3 * line]) + rnd_factor) >> shift);
     dst[3] = Clip3(outputMinimum, outputMaximum, (iT[1] * c[0] + iT[0] * c[2] - c[3] + rnd_factor) >> shift);
+#endif
 
     dst += 4;
     src++;
@@ -1037,6 +1135,28 @@ void fastInverseDST7_B16(const TCoeff *src, TCoeff *dst, int shift, int line, in
 
     t = iT[10] * src[5 * line];
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+    dst[ 2] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[ 2]*d[0] + iT[ 8]*d[1] + iT[14]*d[2] + iT[11]*d[3] + iT[ 5]*d[4] + add ) >> shift);
+    dst[ 5] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[ 5]*d[0] + iT[14]*d[1] + iT[ 2]*d[2] - iT[ 8]*d[3] - iT[11]*d[4] + add ) >> shift);
+    dst[ 8] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[ 8]*d[0] + iT[ 5]*d[1] - iT[11]*d[2] - iT[ 2]*d[3] + iT[14]*d[4] + add ) >> shift);
+    dst[11] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[11]*d[0] - iT[ 2]*d[1] - iT[ 5]*d[2] + iT[14]*d[3] - iT[ 8]*d[4] + add ) >> shift);
+    dst[14] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[14]*d[0] - iT[11]*d[1] + iT[ 8]*d[2] - iT[ 5]*d[3] + iT[ 2]*d[4] + add ) >> shift);
+
+    dst[10] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[10]*(src[ 0*line]-src[ 2*line]+src[ 3*line]-src[5*line]
+                                                                +src[ 6*line]-src[ 8*line]+src[ 9*line]-src[11*line]
+                                                                +src[12*line]-src[14*line]+src[15*line]) + add ) >> shift);
+
+    dst[ 0] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[0]*a[0] + iT[9]*b[0] + iT[2]*a[1] + iT[7]*b[1] + iT[4]*a[2] + iT[5]*b[2] + iT[6]*a[3] + iT[3]*b[3] + iT[8]*a[4] + iT[1]*b[4] + t + add ) >> shift);
+    dst[ 1] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[1]*c[0] - iT[8]*b[0] + iT[5]*c[1] - iT[4]*b[1] + iT[9]*c[2] - iT[0]*b[2] + iT[2]*a[3] + iT[7]*c[3] + iT[6]*a[4] + iT[3]*c[4] + t + add ) >> shift);
+    dst[ 3] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[3]*a[0] + iT[6]*b[0] + iT[0]*c[1] + iT[9]*a[1] + iT[1]*a[2] + iT[8]*c[2] + iT[4]*c[3] - iT[5]*b[3] - iT[2]*a[4] - iT[7]*b[4] - t + add ) >> shift);
+    dst[ 4] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[4]*c[0] - iT[5]*b[0] + iT[6]*c[1] + iT[3]*a[1] + iT[7]*a[2] + iT[2]*b[2] - iT[1]*c[3] + iT[8]*b[3] - iT[9]*c[4] - iT[0]*a[4] - t + add ) >> shift);
+    dst[ 6] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[6]*a[0] + iT[3]*b[0] + iT[9]*c[1] + iT[0]*a[1] - iT[1]*a[2] - iT[8]*b[2] - iT[4]*c[3] - iT[5]*a[3] - iT[2]*c[4] + iT[7]*b[4] + t + add ) >> shift);
+    dst[ 7] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[7]*c[0] - iT[2]*b[0] + iT[8]*a[1] + iT[1]*b[1] - iT[6]*c[2] + iT[3]*b[2] - iT[9]*a[3] - iT[0]*b[3] + iT[5]*c[4] - iT[4]*b[4] + t + add ) >> shift);
+    dst[ 9] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[9]*a[0] + iT[0]*b[0] + iT[2]*c[1] - iT[7]*b[1] - iT[5]*c[2] - iT[4]*a[2] + iT[3]*a[3] + iT[6]*b[3] + iT[8]*c[4] - iT[1]*b[4] - t + add ) >> shift);
+    dst[12] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[1]*c[0] + iT[8]*a[0] - iT[5]*a[1] - iT[4]*b[1] - iT[0]*c[2] + iT[9]*b[2] + iT[7]*c[3] - iT[2]*b[3] - iT[6]*c[4] - iT[3]*a[4] + t + add ) >> shift);
+    dst[13] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[7]*c[0] + iT[2]*a[0] - iT[8]*c[1] + iT[1]*b[1] + iT[3]*c[2] - iT[6]*b[2] + iT[0]*a[3] + iT[9]*b[3] - iT[5]*a[4] - iT[4]*b[4] + t + add ) >> shift);
+    dst[15] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[4]*c[0] + iT[5]*a[0] - iT[3]*c[1] - iT[6]*a[1] + iT[2]*c[2] + iT[7]*a[2] - iT[1]*c[3] - iT[8]*a[3] + iT[0]*c[4] + iT[9]*a[4] - t + add ) >> shift);
+#else
     dst[ 2] = Clip3(outputMinimum, outputMaximum, (int)( iT[ 2]*d[0] + iT[ 8]*d[1] + iT[14]*d[2] + iT[11]*d[3] + iT[ 5]*d[4] + add ) >> shift);
     dst[ 5] = Clip3(outputMinimum, outputMaximum, (int)( iT[ 5]*d[0] + iT[14]*d[1] + iT[ 2]*d[2] - iT[ 8]*d[3] - iT[11]*d[4] + add ) >> shift);
     dst[ 8] = Clip3(outputMinimum, outputMaximum, (int)( iT[ 8]*d[0] + iT[ 5]*d[1] - iT[11]*d[2] - iT[ 2]*d[3] + iT[14]*d[4] + add ) >> shift);
@@ -1057,7 +1177,7 @@ void fastInverseDST7_B16(const TCoeff *src, TCoeff *dst, int shift, int line, in
     dst[12] = Clip3(outputMinimum, outputMaximum, (int)( iT[1]*c[0] + iT[8]*a[0] - iT[5]*a[1] - iT[4]*b[1] - iT[0]*c[2] + iT[9]*b[2] + iT[7]*c[3] - iT[2]*b[3] - iT[6]*c[4] - iT[3]*a[4] + t + add ) >> shift);
     dst[13] = Clip3(outputMinimum, outputMaximum, (int)( iT[7]*c[0] + iT[2]*a[0] - iT[8]*c[1] + iT[1]*b[1] + iT[3]*c[2] - iT[6]*b[2] + iT[0]*a[3] + iT[9]*b[3] - iT[5]*a[4] - iT[4]*b[4] + t + add ) >> shift);
     dst[15] = Clip3(outputMinimum, outputMaximum, (int)( iT[4]*c[0] + iT[5]*a[0] - iT[3]*c[1] - iT[6]*a[1] + iT[2]*c[2] + iT[7]*a[2] - iT[1]*c[3] - iT[8]*a[3] + iT[0]*c[4] + iT[9]*a[4] - t + add ) >> shift);
-
+#endif
     src++;
     dst += 16;
   }
@@ -1209,6 +1329,42 @@ void fastInverseDST7_B32(const TCoeff *src, TCoeff *dst, int shift, int line, in
     t[0] = iT[12] * src[6*line] + iT[25] * src[19*line];
     t[1] = iT[25] * src[6*line] - iT[12] * src[19*line];
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+    dst[ 0] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[0] * a[1][0] - iT[11] * a[8][0] + iT[13] * a[7][0] + iT[24] * a[4][5] - iT[1] * a[8][5] + iT[10] * a[1][5] + iT[14] * a[4][0] + iT[23] * a[7][5] + iT[2] * a[1][1] - iT[9] * a[8][1] + iT[15] * a[7][1] + iT[22] * a[4][4] - iT[3] * a[8][4] + iT[8] * a[1][4] + iT[16] * a[4][1] + iT[21] * a[7][4] + iT[4] * a[1][2] - iT[7] * a[8][2] + iT[17] * a[7][2] + iT[20] * a[4][3] - iT[5] * a[8][3] + iT[6] * a[1][3] + iT[18] * a[4][2] + iT[19] * a[7][3] + t[0] + add) >> shift);
+    dst[ 1] = Clip3<TCoeff>(outputMinimum, outputMaximum, (-iT[0] * a[4][2] - iT[11] * a[6][2] + iT[13] * a[0][3] + iT[24] * a[5][2] + iT[1] * a[2][0] + iT[10] * a[7][0] + iT[14] * a[5][5] - iT[23] * a[9][5] + iT[2] * a[7][2] + iT[9] * a[2][2] - iT[15] * a[9][3] + iT[22] * a[5][3] - iT[3] * a[6][0] - iT[8] * a[4][0] + iT[16] * a[5][0] + iT[21] * a[0][5] - iT[4] * a[4][1] - iT[7] * a[6][1] + iT[17] * a[0][4] + iT[20] * a[5][1] + iT[5] * a[2][1] + iT[6] * a[7][1] + iT[18] * a[5][4] - iT[19] * a[9][4] + t[1] + add) >> shift);
+    dst[ 2] = Clip3<TCoeff>(outputMinimum, outputMaximum, (-iT[0] * a[2][4] - iT[11] * a[3][4] + iT[13] * a[0][4] + iT[24] * a[1][4] + iT[1] * a[4][3] + iT[10] * a[7][2] + iT[14] * a[1][2] - iT[23] * a[8][2] + iT[2] * a[3][0] - iT[9] * a[6][5] - iT[15] * a[8][0] + iT[22] * a[9][5] - iT[3] * a[6][4] + iT[8] * a[3][1] + iT[16] * a[9][4] - iT[21] * a[8][1] + iT[4] * a[7][3] + iT[7] * a[4][2] - iT[17] * a[8][3] + iT[20] * a[1][3] - iT[5] * a[3][5] - iT[6] * a[2][5] + iT[18] * a[1][5] + iT[19] * a[0][5] + t[1] + add) >> shift);
+    dst[ 3] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[0] * a[5][4] + iT[11] * a[0][1] - iT[13] * a[4][4] - iT[24] * a[6][4] - iT[1] * a[1][3] - iT[10] * a[0][3] + iT[14] * a[2][3] + iT[23] * a[3][3] - iT[2] * a[0][4] - iT[9] * a[1][4] + iT[15] * a[3][4] + iT[22] * a[2][4] + iT[3] * a[0][0] + iT[8] * a[5][5] - iT[16] * a[6][5] - iT[21] * a[4][5] + iT[4] * a[5][0] - iT[7] * a[9][0] + iT[17] * a[7][5] + iT[20] * a[2][5] - iT[5] * a[8][2] + iT[6] * a[9][3] - iT[18] * a[6][3] + iT[19] * a[3][2] + t[0] + add) >> shift);
+    dst[ 5] = Clip3<TCoeff>(outputMinimum, outputMaximum, (-iT[0] * a[1][5] + iT[11] * a[8][5] - iT[13] * a[7][5] - iT[24] * a[4][0] + iT[1] * a[5][1] + iT[10] * a[0][4] - iT[14] * a[4][1] - iT[23] * a[6][1] - iT[2] * a[8][3] + iT[9] * a[9][2] - iT[15] * a[6][2] + iT[22] * a[3][3] - iT[3] * a[0][2] - iT[8] * a[1][2] + iT[16] * a[3][2] + iT[21] * a[2][2] - iT[4] * a[9][4] + iT[7] * a[5][4] + iT[17] * a[2][1] + iT[20] * a[7][1] + iT[5] * a[1][0] - iT[6] * a[8][0] + iT[18] * a[7][0] + iT[19] * a[4][5] - t[0] + add) >> shift);
+    dst[ 6] = Clip3<TCoeff>(outputMinimum, outputMaximum, (-iT[0] * a[7][5] - iT[11] * a[2][5] + iT[13] * a[9][0] - iT[24] * a[5][0] + iT[1] * a[3][4] - iT[10] * a[6][1] - iT[14] * a[8][4] + iT[23] * a[9][1] + iT[2] * a[4][2] + iT[9] * a[7][3] + iT[15] * a[1][3] - iT[22] * a[8][3] - iT[3] * a[2][2] - iT[8] * a[3][2] + iT[16] * a[0][2] + iT[21] * a[1][2] - iT[4] * a[6][4] - iT[7] * a[4][4] + iT[17] * a[5][4] + iT[20] * a[0][1] + iT[5] * a[7][0] + iT[6] * a[2][0] - iT[18] * a[9][5] + iT[19] * a[5][5] - t[1] + add) >> shift);
+    dst[ 7] = Clip3<TCoeff>(outputMinimum, outputMaximum, (-iT[0] * a[6][3] - iT[11] * a[4][3] + iT[13] * a[5][3] + iT[24] * a[0][2] + iT[1] * a[7][1] + iT[10] * a[4][4] - iT[14] * a[8][1] + iT[23] * a[1][1] - iT[2] * a[7][5] - iT[9] * a[4][0] + iT[15] * a[8][5] - iT[22] * a[1][5] + iT[3] * a[7][3] + iT[8] * a[2][3] - iT[16] * a[9][2] + iT[21] * a[5][2] - iT[4] * a[6][5] + iT[7] * a[3][0] + iT[17] * a[9][5] - iT[20] * a[8][0] + iT[5] * a[6][1] - iT[6] * a[3][4] - iT[18] * a[9][1] + iT[19] * a[8][4] - t[1] + add) >> shift);
+    dst[ 8] = Clip3<TCoeff>(outputMinimum, outputMaximum, (-iT[0] * a[1][1] - iT[11] * a[0][1] + iT[13] * a[2][1] + iT[24] * a[3][1] + iT[1] * a[1][3] - iT[10] * a[8][3] + iT[14] * a[7][3] + iT[23] * a[4][2] - iT[2] * a[9][1] + iT[9] * a[8][4] - iT[15] * a[3][4] + iT[22] * a[6][1] + iT[3] * a[5][5] + iT[8] * a[0][0] - iT[16] * a[4][5] - iT[21] * a[6][5] + iT[4] * a[0][5] + iT[7] * a[1][5] - iT[17] * a[3][5] - iT[20] * a[2][5] + iT[5] * a[5][3] - iT[6] * a[9][3] + iT[18] * a[7][2] + iT[19] * a[2][2] - t[0] + add) >> shift);
+    dst[10] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[0] * a[8][3] - iT[11] * a[1][3] - iT[13] * a[4][2] - iT[24] * a[7][3] - iT[1] * a[8][0] + iT[10] * a[1][0] + iT[14] * a[4][5] + iT[23] * a[7][0] + iT[2] * a[5][3] + iT[9] * a[0][2] - iT[15] * a[4][3] - iT[22] * a[6][3] - iT[3] * a[5][0] - iT[8] * a[0][5] + iT[16] * a[4][0] + iT[21] * a[6][0] + iT[4] * a[1][4] + iT[7] * a[0][4] - iT[17] * a[2][4] - iT[20] * a[3][4] - iT[5] * a[1][1] - iT[6] * a[0][1] + iT[18] * a[2][1] + iT[19] * a[3][1] + t[0] + add) >> shift);
+    dst[11] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[0] * a[7][0] + iT[11] * a[2][0] - iT[13] * a[9][5] + iT[24] * a[5][5] + iT[1] * a[2][5] + iT[10] * a[7][5] + iT[14] * a[5][0] - iT[23] * a[9][0] - iT[2] * a[2][1] - iT[9] * a[3][1] + iT[15] * a[0][1] + iT[22] * a[1][1] - iT[3] * a[7][4] - iT[8] * a[4][1] + iT[16] * a[8][4] - iT[21] * a[1][4] + iT[4] * a[3][2] - iT[7] * a[6][3] - iT[17] * a[8][2] + iT[20] * a[9][3] + iT[5] * a[4][2] + iT[6] * a[6][2] - iT[18] * a[0][3] - iT[19] * a[5][2] + t[1] + add) >> shift);
+    dst[13] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[0] * a[9][5] - iT[11] * a[8][0] + iT[13] * a[3][0] - iT[24] * a[6][5] - iT[1] * a[8][5] + iT[10] * a[9][0] - iT[14] * a[6][0] + iT[23] * a[3][5] + iT[2] * a[5][4] - iT[9] * a[9][4] + iT[15] * a[7][1] + iT[22] * a[2][1] - iT[3] * a[1][4] + iT[8] * a[8][4] - iT[16] * a[7][4] - iT[21] * a[4][1] - iT[4] * a[0][2] - iT[7] * a[5][3] + iT[17] * a[6][3] + iT[20] * a[4][3] + iT[5] * a[0][3] + iT[6] * a[1][3] - iT[18] * a[3][3] - iT[19] * a[2][3] + t[0] + add) >> shift);
+    dst[15] = Clip3<TCoeff>(outputMinimum, outputMaximum, (-iT[0] * a[9][1] + iT[11] * a[5][1] + iT[13] * a[2][4] + iT[24] * a[7][4] + iT[1] * a[9][3] - iT[10] * a[5][3] - iT[14] * a[2][2] - iT[23] * a[7][2] - iT[2] * a[9][5] + iT[9] * a[5][5] + iT[15] * a[2][0] + iT[22] * a[7][0] + iT[3] * a[9][4] - iT[8] * a[8][1] + iT[16] * a[3][1] - iT[21] * a[6][4] - iT[4] * a[9][2] + iT[7] * a[8][3] - iT[17] * a[3][3] + iT[20] * a[6][2] + iT[5] * a[9][0] - iT[6] * a[8][5] + iT[18] * a[3][5] - iT[19] * a[6][0] - t[0] + add) >> shift);
+    dst[16] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[0] * a[4][4] + iT[11] * a[7][1] + iT[13] * a[1][1] - iT[24] * a[8][1] + iT[1] * a[6][2] - iT[10] * a[3][3] - iT[14] * a[9][2] + iT[23] * a[8][3] - iT[2] * a[6][1] - iT[9] * a[4][1] + iT[15] * a[5][1] + iT[22] * a[0][4] - iT[3] * a[4][5] - iT[8] * a[6][5] + iT[16] * a[0][0] + iT[21] * a[5][5] - iT[4] * a[6][0] + iT[7] * a[3][5] + iT[17] * a[9][0] - iT[20] * a[8][5] + iT[5] * a[6][3] + iT[6] * a[4][3] - iT[18] * a[5][3] - iT[19] * a[0][2] - t[1] + add) >> shift);
+    dst[17] = Clip3<TCoeff>(outputMinimum, outputMaximum, (-iT[0] * a[7][2] - iT[11] * a[4][3] + iT[13] * a[8][2] - iT[24] * a[1][2] + iT[1] * a[7][1] + iT[10] * a[2][1] - iT[14] * a[9][4] + iT[23] * a[5][4] - iT[2] * a[3][5] + iT[9] * a[6][0] + iT[15] * a[8][5] - iT[22] * a[9][0] - iT[3] * a[2][3] - iT[8] * a[7][3] - iT[16] * a[5][2] + iT[21] * a[9][2] + iT[4] * a[4][5] + iT[7] * a[7][0] + iT[17] * a[1][0] - iT[20] * a[8][0] - iT[5] * a[2][4] - iT[6] * a[3][4] + iT[18] * a[0][4] + iT[19] * a[1][4] - t[1] + add) >> shift);
+    dst[18] = Clip3<TCoeff>(outputMinimum, outputMaximum, (-iT[0] * a[9][0] + iT[11] * a[8][5] - iT[13] * a[3][5] + iT[24] * a[6][0] + iT[1] * a[5][1] - iT[10] * a[9][1] + iT[14] * a[7][4] + iT[23] * a[2][4] + iT[2] * a[0][3] + iT[9] * a[5][2] - iT[15] * a[6][2] - iT[22] * a[4][2] + iT[3] * a[1][2] + iT[8] * a[0][2] - iT[16] * a[2][2] - iT[21] * a[3][2] - iT[4] * a[8][1] + iT[7] * a[1][1] + iT[17] * a[4][4] + iT[20] * a[7][1] + iT[5] * a[9][5] - iT[6] * a[8][0] + iT[18] * a[3][0] - iT[19] * a[6][5] - t[0] + add) >> shift);
+    dst[20] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[0] * a[8][2] - iT[11] * a[9][3] + iT[13] * a[6][3] - iT[24] * a[3][2] + iT[1] * a[0][1] + iT[10] * a[5][4] - iT[14] * a[6][4] - iT[23] * a[4][4] + iT[2] * a[1][5] + iT[9] * a[0][5] - iT[15] * a[2][5] - iT[22] * a[3][5] - iT[3] * a[9][2] + iT[8] * a[5][2] + iT[16] * a[2][3] + iT[21] * a[7][3] + iT[4] * a[5][5] - iT[7] * a[9][5] + iT[17] * a[7][0] + iT[20] * a[2][0] + iT[5] * a[0][4] + iT[6] * a[5][1] - iT[18] * a[6][1] - iT[19] * a[4][1] + t[0] + add) >> shift);
+    dst[21] = Clip3<TCoeff>(outputMinimum, outputMaximum, (-iT[0] * a[2][1] - iT[11] * a[7][1] - iT[13] * a[5][4] + iT[24] * a[9][4] - iT[1] * a[6][2] - iT[10] * a[4][2] + iT[14] * a[5][2] + iT[23] * a[0][3] - iT[2] * a[2][4] - iT[9] * a[7][4] - iT[15] * a[5][1] + iT[22] * a[9][1] - iT[3] * a[6][5] - iT[8] * a[4][5] + iT[16] * a[5][5] + iT[21] * a[0][0] - iT[4] * a[4][0] - iT[7] * a[7][5] - iT[17] * a[1][5] + iT[20] * a[8][5] - iT[5] * a[7][2] - iT[6] * a[4][3] + iT[18] * a[8][2] - iT[19] * a[1][2] + t[1] + add) >> shift);
+    dst[22] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[0] * a[6][1] - iT[11] * a[3][4] - iT[13] * a[9][1] + iT[24] * a[8][4] + iT[1] * a[4][3] + iT[10] * a[6][3] - iT[14] * a[0][2] - iT[23] * a[5][3] + iT[2] * a[7][0] + iT[9] * a[4][5] - iT[15] * a[8][0] + iT[22] * a[1][0] - iT[3] * a[3][1] + iT[8] * a[6][4] + iT[16] * a[8][1] - iT[21] * a[9][4] - iT[4] * a[2][3] - iT[7] * a[3][3] + iT[17] * a[0][3] + iT[20] * a[1][3] - iT[5] * a[7][5] - iT[6] * a[2][5] + iT[18] * a[9][0] - iT[19] * a[5][0] + t[1] + add) >> shift);
+    dst[23] = Clip3<TCoeff>(outputMinimum, outputMaximum, (-iT[0] * a[0][3] - iT[11] * a[1][3] + iT[13] * a[3][3] + iT[24] * a[2][3] - iT[1] * a[8][0] + iT[10] * a[9][5] - iT[14] * a[6][5] + iT[23] * a[3][0] + iT[2] * a[8][2] - iT[9] * a[1][2] - iT[15] * a[4][3] - iT[22] * a[7][2] + iT[3] * a[0][5] + iT[8] * a[5][0] - iT[16] * a[6][0] - iT[21] * a[4][0] + iT[4] * a[8][4] - iT[7] * a[9][1] + iT[17] * a[6][1] - iT[20] * a[3][4] - iT[5] * a[5][4] - iT[6] * a[0][1] + iT[18] * a[4][4] + iT[19] * a[6][4] + t[0] + add) >> shift);
+    dst[26] = Clip3<TCoeff>(outputMinimum, outputMaximum, (-iT[0] * a[3][0] - iT[11] * a[2][0] + iT[13] * a[1][0] + iT[24] * a[0][0] - iT[1] * a[2][5] - iT[10] * a[3][5] + iT[14] * a[0][5] + iT[23] * a[1][5] + iT[2] * a[4][4] + iT[9] * a[6][4] - iT[15] * a[0][1] - iT[22] * a[5][4] - iT[3] * a[4][1] - iT[8] * a[7][4] - iT[16] * a[1][4] + iT[21] * a[8][4] + iT[4] * a[2][2] + iT[7] * a[7][2] + iT[17] * a[5][3] - iT[20] * a[9][3] + iT[5] * a[3][3] - iT[6] * a[6][2] - iT[18] * a[8][3] + iT[19] * a[9][2] - t[1] + add) >> shift);
+    dst[27] = Clip3<TCoeff>(outputMinimum, outputMaximum, (-iT[0] * a[3][3] + iT[11] * a[6][2] + iT[13] * a[8][3] - iT[24] * a[9][2] - iT[1] * a[2][0] - iT[10] * a[3][0] + iT[14] * a[0][0] + iT[23] * a[1][0] - iT[2] * a[6][3] + iT[9] * a[3][2] + iT[15] * a[9][3] - iT[22] * a[8][2] - iT[3] * a[4][0] - iT[8] * a[6][0] + iT[16] * a[0][5] + iT[21] * a[5][0] - iT[4] * a[7][4] - iT[7] * a[2][4] + iT[17] * a[9][1] - iT[20] * a[5][1] - iT[5] * a[4][4] - iT[6] * a[7][1] - iT[18] * a[1][1] + iT[19] * a[8][1] - t[1] + add) >> shift);
+    dst[28] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[0] * a[0][4] + iT[11] * a[5][1] - iT[13] * a[6][1] - iT[24] * a[4][1] + iT[1] * a[9][3] - iT[10] * a[8][2] + iT[14] * a[3][2] - iT[23] * a[6][3] - iT[2] * a[1][0] - iT[9] * a[0][0] + iT[15] * a[2][0] + iT[22] * a[3][0] + iT[3] * a[8][1] - iT[8] * a[9][4] + iT[16] * a[6][4] - iT[21] * a[3][1] - iT[4] * a[5][2] - iT[7] * a[0][3] + iT[17] * a[4][2] + iT[20] * a[6][2] + iT[5] * a[1][5] - iT[6] * a[8][5] + iT[18] * a[7][5] + iT[19] * a[4][0] - t[0] + add) >> shift);
+    dst[30] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[0] * a[5][3] - iT[11] * a[9][3] + iT[13] * a[7][2] + iT[24] * a[2][2] + iT[1] * a[0][1] + iT[10] * a[1][1] - iT[14] * a[3][1] - iT[23] * a[2][1] + iT[2] * a[9][0] - iT[9] * a[5][0] - iT[15] * a[2][5] - iT[22] * a[7][5] - iT[3] * a[5][2] + iT[8] * a[9][2] - iT[16] * a[7][3] - iT[21] * a[2][3] - iT[4] * a[0][0] - iT[7] * a[1][0] + iT[17] * a[3][0] + iT[20] * a[2][0] - iT[5] * a[9][1] + iT[6] * a[5][1] + iT[18] * a[2][4] + iT[19] * a[7][4] + t[0] + add) >> shift);
+    dst[31] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( iT[0] * a[3][5] + iT[11] * a[2][5] - iT[13] * a[1][5] - iT[24] * a[0][5] - iT[1] * a[3][4] - iT[10] * a[2][4] + iT[14] * a[1][4] + iT[23] * a[0][4] + iT[2] * a[3][3] + iT[9] * a[2][3] - iT[15] * a[1][3] - iT[22] * a[0][3] - iT[3] * a[3][2] - iT[8] * a[2][2] + iT[16] * a[1][2] + iT[21] * a[0][2] + iT[4] * a[3][1] + iT[7] * a[2][1] - iT[17] * a[1][1] - iT[20] * a[0][1] - iT[5] * a[3][0] - iT[6] * a[2][0] + iT[18] * a[1][0] + iT[19] * a[0][0] + t[1] + add) >> shift);
+
+    dst[ 4] = Clip3<TCoeff>(outputMinimum, outputMaximum, (iT[ 4] * b[0] + iT[14] * b[1] + iT[24] * b[2] + iT[29] * b[3] + iT[19] * b[4] + iT[ 9] * b[5] + add) >> shift);
+    dst[ 9] = Clip3<TCoeff>(outputMinimum, outputMaximum, (iT[ 9] * b[0] + iT[29] * b[1] + iT[14] * b[2] - iT[ 4] * b[3] - iT[24] * b[4] - iT[19] * b[5] + add) >> shift);
+    dst[14] = Clip3<TCoeff>(outputMinimum, outputMaximum, (iT[14] * b[0] + iT[19] * b[1] - iT[ 9] * b[2] - iT[24] * b[3] + iT[ 4] * b[4] + iT[29] * b[5] + add) >> shift);
+    dst[19] = Clip3<TCoeff>(outputMinimum, outputMaximum, (iT[19] * b[0] + iT[ 4] * b[1] - iT[29] * b[2] + iT[ 9] * b[3] + iT[14] * b[4] - iT[24] * b[5] + add) >> shift);
+    dst[24] = Clip3<TCoeff>(outputMinimum, outputMaximum, (iT[24] * b[0] - iT[ 9] * b[1] - iT[ 4] * b[2] + iT[19] * b[3] - iT[29] * b[4] + iT[14] * b[5] + add) >> shift);
+    dst[29] = Clip3<TCoeff>(outputMinimum, outputMaximum, (iT[29] * b[0] - iT[24] * b[1] + iT[19] * b[2] - iT[14] * b[3] + iT[ 9] * b[4] - iT[ 4] * b[5] + add) >> shift);
+
+    dst[12] = Clip3<TCoeff>(outputMinimum, outputMaximum, (iT[12]*c[0] + iT[25]*c[1] + add) >> shift);
+    dst[25] = Clip3<TCoeff>(outputMinimum, outputMaximum, (iT[25]*c[0] - iT[12]*c[1] + add) >> shift);
+#else
     dst[ 0] = Clip3(outputMinimum, outputMaximum, (int)( iT[0] * a[1][0] - iT[11] * a[8][0] + iT[13] * a[7][0] + iT[24] * a[4][5] - iT[1] * a[8][5] + iT[10] * a[1][5] + iT[14] * a[4][0] + iT[23] * a[7][5] + iT[2] * a[1][1] - iT[9] * a[8][1] + iT[15] * a[7][1] + iT[22] * a[4][4] - iT[3] * a[8][4] + iT[8] * a[1][4] + iT[16] * a[4][1] + iT[21] * a[7][4] + iT[4] * a[1][2] - iT[7] * a[8][2] + iT[17] * a[7][2] + iT[20] * a[4][3] - iT[5] * a[8][3] + iT[6] * a[1][3] + iT[18] * a[4][2] + iT[19] * a[7][3] + t[0] + add) >> shift);
     dst[ 1] = Clip3(outputMinimum, outputMaximum, (int)(-iT[0] * a[4][2] - iT[11] * a[6][2] + iT[13] * a[0][3] + iT[24] * a[5][2] + iT[1] * a[2][0] + iT[10] * a[7][0] + iT[14] * a[5][5] - iT[23] * a[9][5] + iT[2] * a[7][2] + iT[9] * a[2][2] - iT[15] * a[9][3] + iT[22] * a[5][3] - iT[3] * a[6][0] - iT[8] * a[4][0] + iT[16] * a[5][0] + iT[21] * a[0][5] - iT[4] * a[4][1] - iT[7] * a[6][1] + iT[17] * a[0][4] + iT[20] * a[5][1] + iT[5] * a[2][1] + iT[6] * a[7][1] + iT[18] * a[5][4] - iT[19] * a[9][4] + t[1] + add) >> shift);
     dst[ 2] = Clip3(outputMinimum, outputMaximum, (int)(-iT[0] * a[2][4] - iT[11] * a[3][4] + iT[13] * a[0][4] + iT[24] * a[1][4] + iT[1] * a[4][3] + iT[10] * a[7][2] + iT[14] * a[1][2] - iT[23] * a[8][2] + iT[2] * a[3][0] - iT[9] * a[6][5] - iT[15] * a[8][0] + iT[22] * a[9][5] - iT[3] * a[6][4] + iT[8] * a[3][1] + iT[16] * a[9][4] - iT[21] * a[8][1] + iT[4] * a[7][3] + iT[7] * a[4][2] - iT[17] * a[8][3] + iT[20] * a[1][3] - iT[5] * a[3][5] - iT[6] * a[2][5] + iT[18] * a[1][5] + iT[19] * a[0][5] + t[1] + add) >> shift);
@@ -1243,7 +1399,7 @@ void fastInverseDST7_B32(const TCoeff *src, TCoeff *dst, int shift, int line, in
 
     dst[12] = Clip3(outputMinimum, outputMaximum, (int)(iT[12]*c[0] + iT[25]*c[1] + add) >> shift);
     dst[25] = Clip3(outputMinimum, outputMaximum, (int)(iT[25]*c[0] - iT[12]*c[1] + add) >> shift);
-
+#endif
     src++;
     dst += 32;
   }
@@ -1265,7 +1421,11 @@ void fastForwardDCT8_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int
   int rnd_factor = 1 << (shift - 1);
   const TMatrixCoeff *iT = g_trCoreDCT8P4[TRANSFORM_FORWARD][0];
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  TCoeff c[4];
+#else
   int c[4];
+#endif
   TCoeff *pCoeff = dst;
   const int  reducedLine = line - iSkipLine;
   for (i = 0; i<reducedLine; i++)
@@ -1302,7 +1462,11 @@ void fastInverseDCT8_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int
 
   const TMatrixCoeff *iT = g_trCoreDCT8P4[TRANSFORM_INVERSE][0];
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  TCoeff c[4];
+#else
   int c[4];
+#endif
   const int  reducedLine = line - iSkipLine;
   for (i = 0; i<reducedLine; i++)
   {
@@ -1312,11 +1476,17 @@ void fastInverseDCT8_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int
     c[2] = src[3 * line] - src[2 * line];
     c[3] = iT[1] * src[1 * line];
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+    dst[0] = Clip3<TCoeff>(outputMinimum, outputMaximum, (iT[3] * c[0] + iT[2] * c[1] + c[3] + rnd_factor) >> shift);
+    dst[1] = Clip3<TCoeff>(outputMinimum, outputMaximum, (iT[1] * (src[0 * line] - src[2 * line] - src[3 * line]) + rnd_factor) >> shift);
+    dst[2] = Clip3<TCoeff>(outputMinimum, outputMaximum, (iT[3] * c[2] + iT[2] * c[0] - c[3] + rnd_factor) >> shift);
+    dst[3] = Clip3<TCoeff>(outputMinimum, outputMaximum, (iT[3] * c[1] - iT[2] * c[2] - c[3] + rnd_factor) >> shift);
+#else
     dst[0] = Clip3(outputMinimum, outputMaximum, (iT[3] * c[0] + iT[2] * c[1] + c[3] + rnd_factor) >> shift);
     dst[1] = Clip3(outputMinimum, outputMaximum, (iT[1] * (src[0 * line] - src[2 * line] - src[3 * line]) + rnd_factor) >> shift);
     dst[2] = Clip3(outputMinimum, outputMaximum, (iT[3] * c[2] + iT[2] * c[0] - c[3] + rnd_factor) >> shift);
     dst[3] = Clip3(outputMinimum, outputMaximum, (iT[3] * c[1] - iT[2] * c[2] - c[3] + rnd_factor) >> shift);
-
+#endif
     dst += 4;
     src++;
   }
@@ -1430,6 +1600,26 @@ void fastInverseDCT8_B16(const TCoeff *src, TCoeff *dst, int shift, int line, in
 
     t = iT[10] * src[5*line];
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+    dst[ 1] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( - iT[ 2]*d[0] - iT[ 5]*d[1] - iT[ 8]*d[2] - iT[11]*d[3] - iT[14]*d[4] + add) >> shift);
+    dst[ 4] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[ 8]*d[0] + iT[14]*d[1] + iT[ 5]*d[2] - iT[ 2]*d[3] - iT[11]*d[4] + add) >> shift);
+    dst[ 7] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( - iT[14]*d[0] - iT[ 2]*d[1] + iT[11]*d[2] + iT[ 5]*d[3] - iT[ 8]*d[4] + add) >> shift);
+    dst[10] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[11]*d[0] - iT[ 8]*d[1] - iT[ 2]*d[2] + iT[14]*d[3] - iT[ 5]*d[4] + add) >> shift);
+    dst[13] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( - iT[ 5]*d[0] + iT[11]*d[1] - iT[14]*d[2] + iT[ 8]*d[3] - iT[ 2]*d[4] + add) >> shift);
+
+    dst[ 5] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( - iT[10] * (src[15 * line] + src[14 * line] - src[12 * line] - src[11 * line] + src[9 * line] + src[8 * line] - src[6 * line] - src[5 * line] + src[3 * line] + src[2 * line] - src[0 * line]) + add) >> shift);
+
+    dst[ 0] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[0]*a[0] + iT[9]*b[0] + iT[1]*a[1] + iT[8]*b[1] + iT[2]*a[2] + iT[7]*b[2] + iT[3]*a[3] + iT[6]*b[3] + iT[4]*a[4] + iT[5]*b[4] + t + add ) >> shift );
+    dst[ 2] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[4]*c[0] - iT[5]*b[0] + iT[9]*c[1] - iT[0]*b[1] + iT[6]*c[2] + iT[3]*a[2] + iT[1]*c[3] + iT[8]*a[3] + iT[7]*a[4] + iT[2]*b[4] - t + add ) >> shift );
+    dst[ 3] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( - iT[6]*a[0] - iT[3]*b[0] - iT[2]*c[1] - iT[7]*a[1] - iT[9]*c[2] - iT[0]*a[2] - iT[4]*c[3] + iT[5]*b[3] + iT[1]*a[4] + iT[8]*b[4] - t + add ) >> shift );
+    dst[ 6] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[8]*a[0] + iT[1]*c[0] + iT[6]*c[1] - iT[3]*b[1] - iT[5]*a[2] - iT[4]*b[2] - iT[7]*c[3] - iT[2]*a[3] - iT[0]*c[4] + iT[9]*b[4] + t + add ) >> shift );
+    dst[ 8] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[4]*c[0] + iT[5]*a[0] - iT[0]*c[1] + iT[9]*b[1] - iT[3]*c[2] - iT[6]*a[2] + iT[1]*c[3] - iT[8]*b[3] + iT[2]*c[4] + iT[7]*a[4] - t + add ) >> shift );
+    dst[ 9] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( - iT[7]*c[0] - iT[2]*a[0] + iT[4]*a[1] + iT[5]*b[1] + iT[8]*c[2] - iT[1]*b[2] - iT[9]*a[3] - iT[0]*b[3] - iT[3]*c[4] + iT[6]*b[4] - t + add ) >> shift );
+    dst[11] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( - iT[9]*a[0] - iT[0]*b[0] + iT[8]*c[1] + iT[1]*a[1] - iT[2]*c[2] + iT[7]*b[2] - iT[6]*a[3] - iT[3]*b[3] + iT[5]*c[4] + iT[4]*a[4] + t + add ) >> shift );
+    dst[12] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[7]*c[0] - iT[2]*b[0] - iT[5]*c[1] - iT[4]*a[1] + iT[8]*a[2] + iT[1]*b[2] - iT[0]*a[3] - iT[9]*b[3] - iT[6]*c[4] + iT[3]*b[4] + t + add ) >> shift );
+    dst[14] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[3]*a[0] + iT[6]*b[0] - iT[7]*a[1] - iT[2]*b[1] + iT[0]*c[2] + iT[9]*a[2] - iT[4]*c[3] - iT[5]*a[3] + iT[8]*c[4] + iT[1]*a[4] - t + add ) >> shift );
+    dst[15] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( - iT[1]*c[0] + iT[8]*b[0] + iT[3]*c[1] - iT[6]*b[1] - iT[5]*c[2] + iT[4]*b[2] + iT[7]*c[3] - iT[2]*b[3] - iT[9]*c[4] + iT[0]*b[4] - t + add ) >> shift );
+#else
     dst[ 1] = Clip3(outputMinimum, outputMaximum, (int)( - iT[ 2]*d[0] - iT[ 5]*d[1] - iT[ 8]*d[2] - iT[11]*d[3] - iT[14]*d[4] + add) >> shift);
     dst[ 4] = Clip3(outputMinimum, outputMaximum, (int)(   iT[ 8]*d[0] + iT[14]*d[1] + iT[ 5]*d[2] - iT[ 2]*d[3] - iT[11]*d[4] + add) >> shift);
     dst[ 7] = Clip3(outputMinimum, outputMaximum, (int)( - iT[14]*d[0] - iT[ 2]*d[1] + iT[11]*d[2] + iT[ 5]*d[3] - iT[ 8]*d[4] + add) >> shift);
@@ -1448,7 +1638,7 @@ void fastInverseDCT8_B16(const TCoeff *src, TCoeff *dst, int shift, int line, in
     dst[12] = Clip3(outputMinimum, outputMaximum, (int)(   iT[7]*c[0] - iT[2]*b[0] - iT[5]*c[1] - iT[4]*a[1] + iT[8]*a[2] + iT[1]*b[2] - iT[0]*a[3] - iT[9]*b[3] - iT[6]*c[4] + iT[3]*b[4] + t + add ) >> shift );
     dst[14] = Clip3(outputMinimum, outputMaximum, (int)(   iT[3]*a[0] + iT[6]*b[0] - iT[7]*a[1] - iT[2]*b[1] + iT[0]*c[2] + iT[9]*a[2] - iT[4]*c[3] - iT[5]*a[3] + iT[8]*c[4] + iT[1]*a[4] - t + add ) >> shift );
     dst[15] = Clip3(outputMinimum, outputMaximum, (int)( - iT[1]*c[0] + iT[8]*b[0] + iT[3]*c[1] - iT[6]*b[1] - iT[5]*c[2] + iT[4]*b[2] + iT[7]*c[3] - iT[2]*b[3] - iT[9]*c[4] + iT[0]*b[4] - t + add ) >> shift );
-
+#endif
     src++;
     dst += 16;
   }
@@ -1603,6 +1793,42 @@ void fastInverseDCT8_B32(const TCoeff *src, TCoeff *dst, int shift, int line, in
     t[0] = iT[12] * src[19 * line] + iT[25] * src[ 6 * line];
     t[1] = iT[12] * src[ 6 * line] - iT[25] * src[19 * line];
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+    dst[ 0] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[0] * a[3][0] + iT[11] * a[6][5] + iT[13] * a[8][0] + iT[24] * a[9][5] + iT[1] * a[3][1] + iT[10] * a[6][4] + iT[14] * a[8][1] + iT[23] * a[9][4] + iT[2] * a[3][2] + iT[9] * a[6][3] + iT[15] * a[8][2] + iT[22] * a[9][3] + iT[3] * a[3][3] + iT[8] * a[6][2] + iT[16] * a[8][3] + iT[21] * a[9][2] + iT[4] * a[3][4] + iT[7] * a[6][1] + iT[17] * a[8][4] + iT[20] * a[9][1] + iT[5] * a[3][5] + iT[6] * a[6][0] + iT[18] * a[8][5] + iT[19] * a[9][0] + t[0] + add) >> shift);
+    dst[ 1] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[0] * a[5][2] - iT[11] * a[0][3] - iT[13] * a[4][2] - iT[24] * a[6][2] - iT[1] * a[9][1] - iT[10] * a[8][4] - iT[14] * a[3][4] - iT[23] * a[6][1] - iT[2] * a[0][0] + iT[9] * a[5][5] - iT[15] * a[6][5] - iT[22] * a[4][5] + iT[3] * a[5][3] - iT[8] * a[0][2] - iT[16] * a[4][3] - iT[21] * a[6][3] - iT[4] * a[9][0] - iT[7] * a[8][5] - iT[17] * a[3][5] - iT[20] * a[6][0] - iT[5] * a[0][1] + iT[6] * a[5][4] - iT[18] * a[6][4] - iT[19] * a[4][4] + t[1] + add) >> shift);
+    dst[ 3] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[0] * a[9][4] + iT[11] * a[5][4] - iT[13] * a[2][1] + iT[24] * a[7][1] + iT[1] * a[0][3] + iT[10] * a[1][3] - iT[14] * a[3][3] - iT[23] * a[2][3] - iT[2] * a[8][5] - iT[9] * a[9][0] - iT[15] * a[6][0] - iT[22] * a[3][5] + iT[3] * a[1][4] + iT[8] * a[0][4] - iT[16] * a[2][4] - iT[21] * a[3][4] + iT[4] * a[5][3] + iT[7] * a[9][3] + iT[17] * a[7][2] - iT[20] * a[2][2] - iT[5] * a[8][0] - iT[6] * a[1][0] + iT[18] * a[4][5] + iT[19] * a[7][0] - t[1] + add) >> shift);
+    dst[ 4] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( - iT[0] * a[3][2] - iT[11] * a[2][2] + iT[13] * a[1][2] + iT[24] * a[0][2] + iT[1] * a[6][0] + iT[10] * a[3][5] + iT[14] * a[9][0] + iT[23] * a[8][5] - iT[2] * a[2][3] - iT[9] * a[3][3] + iT[15] * a[0][3] + iT[22] * a[1][3] - iT[3] * a[7][0] + iT[8] * a[2][0] - iT[16] * a[9][5] - iT[21] * a[5][5] + iT[4] * a[4][4] + iT[7] * a[6][4] + iT[17] * a[0][1] - iT[20] * a[5][4] - iT[5] * a[7][4] - iT[6] * a[4][1] + iT[18] * a[8][4] + iT[19] * a[1][4] - t[0] + add) >> shift);
+    dst[ 5] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[0] * a[3][5] + iT[11] * a[6][0] + iT[13] * a[8][5] + iT[24] * a[9][0] - iT[1] * a[6][5] - iT[10] * a[3][0] - iT[14] * a[9][5] - iT[23] * a[8][0] + iT[2] * a[7][4] - iT[9] * a[2][4] + iT[15] * a[9][1] + iT[22] * a[5][1] + iT[3] * a[7][1] + iT[8] * a[4][4] - iT[16] * a[8][1] - iT[21] * a[1][1] - iT[4] * a[6][2] - iT[7] * a[4][2] + iT[17] * a[5][2] - iT[20] * a[0][3] + iT[5] * a[3][2] + iT[6] * a[2][2] - iT[18] * a[1][2] - iT[19] * a[0][2] - t[0] + add) >> shift);
+    dst[ 8] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[0] * a[9][3] + iT[11] * a[8][2] + iT[13] * a[3][2] + iT[24] * a[6][3] + iT[1] * a[1][5] + iT[10] * a[0][5] - iT[14] * a[2][5] - iT[23] * a[3][5] - iT[2] * a[1][3] - iT[9] * a[8][3] + iT[15] * a[7][3] + iT[22] * a[4][2] - iT[3] * a[9][5] - iT[8] * a[5][5] + iT[16] * a[2][0] - iT[21] * a[7][0] - iT[4] * a[1][1] - iT[7] * a[0][1] + iT[17] * a[2][1] + iT[20] * a[3][1] + iT[5] * a[5][1] + iT[6] * a[9][1] + iT[18] * a[7][4] - iT[19] * a[2][4] + t[1] + add) >> shift);
+    dst[ 9] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[0] * a[2][1] + iT[11] * a[3][1] - iT[13] * a[0][1] - iT[24] * a[1][1] - iT[1] * a[7][3] + iT[10] * a[2][3] - iT[14] * a[9][2] - iT[23] * a[5][2] - iT[2] * a[4][0] - iT[9] * a[7][5] + iT[15] * a[1][5] + iT[22] * a[8][5] - iT[3] * a[3][4] - iT[8] * a[2][4] + iT[16] * a[1][4] + iT[21] * a[0][4] - iT[4] * a[6][3] - iT[7] * a[3][2] - iT[17] * a[9][3] - iT[20] * a[8][2] - iT[5] * a[4][5] - iT[6] * a[6][5] - iT[18] * a[0][0] + iT[19] * a[5][5] + t[0] + add) >> shift);
+    dst[10] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( - iT[0] * a[6][1] - iT[11] * a[4][1] + iT[13] * a[5][1] - iT[24] * a[0][4] + iT[1] * a[2][2] - iT[10] * a[7][2] - iT[14] * a[5][3] - iT[23] * a[9][3] + iT[2] * a[6][4] + iT[9] * a[4][4] - iT[15] * a[5][4] + iT[22] * a[0][1] - iT[3] * a[2][5] + iT[8] * a[7][5] + iT[16] * a[5][0] + iT[21] * a[9][0] - iT[4] * a[7][0] - iT[7] * a[4][5] + iT[17] * a[8][0] + iT[20] * a[1][0] + iT[5] * a[4][2] + iT[6] * a[7][3] - iT[18] * a[1][3] - iT[19] * a[8][3] + t[0] + add) >> shift);
+    dst[11] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( - iT[0] * a[1][3] - iT[11] * a[0][3] + iT[13] * a[2][3] + iT[24] * a[3][3] - iT[1] * a[9][1] - iT[10] * a[5][1] + iT[14] * a[2][4] - iT[23] * a[7][4] - iT[2] * a[8][0] - iT[9] * a[9][5] - iT[15] * a[6][5] - iT[22] * a[3][0] + iT[3] * a[0][2] - iT[8] * a[5][3] + iT[16] * a[6][3] + iT[21] * a[4][3] + iT[4] * a[5][0] - iT[7] * a[0][5] - iT[17] * a[4][0] - iT[20] * a[6][0] + iT[5] * a[9][4] + iT[6] * a[5][4] - iT[18] * a[2][1] + iT[19] * a[7][1] + t[1] + add) >> shift);
+    dst[13] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[0] * a[0][0] + iT[11] * a[1][0] - iT[13] * a[3][0] - iT[24] * a[2][0] + iT[1] * a[5][4] - iT[10] * a[0][1] - iT[14] * a[4][4] - iT[23] * a[6][4] - iT[2] * a[9][3] - iT[9] * a[5][3] + iT[15] * a[2][2] - iT[22] * a[7][2] + iT[3] * a[8][3] + iT[8] * a[9][2] + iT[16] * a[6][2] + iT[21] * a[3][3] - iT[4] * a[1][4] - iT[7] * a[8][4] + iT[17] * a[7][4] + iT[20] * a[4][1] + iT[5] * a[0][5] + iT[6] * a[1][5] - iT[18] * a[3][5] - iT[19] * a[2][5] - t[1] + add) >> shift);
+    dst[14] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[0] * a[4][2] + iT[11] * a[7][3] - iT[13] * a[1][3] - iT[24] * a[8][3] + iT[1] * a[4][1] + iT[10] * a[6][1] + iT[14] * a[0][4] - iT[23] * a[5][1] - iT[2] * a[3][0] - iT[9] * a[2][0] + iT[15] * a[1][0] + iT[22] * a[0][0] - iT[3] * a[6][3] - iT[8] * a[4][3] + iT[16] * a[5][3] - iT[21] * a[0][2] - iT[4] * a[7][5] - iT[7] * a[4][0] + iT[17] * a[8][5] + iT[20] * a[1][5] + iT[5] * a[6][4] + iT[6] * a[3][1] + iT[18] * a[9][4] + iT[19] * a[8][1] - t[0] + add) >> shift);
+    dst[15] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[0] * a[7][4] + iT[11] * a[4][1] - iT[13] * a[8][4] - iT[24] * a[1][4] - iT[1] * a[2][2] - iT[10] * a[3][2] + iT[14] * a[0][2] + iT[23] * a[1][2] - iT[2] * a[2][1] + iT[9] * a[7][1] + iT[15] * a[5][4] + iT[22] * a[9][4] + iT[3] * a[7][5] - iT[8] * a[2][5] + iT[16] * a[9][0] + iT[21] * a[5][0] + iT[4] * a[2][0] + iT[7] * a[3][0] - iT[17] * a[0][0] - iT[20] * a[1][0] + iT[5] * a[2][3] - iT[6] * a[7][3] - iT[18] * a[5][2] - iT[19] * a[9][2] - t[0] + add) >> shift);
+    dst[16] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( - iT[0] * a[0][1] + iT[11] * a[5][4] - iT[13] * a[6][4] - iT[24] * a[4][4] + iT[1] * a[0][3] - iT[10] * a[5][2] + iT[14] * a[6][2] + iT[23] * a[4][2] - iT[2] * a[0][5] + iT[9] * a[5][0] - iT[15] * a[6][0] - iT[22] * a[4][0] - iT[3] * a[0][4] - iT[8] * a[1][4] + iT[16] * a[3][4] + iT[21] * a[2][4] + iT[4] * a[0][2] + iT[7] * a[1][2] - iT[17] * a[3][2] - iT[20] * a[2][2] - iT[5] * a[0][0] - iT[6] * a[1][0] + iT[18] * a[3][0] + iT[19] * a[2][0] - t[1] + add) >> shift);
+    dst[18] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[0] * a[0][5] + iT[11] * a[1][5] - iT[13] * a[3][5] - iT[24] * a[2][5] - iT[1] * a[1][0] - iT[10] * a[0][0] + iT[14] * a[2][0] + iT[23] * a[3][0] - iT[2] * a[5][1] + iT[9] * a[0][4] + iT[15] * a[4][1] + iT[22] * a[6][1] - iT[3] * a[8][1] - iT[8] * a[1][1] + iT[16] * a[4][4] + iT[21] * a[7][1] - iT[4] * a[9][2] - iT[7] * a[5][2] + iT[17] * a[2][3] - iT[20] * a[7][3] - iT[5] * a[9][3] - iT[6] * a[8][2] - iT[18] * a[3][2] - iT[19] * a[6][3] + t[1] + add) >> shift);
+    dst[20] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( - iT[0] * a[4][0] - iT[11] * a[6][0] - iT[13] * a[0][5] + iT[24] * a[5][0] + iT[1] * a[6][5] + iT[10] * a[4][5] - iT[14] * a[5][5] + iT[23] * a[0][0] - iT[2] * a[6][1] - iT[9] * a[3][4] - iT[15] * a[9][1] - iT[22] * a[8][4] + iT[3] * a[4][4] + iT[8] * a[7][1] - iT[16] * a[1][1] - iT[21] * a[8][1] - iT[4] * a[3][3] - iT[7] * a[2][3] + iT[17] * a[1][3] + iT[20] * a[0][3] + iT[5] * a[7][2] - iT[6] * a[2][2] + iT[18] * a[9][3] + iT[19] * a[5][3] + t[0] + add) >> shift);
+    dst[21] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[0] * a[1][2] + iT[11] * a[8][2] - iT[13] * a[7][2] - iT[24] * a[4][3] + iT[1] * a[1][5] + iT[10] * a[8][5] - iT[14] * a[7][5] - iT[23] * a[4][0] + iT[2] * a[5][2] + iT[9] * a[9][2] + iT[15] * a[7][3] - iT[22] * a[2][3] + iT[3] * a[5][5] + iT[8] * a[9][5] + iT[16] * a[7][0] - iT[21] * a[2][0] + iT[4] * a[8][1] + iT[7] * a[9][4] + iT[17] * a[6][4] + iT[20] * a[3][1] + iT[5] * a[8][4] + iT[6] * a[9][1] + iT[18] * a[6][1] + iT[19] * a[3][4] + t[1] + add) >> shift);
+    dst[23] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[0] * a[8][4] + iT[11] * a[9][1] + iT[13] * a[6][1] + iT[24] * a[3][4] - iT[1] * a[8][2] - iT[10] * a[1][2] + iT[14] * a[4][3] + iT[23] * a[7][2] - iT[2] * a[0][1] - iT[9] * a[1][1] + iT[15] * a[3][1] + iT[22] * a[2][1] + iT[3] * a[5][0] + iT[8] * a[9][0] + iT[16] * a[7][5] - iT[21] * a[2][5] - iT[4] * a[9][5] - iT[7] * a[8][0] - iT[17] * a[3][0] - iT[20] * a[6][5] + iT[5] * a[5][2] - iT[6] * a[0][3] - iT[18] * a[4][2] - iT[19] * a[6][2] - t[1] + add) >> shift);
+    dst[24] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( - iT[0] * a[2][3] + iT[11] * a[7][3] + iT[13] * a[5][2] + iT[24] * a[9][2] + iT[1] * a[4][1] + iT[10] * a[7][4] - iT[14] * a[1][4] - iT[23] * a[8][4] - iT[2] * a[4][5] - iT[9] * a[7][0] + iT[15] * a[1][0] + iT[22] * a[8][0] + iT[3] * a[4][3] + iT[8] * a[6][3] + iT[16] * a[0][2] - iT[21] * a[5][3] - iT[4] * a[2][5] - iT[7] * a[3][5] + iT[17] * a[0][5] + iT[20] * a[1][5] + iT[5] * a[2][1] + iT[6] * a[3][1] - iT[18] * a[0][1] - iT[19] * a[1][1] - t[0] + add) >> shift);
+    dst[25] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( - iT[0] * a[4][5] - iT[11] * a[6][5] - iT[13] * a[0][0] + iT[24] * a[5][5] - iT[1] * a[3][1] - iT[10] * a[2][1] + iT[14] * a[1][1] + iT[23] * a[0][1] + iT[2] * a[7][2] + iT[9] * a[4][3] - iT[15] * a[8][2] - iT[22] * a[1][2] + iT[3] * a[6][2] + iT[8] * a[3][3] + iT[16] * a[9][2] + iT[21] * a[8][3] + iT[4] * a[2][4] - iT[7] * a[7][4] - iT[17] * a[5][1] - iT[20] * a[9][1] - iT[5] * a[4][0] - iT[6] * a[6][0] - iT[18] * a[0][5] + iT[19] * a[5][0] - t[0] + add) >> shift);
+    dst[26] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[0] * a[8][0] + iT[11] * a[1][0] - iT[13] * a[4][5] - iT[24] * a[7][0] + iT[1] * a[5][4] + iT[10] * a[9][4] + iT[14] * a[7][1] - iT[23] * a[2][1] - iT[2] * a[1][2] - iT[9] * a[0][2] + iT[15] * a[2][2] + iT[22] * a[3][2] - iT[3] * a[9][2] - iT[8] * a[8][3] - iT[16] * a[3][3] - iT[21] * a[6][2] + iT[4] * a[0][4] - iT[7] * a[5][1] + iT[17] * a[6][1] + iT[20] * a[4][1] + iT[5] * a[8][5] + iT[6] * a[1][5] - iT[18] * a[4][0] - iT[19] * a[7][5] - t[1] + add) >> shift);
+    dst[28] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( - iT[0] * a[5][1] - iT[11] * a[9][1] - iT[13] * a[7][4] + iT[24] * a[2][4] + iT[1] * a[8][2] + iT[10] * a[9][3] + iT[14] * a[6][3] + iT[23] * a[3][2] - iT[2] * a[9][4] - iT[9] * a[8][1] - iT[15] * a[3][1] - iT[22] * a[6][4] + iT[3] * a[9][0] + iT[8] * a[5][0] - iT[16] * a[2][5] + iT[21] * a[7][5] - iT[4] * a[5][5] + iT[7] * a[0][0] + iT[17] * a[4][5] + iT[20] * a[6][5] + iT[5] * a[1][3] + iT[6] * a[0][3] - iT[18] * a[2][3] - iT[19] * a[3][3] + t[1] + add) >> shift);
+    dst[29] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[0] * a[6][4] + iT[11] * a[3][1] + iT[13] * a[9][4] + iT[24] * a[8][1] - iT[1] * a[7][3] - iT[10] * a[4][2] + iT[14] * a[8][3] + iT[23] * a[1][3] - iT[2] * a[3][5] - iT[9] * a[2][5] + iT[15] * a[1][5] + iT[22] * a[0][5] + iT[3] * a[2][4] + iT[8] * a[3][4] - iT[16] * a[0][4] - iT[21] * a[1][4] + iT[4] * a[4][3] + iT[7] * a[7][2] - iT[17] * a[1][2] - iT[20] * a[8][2] - iT[5] * a[3][0] - iT[6] * a[6][5] - iT[18] * a[8][0] - iT[19] * a[9][5] + t[0] + add) >> shift);
+    dst[30] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( - iT[0] * a[7][2] + iT[11] * a[2][2] - iT[13] * a[9][3] - iT[24] * a[5][3] - iT[1] * a[6][0] - iT[10] * a[4][0] + iT[14] * a[5][0] - iT[23] * a[0][5] - iT[2] * a[4][2] - iT[9] * a[6][2] - iT[15] * a[0][3] + iT[22] * a[5][2] + iT[3] * a[2][0] - iT[8] * a[7][0] - iT[16] * a[5][5] - iT[21] * a[9][5] + iT[4] * a[7][1] - iT[7] * a[2][1] + iT[17] * a[9][4] + iT[20] * a[5][4] + iT[5] * a[6][1] + iT[6] * a[4][1] - iT[18] * a[5][1] + iT[19] * a[0][4] + t[0] + add) >> shift);
+    dst[31] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[0] * a[8][5] + iT[11] * a[1][5] - iT[13] * a[4][0] - iT[24] * a[7][5] - iT[1] * a[1][0] - iT[10] * a[8][0] + iT[14] * a[7][0] + iT[23] * a[4][5] - iT[2] * a[8][4] - iT[9] * a[1][4] + iT[15] * a[4][1] + iT[22] * a[7][4] + iT[3] * a[1][1] + iT[8] * a[8][1] - iT[16] * a[7][1] - iT[21] * a[4][4] + iT[4] * a[8][3] + iT[7] * a[1][3] - iT[17] * a[4][2] - iT[20] * a[7][3] - iT[5] * a[1][2] - iT[6] * a[8][2] + iT[18] * a[7][2] + iT[19] * a[4][3] + t[1] + add) >> shift);
+
+    dst[ 2] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[ 4] * b[0] + iT[ 9] * b[1] + iT[14] * b[2] + iT[19] * b[3] + iT[24] * b[4] + iT[29] * b[5] + add) >> shift);
+    dst[ 7] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( - iT[14] * b[0] - iT[29] * b[1] - iT[19] * b[2] - iT[ 4] * b[3] + iT[ 9] * b[4] + iT[24] * b[5] + add) >> shift);
+    dst[12] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[24] * b[0] + iT[14] * b[1] - iT[ 9] * b[2] - iT[29] * b[3] - iT[ 4] * b[4] + iT[19] * b[5] + add) >> shift);
+    dst[17] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( - iT[29] * b[0] + iT[ 4] * b[1] + iT[24] * b[2] - iT[ 9] * b[3] - iT[19] * b[4] + iT[14] * b[5] + add) >> shift);
+    dst[22] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[19] * b[0] - iT[24] * b[1] + iT[ 4] * b[2] + iT[14] * b[3] - iT[29] * b[4] + iT[ 9] * b[5] + add) >> shift);
+    dst[27] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( - iT[ 9] * b[0] + iT[19] * b[1] - iT[29] * b[2] + iT[24] * b[3] - iT[14] * b[4] + iT[ 4] * b[5] + add) >> shift);
+
+    dst[ 6] = Clip3<TCoeff>(outputMinimum, outputMaximum, (   iT[12] * c[0] + iT[25] * c[1] + add) >> shift);
+    dst[19] = Clip3<TCoeff>(outputMinimum, outputMaximum, ( - iT[25] * c[0] + iT[12] * c[1] + add) >> shift);
+#else
     dst[ 0] = Clip3(outputMinimum, outputMaximum, (int)(   iT[0] * a[3][0] + iT[11] * a[6][5] + iT[13] * a[8][0] + iT[24] * a[9][5] + iT[1] * a[3][1] + iT[10] * a[6][4] + iT[14] * a[8][1] + iT[23] * a[9][4] + iT[2] * a[3][2] + iT[9] * a[6][3] + iT[15] * a[8][2] + iT[22] * a[9][3] + iT[3] * a[3][3] + iT[8] * a[6][2] + iT[16] * a[8][3] + iT[21] * a[9][2] + iT[4] * a[3][4] + iT[7] * a[6][1] + iT[17] * a[8][4] + iT[20] * a[9][1] + iT[5] * a[3][5] + iT[6] * a[6][0] + iT[18] * a[8][5] + iT[19] * a[9][0] + t[0] + add) >> shift);
     dst[ 1] = Clip3(outputMinimum, outputMaximum, (int)(   iT[0] * a[5][2] - iT[11] * a[0][3] - iT[13] * a[4][2] - iT[24] * a[6][2] - iT[1] * a[9][1] - iT[10] * a[8][4] - iT[14] * a[3][4] - iT[23] * a[6][1] - iT[2] * a[0][0] + iT[9] * a[5][5] - iT[15] * a[6][5] - iT[22] * a[4][5] + iT[3] * a[5][3] - iT[8] * a[0][2] - iT[16] * a[4][3] - iT[21] * a[6][3] - iT[4] * a[9][0] - iT[7] * a[8][5] - iT[17] * a[3][5] - iT[20] * a[6][0] - iT[5] * a[0][1] + iT[6] * a[5][4] - iT[18] * a[6][4] - iT[19] * a[4][4] + t[1] + add) >> shift);
     dst[ 3] = Clip3(outputMinimum, outputMaximum, (int)(   iT[0] * a[9][4] + iT[11] * a[5][4] - iT[13] * a[2][1] + iT[24] * a[7][1] + iT[1] * a[0][3] + iT[10] * a[1][3] - iT[14] * a[3][3] - iT[23] * a[2][3] - iT[2] * a[8][5] - iT[9] * a[9][0] - iT[15] * a[6][0] - iT[22] * a[3][5] + iT[3] * a[1][4] + iT[8] * a[0][4] - iT[16] * a[2][4] - iT[21] * a[3][4] + iT[4] * a[5][3] + iT[7] * a[9][3] + iT[17] * a[7][2] - iT[20] * a[2][2] - iT[5] * a[8][0] - iT[6] * a[1][0] + iT[18] * a[4][5] + iT[19] * a[7][0] - t[1] + add) >> shift);
@@ -1638,6 +1864,7 @@ void fastInverseDCT8_B32(const TCoeff *src, TCoeff *dst, int shift, int line, in
     dst[ 6] = Clip3(outputMinimum, outputMaximum, (int)(   iT[12] * c[0] + iT[25] * c[1] + add) >> shift);
     dst[19] = Clip3(outputMinimum, outputMaximum, (int)( - iT[25] * c[0] + iT[12] * c[1] + add) >> shift);
 
+#endif
     src++;
     dst += 32;
   }
diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h
index c82f1006d..c455aec1a 100644
--- a/source/Lib/CommonLib/TypeDef.h
+++ b/source/Lib/CommonLib/TypeDef.h
@@ -56,6 +56,9 @@
 
 #define RETRAIN_CABAC                                     1 // CABAC initial values retrained on VTM-9.0rc1
 
+#define JVET_R0351_HIGH_BIT_DEPTH_SUPPORT                 1 // JVET-R0351: high bit depth coding support (syntax changes, no mathematical differences for CTCs)
+#define JVET_R0351_HIGH_BIT_DEPTH_ENABLED                 0 // JVET-R0351: high bit depth coding enabled (increases accuracies of some calculations, e.g. transforms)
+
 #define JVET_R0058                                        1 // JVET-R0058: the combination of RPR, subpictures, and scalability
 
 #define JVET_R0185_OLS_DPB_CLEANUP                        1 // JVET-R0185: Replace if( !vps_all_independent_layers_flag ) condition on vps_num_dpb_params syntax element with if(!each_layer_is_an_ols_flag)
@@ -327,8 +330,12 @@ typedef std::pair<int, int>  TrCost;
 
 // This can be enabled by the makefile
 #ifndef RExt__HIGH_BIT_DEPTH_SUPPORT
+#if JVET_R0351_HIGH_BIT_DEPTH_ENABLED
+#define RExt__HIGH_BIT_DEPTH_SUPPORT                      1 ///< 0 (default) use data type definitions for 8-10 bit video, 1 = use larger data types to allow for up to 16-bit video (originally developed as part of N0188)
+#else
 #define RExt__HIGH_BIT_DEPTH_SUPPORT                      0 ///< 0 (default) use data type definitions for 8-10 bit video, 1 = use larger data types to allow for up to 16-bit video (originally developed as part of N0188)
 #endif
+#endif
 
 // SIMD optimizations
 #define SIMD_ENABLE                                       1
diff --git a/source/Lib/CommonLib/WeightPrediction.cpp b/source/Lib/CommonLib/WeightPrediction.cpp
index cf20eb209..cab07bc6d 100644
--- a/source/Lib/CommonLib/WeightPrediction.cpp
+++ b/source/Lib/CommonLib/WeightPrediction.cpp
@@ -186,7 +186,11 @@ void WeightPrediction::addWeightBi(const CPelUnitBuf          &pcYuvSrc0,
     const int  w0       = wp0[compID].w;
     const int  offset   = wp0[compID].offset;
     const int  clipBD   = clpRng.bd;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+    const int shiftNum = IF_INTERNAL_FRAC_BITS(clipBD);
+#else
     const int  shiftNum = std::max<int>(2, (IF_INTERNAL_PREC - clipBD));
+#endif
     const int  shift    = wp0[compID].shift + shiftNum;
     const int  round    = (enableRounding[compID] && (shift > 0)) ? (1 << (shift - 1)) : 0;
     const int  w1       = wp1[compID].w;
@@ -243,7 +247,11 @@ void WeightPrediction::addWeightBiComponent(const CPelUnitBuf          &pcYuvSrc
   const int  w0       = wp0[compID].w;
   const int  offset   = wp0[compID].offset;
   const int  clipBD   = clpRng.bd;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  const int shiftNum = IF_INTERNAL_FRAC_BITS(clipBD);
+#else
   const int  shiftNum = std::max<int>(2, (IF_INTERNAL_PREC - clipBD));
+#endif
   const int  shift    = wp0[compID].shift + shiftNum;
   const int  round    = (enableRounding[compID] && (shift > 0)) ? (1 << (shift - 1)) : 0;
   const int  w1       = wp1[compID].w;
@@ -304,7 +312,11 @@ void  WeightPrediction::addWeightUni(const CPelUnitBuf          &pcYuvSrc0,
     const int  w0           = wp0[compID].w;
     const int  offset       = wp0[compID].offset;
     const int  clipBD       = clpRng.bd;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+    const int shiftNum      = IF_INTERNAL_FRAC_BITS(clipBD);
+#else
     const int  shiftNum     = std::max<int>(2, (IF_INTERNAL_PREC - clipBD));
+#endif
     const int  shift        = wp0[compID].shift + shiftNum;
     const uint32_t iSrc0Stride  = pcYuvSrc0.bufs[compID].stride;
     const uint32_t iDstStride   = rpcYuvDst.bufs[compID].stride;
diff --git a/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h b/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h
index 12c44a2f8..447d02b5c 100644
--- a/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h
+++ b/source/Lib/CommonLib/x86/AdaptiveLoopFilterX86.h
@@ -297,7 +297,11 @@ static void simdDeriveClassificationBlk(AlfClassifier **classifier, int **laplac
 template<X86_VEXT vext>
 static void simdFilter5x5Blk(AlfClassifier **classifier, const PelUnitBuf &recDst, const CPelUnitBuf &recSrc,
   const Area &blkDst, const Area &blk, const ComponentID compId, const short *filterSet,
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  const Pel *fClipSet, const ClpRng &clpRng, CodingStructure &cs, const int vbCTUHeight,
+#else
   const short *fClipSet, const ClpRng &clpRng, CodingStructure &cs, const int vbCTUHeight,
+#endif
   int vbPos)
 
 {
@@ -484,7 +488,11 @@ static const uint16_t shuffleTab[4][2][8] = {
 template<X86_VEXT vext>
 static void simdFilter7x7Blk(AlfClassifier **classifier, const PelUnitBuf &recDst, const CPelUnitBuf &recSrc,
   const Area &blkDst, const Area &blk, const ComponentID compId, const short *filterSet,
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  const Pel *fClipSet, const ClpRng &clpRng, CodingStructure &cs, const int vbCTUHeight,
+#else
   const short *fClipSet, const ClpRng &clpRng, CodingStructure &cs, const int vbCTUHeight,
+#endif
   int vbPos)
 {
   CHECK((vbCTUHeight & (vbCTUHeight - 1)) != 0, "vbCTUHeight must be a power of 2");
diff --git a/source/Lib/CommonLib/x86/InterpolationFilterX86.h b/source/Lib/CommonLib/x86/InterpolationFilterX86.h
index c4e093f85..6969ebcea 100644
--- a/source/Lib/CommonLib/x86/InterpolationFilterX86.h
+++ b/source/Lib/CommonLib/x86/InterpolationFilterX86.h
@@ -68,7 +68,11 @@ static void fullPelCopySSE( const ClpRng& clpRng, const void*_src, int srcStride
 {
   Tsrc* src = (Tsrc*)_src;
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  int headroom = IF_INTERNAL_FRAC_BITS(clpRng.bd);
+#else
   int headroom = IF_INTERNAL_PREC - clpRng.bd;
+#endif
   int headroom_offset = 1 << ( headroom - 1 );
   int offset   = IF_INTERNAL_OFFS;
   __m128i voffset  = _mm_set1_epi16( offset );
@@ -131,7 +135,11 @@ static void fullPelCopyAVX2( const ClpRng& clpRng, const void*_src, int srcStrid
 #ifdef USE_AVX2
   Tsrc* src = (Tsrc*)_src;
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  int headroom = IF_INTERNAL_FRAC_BITS(clpRng.bd);
+#else
   int headroom = IF_INTERNAL_PREC - clpRng.bd;
+#endif
   int offset   = 1 << ( headroom - 1 );
   int internal_offset = IF_INTERNAL_OFFS;
 
@@ -1184,7 +1192,11 @@ static void simdFilter( const ClpRng& clpRng, Pel const *src, int srcStride, Pel
   src -= ( N/2 - 1 ) * cStride;
 
   int offset;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  int headRoom = IF_INTERNAL_FRAC_BITS(clpRng.bd);
+#else
   int headRoom = std::max<int>( 2, ( IF_INTERNAL_PREC - clpRng.bd ) );
+#endif
   int shift    = IF_FILTER_PREC;
   // with the current settings (IF_INTERNAL_PREC = 14 and IF_FILTER_PREC = 6), though headroom can be
   // negative for bit depths greater than 14, shift will remain non-negative for bit depths of 8->20
@@ -1339,7 +1351,11 @@ void xWeightedGeoBlk_SSE(const PredictionUnit &pu, const uint32_t width, const u
 
   const char    log2WeightBase = 3;
   const ClpRng  clpRng = pu.cu->slice->clpRngs().comp[compIdx];
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  const int32_t shiftWeighted = IF_INTERNAL_FRAC_BITS(clpRng.bd) + log2WeightBase;
+#else
   const int32_t shiftWeighted = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd)) + log2WeightBase;
+#endif
   const int32_t offsetWeighted = (1 << (shiftWeighted - 1)) + (IF_INTERNAL_OFFS << log2WeightBase);
 
   int16_t wIdx = floorLog2(pu.lwidth()) - GEO_MIN_CU_LOG2;
diff --git a/source/Lib/DecoderLib/CABACReader.cpp b/source/Lib/DecoderLib/CABACReader.cpp
index 248ca9e3d..c972b82d2 100644
--- a/source/Lib/DecoderLib/CABACReader.cpp
+++ b/source/Lib/DecoderLib/CABACReader.cpp
@@ -3185,10 +3185,19 @@ int CABACReader::last_sig_coeff( CoeffCodingContext& cctx, TransformUnit& tu, Co
   return scanPos;
 }
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+static void check_coeff_conformance(const CoeffCodingContext& cctx, const TCoeff coeff)
+#else
 static void check_coeff_conformance(TCoeff coeff)
+#endif
 {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  CHECK( coeff < cctx.minCoeff() || coeff > cctx.maxCoeff(),
+         "TransCoeffLevel outside allowable range" );
+#else
   CHECK( coeff < COEFF_MIN || coeff > COEFF_MAX,
          "TransCoeffLevel should be in the range [-32768, 32767]" );
+#endif
 }
 
 void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* coeff, const int stateTransTable, int& state )
@@ -3339,7 +3348,11 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co
     sumAbs               += AbsCoeff;
     coeff[ sigBlkPos[k] ] = ( signPattern & ( 1u << 31 ) ? -AbsCoeff : AbsCoeff );
     signPattern         <<= 1;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+    check_coeff_conformance( cctx, coeff[ sigBlkPos[k] ] );
+#else
     check_coeff_conformance( coeff[ sigBlkPos[k] ] );
+#endif
   }
   if( numNonZero > numSigns )
   {
@@ -3347,7 +3360,11 @@ void CABACReader::residual_coding_subblock( CoeffCodingContext& cctx, TCoeff* co
     int AbsCoeff          = coeff[ sigBlkPos[ k ] ];
     sumAbs               += AbsCoeff;
     coeff[ sigBlkPos[k] ] = ( sumAbs & 1 ? -AbsCoeff : AbsCoeff );
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+    check_coeff_conformance( cctx, coeff[ sigBlkPos[k] ] );
+#else
     check_coeff_conformance( coeff[ sigBlkPos[k] ] );
+#endif
   }
 }
 
@@ -3464,7 +3481,11 @@ void CABACReader::residual_coding_subblockTS( CoeffCodingContext& cctx, TCoeff*
           DTRACE( g_trace_ctx, D_SYNTAX_RESI, "ts_par_flag() bin=%d ctx=%d\n", parFlag, cctx.parityCtxIdAbsTS() );
           cctx.decimateNumCtxBins(1);
       }
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+      coeff[ blkPos ] = (sign ? -1 : 1 ) * (TCoeff)(1 + parFlag + gt1Flag);
+#else
       coeff[ blkPos ] = (sign ? -1 : 1 ) * (1 + parFlag + gt1Flag);
+#endif
     }
     lastScanPosPass1 = nextSigPos;
   }
@@ -3538,7 +3559,11 @@ void CABACReader::residual_coding_subblockTS( CoeffCodingContext& cctx, TCoeff*
     int AbsCoeff          = coeff[ sigBlkPos[ k ] ];
     coeff[ sigBlkPos[k] ] = ( signPattern & 1 ? -AbsCoeff : AbsCoeff );
     signPattern         >>= 1;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+    check_coeff_conformance( cctx, coeff[ sigBlkPos[k] ] );
+#else
     check_coeff_conformance( coeff[ sigBlkPos[k] ] );
+#endif
   }
 }
 
diff --git a/source/Lib/DecoderLib/VLCReader.cpp b/source/Lib/DecoderLib/VLCReader.cpp
index 518bbb73d..d7d34965b 100644
--- a/source/Lib/DecoderLib/VLCReader.cpp
+++ b/source/Lib/DecoderLib/VLCReader.cpp
@@ -5614,7 +5614,11 @@ void HLSyntaxReader::alfFilter( AlfParam& alfParam, const bool isChroma, const i
   AlfFilterShape alfShape( isChroma ? 5 : 7 );
   const int numFilters = isChroma ? 1 : alfParam.numLumaFilters;
   short* coeff = isChroma ? alfParam.chromaCoeff[altIdx] : alfParam.lumaCoeff;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  Pel*   clipp = isChroma ? alfParam.chromaClipp[altIdx] : alfParam.lumaClipp;
+#else
   short* clipp = isChroma ? alfParam.chromaClipp[altIdx] : alfParam.lumaClipp;
+#endif
 
 
   // Filter coefficients
diff --git a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp
index f8021943d..8fadc3b33 100644
--- a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp
+++ b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.cpp
@@ -2255,7 +2255,11 @@ void EncAdaptiveLoopFilter::getBlkStats(AlfCovariance* alfCovariance, const AlfF
 
 
 {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  Pel ELocal[MAX_NUM_ALF_LUMA_COEFF][MaxAlfNumClippingValues];
+#else
   int ELocal[MAX_NUM_ALF_LUMA_COEFF][MaxAlfNumClippingValues];
+#endif
 
   const int numBins = AlfNumClippingValues[channel];
   int transposeIdx = 0;
@@ -2283,7 +2287,11 @@ void EncAdaptiveLoopFilter::getBlkStats(AlfCovariance* alfCovariance, const AlfF
       {
         weight = m_lumaLevelToWeightPLUT[org[j]];
       }
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+      Intermediate_Int yLocal = org[j] - rec[j];
+#else
       int yLocal = org[j] - rec[j];
+#endif
       calcCovariance(ELocal, rec + j, recStride, shape, transposeIdx, channel, vbDistance);
       for( int k = 0; k < shape.numCoeff; k++ )
       {
@@ -2295,11 +2303,19 @@ void EncAdaptiveLoopFilter::getBlkStats(AlfCovariance* alfCovariance, const AlfF
             {
               if (m_alfWSSD)
               {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+                alfCovariance[classIdx].E[b0][b1][k][l] += weight * (ELocal[k][b0] * (double)ELocal[l][b1]);
+#else
                 alfCovariance[classIdx].E[b0][b1][k][l] += weight * (double)(ELocal[k][b0] * ELocal[l][b1]);
+#endif
               }
               else
               {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+                alfCovariance[classIdx].E[b0][b1][k][l] += ELocal[k][b0] * (double)ELocal[l][b1];
+#else
                 alfCovariance[classIdx].E[b0][b1][k][l] += ELocal[k][b0] * ELocal[l][b1];
+#endif
               }
             }
           }
@@ -2308,21 +2324,37 @@ void EncAdaptiveLoopFilter::getBlkStats(AlfCovariance* alfCovariance, const AlfF
         {
           if (m_alfWSSD)
           {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+            alfCovariance[classIdx].y[b][k] += weight * (ELocal[k][b] * (double)yLocal);
+#else
             alfCovariance[classIdx].y[b][k] += weight * (double)(ELocal[k][b] * yLocal);
+#endif
           }
           else
           {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+            alfCovariance[classIdx].y[b][k] += ELocal[k][b] * (double)yLocal;
+#else
             alfCovariance[classIdx].y[b][k] += ELocal[k][b] * yLocal;
+#endif
           }
         }
       }
       if (m_alfWSSD)
       {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+        alfCovariance[classIdx].pixAcc += weight * (yLocal * (double)yLocal);
+#else
         alfCovariance[classIdx].pixAcc += weight * (double)(yLocal * yLocal);
+#endif
       }
       else
       {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+        alfCovariance[classIdx].pixAcc += yLocal * (double)yLocal;
+#else
         alfCovariance[classIdx].pixAcc += yLocal * yLocal;
+#endif
       }
     }
     org += orgStride;
@@ -2348,7 +2380,11 @@ void EncAdaptiveLoopFilter::getBlkStats(AlfCovariance* alfCovariance, const AlfF
   }
 }
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+void EncAdaptiveLoopFilter::calcCovariance(Pel ELocal[MAX_NUM_ALF_LUMA_COEFF][MaxAlfNumClippingValues], const Pel *rec, const int stride, const AlfFilterShape& shape, const int transposeIdx, const ChannelType channel, int vbDistance)
+#else
 void EncAdaptiveLoopFilter::calcCovariance(int ELocal[MAX_NUM_ALF_LUMA_COEFF][MaxAlfNumClippingValues], const Pel *rec, const int stride, const AlfFilterShape& shape, const int transposeIdx, const ChannelType channel, int vbDistance)
+#endif
 {
   int clipTopRow = -4;
   int clipBotRow = 4;
@@ -2369,7 +2405,11 @@ void EncAdaptiveLoopFilter::calcCovariance(int ELocal[MAX_NUM_ALF_LUMA_COEFF][Ma
 
   int k = 0;
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  const Pel curr = rec[0];
+#else
   const short curr = rec[0];
+#endif
 
   if( transposeIdx == 0 )
   {
@@ -2665,7 +2705,11 @@ void  EncAdaptiveLoopFilter::alfEncoderCtb(CodingStructure& cs, AlfParam& alfPar
               for (int classIdx = 0; classIdx < MAX_NUM_ALF_CLASSES; classIdx++)
               {
                 short* pCoeff = m_coeffFinal;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+                Pel* pClipp   = m_clippFinal;
+#else
                 short* pClipp = m_clippFinal;
+#endif
                 for (int i = 0; i < MAX_NUM_ALF_LUMA_COEFF; i++)
                 {
                   m_filterTmp[i] = pCoeff[classIdx * MAX_NUM_ALF_LUMA_COEFF + i];
@@ -2743,7 +2787,11 @@ void  EncAdaptiveLoopFilter::alfEncoderCtb(CodingStructure& cs, AlfParam& alfPar
               else
               {
                 short *pCoeff;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+                Pel *pClipp;
+#else
                 short *pClipp;
+#endif
                 if (useNewFilter && filterSetIdx == NUM_FIXED_FILTER_SETS)
                 {
                   pCoeff = m_coeffFinal;
@@ -3147,7 +3195,11 @@ void EncAdaptiveLoopFilter::alfReconstructor(CodingStructure& cs, const PelUnitB
               const Area blkDst(xStart, yStart, w, h);
               short filterSetIndex = alfCtuFilterIndex[ctuIdx];
               short *coeff;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+              Pel *clip;
+#else
               short *clip;
+#endif
               if (filterSetIndex >= NUM_FIXED_FILTER_SETS)
               {
                 coeff = m_coeffApsLuma[filterSetIndex - NUM_FIXED_FILTER_SETS];
@@ -3196,7 +3248,11 @@ void EncAdaptiveLoopFilter::alfReconstructor(CodingStructure& cs, const PelUnitB
         Area blk(xPos, yPos, width, height);
         short filterSetIndex = alfCtuFilterIndex[ctuIdx];
         short *coeff;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+        Pel *clip;
+#else
         short *clip;
+#endif
         if (filterSetIndex >= NUM_FIXED_FILTER_SETS)
         {
           coeff = m_coeffApsLuma[filterSetIndex - NUM_FIXED_FILTER_SETS];
@@ -4147,7 +4203,11 @@ void EncAdaptiveLoopFilter::getBlkStatsCcAlf(AlfCovariance &alfCovariance, const
     vbPos = m_picHeight;
   }
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  Pel ELocal[MAX_NUM_CC_ALF_CHROMA_COEFF][1];
+#else
   int ELocal[MAX_NUM_CC_ALF_CHROMA_COEFF][1];
+#endif
 
   for (int i = 0; i < compArea.height; i++)
   {
@@ -4167,7 +4227,11 @@ void EncAdaptiveLoopFilter::getBlkStatsCcAlf(AlfCovariance &alfCovariance, const
         weight = m_lumaLevelToWeightPLUT[org[j]];
       }
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+      Intermediate_Int yLocal = org[j] - rec[compID][j];
+#else
       int yLocal = org[j] - rec[compID][j];
+#endif
 
       calcCovarianceCcAlf( ELocal, rec[COMPONENT_Y] + ( j << getComponentScaleX(compID, m_chromaFormat)), recStride[COMPONENT_Y], shape, vbDistance );
 
@@ -4181,11 +4245,19 @@ void EncAdaptiveLoopFilter::getBlkStatsCcAlf(AlfCovariance &alfCovariance, const
             {
               if (m_alfWSSD)
               {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+                alfCovariance.E[b0][b1][k][l] += weight * (ELocal[k][b0] * (double)ELocal[l][b1]);
+#else
                 alfCovariance.E[b0][b1][k][l] += weight * (double) (ELocal[k][b0] * ELocal[l][b1]);
+#endif
               }
               else
               {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+                alfCovariance.E[b0][b1][k][l] += ELocal[k][b0] * (double)ELocal[l][b1];
+#else
                 alfCovariance.E[b0][b1][k][l] += ELocal[k][b0] * ELocal[l][b1];
+#endif
               }
             }
           }
@@ -4194,21 +4266,37 @@ void EncAdaptiveLoopFilter::getBlkStatsCcAlf(AlfCovariance &alfCovariance, const
         {
           if (m_alfWSSD)
           {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+            alfCovariance.y[b][k] += weight * (ELocal[k][b] * (double)yLocal);
+#else
             alfCovariance.y[b][k] += weight * (double) (ELocal[k][b] * yLocal);
+#endif
           }
           else
           {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+            alfCovariance.y[b][k] += ELocal[k][b] * (double)yLocal;
+#else
             alfCovariance.y[b][k] += ELocal[k][b] * yLocal;
+#endif
           }
         }
       }
       if (m_alfWSSD)
       {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+        alfCovariance.pixAcc += weight * (yLocal * (double)yLocal);
+#else
         alfCovariance.pixAcc += weight * (double) (yLocal * yLocal);
+#endif
       }
       else
       {
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+        alfCovariance.pixAcc += yLocal * (double)yLocal;
+#else
         alfCovariance.pixAcc += yLocal * yLocal;
+#endif
       }
     }
     org += orgStride;
@@ -4248,7 +4336,11 @@ void EncAdaptiveLoopFilter::getBlkStatsCcAlf(AlfCovariance &alfCovariance, const
   }
 }
 
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+void EncAdaptiveLoopFilter::calcCovarianceCcAlf(Pel ELocal[MAX_NUM_CC_ALF_CHROMA_COEFF][1], const Pel *rec, const int stride, const AlfFilterShape& shape, int vbDistance)
+#else
 void EncAdaptiveLoopFilter::calcCovarianceCcAlf(int ELocal[MAX_NUM_CC_ALF_CHROMA_COEFF][1], const Pel *rec, const int stride, const AlfFilterShape& shape, int vbDistance)
+#endif
 {
   CHECK(shape.filterType != CC_ALF, "Bad CC ALF shape");
 
diff --git a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.h b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.h
index 0aa905998..525aa16c1 100644
--- a/source/Lib/EncoderLib/EncAdaptiveLoopFilter.h
+++ b/source/Lib/EncoderLib/EncAdaptiveLoopFilter.h
@@ -318,13 +318,21 @@ private:
   void   getFrameStat( AlfCovariance* frameCov, AlfCovariance** ctbCov, uint8_t* ctbEnableFlags, uint8_t* ctbAltIdx, const int numClasses, int altIdx );
   void   deriveStatsForFiltering( PelUnitBuf& orgYuv, PelUnitBuf& recYuv, CodingStructure& cs );
   void   getBlkStats(AlfCovariance* alfCovariace, const AlfFilterShape& shape, AlfClassifier** classifier, Pel* org, const int orgStride, Pel* rec, const int recStride, const CompArea& areaDst, const CompArea& area, const ChannelType channel, int vbCTUHeight, int vbPos);
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  void   calcCovariance(Pel ELocal[MAX_NUM_ALF_LUMA_COEFF][MaxAlfNumClippingValues], const Pel *rec, const int stride, const AlfFilterShape& shape, const int transposeIdx, const ChannelType channel, int vbDistance);
+#else
   void   calcCovariance(int ELocal[MAX_NUM_ALF_LUMA_COEFF][MaxAlfNumClippingValues], const Pel *rec, const int stride, const AlfFilterShape& shape, const int transposeIdx, const ChannelType channel, int vbDistance);
+#endif
   void   deriveStatsForCcAlfFiltering(const PelUnitBuf &orgYuv, const PelUnitBuf &recYuv, const int compIdx,
                                       const int maskStride, const uint8_t filterIdc, CodingStructure &cs);
   void   getBlkStatsCcAlf(AlfCovariance &alfCovariance, const AlfFilterShape &shape, const PelUnitBuf &orgYuv,
                           const PelUnitBuf &recYuv, const UnitArea &areaDst, const UnitArea &area,
                           const ComponentID compID, const int yPos);
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  void   calcCovarianceCcAlf(Pel ELocal[MAX_NUM_CC_ALF_CHROMA_COEFF][1], const Pel* rec, const int stride, const AlfFilterShape& shape, int vbDistance);
+#else
   void   calcCovarianceCcAlf(int ELocal[MAX_NUM_CC_ALF_CHROMA_COEFF][1], const Pel* rec, const int stride, const AlfFilterShape& shape, int vbDistance);
+#endif
   void   mergeClasses(const AlfFilterShape& alfShape, AlfCovariance* cov, AlfCovariance* covMerged, int clipMerged[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_LUMA_COEFF], const int numClasses, short filterIndices[MAX_NUM_ALF_CLASSES][MAX_NUM_ALF_CLASSES]);
 
 
diff --git a/source/Lib/EncoderLib/VLCWriter.cpp b/source/Lib/EncoderLib/VLCWriter.cpp
index ec0a1bd15..4db625ea0 100644
--- a/source/Lib/EncoderLib/VLCWriter.cpp
+++ b/source/Lib/EncoderLib/VLCWriter.cpp
@@ -3291,7 +3291,11 @@ void HLSWriter::alfFilter( const AlfParam& alfParam, const bool isChroma, const
 {
   AlfFilterShape alfShape(isChroma ? 5 : 7);
   const short* coeff = isChroma ? alfParam.chromaCoeff[altIdx] : alfParam.lumaCoeff;
+#if JVET_R0351_HIGH_BIT_DEPTH_SUPPORT
+  const Pel* clipp = isChroma ? alfParam.chromaClipp[altIdx] : alfParam.lumaClipp;
+#else
   const short* clipp = isChroma ? alfParam.chromaClipp[altIdx] : alfParam.lumaClipp;
+#endif
   const int numFilters = isChroma ? 1 : alfParam.numLumaFilters;
 
   // vlc for all
-- 
GitLab