Commit 7405266d authored by Xiaoyu Xiu's avatar Xiaoyu Xiu Committed by Xiang Li
Browse files

JVET-U0103: SIMD implementation for high bit depth coding

parent 1d3fca9d
......@@ -249,6 +249,40 @@ void removeHighFreq(int16_t* dst, int dstStride, const int16_t* src, int srcStri
#undef REM_HF_OP
#undef REM_HF_OP_CLIP
}
#if RExt__HIGH_BIT_DEPTH_SUPPORT
void removeWeightHighFreq_HBD(Pel* dst, int dstStride, const Pel* src, int srcStride, int width, int height, int shift, int bcwWeight)
{
Intermediate_Int normalizer = ((1 << 16) + (bcwWeight > 0 ? (bcwWeight >> 1) : -(bcwWeight >> 1))) / bcwWeight;
Intermediate_Int weight0 = normalizer << g_BcwLog2WeightBase;
Intermediate_Int weight1 = (g_BcwWeightBase - bcwWeight)*normalizer;
#define REM_HF_INC \
src += srcStride; \
dst += dstStride; \
#define REM_HF_OP( ADDR ) dst[ADDR] = (Pel)((dst[ADDR]*weight0 - src[ADDR]*weight1 + (1<<15))>>16)
SIZE_AWARE_PER_EL_OP(REM_HF_OP, REM_HF_INC);
#undef REM_HF_INC
#undef REM_HF_OP
#undef REM_HF_OP_CLIP
}
void removeHighFreq_HBD(Pel* dst, int dstStride, const Pel* src, int srcStride, int width, int height)
{
#define REM_HF_INC \
src += srcStride; \
dst += dstStride; \
#define REM_HF_OP( ADDR ) dst[ADDR] = 2 * dst[ADDR] - src[ADDR]
SIZE_AWARE_PER_EL_OP(REM_HF_OP, REM_HF_INC);
#undef REM_HF_INC
#undef REM_HF_OP
#undef REM_HF_OP_CLIP
}
#endif
#endif
template<typename T>
......@@ -299,10 +333,17 @@ PelBufferOps::PelBufferOps()
copyBuffer = copyBufferCore;
padding = paddingCore;
#if ENABLE_SIMD_OPT_BCW
#if RExt__HIGH_BIT_DEPTH_SUPPORT
removeWeightHighFreq8 = removeWeightHighFreq_HBD;
removeWeightHighFreq4 = removeWeightHighFreq_HBD;
removeHighFreq8 = removeHighFreq_HBD;
removeHighFreq4 = removeHighFreq_HBD;
#else
removeWeightHighFreq8 = removeWeightHighFreq;
removeWeightHighFreq4 = removeWeightHighFreq;
removeHighFreq8 = removeHighFreq;
removeHighFreq4 = removeHighFreq;
#endif
#endif
profGradFilter = gradFilterCore <false>;
......
......@@ -393,6 +393,10 @@ private:
static Distortion xGetSSE_SIMD ( const DistParam& pcDtParam );
template<int iWidth, X86_VEXT vext>
static Distortion xGetSSE_NxN_SIMD( const DistParam& pcDtParam );
#if RExt__HIGH_BIT_DEPTH_SUPPORT
template<X86_VEXT vext>
static Distortion xGetSSE_HBD_SIMD(const DistParam& pcDtParam);
#endif
template<X86_VEXT vext>
static Distortion xGetSAD_SIMD ( const DistParam& pcDtParam );
......@@ -400,12 +404,23 @@ private:
static Distortion xGetSAD_NxN_SIMD( const DistParam& pcDtParam );
template<X86_VEXT vext>
static Distortion xGetSAD_IBD_SIMD( const DistParam& pcDtParam );
#if RExt__HIGH_BIT_DEPTH_SUPPORT
template<X86_VEXT vext>
static Distortion xGetHADs_HBD_SIMD(const DistParam& pcDtParam);
#else
template<X86_VEXT vext>
static Distortion xGetHADs_SIMD ( const DistParam& pcDtParam );
#endif
template< X86_VEXT vext >
static Distortion xGetSADwMask_SIMD( const DistParam& pcDtParam );
#if RExt__HIGH_BIT_DEPTH_SUPPORT
template<X86_VEXT vext>
static Distortion xGetSAD_HBD_SIMD(const DistParam& pcDtParam);
template< X86_VEXT vext >
static Distortion xGetSADwMask_HBD_SIMD(const DistParam& pcDtParam);
#endif
#endif
public:
......
......@@ -53,6 +53,7 @@
// clang-format off
//########### place macros to be removed in next cycle below this line ###############
#define JVET_U0103_HIGH_BIT_DEPTH_SIMD 1 // JVET-U0103: SIMD implementation for high bit depth coding
#define JVET_S0078_NOOUTPUTPRIORPICFLAG 0 // JVET-S0078: Handling of NoOutputOfPriorPicsFlag in output process
......@@ -185,7 +186,11 @@ typedef std::pair<int, int> TrCost;
// SIMD optimizations
#define SIMD_ENABLE 1
#if JVET_U0103_HIGH_BIT_DEPTH_SIMD
#define ENABLE_SIMD_OPT SIMD_ENABLE ///< SIMD optimizations, no impact on RD performance
#else
#define ENABLE_SIMD_OPT ( SIMD_ENABLE && !RExt__HIGH_BIT_DEPTH_SUPPORT ) ///< SIMD optimizations, no impact on RD performance
#endif
#define ENABLE_SIMD_OPT_MCIF ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for the interpolation filter, no impact on RD performance
#define ENABLE_SIMD_OPT_BUFFER ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for the buffer operations, no impact on RD performance
#define ENABLE_SIMD_OPT_DIST ( 1 && ENABLE_SIMD_OPT ) ///< SIMD optimization for the distortion calculations(SAD,SSE,HADAMARD), no impact on RD performance
......
This diff is collapsed.
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment