Commit 007e7600 authored by xiuxx's avatar xiuxx

JVET-K0485: BIO simplifications:

Reference sample padding to keep the memory bandwidth of the BIO CUs the same as regular bi-pred
Simplified BIO gradient calculation
Two-stage early termination to skip unnecessary BIO operations
parent e617dffe
......@@ -144,6 +144,7 @@ LMChroma : 1
DMVR : 1
IMV : 2
DepQuant : 1
BIO : 1
# Fast tools
PBIntraFast : 1
......
......@@ -62,6 +62,55 @@ void addAvgCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T
#undef ADD_AVG_CORE_INC
}
#if JVET_K0485_BIO
void addBIOAvgCore(const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *pGradX0, const Pel *pGradX1, const Pel *pGradY0, const Pel*pGradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng)
{
int b = 0;
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x += 4)
{
b = tmpx * (pGradX0[x] - pGradX1[x]) + tmpy * (pGradY0[x] - pGradY1[x]);
b = ((b + 32) >> 6);
dst[x] = ClipPel((int16_t)rightShift((src0[x] + src1[x] + b + offset), shift), clpRng);
b = tmpx * (pGradX0[x + 1] - pGradX1[x + 1]) + tmpy * (pGradY0[x + 1] - pGradY1[x + 1]);
b = ((b + 32) >> 6);
dst[x + 1] = ClipPel((int16_t)rightShift((src0[x + 1] + src1[x + 1] + b + offset), shift), clpRng);
b = tmpx * (pGradX0[x + 2] - pGradX1[x + 2]) + tmpy * (pGradY0[x + 2] - pGradY1[x + 2]);
b = ((b + 32) >> 6);
dst[x + 2] = ClipPel((int16_t)rightShift((src0[x + 2] + src1[x + 2] + b + offset), shift), clpRng);
b = tmpx * (pGradX0[x + 3] - pGradX1[x + 3]) + tmpy * (pGradY0[x + 3] - pGradY1[x + 3]);
b = ((b + 32) >> 6);
dst[x + 3] = ClipPel((int16_t)rightShift((src0[x + 3] + src1[x + 3] + b + offset), shift), clpRng);
}
dst += dstStride; src0 += src0Stride; src1 += src1Stride;
pGradX0 += gradStride; pGradX1 += gradStride; pGradY0 += gradStride; pGradY1 += gradStride;
}
}
Distortion calcHighBDSADCore(const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int bitDepth)
{
const uint32_t distortionShift = DISTORTION_PRECISION_ADJUSTMENT(bitDepth - 8);
Distortion sum = 0;
for (int rows = 0; rows < height; rows++)
{
for (int cols = 0; cols < width; cols++)
{
sum += abs(src0[cols] - src1[cols]);
}
src0 += src0Stride;
src1 += src1Stride;
}
return (sum >> distortionShift);
}
#endif
template<typename T>
void reconstructCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T* dest, int dstStride, int width, int height, const ClpRng& clpRng )
......@@ -97,6 +146,10 @@ PelBufferOps::PelBufferOps()
{
addAvg4 = addAvgCore<Pel>;
addAvg8 = addAvgCore<Pel>;
#if JVET_K0485_BIO
addBIOAvg4 = addBIOAvgCore;
calcHighBDSAD = calcHighBDSADCore;
#endif
reco4 = reconstructCore<Pel>;
reco8 = reconstructCore<Pel>;
......@@ -150,6 +203,45 @@ void AreaBuf<Pel>::addAvg( const AreaBuf<const Pel> &other1, const AreaBuf<const
}
}
#if JVET_K0485_BIO
template<>
void AreaBuf<Pel>::avgPel(const Pel* pYuvSrc0, const int src0Stride, const Pel* pYuvSrc1, const int src1Stride, const ClpRng& clpRng)
{
const Pel* src0 = pYuvSrc0;
const Pel* src2 = pYuvSrc1;
Pel* dest = buf;
const unsigned destStride = stride;
const int clipbd = clpRng.bd;
const int shiftNum = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + 1;
const int offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
#if ENABLE_SIMD_OPT_BUFFER && defined(TARGET_SIMD_X86)
if ((width & 7) == 0)
{
g_pelBufOP.addAvg8(src0, src0Stride, src2, src1Stride, dest, destStride, width, height, shiftNum, offset, clpRng);
}
else if ((width & 3) == 0)
{
g_pelBufOP.addAvg4(src0, src0Stride, src2, src1Stride, dest, destStride, width, height, shiftNum, offset, clpRng);
}
else
#endif
{
#define ADD_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShift( ( src0[ADDR] + src2[ADDR] + offset ), shiftNum ), clpRng )
#define ADD_AVG_INC \
src0 += src0Stride; \
src2 += src1Stride; \
dest += destStride; \
SIZE_AWARE_PER_EL_OP(ADD_AVG_OP, ADD_AVG_INC);
#undef ADD_AVG_OP
#undef ADD_AVG_INC
}
}
#endif
template<>
void AreaBuf<Pel>::toLast( const ClpRng& clpRng )
{
......
......@@ -68,6 +68,10 @@ struct PelBufferOps
void ( *reco8 ) ( const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, int width, int height, const ClpRng& clpRng );
void ( *linTf4 ) ( const Pel* src0, int src0Stride, Pel *dst, int dstStride, int width, int height, int scale, int shift, int offset, const ClpRng& clpRng, bool bClip );
void ( *linTf8 ) ( const Pel* src0, int src0Stride, Pel *dst, int dstStride, int width, int height, int scale, int shift, int offset, const ClpRng& clpRng, bool bClip );
#if JVET_K0485_BIO
void(*addBIOAvg4) (const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *pGradX0, const Pel *pGradX1, const Pel *pGradY0, const Pel*pGradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng);
Distortion(*calcHighBDSAD) (const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int bitDepth);
#endif
};
extern PelBufferOps g_pelBufOP;
......@@ -103,6 +107,9 @@ struct AreaBuf : public Size
void extendSingleBorderPel();
void extendBorderPel ( unsigned margin );
void addAvg ( const AreaBuf<const T> &other1, const AreaBuf<const T> &other2, const ClpRng& clpRng );
#if JVET_K0485_BIO
void avgPel (const Pel* pYuvSrc0, const int src0Stride, const Pel* pYuvSrc1, const int src1Stride, const ClpRng& clpRng);
#endif
void removeHighFreq ( const AreaBuf<T>& other, const bool bClip, const ClpRng& clpRng);
void updateHistogram ( std::vector<int32_t>& hist ) const;
......@@ -372,6 +379,16 @@ void AreaBuf<T>::addAvg( const AreaBuf<const T> &other1, const AreaBuf<const T>
template<>
void AreaBuf<Pel>::addAvg( const AreaBuf<const Pel> &other1, const AreaBuf<const Pel> &other2, const ClpRng& clpRng );
#if JVET_K0485_BIO
template<typename T>
void AreaBuf<T>::avgPel(const Pel* pYuvSrc0, const int src0Stride, const Pel* pYuvSrc1, const int src1Stride, const ClpRng& clpRng)
{
THROW("Type not supported");
}
template<>
void AreaBuf<Pel>::avgPel(const Pel* pYuvSrc0, const int src0Stride, const Pel* pYuvSrc1, const int src1Stride, const ClpRng& clpRng);
#endif
template<typename T>
void AreaBuf<T>::linearTransform( const int scale, const int shift, const int offset, bool bClip, const ClpRng& clpRng )
......
This diff is collapsed.
......@@ -62,8 +62,12 @@ class Mv;
// ====================================================================================================================
#if JEM_TOOLS
#if JVET_K0485_BIO
#define BIO_TEMP_BUFFER_SIZE ( MAX_CU_SIZE+2*JVET_K0485_BIO_EXTEND_SIZE ) * ( MAX_CU_SIZE+2*JVET_K0485_BIO_EXTEND_SIZE )
#else
#define BIO_TEMP_BUFFER_SIZE ( MAX_CU_SIZE ) * ( MAX_CU_SIZE )
#endif
#endif
class InterPrediction : public WeightPrediction
{
......@@ -76,6 +80,11 @@ private:
#endif
#if JEM_TOOLS
#if JVET_K0485_BIO
Distortion m_bioDistThres;
Distortion m_bioSubBlkDistThres;
Distortion m_bioPredSubBlkDist[MAX_NUM_PARTS_IN_CTU];
#endif
int64_t m_piDotProduct1[BIO_TEMP_BUFFER_SIZE];
int64_t m_piDotProduct2[BIO_TEMP_BUFFER_SIZE];
int64_t m_piDotProduct3[BIO_TEMP_BUFFER_SIZE];
......@@ -104,12 +113,17 @@ protected:
Pel* m_pGradY0;
Pel* m_pGradX1;
Pel* m_pGradY1;
#if JVET_K0485_BIO
Pel* m_pBIOPadRef;
#endif
PelStorage m_tmpObmcBuf;
Pel* m_cYuvPredTempDMVR[MAX_NUM_COMPONENT];
#if !JVET_K0485_BIO
uint32_t m_uiaBIOShift[64];
#endif
#if JVET_J0090_MEMORY_BANDWITH_MEASURE
CacheModel* m_cacheModel;
#endif
......@@ -119,6 +133,10 @@ protected:
#define BIO_FILTER_LENGTH_MINUS_1 (BIO_FILTER_LENGTH-1)
#define BIO_FILTER_HALF_LENGTH_MINUS_1 ((BIO_FILTER_LENGTH>>1)-1)
#if JVET_K0485_BIO
void (*bioGradFilter)(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* pGradX, Pel* pGradY);
static void gradFilter (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* pGradX, Pel* pGradY);
#else
void xGradFilterX ( const Pel* piRefY, int iRefStride, Pel* piDstY, int iDstStride, int iWidth, int iHeight, int iMVyFrac, int iMVxFrac, const int bitDepth );
void xGradFilterY ( const Pel* piRefY, int iRefStride, Pel* piDstY, int iDstStride, int iWidth, int iHeight, int iMVyFrac, int iMVxFrac, const int bitDepth );
inline void gradFilter2DVer ( const Pel* piSrc, int iSrcStride, int iWidth, int iHeight, int iDstStride, Pel*& rpiDst, int iMv, const int iShift );
......@@ -127,14 +145,22 @@ protected:
inline void fracFilter2DVer ( const Pel* piSrc, int iSrcStride, int iWidth, int iHeight, int iDstStride, Pel*& rpiDst, int iMv, const int iShift );
inline void gradFilter1DHor ( const Pel* piSrc, int iSrcStride, int iWidth, int iHeight, int iDstStride, Pel*& rpiDst, int iMV, const int iShift );
inline void gradFilter1DVer ( const Pel* piSrc, int iSrcStride, int iWidth, int iHeight, int iDstStride, Pel*& rpiDst, int iMV, const int iShift );
#endif
inline int64_t divide64 ( int64_t numer, int64_t denom);
#if JVET_K0485_BIO
inline void calcBlkGradient(int sx, int sy, int64_t *arraysGx2, int64_t *arraysGxGy, int64_t *arraysGxdI, int64_t *arraysGy2, int64_t *arraysGydI,
int64_t &sGx2, int64_t &sGy2, int64_t &sGxGy, int64_t &sGxdI, int64_t &sGydI, int width, int height, int unitSize);
#else
inline void calcBlkGradient ( int sx, int sy, int64_t *arraysGx2, int64_t *arraysGxGy, int64_t *arraysGxdI, int64_t *arraysGy2, int64_t *arraysGydI, int64_t &sGx2, int64_t &sGy2, int64_t &sGxGy, int64_t &sGxdI, int64_t &sGydI, int iWidth, int iHeight);
Pel optical_flow_averaging ( int64_t s1, int64_t s2, int64_t s3, int64_t s5, int64_t s6,
Pel pGradX0, Pel pGradX1, Pel pGradY0, Pel pGradY1, Pel pSrcY0Temp, Pel pSrcY1Temp,
const int shiftNum, const int offset, const int64_t limit, const int64_t denom_min_1, const int64_t denom_min_2, const ClpRng& clpRng );
#endif
void applyBiOptFlow ( const PredictionUnit &pu, const CPelUnitBuf &pcYuvSrc0, const CPelUnitBuf &pcYuvSrc1, const int &iRefIdx0, const int &iRefIdx1, PelUnitBuf &pcYuvDst, const BitDepths &clipBitDepths);
#if JVET_K0485_BIO
bool xCalcBiPredSubBlkDist (const PredictionUnit &pu, const Pel* pYuvSrc0, const int src0Stride, const Pel* pYuvSrc1, const int src1Stride, const BitDepths &clipBitDepths);
#endif
#endif
#if JEM_TOOLS
......@@ -151,6 +177,9 @@ protected:
);
#if JEM_TOOLS
#if JVET_K0485_BIO
void xPadRefFromFMC (const Pel* refBufPtr, int refBufStride, int width, int height, Pel* padRefPelPtr, int &padRefStride, bool isFracMC);
#endif
void xPredAffineBlk ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng, const bool& bBIOApplied = false );
void xGetLICParams ( const CodingUnit& cu, const ComponentID compID, const Picture& refPic, const Mv& mv, int& shift, int& scale, int& offset );
void xLocalIlluComp ( const PredictionUnit& pu, const ComponentID compID, const Picture& refPic, const Mv& mv, const bool biPred, PelBuf& dstBuf );
......
......@@ -104,6 +104,11 @@
#define JVET_K0357_AMVR 1 // Adaptive motion vector resolution separated from JEM_TOOLS macro
#define JVET_K0485_BIO 0 // BIO simplifications
#if JVET_K0485_BIO
#define JVET_K0485_BIO_EXTEND_SIZE 1
#endif
#ifndef JEM_TOOLS
#define JEM_TOOLS 1 // Defines the inclusion of JEM tools into compiled executable
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment