Commit e9bf7ed5 authored by Xiang Li's avatar Xiang Li

Merge branch 'JVET_L0256_BIO' into 'master'

JVET_L0256: BIO

See merge request !32
parents d1194f46 0f747e97
Pipeline #231 passed with stage
......@@ -142,7 +142,8 @@ DepQuant : 1
IMV : 2
ALF : 1
GBi : 1
GBiFast : 1
GBiFast : 1
BIO : 1
# Fast tools
PBIntraFast : 1
......
......@@ -230,6 +230,9 @@ void EncApp::xInitLibCfg()
m_cEncLib.setAffineType ( m_AffineType );
#if !REMOVE_MV_ADAPT_PREC
m_cEncLib.setHighPrecisionMv (m_highPrecisionMv);
#endif
#if JVET_L0256_BIO
m_cEncLib.setBIO (m_BIO);
#endif
m_cEncLib.setDisableMotionCompression ( m_DisableMotionCompression );
m_cEncLib.setMTTMode ( m_MTT );
......
......@@ -827,8 +827,11 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
#if !REMOVE_MV_ADAPT_PREC
("HighPrecMv", m_highPrecisionMv, false, "High precision motion vectors for temporal merging (0:off, 1:on) [default: off]")
#endif
("Affine", m_Affine, false, "Enable affine prediction (0:off, 1:on) [default: off]")
( "AffineType", m_AffineType, true, "Enable affine type prediction (0:off, 1:on) [default: on]" )
("Affine", m_Affine, false, "Enable affine prediction (0:off, 1:on) [default: off]")
("AffineType", m_AffineType, true, "Enable affine type prediction (0:off, 1:on) [default: on]" )
#if JVET_L0256_BIO
("BIO", m_BIO, false, "Enable bi-directional optical flow")
#endif
("DisableMotCompression", m_DisableMotionCompression, false, "Disable motion data compression for all modes")
("IMV", m_ImvMode, 2, "Adaptive MV precision Mode (IMV)\n"
"\t0: disabled IMV\n"
......@@ -1943,6 +1946,9 @@ bool EncAppCfg::xCheckParameter()
#if !REMOVE_MV_ADAPT_PREC
xConfirmPara( m_highPrecisionMv, "High precision MV for temporal merging can only be used with NEXT profile" );
xConfirmPara( m_Affine, "Affine is only allowed with NEXT profile" );
#endif
#if JVET_L0256_BIO
xConfirmPara( m_BIO, "BIO only allowed with NEXT profile" );
#endif
xConfirmPara( m_DisableMotionCompression, "Disable motion data compression only allowed with NEXT profile" );
xConfirmPara( m_MTT, "Multi type tree is only allowed with NEXT profile" );
......@@ -3143,6 +3149,9 @@ void EncAppCfg::xPrintParameter()
if( !m_QTBT ) msg( VERBOSE, "IMVMaxCand:%d ", m_ImvMaxCand );
#if !REMOVE_MV_ADAPT_PREC
msg(VERBOSE, "HighPrecMv:%d ", m_highPrecisionMv);
#endif
#if JVET_L0256_BIO
msg( VERBOSE, "BIO:%d ", m_BIO );
#endif
msg( VERBOSE, "DisMDC:%d ", m_DisableMotionCompression );
msg( VERBOSE, "MTT:%d ", m_MTT );
......
......@@ -213,6 +213,9 @@ protected:
bool m_AffineType;
#if !REMOVE_MV_ADAPT_PREC
bool m_highPrecisionMv;
#endif
#if JVET_L0256_BIO
bool m_BIO;
#endif
bool m_DisableMotionCompression;
unsigned m_MTT;
......
......@@ -62,6 +62,138 @@ void addAvgCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T
#undef ADD_AVG_CORE_INC
}
#if JVET_L0256_BIO
void addBIOAvgCore(const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng)
{
int b = 0;
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x += 4)
{
b = tmpx * (gradX0[x] - gradX1[x]) + tmpy * (gradY0[x] - gradY1[x]);
b = ((b + 1) >> 1);
dst[x] = ClipPel((int16_t)rightShift((src0[x] + src1[x] + b + offset), shift), clpRng);
b = tmpx * (gradX0[x + 1] - gradX1[x + 1]) + tmpy * (gradY0[x + 1] - gradY1[x + 1]);
b = ((b + 1) >> 1);
dst[x + 1] = ClipPel((int16_t)rightShift((src0[x + 1] + src1[x + 1] + b + offset), shift), clpRng);
b = tmpx * (gradX0[x + 2] - gradX1[x + 2]) + tmpy * (gradY0[x + 2] - gradY1[x + 2]);
b = ((b + 1) >> 1);
dst[x + 2] = ClipPel((int16_t)rightShift((src0[x + 2] + src1[x + 2] + b + offset), shift), clpRng);
b = tmpx * (gradX0[x + 3] - gradX1[x + 3]) + tmpy * (gradY0[x + 3] - gradY1[x + 3]);
b = ((b + 1) >> 1);
dst[x + 3] = ClipPel((int16_t)rightShift((src0[x + 3] + src1[x + 3] + b + offset), shift), clpRng);
}
dst += dstStride; src0 += src0Stride; src1 += src1Stride;
gradX0 += gradStride; gradX1 += gradStride; gradY0 += gradStride; gradY1 += gradStride;
}
}
void gradFilterCore(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY)
{
Pel* srcTmp = pSrc + srcStride + 1;
Pel* gradXTmp = gradX + gradStride + 1;
Pel* gradYTmp = gradY + gradStride + 1;
for (int y = 0; y < (height - 2 * BIO_EXTEND_SIZE); y++)
{
for (int x = 0; x < (width - 2 * BIO_EXTEND_SIZE); x++)
{
gradYTmp[x] = (srcTmp[x + srcStride] - srcTmp[x - srcStride]) >> 4;
gradXTmp[x] = (srcTmp[x + 1] - srcTmp[x - 1]) >> 4;
}
gradXTmp += gradStride;
gradYTmp += gradStride;
srcTmp += srcStride;
}
gradXTmp = gradX + gradStride + 1;
gradYTmp = gradY + gradStride + 1;
for (int y = 0; y < (height - 2 * BIO_EXTEND_SIZE); y++)
{
gradXTmp[-1] = gradXTmp[0];
gradXTmp[width - 2 * BIO_EXTEND_SIZE] = gradXTmp[width - 2 * BIO_EXTEND_SIZE - 1];
gradXTmp += gradStride;
gradYTmp[-1] = gradYTmp[0];
gradYTmp[width - 2 * BIO_EXTEND_SIZE] = gradYTmp[width - 2 * BIO_EXTEND_SIZE - 1];
gradYTmp += gradStride;
}
gradXTmp = gradX + gradStride;
gradYTmp = gradY + gradStride;
::memcpy(gradXTmp - gradStride, gradXTmp, sizeof(Pel)*(width));
::memcpy(gradXTmp + (height - 2 * BIO_EXTEND_SIZE)*gradStride, gradXTmp + (height - 2 * BIO_EXTEND_SIZE - 1)*gradStride, sizeof(Pel)*(width));
::memcpy(gradYTmp - gradStride, gradYTmp, sizeof(Pel)*(width));
::memcpy(gradYTmp + (height - 2 * BIO_EXTEND_SIZE)*gradStride, gradYTmp + (height - 2 * BIO_EXTEND_SIZE - 1)*gradStride, sizeof(Pel)*(width));
}
void calcBIOParCore(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG)
{
for (int y = 0; y < heightG; y++)
{
for (int x = 0; x < widthG; x++)
{
int temp = (srcY0Temp[x] >> 6) - (srcY1Temp[x] >> 6);
int tempX = (gradX0[x] + gradX1[x]) >> 3;
int tempY = (gradY0[x] + gradY1[x]) >> 3;
dotProductTemp1[x] = tempX * tempX;
dotProductTemp2[x] = tempX * tempY;
dotProductTemp3[x] = -tempX * temp;
dotProductTemp5[x] = tempY * tempY;
dotProductTemp6[x] = -tempY * temp;
}
srcY0Temp += src0Stride;
srcY1Temp += src1Stride;
gradX0 += gradStride;
gradX1 += gradStride;
gradY0 += gradStride;
gradY1 += gradStride;
dotProductTemp1 += widthG;
dotProductTemp2 += widthG;
dotProductTemp3 += widthG;
dotProductTemp5 += widthG;
dotProductTemp6 += widthG;
}
}
void calcBlkGradientCore(int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize)
{
int *Gx2 = arraysGx2;
int *Gy2 = arraysGy2;
int *GxGy = arraysGxGy;
int *GxdI = arraysGxdI;
int *GydI = arraysGydI;
// set to the above row due to JVET_K0485_BIO_EXTEND_SIZE
Gx2 -= (BIO_EXTEND_SIZE*width);
Gy2 -= (BIO_EXTEND_SIZE*width);
GxGy -= (BIO_EXTEND_SIZE*width);
GxdI -= (BIO_EXTEND_SIZE*width);
GydI -= (BIO_EXTEND_SIZE*width);
for (int y = -BIO_EXTEND_SIZE; y < unitSize + BIO_EXTEND_SIZE; y++)
{
for (int x = -BIO_EXTEND_SIZE; x < unitSize + BIO_EXTEND_SIZE; x++)
{
sGx2 += Gx2[x];
sGy2 += Gy2[x];
sGxGy += GxGy[x];
sGxdI += GxdI[x];
sGydI += GydI[x];
}
Gx2 += width;
Gy2 += width;
GxGy += width;
GxdI += width;
GydI += width;
}
}
#endif
#if ENABLE_SIMD_OPT_GBI && JVET_L0646_GBI
void removeWeightHighFreq(int16_t* dst, int dstStride, const int16_t* src, int srcStride, int width, int height, int shift, int gbiWeight)
{
......@@ -138,6 +270,13 @@ PelBufferOps::PelBufferOps()
linTf4 = linTfCore<Pel>;
linTf8 = linTfCore<Pel>;
#if JVET_L0256_BIO
addBIOAvg4 = addBIOAvgCore;
bioGradFilter = gradFilterCore;
calcBIOPar = calcBIOParCore;
calcBlkGradient = calcBlkGradientCore;
#endif
#if ENABLE_SIMD_OPT_GBI
removeWeightHighFreq8 = removeWeightHighFreq;
removeWeightHighFreq4 = removeWeightHighFreq;
......
......@@ -68,6 +68,12 @@ struct PelBufferOps
void ( *reco8 ) ( const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, int width, int height, const ClpRng& clpRng );
void ( *linTf4 ) ( const Pel* src0, int src0Stride, Pel *dst, int dstStride, int width, int height, int scale, int shift, int offset, const ClpRng& clpRng, bool bClip );
void ( *linTf8 ) ( const Pel* src0, int src0Stride, Pel *dst, int dstStride, int width, int height, int scale, int shift, int offset, const ClpRng& clpRng, bool bClip );
#if JVET_L0256_BIO
void(*addBIOAvg4) (const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng);
void(*bioGradFilter) (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY);
void(*calcBIOPar) (const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG);
void(*calcBlkGradient)(int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize);
#endif
#if ENABLE_SIMD_OPT_GBI
void ( *removeWeightHighFreq8) ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int gbiWeight);
void ( *removeWeightHighFreq4) ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int gbiWeight);
......
......@@ -318,6 +318,12 @@ static const int MAX_NUM_GT2_BINS_4x4SUBBLOCK = 4; ///< max
static const int MAX_NUM_REG_BINS_2x2SUBBLOCK = 8; ///< max number of context-coded bins (incl. gt2 bins) per 2x2 subblock (chroma)
static const int MAX_NUM_GT2_BINS_2x2SUBBLOCK = 2; ///< max number of gt2 bins per 2x2 subblock (chroma)
#endif
#if JVET_L0256_BIO
static const int BIO_EXTEND_SIZE = 1;
static const int BIO_TEMP_BUFFER_SIZE = (MAX_CU_SIZE + 2 * BIO_EXTEND_SIZE) * (MAX_CU_SIZE + 2 * BIO_EXTEND_SIZE);
#endif
#if JVET_L0646_GBI
static const int GBI_NUM = 5; ///< the number of weight options
static const int GBI_DEFAULT = ((uint8_t)(GBI_NUM >> 1)); ///< Default weighting index representing for w=0.5
......@@ -397,6 +403,10 @@ static const int NTAPS_CHROMA = 4; ///< Numb
static const int MAX_LADF_INTERVALS = 5; /// max number of luma adaptive deblocking filter qp offset intervals
#endif
#if JVET_L0256_BIO
static const int NTAPS_BILINEAR = 2; ///< Number of taps for bilinear filter
#endif
// ====================================================================================================================
// Macro functions
// ====================================================================================================================
......
This diff is collapsed.
......@@ -64,6 +64,17 @@ class InterPrediction : public WeightPrediction
{
private:
#if JVET_L0256_BIO
Distortion m_bioDistThres;
Distortion m_bioSubBlkDistThres;
Distortion m_bioPredSubBlkDist[MAX_NUM_PARTS_IN_CTU];
int m_dotProduct1[BIO_TEMP_BUFFER_SIZE];
int m_dotProduct2[BIO_TEMP_BUFFER_SIZE];
int m_dotProduct3[BIO_TEMP_BUFFER_SIZE];
int m_dotProduct5[BIO_TEMP_BUFFER_SIZE];
int m_dotProduct6[BIO_TEMP_BUFFER_SIZE];
#endif
protected:
InterpolationFilter m_if;
......@@ -80,15 +91,37 @@ protected:
RdCost* m_pcRdCost;
int m_iRefListIdx;
#if JVET_L0256_BIO
Pel* m_gradX0;
Pel* m_gradY0;
Pel* m_gradX1;
Pel* m_gradY1;
bool m_subPuMC;
int rightShiftMSB(int numer, int denom);
void applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf &yuvSrc0, const CPelUnitBuf &yuvSrc1, const int &refIdx0, const int &refIdx1, PelUnitBuf &yuvDst, const BitDepths &clipBitDepths);
bool xCalcBiPredSubBlkDist(const PredictionUnit &pu, const Pel* yuvSrc0, const int src0Stride, const Pel* yuvSrc1, const int src1Stride, const BitDepths &clipBitDepths);
void bioSampleExtendBilinearFilter(Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int dim, int fracX, int fracY, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng);
#endif
void xPredInterUni ( const PredictionUnit& pu, const RefPicList& eRefPicList, PelUnitBuf& pcYuvPred, const bool& bi
#if JVET_L0256_BIO
,const bool& bioApplied = false
#endif
);
void xPredInterBi ( PredictionUnit& pu, PelUnitBuf &pcYuvPred );
void xPredInterBlk ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng
#if JVET_L0256_BIO
,const bool& bioApplied = false
#endif
);
#if JVET_L0256_BIO
void xWeightedAverage ( const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied );
#else
void xWeightedAverage ( const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs );
#endif
void xPredAffineBlk( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng );
static bool xCheckIdenticalMotion( const PredictionUnit& pu );
......
......@@ -111,6 +111,28 @@ const TFilterCoeff InterpolationFilter::m_chromaFilter[CHROMA_INTERPOLATION_FILT
{ 0, 2, 63, -1 },
};
#if JVET_L0256_BIO
const TFilterCoeff InterpolationFilter::m_bilinearFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS << VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE][NTAPS_BILINEAR] =
{
{ 64, 0, },
{ 60, 4, },
{ 56, 8, },
{ 52, 12, },
{ 48, 16, },
{ 44, 20, },
{ 40, 24, },
{ 36, 28, },
{ 32, 32, },
{ 28, 36, },
{ 24, 40, },
{ 20, 44, },
{ 16, 48, },
{ 12, 52, },
{ 8, 56, },
{ 4, 60, },
};
#endif
// ====================================================================================================================
// Private member functions
// ====================================================================================================================
......@@ -443,7 +465,11 @@ void InterpolationFilter::filterVer(const ClpRng& clpRng, Pel const *src, int sr
* \param fmt Chroma format
* \param bitDepth Bit depth
*/
#if JVET_L0256_BIO
void InterpolationFilter::filterHor( const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx )
#else
void InterpolationFilter::filterHor( const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng )
#endif
{
if( frac == 0 )
{
......@@ -452,6 +478,13 @@ void InterpolationFilter::filterHor( const ComponentID compID, Pel const *src, i
else if( isLuma( compID ) )
{
CHECK( frac < 0 || frac >= ( LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS << VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE ), "Invalid fraction" );
#if JVET_L0256_BIO
if( nFilterIdx == 1 )
{
filterHor<NTAPS_BILINEAR>(clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_bilinearFilter[frac]);
}
else
#endif
{
filterHor<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter[frac] );
}
......@@ -481,7 +514,11 @@ void InterpolationFilter::filterHor( const ComponentID compID, Pel const *src, i
* \param fmt Chroma format
* \param bitDepth Bit depth
*/
#if JVET_L0256_BIO
void InterpolationFilter::filterVer( const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx)
#else
void InterpolationFilter::filterVer( const ComponentID compID, Pel const *src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng )
#endif
{
if( frac == 0 )
{
......@@ -490,6 +527,13 @@ void InterpolationFilter::filterVer( const ComponentID compID, Pel const *src, i
else if( isLuma( compID ) )
{
CHECK( frac < 0 || frac >= ( LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS << VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE ), "Invalid fraction" );
#if JVET_L0256_BIO
if (nFilterIdx == 1)
{
filterVer<NTAPS_BILINEAR>(clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_bilinearFilter[frac]);
}
else
#endif
{
filterVer<NTAPS_LUMA>( clpRng, src, srcStride, dst, dstStride, width, height, isFirst, isLast, m_lumaFilter[frac] );
}
......
......@@ -56,6 +56,9 @@ class InterpolationFilter
{
static const TFilterCoeff m_lumaFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS << VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE][NTAPS_LUMA]; ///< Luma filter taps
static const TFilterCoeff m_chromaFilter[CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS << VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE][NTAPS_CHROMA]; ///< Chroma filter taps
#if JVET_L0256_BIO
static const TFilterCoeff m_bilinearFilter[LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS << VCEG_AZ07_MV_ADD_PRECISION_BIT_FOR_STORE][NTAPS_BILINEAR]; ///< bilinear filter taps
#endif
public:
template<bool isFirst, bool isLast>
static void filterCopy( const ClpRng& clpRng, const Pel *src, int srcStride, Pel *dst, int dstStride, int width, int height );
......@@ -87,8 +90,13 @@ public:
void _initInterpolationFilterX86();
#endif
#if JVET_L0256_BIO
void filterHor(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx = 0);
void filterVer(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng, int nFilterIdx = 0);
#else
void filterHor(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng );
void filterVer(const ComponentID compID, Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int frac, bool isFirst, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng );
#endif
#if JVET_J0090_MEMORY_BANDWITH_MEASURE
void cacheAssign( CacheModel *cache ) { m_cacheModel = cache; }
#endif
......
......@@ -164,6 +164,10 @@ void RdCost::init()
m_afpDistortFunc[DF_SSE16N_WTD] = RdCost::xGetSSE16N_WTD;
#endif
#if JVET_L0256_BIO
m_afpDistortFunc[DF_SAD_INTERMEDIATE_BITDEPTH] = RdCost::xGetSAD;
#endif
#if ENABLE_SIMD_OPT_DIST
#ifdef TARGET_SIMD_X86
initRdCostX86();
......@@ -318,7 +322,11 @@ void RdCost::setDistParam( DistParam &rcDP, const CPelBuf &org, const CPelBuf &c
rcDP.maximumDistortionForEarlyExit = std::numeric_limits<Distortion>::max();
}
#if JVET_L0256_BIO
void RdCost::setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY, int iOrgStride, int iRefStride, int bitDepth, ComponentID compID, int width, int height, int subShiftMode, int step, bool useHadamard, bool bioApplied )
#else
void RdCost::setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY, int iOrgStride, int iRefStride, int bitDepth, ComponentID compID, int width, int height, int subShiftMode, int step, bool useHadamard )
#endif
{
rcDP.bitDepth = bitDepth;
rcDP.compID = compID;
......@@ -339,6 +347,14 @@ void RdCost::setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY,
CHECK( useHadamard || rcDP.useMR || subShiftMode > 0, "only used in xDirectMCCost with these default parameters (so far...)" );
#if JVET_L0256_BIO
if ( bioApplied )
{
rcDP.distFunc = m_afpDistortFunc[ DF_SAD_INTERMEDIATE_BITDEPTH ];
return;
}
#endif
if( width == 12 )
{
rcDP.distFunc = m_afpDistortFunc[ DF_SAD12 ];
......
......@@ -154,7 +154,11 @@ public:
void setDistParam( DistParam &rcDP, const CPelBuf &org, const Pel* piRefY , int iRefStride, int bitDepth, ComponentID compID, int subShiftMode = 0, int step = 1, bool useHadamard = false );
void setDistParam( DistParam &rcDP, const CPelBuf &org, const CPelBuf &cur, int bitDepth, ComponentID compID, bool useHadamard = false );
#if JVET_L0256_BIO
void setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY, int iOrgStride, int iRefStride, int bitDepth, ComponentID compID, int width, int height, int subShiftMode = 0, int step = 1, bool useHadamard = false, bool bioApplied = false );
#else
void setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY, int iOrgStride, int iRefStride, int bitDepth, ComponentID compID, int width, int height, int subShiftMode = 0, int step = 1, bool useHadamard = false );
#endif
double getMotionLambda ( bool bIsTransquantBypass ) { return m_dLambdaMotionSAD[(bIsTransquantBypass && m_costMode==COST_MIXED_LOSSLESS_LOSSY_CODING)?1:0]; }
void selectMotionLambda ( bool bIsTransquantBypass ) { m_motionLambda = getMotionLambda( bIsTransquantBypass ); }
......@@ -266,6 +270,10 @@ private:
static Distortion xGetSAD_SIMD ( const DistParam& pcDtParam );
template< int iWidth, X86_VEXT vext >
static Distortion xGetSAD_NxN_SIMD( const DistParam& pcDtParam );
#if ENABLE_SIMD_OPT_BIO
template< X86_VEXT vext >
static Distortion xGetSAD_IBD_SIMD(const DistParam& pcDtParam);
#endif
template< typename Torg, typename Tcur, X86_VEXT vext >
static Distortion xGetHADs_SIMD ( const DistParam& pcDtParam );
......
......@@ -1734,6 +1734,9 @@ SPSNext::SPSNext( SPS& sps )
, m_IMV ( false )
#if !REMOVE_MV_ADAPT_PREC
, m_highPrecMv ( false )
#endif
#if JVET_L0256_BIO
, m_BIO ( false )
#endif
, m_DisableMotionCompression ( false )
, m_LMChroma ( false )
......
......@@ -806,6 +806,9 @@ private:
bool m_IMV; // 9
#if !REMOVE_MV_ADAPT_PREC
bool m_highPrecMv;
#endif
#if JVET_L0256_BIO
bool m_BIO;
#endif
bool m_DisableMotionCompression; // 13
bool m_LMChroma; // 17
......@@ -880,6 +883,10 @@ public:
#if !REMOVE_MV_ADAPT_PREC
void setUseHighPrecMv(bool b) { m_highPrecMv = b; }
bool getUseHighPrecMv() const { return m_highPrecMv; }
#endif
#if JVET_L0256_BIO
void setUseBIO(bool b) { m_BIO = b; }
bool getUseBIO() const { return m_BIO; }
#endif
void setDisableMotCompress ( bool b ) { m_DisableMotionCompression = b; }
bool getDisableMotCompress () const { return m_DisableMotionCompression; }
......
......@@ -85,6 +85,8 @@
#define L0074_SUBBLOCK_DEBLOCKING 1
#define JVET_L0256_BIO 1
#define JVET_L0646_GBI 1 // Generalized bi-prediction (GBi)
#define JVET_L0628_4TAP_INTRA 1 // 4-tap intra-interpolation filtering with switching between Gaussian and DCT-IF filters for luma component
......@@ -264,6 +266,7 @@
#if ENABLE_SIMD_OPT_BUFFER && JVET_L0646_GBI
#define ENABLE_SIMD_OPT_GBI 1 ///< SIMD optimization for GBi
#endif
#define ENABLE_SIMD_OPT_BIO ( JVET_L0256_BIO && ENABLE_SIMD_OPT ) ///< SIMD optimization for BIO
// End of SIMD optimizations
......@@ -558,7 +561,13 @@ enum DFunc
DF_DEFAULT_ORI = DF_SSE_WTD+8,
#endif
#if JVET_L0256_BIO
DF_SAD_INTERMEDIATE_BITDEPTH = 63,
DF_TOTAL_FUNCTIONS = 64
#else
DF_TOTAL_FUNCTIONS = 63
#endif
};
/// motion vector predictor direction used in AMVP
......
This diff is collapsed.
......@@ -297,6 +297,45 @@ Distortion RdCost::xGetSAD_SIMD( const DistParam &rcDtParam )
return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth);
}
#if ENABLE_SIMD_OPT_BIO
template< X86_VEXT vext >
Distortion RdCost::xGetSAD_IBD_SIMD(const DistParam &rcDtParam)
{
if (rcDtParam.org.width < 4 || rcDtParam.bitDepth > 10 || rcDtParam.applyWeight)
return RdCost::xGetSAD(rcDtParam);
const short* src0 = (const short*)rcDtParam.org.buf;
const short* src1 = (const short*)rcDtParam.cur.buf;
int width = rcDtParam.org.height;
int height = rcDtParam.org.width;
int subShift = rcDtParam.subShift;
int subStep = (1 << subShift);
const int src0Stride = rcDtParam.org.stride * subStep;
const int src1Stride = rcDtParam.cur.stride * subStep;
__m128i vtotalsum32 = _mm_setzero_si128();
__m128i vzero = _mm_setzero_si128();
for (int y = 0; y < height; y += subStep)
{
for (int x = 0; x < width; x += 4)
{
__m128i vsrc1 = _mm_loadl_epi64((const __m128i*)(src0 + x));
__m128i vsrc2 = _mm_loadl_epi64((const __m128i*)(src1 + x));
vsrc1 = _mm_cvtepi16_epi32(vsrc1);
vsrc2 = _mm_cvtepi16_epi32(vsrc2);
vtotalsum32 = _mm_add_epi32(vtotalsum32, _mm_abs_epi32(_mm_sub_epi32(vsrc1, vsrc2)));
}
src0 += src0Stride;
src1 += src1Stride;
}
vtotalsum32 = _mm_hadd_epi32(vtotalsum32, vzero);
vtotalsum32 = _mm_hadd_epi32(vtotalsum32, vzero);
Distortion uiSum = _mm_cvtsi128_si32(vtotalsum32);
uiSum <<= subShift;
return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth);
}
#endif
template< int iWidth, X86_VEXT vext >
Distortion RdCost::xGetSAD_NxN_SIMD( const DistParam &rcDtParam )
......@@ -2422,6 +2461,10 @@ void RdCost::_initRdCostX86()
m_afpDistortFunc[DF_HAD32] = RdCost::xGetHADs_SIMD<Pel, Pel, vext>;
m_afpDistortFunc[DF_HAD64] = RdCost::xGetHADs_SIMD<Pel, Pel, vext>;
m_afpDistortFunc[DF_HAD16N] = RdCost::xGetHADs_SIMD<Pel, Pel, vext>;
#if ENABLE_SIMD_OPT_BIO
m_afpDistortFunc[DF_SAD_INTERMEDIATE_BITDEPTH] = RdCost::xGetSAD_IBD_SIMD<vext>;
#endif
}
template void RdCost::_initRdCostX86<SIMDX86>();
......
......@@ -794,6 +794,9 @@ void HLSyntaxReader::parseSPSNext( SPSNext& spsNext, const bool usePCM )
READ_FLAG( symbol, "imv_enable_flag" ); spsNext.setUseIMV ( symbol != 0 );
#if !REMOVE_MV_ADAPT_PREC
READ_FLAG( symbol, "high_precision_motion_vectors" ); spsNext.setUseHighPrecMv(symbol != 0);
#endif
#if JVET_L0256_BIO
READ_FLAG( symbol, "bio_enable_flag" ); spsNext.setUseBIO ( symbol != 0 );
#endif
READ_FLAG( symbol, "disable_motion_compression_flag" ); spsNext.setDisableMotCompress ( symbol != 0 );
READ_FLAG( symbol, "lm_chroma_enabled_flag" ); spsNext.setUseLMChroma ( symbol != 0 );
......
......@@ -199,6 +199,9 @@ protected:
bool m_AffineType;
#if !REMOVE_MV_ADAPT_PREC
bool m_highPrecMv;
#endif
#if JVET_L0256_BIO
bool m_BIO;
#endif
bool m_DisableMotionCompression;
unsigned m_MTTMode;
......@@ -632,6 +635,10 @@ public:
#if !REMOVE_MV_ADAPT_PREC
void setHighPrecisionMv ( bool b ) { m_highPrecMv = b; }
bool getHighPrecisionMv () { return m_highPrecMv; }
#endif
#if JVET_L0256_BIO
void setBIO(bool b) { m_BIO = b; }
bool getBIO() const { return m_BIO; }
#endif
void setDisableMotionCompression ( bool b ) { m_DisableMotionCompression = b; }
bool getDisableMotionCompression () const { return m_DisableMotionCompression; }
......
......@@ -843,6 +843,9 @@ void EncLib::xInitSPS(SPS &sps)
sps.getSpsNext().setUseIMV ( m_ImvMode != IMV_OFF );
#if !REMOVE_MV_ADAPT_PREC
sps.getSpsNext().setUseHighPrecMv ( m_highPrecMv );
#endif
#if JVET_L0256_BIO
sps.getSpsNext().setUseBIO ( m_BIO );
#endif
sps.getSpsNext().setUseAffine ( m_Affine );
sps.getSpsNext().setUseAffineType ( m_AffineType );
......
......@@ -534,6 +534,9 @@ void HLSWriter::codeSPSNext( const SPSNext& spsNext, const bool usePCM )
WRITE_FLAG( spsNext.getUseIMV() ? 1 : 0, "imv_enable_flag" );
#if !REMOVE_MV_ADAPT_PREC
WRITE_FLAG( spsNext.getUseHighPrecMv() ? 1 : 0, "high_precision_motion_vectors");
#endif
#if JVET_L0256_BIO
WRITE_FLAG( spsNext.getUseBIO() ? 1 : 0, "bio_enable_flag" );
#endif
WRITE_FLAG( spsNext.getDisableMotCompress() ? 1 : 0, "disable_motion_compression_flag" );
WRITE_FLAG( spsNext.getUseLMChroma() ? 1 : 0, "lm_chroma_enabled_flag" );
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment