Commit e9bf7ed5 authored by Xiang Li's avatar Xiang Li

Merge branch 'JVET_L0256_BIO' into 'master'

JVET_L0256: BIO

See merge request jvet/VVCSoftware_VTM!32
parents d1194f46 0f747e97
Pipeline #231 passed with stage
......@@ -143,6 +143,7 @@ IMV : 2
ALF : 1
GBi : 1
GBiFast : 1
BIO : 1
# Fast tools
PBIntraFast : 1
......
......@@ -230,6 +230,9 @@ void EncApp::xInitLibCfg()
m_cEncLib.setAffineType ( m_AffineType );
#if !REMOVE_MV_ADAPT_PREC
m_cEncLib.setHighPrecisionMv (m_highPrecisionMv);
#endif
#if JVET_L0256_BIO
m_cEncLib.setBIO (m_BIO);
#endif
m_cEncLib.setDisableMotionCompression ( m_DisableMotionCompression );
m_cEncLib.setMTTMode ( m_MTT );
......
......@@ -828,7 +828,10 @@ bool EncAppCfg::parseCfg( int argc, char* argv[] )
("HighPrecMv", m_highPrecisionMv, false, "High precision motion vectors for temporal merging (0:off, 1:on) [default: off]")
#endif
("Affine", m_Affine, false, "Enable affine prediction (0:off, 1:on) [default: off]")
( "AffineType", m_AffineType, true, "Enable affine type prediction (0:off, 1:on) [default: on]" )
("AffineType", m_AffineType, true, "Enable affine type prediction (0:off, 1:on) [default: on]" )
#if JVET_L0256_BIO
("BIO", m_BIO, false, "Enable bi-directional optical flow")
#endif
("DisableMotCompression", m_DisableMotionCompression, false, "Disable motion data compression for all modes")
("IMV", m_ImvMode, 2, "Adaptive MV precision Mode (IMV)\n"
"\t0: disabled IMV\n"
......@@ -1943,6 +1946,9 @@ bool EncAppCfg::xCheckParameter()
#if !REMOVE_MV_ADAPT_PREC
xConfirmPara( m_highPrecisionMv, "High precision MV for temporal merging can only be used with NEXT profile" );
xConfirmPara( m_Affine, "Affine is only allowed with NEXT profile" );
#endif
#if JVET_L0256_BIO
xConfirmPara( m_BIO, "BIO only allowed with NEXT profile" );
#endif
xConfirmPara( m_DisableMotionCompression, "Disable motion data compression only allowed with NEXT profile" );
xConfirmPara( m_MTT, "Multi type tree is only allowed with NEXT profile" );
......@@ -3143,6 +3149,9 @@ void EncAppCfg::xPrintParameter()
if( !m_QTBT ) msg( VERBOSE, "IMVMaxCand:%d ", m_ImvMaxCand );
#if !REMOVE_MV_ADAPT_PREC
msg(VERBOSE, "HighPrecMv:%d ", m_highPrecisionMv);
#endif
#if JVET_L0256_BIO
msg( VERBOSE, "BIO:%d ", m_BIO );
#endif
msg( VERBOSE, "DisMDC:%d ", m_DisableMotionCompression );
msg( VERBOSE, "MTT:%d ", m_MTT );
......
......@@ -213,6 +213,9 @@ protected:
bool m_AffineType;
#if !REMOVE_MV_ADAPT_PREC
bool m_highPrecisionMv;
#endif
#if JVET_L0256_BIO
bool m_BIO;
#endif
bool m_DisableMotionCompression;
unsigned m_MTT;
......
......@@ -62,6 +62,138 @@ void addAvgCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T
#undef ADD_AVG_CORE_INC
}
#if JVET_L0256_BIO
void addBIOAvgCore(const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng)
{
int b = 0;
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x += 4)
{
b = tmpx * (gradX0[x] - gradX1[x]) + tmpy * (gradY0[x] - gradY1[x]);
b = ((b + 1) >> 1);
dst[x] = ClipPel((int16_t)rightShift((src0[x] + src1[x] + b + offset), shift), clpRng);
b = tmpx * (gradX0[x + 1] - gradX1[x + 1]) + tmpy * (gradY0[x + 1] - gradY1[x + 1]);
b = ((b + 1) >> 1);
dst[x + 1] = ClipPel((int16_t)rightShift((src0[x + 1] + src1[x + 1] + b + offset), shift), clpRng);
b = tmpx * (gradX0[x + 2] - gradX1[x + 2]) + tmpy * (gradY0[x + 2] - gradY1[x + 2]);
b = ((b + 1) >> 1);
dst[x + 2] = ClipPel((int16_t)rightShift((src0[x + 2] + src1[x + 2] + b + offset), shift), clpRng);
b = tmpx * (gradX0[x + 3] - gradX1[x + 3]) + tmpy * (gradY0[x + 3] - gradY1[x + 3]);
b = ((b + 1) >> 1);
dst[x + 3] = ClipPel((int16_t)rightShift((src0[x + 3] + src1[x + 3] + b + offset), shift), clpRng);
}
dst += dstStride; src0 += src0Stride; src1 += src1Stride;
gradX0 += gradStride; gradX1 += gradStride; gradY0 += gradStride; gradY1 += gradStride;
}
}
void gradFilterCore(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY)
{
Pel* srcTmp = pSrc + srcStride + 1;
Pel* gradXTmp = gradX + gradStride + 1;
Pel* gradYTmp = gradY + gradStride + 1;
for (int y = 0; y < (height - 2 * BIO_EXTEND_SIZE); y++)
{
for (int x = 0; x < (width - 2 * BIO_EXTEND_SIZE); x++)
{
gradYTmp[x] = (srcTmp[x + srcStride] - srcTmp[x - srcStride]) >> 4;
gradXTmp[x] = (srcTmp[x + 1] - srcTmp[x - 1]) >> 4;
}
gradXTmp += gradStride;
gradYTmp += gradStride;
srcTmp += srcStride;
}
gradXTmp = gradX + gradStride + 1;
gradYTmp = gradY + gradStride + 1;
for (int y = 0; y < (height - 2 * BIO_EXTEND_SIZE); y++)
{
gradXTmp[-1] = gradXTmp[0];
gradXTmp[width - 2 * BIO_EXTEND_SIZE] = gradXTmp[width - 2 * BIO_EXTEND_SIZE - 1];
gradXTmp += gradStride;
gradYTmp[-1] = gradYTmp[0];
gradYTmp[width - 2 * BIO_EXTEND_SIZE] = gradYTmp[width - 2 * BIO_EXTEND_SIZE - 1];
gradYTmp += gradStride;
}
gradXTmp = gradX + gradStride;
gradYTmp = gradY + gradStride;
::memcpy(gradXTmp - gradStride, gradXTmp, sizeof(Pel)*(width));
::memcpy(gradXTmp + (height - 2 * BIO_EXTEND_SIZE)*gradStride, gradXTmp + (height - 2 * BIO_EXTEND_SIZE - 1)*gradStride, sizeof(Pel)*(width));
::memcpy(gradYTmp - gradStride, gradYTmp, sizeof(Pel)*(width));
::memcpy(gradYTmp + (height - 2 * BIO_EXTEND_SIZE)*gradStride, gradYTmp + (height - 2 * BIO_EXTEND_SIZE - 1)*gradStride, sizeof(Pel)*(width));
}
void calcBIOParCore(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG)
{
for (int y = 0; y < heightG; y++)
{
for (int x = 0; x < widthG; x++)
{
int temp = (srcY0Temp[x] >> 6) - (srcY1Temp[x] >> 6);
int tempX = (gradX0[x] + gradX1[x]) >> 3;
int tempY = (gradY0[x] + gradY1[x]) >> 3;
dotProductTemp1[x] = tempX * tempX;
dotProductTemp2[x] = tempX * tempY;
dotProductTemp3[x] = -tempX * temp;
dotProductTemp5[x] = tempY * tempY;
dotProductTemp6[x] = -tempY * temp;
}
srcY0Temp += src0Stride;
srcY1Temp += src1Stride;
gradX0 += gradStride;
gradX1 += gradStride;
gradY0 += gradStride;
gradY1 += gradStride;
dotProductTemp1 += widthG;
dotProductTemp2 += widthG;
dotProductTemp3 += widthG;
dotProductTemp5 += widthG;
dotProductTemp6 += widthG;
}
}
void calcBlkGradientCore(int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize)
{
int *Gx2 = arraysGx2;
int *Gy2 = arraysGy2;
int *GxGy = arraysGxGy;
int *GxdI = arraysGxdI;
int *GydI = arraysGydI;
// set to the above row due to JVET_K0485_BIO_EXTEND_SIZE
Gx2 -= (BIO_EXTEND_SIZE*width);
Gy2 -= (BIO_EXTEND_SIZE*width);
GxGy -= (BIO_EXTEND_SIZE*width);
GxdI -= (BIO_EXTEND_SIZE*width);
GydI -= (BIO_EXTEND_SIZE*width);
for (int y = -BIO_EXTEND_SIZE; y < unitSize + BIO_EXTEND_SIZE; y++)
{
for (int x = -BIO_EXTEND_SIZE; x < unitSize + BIO_EXTEND_SIZE; x++)
{
sGx2 += Gx2[x];
sGy2 += Gy2[x];
sGxGy += GxGy[x];
sGxdI += GxdI[x];
sGydI += GydI[x];
}
Gx2 += width;
Gy2 += width;
GxGy += width;
GxdI += width;
GydI += width;
}
}
#endif
#if ENABLE_SIMD_OPT_GBI && JVET_L0646_GBI
void removeWeightHighFreq(int16_t* dst, int dstStride, const int16_t* src, int srcStride, int width, int height, int shift, int gbiWeight)
{
......@@ -138,6 +270,13 @@ PelBufferOps::PelBufferOps()
linTf4 = linTfCore<Pel>;
linTf8 = linTfCore<Pel>;
#if JVET_L0256_BIO
addBIOAvg4 = addBIOAvgCore;
bioGradFilter = gradFilterCore;
calcBIOPar = calcBIOParCore;
calcBlkGradient = calcBlkGradientCore;
#endif
#if ENABLE_SIMD_OPT_GBI
removeWeightHighFreq8 = removeWeightHighFreq;
removeWeightHighFreq4 = removeWeightHighFreq;
......
......@@ -68,6 +68,12 @@ struct PelBufferOps
void ( *reco8 ) ( const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, int width, int height, const ClpRng& clpRng );
void ( *linTf4 ) ( const Pel* src0, int src0Stride, Pel *dst, int dstStride, int width, int height, int scale, int shift, int offset, const ClpRng& clpRng, bool bClip );
void ( *linTf8 ) ( const Pel* src0, int src0Stride, Pel *dst, int dstStride, int width, int height, int scale, int shift, int offset, const ClpRng& clpRng, bool bClip );
#if JVET_L0256_BIO
void(*addBIOAvg4) (const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng);
void(*bioGradFilter) (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY);
void(*calcBIOPar) (const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG);
void(*calcBlkGradient)(int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize);
#endif
#if ENABLE_SIMD_OPT_GBI
void ( *removeWeightHighFreq8) ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int gbiWeight);
void ( *removeWeightHighFreq4) ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int gbiWeight);
......
......@@ -318,6 +318,12 @@ static const int MAX_NUM_GT2_BINS_4x4SUBBLOCK = 4; ///< max
static const int MAX_NUM_REG_BINS_2x2SUBBLOCK = 8; ///< max number of context-coded bins (incl. gt2 bins) per 2x2 subblock (chroma)
static const int MAX_NUM_GT2_BINS_2x2SUBBLOCK = 2; ///< max number of gt2 bins per 2x2 subblock (chroma)
#endif
#if JVET_L0256_BIO
static const int BIO_EXTEND_SIZE = 1;
static const int BIO_TEMP_BUFFER_SIZE = (MAX_CU_SIZE + 2 * BIO_EXTEND_SIZE) * (MAX_CU_SIZE + 2 * BIO_EXTEND_SIZE);
#endif
#if JVET_L0646_GBI
static const int GBI_NUM = 5; ///< the number of weight options
static const int GBI_DEFAULT = ((uint8_t)(GBI_NUM >> 1)); ///< Default weighting index representing for w=0.5
......@@ -397,6 +403,10 @@ static const int NTAPS_CHROMA = 4; ///< Numb
static const int MAX_LADF_INTERVALS = 5; /// max number of luma adaptive deblocking filter qp offset intervals
#endif
#if JVET_L0256_BIO
static const int NTAPS_BILINEAR = 2; ///< Number of taps for bilinear filter
#endif
// ====================================================================================================================
// Macro functions
// ====================================================================================================================
......
......@@ -55,6 +55,13 @@ InterPrediction::InterPrediction()
m_currChromaFormat( NUM_CHROMA_FORMAT )
, m_maxCompIDToPred ( MAX_NUM_COMPONENT )
, m_pcRdCost ( nullptr )
#if JVET_L0256_BIO
, m_gradX0(nullptr)
, m_gradY0(nullptr)
, m_gradX1(nullptr)
, m_gradY1(nullptr)
, m_subPuMC(false)
#endif
{
for( uint32_t ch = 0; ch < MAX_NUM_COMPONENT; ch++ )
{
......@@ -109,6 +116,13 @@ void InterPrediction::destroy()
m_filteredBlockTmp[i][c] = nullptr;
}
}
#if JVET_L0256_BIO
xFree(m_gradX0); m_gradX0 = nullptr;
xFree(m_gradY0); m_gradY0 = nullptr;
xFree(m_gradX1); m_gradX1 = nullptr;
xFree(m_gradY1); m_gradY1 = nullptr;
#endif
}
void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC )
......@@ -127,8 +141,13 @@ void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC )
{
for( uint32_t c = 0; c < MAX_NUM_COMPONENT; c++ )
{
#if JVET_L0256_BIO
int extWidth = MAX_CU_SIZE + (2 * BIO_EXTEND_SIZE + 2) + 16;
int extHeight = MAX_CU_SIZE + (2 * BIO_EXTEND_SIZE + 2) + 1;
#else
int extWidth = MAX_CU_SIZE + 16;
int extHeight = MAX_CU_SIZE + 1;
#endif
for( uint32_t i = 0; i < LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS; i++ )
{
m_filteredBlockTmp[i][c] = ( Pel* ) xMalloc( Pel, ( extWidth + 4 ) * ( extHeight + 7 + 4 ) );
......@@ -149,6 +168,12 @@ void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC )
m_iRefListIdx = -1;
#if JVET_L0256_BIO
m_gradX0 = (Pel*)xMalloc(Pel, BIO_TEMP_BUFFER_SIZE);
m_gradY0 = (Pel*)xMalloc(Pel, BIO_TEMP_BUFFER_SIZE);
m_gradX1 = (Pel*)xMalloc(Pel, BIO_TEMP_BUFFER_SIZE);
m_gradY1 = (Pel*)xMalloc(Pel, BIO_TEMP_BUFFER_SIZE);
#endif
}
#if !JVET_J0090_MEMORY_BANDWITH_MEASURE
......@@ -264,6 +289,10 @@ void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const R
int fstStep = (!verMC ? puHeight : puWidth);
int secStep = (!verMC ? puWidth : puHeight);
#if JVET_L0256_BIO
m_subPuMC = true;
#endif
for (int fstDim = fstStart; fstDim < fstEnd; fstDim += fstStep)
{
for (int secDim = secStart; secDim < secEnd; secDim += secStep)
......@@ -299,10 +328,16 @@ void InterPrediction::xSubPuMC( PredictionUnit& pu, PelUnitBuf& predBuf, const R
secDim = later - secStep;
}
}
#if JVET_L0256_BIO
m_subPuMC = false;
#endif
}
void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList& eRefPicList, PelUnitBuf& pcYuvPred, const bool& bi
#if JVET_L0256_BIO
,const bool& bioApplied /*=false*/
#endif
)
{
const SPS &sps = *pu.cs->sps;
......@@ -332,11 +367,17 @@ void InterPrediction::xPredInterUni(const PredictionUnit& pu, const RefPicList&
const ComponentID compID = ComponentID( comp );
if ( pu.cu->affine )
{
#if JVET_L0256_BIO
CHECK( bioApplied, "BIO is not allowed with affine" );
#endif
xPredAffineBlk( compID, pu, pu.cu->slice->getRefPic( eRefPicList, iRefIdx ), mv, pcYuvPred, bi, pu.cu->slice->clpRng( compID ) );
}
else
{
xPredInterBlk( compID, pu, pu.cu->slice->getRefPic( eRefPicList, iRefIdx ), mv[0], pcYuvPred, bi, pu.cu->slice->clpRng( compID )
#if JVET_L0256_BIO
,bioApplied
#endif
);
}
}
......@@ -347,6 +388,36 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred)
const PPS &pps = *pu.cs->pps;
const Slice &slice = *pu.cs->slice;
#if JVET_L0256_BIO
bool bioApplied = false;
if (pu.cs->sps->getSpsNext().getUseBIO())
{
if (pu.cu->affine || m_subPuMC)
{
bioApplied = false;
}
else
{
const bool biocheck0 = !(pps.getWPBiPred() && slice.getSliceType() == B_SLICE);
const bool biocheck1 = !(pps.getUseWP() && slice.getSliceType() == P_SLICE);
if (biocheck0
&& biocheck1
&& PU::isBiPredFromDifferentDir(pu)
&& !(pu.Y().height == 4 || (pu.Y().width == 4 && pu.Y().height == 8))
)
{
bioApplied = true;
}
}
#if JVET_L0646_GBI
if (pu.cu->cs->sps->getSpsNext().getUseGBi() && bioApplied && pu.cu->GBiIdx != GBI_DEFAULT)
{
bioApplied = false;
}
#endif
}
#endif
for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++)
{
......@@ -367,6 +438,9 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred)
if (pu.refIdx[0] >= 0 && pu.refIdx[1] >= 0)
{
xPredInterUni ( pu, eRefPicList, pcMbBuf, true
#if JVET_L0256_BIO
,bioApplied
#endif
);
}
else
......@@ -399,13 +473,19 @@ void InterPrediction::xPredInterBi(PredictionUnit& pu, PelUnitBuf &pcYuvPred)
}
else
{
#if JVET_L0256_BIO
xWeightedAverage( pu, srcPred0, srcPred1, pcYuvPred, slice.getSPS()->getBitDepths(), slice.clpRngs(), bioApplied );
#else
xWeightedAverage( pu, srcPred0, srcPred1, pcYuvPred, slice.getSPS()->getBitDepths(), slice.clpRngs() );
#endif
}
}
void InterPrediction::xPredInterBlk ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng
)
#if JVET_L0256_BIO
,const bool& bioApplied /*=false*/
#endif
)
{
JVET_J0090_SET_REF_PICTURE( refPic, compID );
const ChromaFormat chFmt = pu.chromaFormat;
......@@ -446,24 +526,75 @@ void InterPrediction::xPredInterBlk ( const ComponentID& compID, const Predictio
refBuf = refPic->getRecoBuf( CompArea( compID, chFmt, offset, pu.blocks[compID].size() ) );
}
#if JVET_L0256_BIO
// backup data
int backupWidth = width;
int backupHeight = height;
Pel *backupDstBufPtr = dstBuf.buf;
int backupDstBufStride = dstBuf.stride;
if (bioApplied && compID == COMPONENT_Y)
{
width = width + 2 * BIO_EXTEND_SIZE + 2;
height = height + 2 * BIO_EXTEND_SIZE + 2;
// change MC output
dstBuf.stride = width;
dstBuf.buf = m_filteredBlockTmp[2 + m_iRefListIdx][compID] + 2 * dstBuf.stride + 2;
}
#endif
if( yFrac == 0 )
{
#if JVET_L0256_BIO
m_if.filterHor(compID, (Pel*)refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, xFrac, rndRes, chFmt, clpRng);
#else
m_if.filterHor(compID, (Pel*) refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, width, height, xFrac, rndRes, chFmt, clpRng);
#endif
}
else if( xFrac == 0 )
{
#if JVET_L0256_BIO
m_if.filterVer(compID, (Pel*)refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, true, rndRes, chFmt, clpRng);
#else
m_if.filterVer(compID, (Pel*) refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, width, height, yFrac, true, rndRes, chFmt, clpRng);
#endif
}
else
{
PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][compID], pu.blocks[compID]);
#if JVET_L0256_BIO
tmpBuf.stride = dstBuf.stride;
#endif
int vFilterSize = isLuma(compID) ? NTAPS_LUMA : NTAPS_CHROMA;
#if JVET_L0256_BIO
m_if.filterHor(compID, (Pel*)refBuf.buf - ((vFilterSize >> 1) - 1) * refBuf.stride, refBuf.stride, tmpBuf.buf, tmpBuf.stride, backupWidth, backupHeight + vFilterSize - 1, xFrac, false, chFmt, clpRng);
#else
m_if.filterHor(compID, (Pel*) refBuf.buf - ((vFilterSize >> 1) - 1) * refBuf.stride, refBuf.stride, tmpBuf.buf, tmpBuf.stride, width, height + vFilterSize - 1, xFrac, false, chFmt, clpRng);
#endif
JVET_J0090_SET_CACHE_ENABLE( false );
#if JVET_L0256_BIO
m_if.filterVer(compID, (Pel*)tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, false, rndRes, chFmt, clpRng);
#else
m_if.filterVer(compID, (Pel*) tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, dstBuf.buf, dstBuf.stride, width, height, yFrac, false, rndRes, chFmt, clpRng);
#endif
JVET_J0090_SET_CACHE_ENABLE( true );
}
#if JVET_L0256_BIO
if (bioApplied && compID == COMPONENT_Y)
{
refBuf.buf = refBuf.buf - refBuf.stride - 1;
dstBuf.buf = m_filteredBlockTmp[2 + m_iRefListIdx][compID] + dstBuf.stride + 1;
bioSampleExtendBilinearFilter(refBuf.buf, refBuf.stride, dstBuf.buf, dstBuf.stride, width - 2, height - 2, 1, xFrac, yFrac, rndRes, chFmt, clpRng);
// restore data
width = backupWidth;
height = backupHeight;
dstBuf.buf = backupDstBufPtr;
dstBuf.stride = backupDstBufStride;
}
#endif
}
void InterPrediction::xPredAffineBlk( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng )
......@@ -612,8 +743,224 @@ int getMSB( unsigned x )
return msb;
}
#if JVET_L0256_BIO
void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf &yuvSrc0, const CPelUnitBuf &yuvSrc1, const int &refIdx0, const int &refIdx1, PelUnitBuf &yuvDst, const BitDepths &clipBitDepths)
{
const int height = yuvDst.Y().height;
const int width = yuvDst.Y().width;
int heightG = height + 2 * BIO_EXTEND_SIZE;
int widthG = width + 2 * BIO_EXTEND_SIZE;
int offsetPos = widthG*BIO_EXTEND_SIZE + BIO_EXTEND_SIZE;
Pel* gradX0 = m_gradX0;
Pel* gradX1 = m_gradX1;
Pel* gradY0 = m_gradY0;
Pel* gradY1 = m_gradY1;
int stridePredMC = widthG + 2;
const Pel* srcY0 = m_filteredBlockTmp[2][COMPONENT_Y] + stridePredMC + 1;
const Pel* srcY1 = m_filteredBlockTmp[3][COMPONENT_Y] + stridePredMC + 1;
const int src0Stride = stridePredMC;
const int src1Stride = stridePredMC;
Pel* dstY = yuvDst.Y().buf;
const int dstStride = yuvDst.Y().stride;
const Pel* srcY0Temp = srcY0;
const Pel* srcY1Temp = srcY1;
for (int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++)
{
Pel* dstTempPtr = m_filteredBlockTmp[2 + refList][COMPONENT_Y] + stridePredMC + 1;
Pel* gradY = (refList == 0) ? m_gradY0 : m_gradY1;
Pel* gradX = (refList == 0) ? m_gradX0 : m_gradX1;
g_pelBufOP.bioGradFilter(dstTempPtr, stridePredMC, widthG, heightG, widthG, gradX, gradY);
Pel* padStr = m_filteredBlockTmp[2 + refList][COMPONENT_Y] + 2 * stridePredMC + 2;
for (int y = 0; y< height; y++)
{
padStr[-1] = padStr[0];
padStr[width] = padStr[width - 1];
padStr += stridePredMC;
}
padStr = m_filteredBlockTmp[2 + refList][COMPONENT_Y] + 2 * stridePredMC + 1;
::memcpy(padStr - stridePredMC, padStr, sizeof(Pel)*(widthG));
::memcpy(padStr + height*stridePredMC, padStr + (height - 1)*stridePredMC, sizeof(Pel)*(widthG));
}
const ClpRng& clpRng = pu.cu->cs->slice->clpRng(COMPONENT_Y);
const int bitDepth = clipBitDepths.recon[toChannelType(COMPONENT_Y)];
const int shiftNum = IF_INTERNAL_PREC + 1 - bitDepth;
const int offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
const int limit = ((int)1 << (4 + IF_INTERNAL_PREC - bitDepth - 5));
int* dotProductTemp1 = m_dotProduct1;
int* dotProductTemp2 = m_dotProduct2;
int* dotProductTemp3 = m_dotProduct3;
int* dotProductTemp5 = m_dotProduct5;
int* dotProductTemp6 = m_dotProduct6;
g_pelBufOP.calcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, widthG, widthG, heightG);
int xUnit = (width >> 2);
int yUnit = (height >> 2);
Pel *dstY0 = dstY;
gradX0 = m_gradX0; gradX1 = m_gradX1;
gradY0 = m_gradY0; gradY1 = m_gradY1;
for (int yu = 0; yu < yUnit; yu++)
{
for (int xu = 0; xu < xUnit; xu++)
{
if (m_bioPredSubBlkDist[yu*xUnit + xu] < m_bioSubBlkDistThres)
{
srcY0Temp = srcY0 + (stridePredMC + 1) + ((yu*src0Stride + xu) << 2);
srcY1Temp = srcY1 + (stridePredMC + 1) + ((yu*src1Stride + xu) << 2);
dstY0 = dstY + ((yu*dstStride + xu) << 2);
g_pelBufOP.addAvg4(srcY0Temp, src0Stride, srcY1Temp, src1Stride, dstY0, dstStride, (1 << 2), (1 << 2), shiftNum, offset, clpRng);
continue;
}
int sGxdI = 0, sGydI = 0, sGxGy = 0, sGx2 = 0, sGy2 = 0;
int tmpx = 0, tmpy = 0;
dotProductTemp1 = m_dotProduct1 + offsetPos + ((yu*widthG + xu) << 2);
dotProductTemp2 = m_dotProduct2 + offsetPos + ((yu*widthG + xu) << 2);
dotProductTemp3 = m_dotProduct3 + offsetPos + ((yu*widthG + xu) << 2);
dotProductTemp5 = m_dotProduct5 + offsetPos + ((yu*widthG + xu) << 2);
dotProductTemp6 = m_dotProduct6 + offsetPos + ((yu*widthG + xu) << 2);
g_pelBufOP.calcBlkGradient(xu << 2, yu << 2, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, sGx2, sGy2, sGxGy, sGxdI, sGydI, widthG, heightG, (1 << 2));
if (sGx2 > 0)
{
tmpx = rightShiftMSB(sGxdI << 3, sGx2);
tmpx = Clip3(-limit, limit, tmpx);
}
if (sGy2 > 0)
{
int mainsGxGy = sGxGy >> 12;
int secsGxGy = sGxGy & ((1 << 12) - 1);
int tmpData = tmpx * mainsGxGy;
tmpData = ((tmpData << 12) + tmpx*secsGxGy) >> 1;
tmpy = rightShiftMSB(((sGydI << 3) - tmpData), sGy2);
tmpy = Clip3(-limit, limit, tmpy);
}
srcY0Temp = srcY0 + (stridePredMC + 1) + ((yu*src0Stride + xu) << 2);
srcY1Temp = srcY1 + (stridePredMC + 1) + ((yu*src0Stride + xu) << 2);
gradX0 = m_gradX0 + offsetPos + ((yu*widthG + xu) << 2);
gradX1 = m_gradX1 + offsetPos + ((yu*widthG + xu) << 2);
gradY0 = m_gradY0 + offsetPos + ((yu*widthG + xu) << 2);
gradY1 = m_gradY1 + offsetPos + ((yu*widthG + xu) << 2);
dstY0 = dstY + ((yu*dstStride + xu) << 2);
g_pelBufOP.addBIOAvg4(srcY0Temp, src0Stride, srcY1Temp, src1Stride, dstY0, dstStride, gradX0, gradX1, gradY0, gradY1, widthG, (1 << 2), (1 << 2), (int)tmpx, (int)tmpy, shiftNum, offset, clpRng);
} // xu
} // yu
}
void InterPrediction::bioSampleExtendBilinearFilter(Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int dim, int fracX, int fracY, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng)
{
Pel const* pSrc = NULL;
Pel* pDst = NULL;
int vFilterSize = NTAPS_LUMA;
int widthTmp = 0;
int heightTmp = 0;
for (int cand = 0; cand < 4; cand++) // top, left, bottom and right
{
if (cand == 0) // top
{
pSrc = src;
pDst = dst;
widthTmp = width;
heightTmp = dim;
}
else if (cand == 1) // left
{
pSrc = src + dim*srcStride;
pDst = dst + dim*dstStride;
widthTmp = dim;
heightTmp = height - 2 * dim;
}
else if (cand == 2) // bottom
{
pSrc = src + (height - dim)*srcStride;
pDst = dst + (height - dim)*dstStride;
widthTmp = width;
heightTmp = dim;
}
else if (cand == 3) // right
{
pSrc = src + dim*srcStride + width - dim;
pDst = dst + dim*dstStride + width - dim;
widthTmp = dim;
heightTmp = height - 2 * dim;
}
if (fracY == 0)
{
m_if.filterHor(COMPONENT_Y, pSrc, srcStride, pDst, dstStride, widthTmp, heightTmp, fracX, isLast, fmt, clpRng, 1);
}
else if (fracX == 0)
{
m_if.filterVer(COMPONENT_Y, pSrc, srcStride, pDst, dstStride, widthTmp, heightTmp, fracY, true, isLast, fmt, clpRng, 1);
}
else
{
PelBuf tmpBuf = PelBuf(m_filteredBlockTmp[0][COMPONENT_Y], Size(width, height));
tmpBuf.stride = width;
m_if.filterHor(COMPONENT_Y, pSrc - ((vFilterSize >> 1) - 1) * srcStride, srcStride, tmpBuf.buf, tmpBuf.stride, widthTmp, heightTmp + vFilterSize - 1, fracX, false, fmt, clpRng, 1);
m_if.filterVer(COMPONENT_Y, tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, pDst, dstStride, widthTmp,