Commit fe28cdb0 authored by xiuxx's avatar xiuxx

check-in changes for fixing the variable problems as per JVET-K1003

parent a8a2aef0
......@@ -63,7 +63,7 @@ void addAvgCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T
}
#if JVET_L0256_BIO
void addBIOAvgCore(const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *pGradX0, const Pel *pGradX1, const Pel *pGradY0, const Pel*pGradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng)
void addBIOAvgCore(const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng)
{
int b = 0;
......@@ -71,125 +71,125 @@ void addBIOAvgCore(const Pel* src0, int src0Stride, const Pel* src1, int src1Str
{
for (int x = 0; x < width; x += 4)
{
b = tmpx * (pGradX0[x] - pGradX1[x]) + tmpy * (pGradY0[x] - pGradY1[x]);
b = tmpx * (gradX0[x] - gradX1[x]) + tmpy * (gradY0[x] - gradY1[x]);
b = ((b + 1) >> 1);
dst[x] = ClipPel((int16_t)rightShift((src0[x] + src1[x] + b + offset), shift), clpRng);
b = tmpx * (pGradX0[x + 1] - pGradX1[x + 1]) + tmpy * (pGradY0[x + 1] - pGradY1[x + 1]);
b = tmpx * (gradX0[x + 1] - gradX1[x + 1]) + tmpy * (gradY0[x + 1] - gradY1[x + 1]);
b = ((b + 1) >> 1);
dst[x + 1] = ClipPel((int16_t)rightShift((src0[x + 1] + src1[x + 1] + b + offset), shift), clpRng);
b = tmpx * (pGradX0[x + 2] - pGradX1[x + 2]) + tmpy * (pGradY0[x + 2] - pGradY1[x + 2]);
b = tmpx * (gradX0[x + 2] - gradX1[x + 2]) + tmpy * (gradY0[x + 2] - gradY1[x + 2]);
b = ((b + 1) >> 1);
dst[x + 2] = ClipPel((int16_t)rightShift((src0[x + 2] + src1[x + 2] + b + offset), shift), clpRng);
b = tmpx * (pGradX0[x + 3] - pGradX1[x + 3]) + tmpy * (pGradY0[x + 3] - pGradY1[x + 3]);
b = tmpx * (gradX0[x + 3] - gradX1[x + 3]) + tmpy * (gradY0[x + 3] - gradY1[x + 3]);
b = ((b + 1) >> 1);
dst[x + 3] = ClipPel((int16_t)rightShift((src0[x + 3] + src1[x + 3] + b + offset), shift), clpRng);
}
dst += dstStride; src0 += src0Stride; src1 += src1Stride;
pGradX0 += gradStride; pGradX1 += gradStride; pGradY0 += gradStride; pGradY1 += gradStride;
gradX0 += gradStride; gradX1 += gradStride; gradY0 += gradStride; gradY1 += gradStride;
}
}
void gradFilterCore(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* pGradX, Pel* pGradY)
void gradFilterCore(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY)
{
Pel* piSrcTmp = pSrc + srcStride + 1;
Pel* piGradXTmp = pGradX + gradStride + 1;
Pel* piGradYTmp = pGradY + gradStride + 1;
Pel* srcTmp = pSrc + srcStride + 1;
Pel* gradXTmp = gradX + gradStride + 1;
Pel* gradYTmp = gradY + gradStride + 1;
for (int y = 0; y < (height - 2 * JVET_L0256_BIO_EXTEND_SIZE); y++)
for (int y = 0; y < (height - 2 * BIO_EXTEND_SIZE); y++)
{
for (int x = 0; x < (width - 2 * JVET_L0256_BIO_EXTEND_SIZE); x++)
for (int x = 0; x < (width - 2 * BIO_EXTEND_SIZE); x++)
{
piGradYTmp[x] = (piSrcTmp[x + srcStride] - piSrcTmp[x - srcStride]) >> 4;
piGradXTmp[x] = (piSrcTmp[x + 1] - piSrcTmp[x - 1]) >> 4;
gradYTmp[x] = (srcTmp[x + srcStride] - srcTmp[x - srcStride]) >> 4;
gradXTmp[x] = (srcTmp[x + 1] - srcTmp[x - 1]) >> 4;
}
piGradXTmp += gradStride;
piGradYTmp += gradStride;
piSrcTmp += srcStride;
gradXTmp += gradStride;
gradYTmp += gradStride;
srcTmp += srcStride;
}
piGradXTmp = pGradX + gradStride + 1;
piGradYTmp = pGradY + gradStride + 1;
for (int y = 0; y < (height - 2 * JVET_L0256_BIO_EXTEND_SIZE); y++)
gradXTmp = gradX + gradStride + 1;
gradYTmp = gradY + gradStride + 1;
for (int y = 0; y < (height - 2 * BIO_EXTEND_SIZE); y++)
{
piGradXTmp[-1] = piGradXTmp[0];
piGradXTmp[width - 2 * JVET_L0256_BIO_EXTEND_SIZE] = piGradXTmp[width - 2 * JVET_L0256_BIO_EXTEND_SIZE - 1];
piGradXTmp += gradStride;
gradXTmp[-1] = gradXTmp[0];
gradXTmp[width - 2 * BIO_EXTEND_SIZE] = gradXTmp[width - 2 * BIO_EXTEND_SIZE - 1];
gradXTmp += gradStride;
piGradYTmp[-1] = piGradYTmp[0];
piGradYTmp[width - 2 * JVET_L0256_BIO_EXTEND_SIZE] = piGradYTmp[width - 2 * JVET_L0256_BIO_EXTEND_SIZE - 1];
piGradYTmp += gradStride;
gradYTmp[-1] = gradYTmp[0];
gradYTmp[width - 2 * BIO_EXTEND_SIZE] = gradYTmp[width - 2 * BIO_EXTEND_SIZE - 1];
gradYTmp += gradStride;
}
piGradXTmp = pGradX + gradStride;
piGradYTmp = pGradY + gradStride;
::memcpy(piGradXTmp - gradStride, piGradXTmp, sizeof(Pel)*(width));
::memcpy(piGradXTmp + (height - 2 * JVET_L0256_BIO_EXTEND_SIZE)*gradStride, piGradXTmp + (height - 2 * JVET_L0256_BIO_EXTEND_SIZE - 1)*gradStride, sizeof(Pel)*(width));
::memcpy(piGradYTmp - gradStride, piGradYTmp, sizeof(Pel)*(width));
::memcpy(piGradYTmp + (height - 2 * JVET_L0256_BIO_EXTEND_SIZE)*gradStride, piGradYTmp + (height - 2 * JVET_L0256_BIO_EXTEND_SIZE - 1)*gradStride, sizeof(Pel)*(width));
gradXTmp = gradX + gradStride;
gradYTmp = gradY + gradStride;
::memcpy(gradXTmp - gradStride, gradXTmp, sizeof(Pel)*(width));
::memcpy(gradXTmp + (height - 2 * BIO_EXTEND_SIZE)*gradStride, gradXTmp + (height - 2 * BIO_EXTEND_SIZE - 1)*gradStride, sizeof(Pel)*(width));
::memcpy(gradYTmp - gradStride, gradYTmp, sizeof(Pel)*(width));
::memcpy(gradYTmp + (height - 2 * BIO_EXTEND_SIZE)*gradStride, gradYTmp + (height - 2 * BIO_EXTEND_SIZE - 1)*gradStride, sizeof(Pel)*(width));
}
void calcBIOParCore(const Pel* pSrcY0Temp, const Pel* pSrcY1Temp, const Pel* pGradX0, const Pel* pGradX1, const Pel* pGradY0, const Pel* pGradY1, int* m_piDotProductTemp1, int* m_piDotProductTemp2, int* m_piDotProductTemp3, int* m_piDotProductTemp5, int* m_piDotProductTemp6, const int iSrc0Stride, const int iSrc1Stride, const int iGradStride, const int iWidthG, const int iHeightG)
void calcBIOParCore(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG)
{
for (int y = 0; y < iHeightG; y++)
for (int y = 0; y < heightG; y++)
{
for (int x = 0; x < iWidthG; x++)
for (int x = 0; x < widthG; x++)
{
int temp = (pSrcY0Temp[x] >> 6) - (pSrcY1Temp[x] >> 6);
int tempX = (pGradX0[x] + pGradX1[x]) >> 3;
int tempY = (pGradY0[x] + pGradY1[x]) >> 3;
m_piDotProductTemp1[x] = tempX * tempX;
m_piDotProductTemp2[x] = tempX * tempY;
m_piDotProductTemp3[x] = -tempX * temp;
m_piDotProductTemp5[x] = tempY * tempY;
m_piDotProductTemp6[x] = -tempY * temp;
int temp = (srcY0Temp[x] >> 6) - (srcY1Temp[x] >> 6);
int tempX = (gradX0[x] + gradX1[x]) >> 3;
int tempY = (gradY0[x] + gradY1[x]) >> 3;
dotProductTemp1[x] = tempX * tempX;
dotProductTemp2[x] = tempX * tempY;
dotProductTemp3[x] = -tempX * temp;
dotProductTemp5[x] = tempY * tempY;
dotProductTemp6[x] = -tempY * temp;
}
pSrcY0Temp += iSrc0Stride;
pSrcY1Temp += iSrc1Stride;
pGradX0 += iGradStride;
pGradX1 += iGradStride;
pGradY0 += iGradStride;
pGradY1 += iGradStride;
m_piDotProductTemp1 += iWidthG;
m_piDotProductTemp2 += iWidthG;
m_piDotProductTemp3 += iWidthG;
m_piDotProductTemp5 += iWidthG;
m_piDotProductTemp6 += iWidthG;
srcY0Temp += src0Stride;
srcY1Temp += src1Stride;
gradX0 += gradStride;
gradX1 += gradStride;
gradY0 += gradStride;
gradY1 += gradStride;
dotProductTemp1 += widthG;
dotProductTemp2 += widthG;
dotProductTemp3 += widthG;
dotProductTemp5 += widthG;
dotProductTemp6 += widthG;
}
}
void calcBlkGradientCore(int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize)
{
int *pGx2 = arraysGx2;
int *pGy2 = arraysGy2;
int *pGxGy = arraysGxGy;
int *pGxdI = arraysGxdI;
int *pGydI = arraysGydI;
int *Gx2 = arraysGx2;
int *Gy2 = arraysGy2;
int *GxGy = arraysGxGy;
int *GxdI = arraysGxdI;
int *GydI = arraysGydI;
// set to the above row due to JVET_K0485_BIO_EXTEND_SIZE
pGx2 -= (JVET_L0256_BIO_EXTEND_SIZE*width);
pGy2 -= (JVET_L0256_BIO_EXTEND_SIZE*width);
pGxGy -= (JVET_L0256_BIO_EXTEND_SIZE*width);
pGxdI -= (JVET_L0256_BIO_EXTEND_SIZE*width);
pGydI -= (JVET_L0256_BIO_EXTEND_SIZE*width);
Gx2 -= (BIO_EXTEND_SIZE*width);
Gy2 -= (BIO_EXTEND_SIZE*width);
GxGy -= (BIO_EXTEND_SIZE*width);
GxdI -= (BIO_EXTEND_SIZE*width);
GydI -= (BIO_EXTEND_SIZE*width);
for (int y = -JVET_L0256_BIO_EXTEND_SIZE; y < unitSize + JVET_L0256_BIO_EXTEND_SIZE; y++)
for (int y = -BIO_EXTEND_SIZE; y < unitSize + BIO_EXTEND_SIZE; y++)
{
for (int x = -JVET_L0256_BIO_EXTEND_SIZE; x < unitSize + JVET_L0256_BIO_EXTEND_SIZE; x++)
for (int x = -BIO_EXTEND_SIZE; x < unitSize + BIO_EXTEND_SIZE; x++)
{
sGx2 += pGx2[x];
sGy2 += pGy2[x];
sGxGy += pGxGy[x];
sGxdI += pGxdI[x];
sGydI += pGydI[x];
sGx2 += Gx2[x];
sGy2 += Gy2[x];
sGxGy += GxGy[x];
sGxdI += GxdI[x];
sGydI += GydI[x];
}
pGx2 += width;
pGy2 += width;
pGxGy += width;
pGxdI += width;
pGydI += width;
Gx2 += width;
Gy2 += width;
GxGy += width;
GxdI += width;
GydI += width;
}
}
#endif
......
......@@ -69,9 +69,9 @@ struct PelBufferOps
void ( *linTf4 ) ( const Pel* src0, int src0Stride, Pel *dst, int dstStride, int width, int height, int scale, int shift, int offset, const ClpRng& clpRng, bool bClip );
void ( *linTf8 ) ( const Pel* src0, int src0Stride, Pel *dst, int dstStride, int width, int height, int scale, int shift, int offset, const ClpRng& clpRng, bool bClip );
#if JVET_L0256_BIO
void(*addBIOAvg4) (const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *pGradX0, const Pel *pGradX1, const Pel *pGradY0, const Pel*pGradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng);
void(*bioGradFilter) (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* pGradX, Pel* pGradY);
void(*calcBIOPar) (const Pel* pSrcY0Temp, const Pel* pSrcY1Temp, const Pel* pGradX0, const Pel* pGradX1, const Pel* pGradY0, const Pel* pGradY1, int* m_piDotProductTemp1, int* m_piDotProductTemp2, int* m_piDotProductTemp3, int* m_piDotProductTemp5, int* m_piDotProductTemp6, const int iSrc0Stride, const int iSrc1Stride, const int iGradStride, const int iWidthG, const int iHeightG);
void(*addBIOAvg4) (const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng);
void(*bioGradFilter) (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY);
void(*calcBIOPar) (const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG);
void(*calcBlkGradient)(int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize);
#endif
#if ENABLE_SIMD_OPT_GBI
......
......@@ -297,6 +297,12 @@ static const int MAX_NUM_GT2_BINS_4x4SUBBLOCK = 4; ///< max
static const int MAX_NUM_REG_BINS_2x2SUBBLOCK = 8; ///< max number of context-coded bins (incl. gt2 bins) per 2x2 subblock (chroma)
static const int MAX_NUM_GT2_BINS_2x2SUBBLOCK = 2; ///< max number of gt2 bins per 2x2 subblock (chroma)
#endif
#if JVET_L0256_BIO
static const int BIO_EXTEND_SIZE = 1;
static const int BIO_TEMP_BUFFER_SIZE = (MAX_CU_SIZE + 2 * BIO_EXTEND_SIZE) * (MAX_CU_SIZE + 2 * BIO_EXTEND_SIZE);
#endif
#if JVET_L0646_GBI
static const int GBI_NUM = 5; ///< the number of weight options
static const int GBI_DEFAULT = ((uint8_t)(GBI_NUM >> 1)); ///< Default weighting index representing for w=0.5
......
This diff is collapsed.
......@@ -60,10 +60,6 @@ class Mv;
// Class definition
// ====================================================================================================================
#if JVET_L0256_BIO
#define BIO_TEMP_BUFFER_SIZE ( MAX_CU_SIZE+2*JVET_L0256_BIO_EXTEND_SIZE ) * ( MAX_CU_SIZE+2*JVET_L0256_BIO_EXTEND_SIZE )
#endif
class InterPrediction : public WeightPrediction
{
private:
......@@ -73,11 +69,11 @@ private:
Distortion m_bioSubBlkDistThres;
Distortion m_bioPredSubBlkDist[MAX_NUM_PARTS_IN_CTU];
int m_piDotProduct1[BIO_TEMP_BUFFER_SIZE];
int m_piDotProduct2[BIO_TEMP_BUFFER_SIZE];
int m_piDotProduct3[BIO_TEMP_BUFFER_SIZE];
int m_piDotProduct5[BIO_TEMP_BUFFER_SIZE];
int m_piDotProduct6[BIO_TEMP_BUFFER_SIZE];
int m_dotProduct1[BIO_TEMP_BUFFER_SIZE];
int m_dotProduct2[BIO_TEMP_BUFFER_SIZE];
int m_dotProduct3[BIO_TEMP_BUFFER_SIZE];
int m_dotProduct5[BIO_TEMP_BUFFER_SIZE];
int m_dotProduct6[BIO_TEMP_BUFFER_SIZE];
#endif
protected:
......@@ -97,32 +93,32 @@ protected:
int m_iRefListIdx;
#if JVET_L0256_BIO
Pel* m_pGradX0;
Pel* m_pGradY0;
Pel* m_pGradX1;
Pel* m_pGradY1;
Pel* m_gradX0;
Pel* m_gradY0;
Pel* m_gradX1;
Pel* m_gradY1;
bool m_subPuMC;
int rightShiftMSB(int numer, int denom);
void applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf &pcYuvSrc0, const CPelUnitBuf &pcYuvSrc1, const int &iRefIdx0, const int &iRefIdx1, PelUnitBuf &pcYuvDst, const BitDepths &clipBitDepths);
bool xCalcBiPredSubBlkDist(const PredictionUnit &pu, const Pel* pYuvSrc0, const int src0Stride, const Pel* pYuvSrc1, const int src1Stride, const BitDepths &clipBitDepths);
void applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf &yuvSrc0, const CPelUnitBuf &yuvSrc1, const int &refIdx0, const int &refIdx1, PelUnitBuf &yuvDst, const BitDepths &clipBitDepths);
bool xCalcBiPredSubBlkDist(const PredictionUnit &pu, const Pel* yuvSrc0, const int src0Stride, const Pel* yuvSrc1, const int src1Stride, const BitDepths &clipBitDepths);
void bioSampleExtendBilinearFilter(Pel const* src, int srcStride, Pel *dst, int dstStride, int width, int height, int dim, int fracX, int fracY, bool isLast, const ChromaFormat fmt, const ClpRng& clpRng);
#endif
void xPredInterUni ( const PredictionUnit& pu, const RefPicList& eRefPicList, PelUnitBuf& pcYuvPred, const bool& bi
#if JVET_L0256_BIO
,const bool& bBIOApplied = false
,const bool& bioApplied = false
#endif
);
void xPredInterBi ( PredictionUnit& pu, PelUnitBuf &pcYuvPred );
void xPredInterBlk ( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv& _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng
#if JVET_L0256_BIO
,const bool& bBIOApplied = false
,const bool& bioApplied = false
#endif
);
#if JVET_L0256_BIO
void xWeightedAverage ( const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bBIOApplied );
void xWeightedAverage ( const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied );
#else
void xWeightedAverage ( const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs );
#endif
......
......@@ -323,7 +323,7 @@ void RdCost::setDistParam( DistParam &rcDP, const CPelBuf &org, const CPelBuf &c
}
#if JVET_L0256_BIO
void RdCost::setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY, int iOrgStride, int iRefStride, int bitDepth, ComponentID compID, int width, int height, int subShiftMode, int step, bool useHadamard, bool bBIOApplied )
void RdCost::setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY, int iOrgStride, int iRefStride, int bitDepth, ComponentID compID, int width, int height, int subShiftMode, int step, bool useHadamard, bool bioApplied )
#else
void RdCost::setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY, int iOrgStride, int iRefStride, int bitDepth, ComponentID compID, int width, int height, int subShiftMode, int step, bool useHadamard )
#endif
......@@ -348,7 +348,7 @@ void RdCost::setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY,
CHECK( useHadamard || rcDP.useMR || subShiftMode > 0, "only used in xDirectMCCost with these default parameters (so far...)" );
#if JVET_L0256_BIO
if ( bBIOApplied )
if ( bioApplied )
{
rcDP.distFunc = m_afpDistortFunc[ DF_SAD_INTERMEDIATE_BITDEPTH ];
return;
......
......@@ -102,9 +102,6 @@ private:
// for distortion
static FpDistFunc m_afpDistortFunc[DF_TOTAL_FUNCTIONS]; // [eDFunc]
#if JVET_L0256_BIO
#endif
CostMode m_costMode;
double m_distortionWeight[MAX_NUM_COMPONENT]; // only chroma values are used.
double m_dLambda;
......@@ -158,7 +155,7 @@ public:
void setDistParam( DistParam &rcDP, const CPelBuf &org, const Pel* piRefY , int iRefStride, int bitDepth, ComponentID compID, int subShiftMode = 0, int step = 1, bool useHadamard = false );
void setDistParam( DistParam &rcDP, const CPelBuf &org, const CPelBuf &cur, int bitDepth, ComponentID compID, bool useHadamard = false );
#if JVET_L0256_BIO
void setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY, int iOrgStride, int iRefStride, int bitDepth, ComponentID compID, int width, int height, int subShiftMode = 0, int step = 1, bool useHadamard = false, bool bBIOApplied = false );
void setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY, int iOrgStride, int iRefStride, int bitDepth, ComponentID compID, int width, int height, int subShiftMode = 0, int step = 1, bool useHadamard = false, bool bioApplied = false );
#else
void setDistParam( DistParam &rcDP, const Pel* pOrg, const Pel* piRefY, int iOrgStride, int iRefStride, int bitDepth, ComponentID compID, int width, int height, int subShiftMode = 0, int step = 1, bool useHadamard = false );
#endif
......@@ -273,7 +270,7 @@ private:
static Distortion xGetSAD_SIMD ( const DistParam& pcDtParam );
template< int iWidth, X86_VEXT vext >
static Distortion xGetSAD_NxN_SIMD( const DistParam& pcDtParam );
#if JVET_L0256_BIO
#if ENABLE_SIMD_OPT_BIO
template< X86_VEXT vext >
static Distortion xGetSAD_IBD_SIMD(const DistParam& pcDtParam);
#endif
......
......@@ -76,9 +76,6 @@
#define L0074_SUBBLOCK_DEBLOCKING 1
#define JVET_L0256_BIO 1
#if JVET_L0256_BIO
#define JVET_L0256_BIO_EXTEND_SIZE 1
#endif
#define JVET_L0646_GBI 1 // Generalized bi-prediction (GBi)
......@@ -258,6 +255,7 @@
#if ENABLE_SIMD_OPT_BUFFER
#define ENABLE_SIMD_OPT_GBI 1 ///< SIMD optimization for GBi
#endif
#define ENABLE_SIMD_OPT_BIO ( JVET_L0256_BIO && ENABLE_SIMD_OPT ) ///< SIMD optimization for BIO
// End of SIMD optimizations
......
This diff is collapsed.
......@@ -297,7 +297,7 @@ Distortion RdCost::xGetSAD_SIMD( const DistParam &rcDtParam )
return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth);
}
#if JVET_L0256_BIO
#if ENABLE_SIMD_OPT_BIO
template< X86_VEXT vext >
Distortion RdCost::xGetSAD_IBD_SIMD(const DistParam &rcDtParam)
{
......@@ -308,14 +308,14 @@ Distortion RdCost::xGetSAD_IBD_SIMD(const DistParam &rcDtParam)
const short* src1 = (const short*)rcDtParam.cur.buf;
int width = rcDtParam.org.height;
int height = rcDtParam.org.width;
int iSubShift = rcDtParam.subShift;
int iSubStep = (1 << iSubShift);
const int src0Stride = rcDtParam.org.stride * iSubStep;
const int src1Stride = rcDtParam.cur.stride * iSubStep;
int subShift = rcDtParam.subShift;
int subStep = (1 << subShift);
const int src0Stride = rcDtParam.org.stride * subStep;
const int src1Stride = rcDtParam.cur.stride * subStep;
__m128i vtotalsum32 = _mm_setzero_si128();
__m128i vzero = _mm_setzero_si128();
for (int y = 0; y < height; y += iSubStep)
for (int y = 0; y < height; y += subStep)
{
for (int x = 0; x < width; x += 4)
{
......@@ -332,7 +332,7 @@ Distortion RdCost::xGetSAD_IBD_SIMD(const DistParam &rcDtParam)
vtotalsum32 = _mm_hadd_epi32(vtotalsum32, vzero);
Distortion uiSum = _mm_cvtsi128_si32(vtotalsum32);
uiSum <<= iSubShift;
uiSum <<= subShift;
return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth);
}
#endif
......@@ -2462,7 +2462,7 @@ void RdCost::_initRdCostX86()
m_afpDistortFunc[DF_HAD64] = RdCost::xGetHADs_SIMD<Pel, Pel, vext>;
m_afpDistortFunc[DF_HAD16N] = RdCost::xGetHADs_SIMD<Pel, Pel, vext>;
#if JVET_L0256_BIO
#if ENABLE_SIMD_OPT_BIO
m_afpDistortFunc[DF_SAD_INTERMEDIATE_BITDEPTH] = RdCost::xGetSAD_IBD_SIMD<vext>;
#endif
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment