Commit 1dd480e3 authored by Karsten Suehring's avatar Karsten Suehring

remove macro ENABLE_SIMD_OPT_BIO

parent 9c470123
......@@ -268,12 +268,10 @@ PelBufferOps::PelBufferOps()
linTf4 = linTfCore<Pel>;
linTf8 = linTfCore<Pel>;
#if ENABLE_SIMD_OPT_BIO
addBIOAvg4 = addBIOAvgCore;
bioGradFilter = gradFilterCore;
calcBIOPar = calcBIOParCore;
calcBlkGradient = calcBlkGradientCore;
#endif
#if ENABLE_SIMD_OPT_GBI
removeWeightHighFreq8 = removeWeightHighFreq;
......
......@@ -68,12 +68,10 @@ struct PelBufferOps
void ( *reco8 ) ( const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, int width, int height, const ClpRng& clpRng );
void ( *linTf4 ) ( const Pel* src0, int src0Stride, Pel *dst, int dstStride, int width, int height, int scale, int shift, int offset, const ClpRng& clpRng, bool bClip );
void ( *linTf8 ) ( const Pel* src0, int src0Stride, Pel *dst, int dstStride, int width, int height, int scale, int shift, int offset, const ClpRng& clpRng, bool bClip );
#if ENABLE_SIMD_OPT_BIO
void(*addBIOAvg4) (const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng);
void(*bioGradFilter) (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY);
void(*calcBIOPar) (const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG);
void(*calcBlkGradient)(int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize);
#endif
#if ENABLE_SIMD_OPT_GBI
void ( *removeWeightHighFreq8) ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int gbiWeight);
void ( *removeWeightHighFreq4) ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int gbiWeight);
......
......@@ -996,148 +996,22 @@ bool InterPrediction::xCalcBiPredSubBlkDist(const PredictionUnit &pu, const Pel*
void InterPrediction::xAddBIOAvg4(const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng)
{
#if ENABLE_SIMD_OPT_BIO
g_pelBufOP.addBIOAvg4(src0, src0Stride, src1, src1Stride, dst, dstStride, gradX0, gradX1, gradY0, gradY1, gradStride, width, height, tmpx, tmpy, shift, offset, clpRng);
#else
int b = 0;
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x += 4)
{
b = tmpx * (gradX0[x] - gradX1[x]) + tmpy * (gradY0[x] - gradY1[x]);
b = ((b + 1) >> 1);
dst[x] = ClipPel((int16_t)rightShift((src0[x] + src1[x] + b + offset), shift), clpRng);
b = tmpx * (gradX0[x + 1] - gradX1[x + 1]) + tmpy * (gradY0[x + 1] - gradY1[x + 1]);
b = ((b + 1) >> 1);
dst[x + 1] = ClipPel((int16_t)rightShift((src0[x + 1] + src1[x + 1] + b + offset), shift), clpRng);
b = tmpx * (gradX0[x + 2] - gradX1[x + 2]) + tmpy * (gradY0[x + 2] - gradY1[x + 2]);
b = ((b + 1) >> 1);
dst[x + 2] = ClipPel((int16_t)rightShift((src0[x + 2] + src1[x + 2] + b + offset), shift), clpRng);
b = tmpx * (gradX0[x + 3] - gradX1[x + 3]) + tmpy * (gradY0[x + 3] - gradY1[x + 3]);
b = ((b + 1) >> 1);
dst[x + 3] = ClipPel((int16_t)rightShift((src0[x + 3] + src1[x + 3] + b + offset), shift), clpRng);
}
dst += dstStride; src0 += src0Stride; src1 += src1Stride;
gradX0 += gradStride; gradX1 += gradStride; gradY0 += gradStride; gradY1 += gradStride;
}
#endif
}
void InterPrediction::xBioGradFilter(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY)
{
#if ENABLE_SIMD_OPT_BIO
g_pelBufOP.bioGradFilter(pSrc, srcStride, width, height, gradStride, gradX, gradY);
#else
Pel* srcTmp = pSrc + srcStride + 1;
Pel* gradXTmp = gradX + gradStride + 1;
Pel* gradYTmp = gradY + gradStride + 1;
for (int y = 0; y < (height - 2 * BIO_EXTEND_SIZE); y++)
{
for (int x = 0; x < (width - 2 * BIO_EXTEND_SIZE); x++)
{
gradYTmp[x] = (srcTmp[x + srcStride] - srcTmp[x - srcStride]) >> 4;
gradXTmp[x] = (srcTmp[x + 1] - srcTmp[x - 1]) >> 4;
}
gradXTmp += gradStride;
gradYTmp += gradStride;
srcTmp += srcStride;
}
gradXTmp = gradX + gradStride + 1;
gradYTmp = gradY + gradStride + 1;
for (int y = 0; y < (height - 2 * BIO_EXTEND_SIZE); y++)
{
gradXTmp[-1] = gradXTmp[0];
gradXTmp[width - 2 * BIO_EXTEND_SIZE] = gradXTmp[width - 2 * BIO_EXTEND_SIZE - 1];
gradXTmp += gradStride;
gradYTmp[-1] = gradYTmp[0];
gradYTmp[width - 2 * BIO_EXTEND_SIZE] = gradYTmp[width - 2 * BIO_EXTEND_SIZE - 1];
gradYTmp += gradStride;
}
gradXTmp = gradX + gradStride;
gradYTmp = gradY + gradStride;
::memcpy(gradXTmp - gradStride, gradXTmp, sizeof(Pel)*(width));
::memcpy(gradXTmp + (height - 2 * BIO_EXTEND_SIZE)*gradStride, gradXTmp + (height - 2 * BIO_EXTEND_SIZE - 1)*gradStride, sizeof(Pel)*(width));
::memcpy(gradYTmp - gradStride, gradYTmp, sizeof(Pel)*(width));
::memcpy(gradYTmp + (height - 2 * BIO_EXTEND_SIZE)*gradStride, gradYTmp + (height - 2 * BIO_EXTEND_SIZE - 1)*gradStride, sizeof(Pel)*(width));
#endif
}
void InterPrediction::xCalcBIOPar(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG)
{
#if ENABLE_SIMD_OPT_BIO
g_pelBufOP.calcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, gradStride, widthG, heightG);
#else
for (int y = 0; y < heightG; y++)
{
for (int x = 0; x < widthG; x++)
{
int temp = (srcY0Temp[x] >> 6) - (srcY1Temp[x] >> 6);
int tempX = (gradX0[x] + gradX1[x]) >> 3;
int tempY = (gradY0[x] + gradY1[x]) >> 3;
dotProductTemp1[x] = tempX * tempX;
dotProductTemp2[x] = tempX * tempY;
dotProductTemp3[x] = -tempX * temp;
dotProductTemp5[x] = tempY * tempY;
dotProductTemp6[x] = -tempY * temp;
}
srcY0Temp += src0Stride;
srcY1Temp += src1Stride;
gradX0 += gradStride;
gradX1 += gradStride;
gradY0 += gradStride;
gradY1 += gradStride;
dotProductTemp1 += widthG;
dotProductTemp2 += widthG;
dotProductTemp3 += widthG;
dotProductTemp5 += widthG;
dotProductTemp6 += widthG;
}
#endif
}
void InterPrediction::xCalcBlkGradient(int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize)
{
#if ENABLE_SIMD_OPT_BIO
g_pelBufOP.calcBlkGradient(sx, sy, arraysGx2, arraysGxGy, arraysGxdI, arraysGy2, arraysGydI, sGx2, sGy2, sGxGy, sGxdI, sGydI, width, height, unitSize);
#else
int *Gx2 = arraysGx2;
int *Gy2 = arraysGy2;
int *GxGy = arraysGxGy;
int *GxdI = arraysGxdI;
int *GydI = arraysGydI;
// set to the above row due to JVET_K0485_BIO_EXTEND_SIZE
Gx2 -= (BIO_EXTEND_SIZE*width);
Gy2 -= (BIO_EXTEND_SIZE*width);
GxGy -= (BIO_EXTEND_SIZE*width);
GxdI -= (BIO_EXTEND_SIZE*width);
GydI -= (BIO_EXTEND_SIZE*width);
for (int y = -BIO_EXTEND_SIZE; y < unitSize + BIO_EXTEND_SIZE; y++)
{
for (int x = -BIO_EXTEND_SIZE; x < unitSize + BIO_EXTEND_SIZE; x++)
{
sGx2 += Gx2[x];
sGy2 += Gy2[x];
sGxGy += GxGy[x];
sGxdI += GxdI[x];
sGydI += GydI[x];
}
Gx2 += width;
Gy2 += width;
GxGy += width;
GxdI += width;
GydI += width;
}
#endif
}
void InterPrediction::xWeightedAverage(const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied )
......
......@@ -360,10 +360,8 @@ private:
static Distortion xGetSAD_SIMD ( const DistParam& pcDtParam );
template< int iWidth, X86_VEXT vext >
static Distortion xGetSAD_NxN_SIMD( const DistParam& pcDtParam );
#if ENABLE_SIMD_OPT_BIO
template< X86_VEXT vext >
static Distortion xGetSAD_IBD_SIMD(const DistParam& pcDtParam);
#endif
template< typename Torg, typename Tcur, X86_VEXT vext >
static Distortion xGetHADs_SIMD ( const DistParam& pcDtParam );
......
......@@ -240,7 +240,6 @@
#if ENABLE_SIMD_OPT_BUFFER
#define ENABLE_SIMD_OPT_GBI 1 ///< SIMD optimization for GBi
#endif
#define ENABLE_SIMD_OPT_BIO ( JVET_L0256_BIO && ENABLE_SIMD_OPT ) ///< SIMD optimization for BIO
// End of SIMD optimizations
......
......@@ -128,7 +128,6 @@ void addAvg_SSE( const int16_t* src0, int src0Stride, const int16_t* src1, int s
}
}
#if ENABLE_SIMD_OPT_BIO
template< X86_VEXT vext >
void addBIOAvg4_SSE(const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng)
{
......@@ -428,7 +427,6 @@ void calcBlkGradient_SSE(int sx, int sy, int *arraysGx2, int *arraysGxGy
sGxdI = _mm_cvtsi128_si32(mmGxdITotal);
sGydI = _mm_cvtsi128_si32(mmGydITotal);
}
#endif
template< X86_VEXT vext, int W >
void reco_SSE( const int16_t* src0, int src0Stride, const int16_t* src1, int src1Stride, int16_t *dst, int dstStride, int width, int height, const ClpRng& clpRng )
......@@ -798,12 +796,10 @@ void PelBufferOps::_initPelBufOpsX86()
addAvg8 = addAvg_SSE<vext, 8>;
addAvg4 = addAvg_SSE<vext, 4>;
#if ENABLE_SIMD_OPT_BIO
addBIOAvg4 = addBIOAvg4_SSE<vext>;
bioGradFilter = gradFilter_SSE<vext>;
calcBIOPar = calcBIOPar_SSE<vext>;
calcBlkGradient = calcBlkGradient_SSE<vext>;
#endif
reco8 = reco_SSE<vext, 8>;
reco4 = reco_SSE<vext, 4>;
......
......@@ -297,7 +297,6 @@ Distortion RdCost::xGetSAD_SIMD( const DistParam &rcDtParam )
return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth);
}
#if ENABLE_SIMD_OPT_BIO
template< X86_VEXT vext >
Distortion RdCost::xGetSAD_IBD_SIMD(const DistParam &rcDtParam)
{
......@@ -335,7 +334,6 @@ Distortion RdCost::xGetSAD_IBD_SIMD(const DistParam &rcDtParam)
uiSum <<= subShift;
return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth);
}
#endif
template< int iWidth, X86_VEXT vext >
Distortion RdCost::xGetSAD_NxN_SIMD( const DistParam &rcDtParam )
......@@ -2462,9 +2460,7 @@ void RdCost::_initRdCostX86()
m_afpDistortFunc[DF_HAD64] = RdCost::xGetHADs_SIMD<Pel, Pel, vext>;
m_afpDistortFunc[DF_HAD16N] = RdCost::xGetHADs_SIMD<Pel, Pel, vext>;
#if ENABLE_SIMD_OPT_BIO
m_afpDistortFunc[DF_SAD_INTERMEDIATE_BITDEPTH] = RdCost::xGetSAD_IBD_SIMD<vext>;
#endif
}
template void RdCost::_initRdCostX86<SIMDX86>();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment