diff --git a/source/Lib/CommonLib/Buffer.cpp b/source/Lib/CommonLib/Buffer.cpp index 07ffa9a94ab29b9ce80e49aae1e75bbeb025c6c5..9fd64da8014b28cb3ae26cd538f7f7398e7c92b5 100644 --- a/source/Lib/CommonLib/Buffer.cpp +++ b/source/Lib/CommonLib/Buffer.cpp @@ -91,18 +91,30 @@ void addBIOAvgCore(const Pel* src0, int src0Stride, const Pel* src1, int src1Str } } +#if JVET_M0063_BDOF_FIX +void gradFilterCore(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, const int bitDepth) +#else void gradFilterCore(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY) +#endif { Pel* srcTmp = pSrc + srcStride + 1; Pel* gradXTmp = gradX + gradStride + 1; Pel* gradYTmp = gradY + gradStride + 1; +#if JVET_M0063_BDOF_FIX + int shift1 = std::max<int>(2, (IF_INTERNAL_PREC - bitDepth)); +#endif for (int y = 0; y < (height - 2 * BIO_EXTEND_SIZE); y++) { for (int x = 0; x < (width - 2 * BIO_EXTEND_SIZE); x++) { +#if JVET_M0063_BDOF_FIX + gradYTmp[x] = (srcTmp[x + srcStride] - srcTmp[x - srcStride]) >> shift1; + gradXTmp[x] = (srcTmp[x + 1] - srcTmp[x - 1]) >> shift1; +#else gradYTmp[x] = (srcTmp[x + srcStride] - srcTmp[x - srcStride]) >> 4; gradXTmp[x] = (srcTmp[x + 1] - srcTmp[x - 1]) >> 4; +#endif } gradXTmp += gradStride; gradYTmp += gradStride; @@ -130,15 +142,29 @@ void gradFilterCore(Pel* pSrc, int srcStride, int width, int height, int gradStr ::memcpy(gradYTmp + (height - 2 * BIO_EXTEND_SIZE)*gradStride, gradYTmp + (height - 2 * BIO_EXTEND_SIZE - 1)*gradStride, sizeof(Pel)*(width)); } +#if JVET_M0063_BDOF_FIX +void calcBIOParCore(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, const int bitDepth) +#else void calcBIOParCore(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG) +#endif { +#if JVET_M0063_BDOF_FIX + int shift4 = std::min<int>(8, (bitDepth - 4)); + int shift5 = std::min<int>(5, (bitDepth - 7)); +#endif for (int y = 0; y < heightG; y++) { for (int x = 0; x < widthG; x++) { +#if JVET_M0063_BDOF_FIX + int temp = (srcY0Temp[x] >> shift4) - (srcY1Temp[x] >> shift4); + int tempX = (gradX0[x] + gradX1[x]) >> shift5; + int tempY = (gradY0[x] + gradY1[x]) >> shift5; +#else int temp = (srcY0Temp[x] >> 6) - (srcY1Temp[x] >> 6); int tempX = (gradX0[x] + gradX1[x]) >> 3; int tempY = (gradY0[x] + gradY1[x]) >> 3; +#endif dotProductTemp1[x] = tempX * tempX; dotProductTemp2[x] = tempX * tempY; dotProductTemp3[x] = -tempX * temp; diff --git a/source/Lib/CommonLib/Buffer.h b/source/Lib/CommonLib/Buffer.h index e16ab2d3f4a9b163cbbbe0f2b33bdb1c70ef416d..db9d69e5e59716d556a6554fceb0a242ce55b8bc 100644 --- a/source/Lib/CommonLib/Buffer.h +++ b/source/Lib/CommonLib/Buffer.h @@ -69,8 +69,13 @@ struct PelBufferOps void ( *linTf4 ) ( const Pel* src0, int src0Stride, Pel *dst, int dstStride, int width, int height, int scale, int shift, int offset, const ClpRng& clpRng, bool bClip ); void ( *linTf8 ) ( const Pel* src0, int src0Stride, Pel *dst, int dstStride, int width, int height, int scale, int shift, int offset, const ClpRng& clpRng, bool bClip ); void(*addBIOAvg4) (const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng); +#if JVET_M0063_BDOF_FIX + void(*bioGradFilter) (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, const int bitDepth); + void(*calcBIOPar) (const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, const int bitDepth); +#else void(*bioGradFilter) (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY); void(*calcBIOPar) (const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG); +#endif void(*calcBlkGradient)(int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize); #if ENABLE_SIMD_OPT_GBI void ( *removeWeightHighFreq8) ( Pel* src0, int src0Stride, const Pel* src1, int src1Stride, int width, int height, int shift, int gbiWeight); diff --git a/source/Lib/CommonLib/InterPrediction.cpp b/source/Lib/CommonLib/InterPrediction.cpp index 43deb866b992f6c87d36c332a8ba547536f1ba33..f9a9da2d516638c304e6b331e089392af0729388 100644 --- a/source/Lib/CommonLib/InterPrediction.cpp +++ b/source/Lib/CommonLib/InterPrediction.cpp @@ -875,7 +875,11 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf Pel* gradY = (refList == 0) ? m_gradY0 : m_gradY1; Pel* gradX = (refList == 0) ? m_gradX0 : m_gradX1; +#if JVET_M0063_BDOF_FIX + xBioGradFilter(dstTempPtr, stridePredMC, widthG, heightG, widthG, gradX, gradY, clipBitDepths.recon[toChannelType(COMPONENT_Y)]); +#else xBioGradFilter(dstTempPtr, stridePredMC, widthG, heightG, widthG, gradX, gradY); +#endif Pel* padStr = m_filteredBlockTmp[2 + refList][COMPONENT_Y] + 2 * stridePredMC + 2; for (int y = 0; y< height; y++) { @@ -893,7 +897,11 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf const int bitDepth = clipBitDepths.recon[toChannelType(COMPONENT_Y)]; const int shiftNum = IF_INTERNAL_PREC + 1 - bitDepth; const int offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; +#if JVET_M0063_BDOF_FIX + const int limit = (bitDepth>12)? 2 : ((int)1 << (4 + IF_INTERNAL_PREC - bitDepth - 5)); +#else const int limit = ((int)1 << (4 + IF_INTERNAL_PREC - bitDepth - 5)); +#endif int* dotProductTemp1 = m_dotProduct1; int* dotProductTemp2 = m_dotProduct2; @@ -901,7 +909,11 @@ void InterPrediction::applyBiOptFlow(const PredictionUnit &pu, const CPelUnitBuf int* dotProductTemp5 = m_dotProduct5; int* dotProductTemp6 = m_dotProduct6; +#if JVET_M0063_BDOF_FIX + xCalcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, widthG, widthG, heightG, bitDepth); +#else xCalcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, widthG, widthG, heightG); +#endif int xUnit = (width >> 2); int yUnit = (height >> 2); @@ -1066,6 +1078,17 @@ void InterPrediction::xAddBIOAvg4(const Pel* src0, int src0Stride, const Pel* sr g_pelBufOP.addBIOAvg4(src0, src0Stride, src1, src1Stride, dst, dstStride, gradX0, gradX1, gradY0, gradY1, gradStride, width, height, tmpx, tmpy, shift, offset, clpRng); } +#if JVET_M0063_BDOF_FIX +void InterPrediction::xBioGradFilter(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, int bitDepth) +{ + g_pelBufOP.bioGradFilter(pSrc, srcStride, width, height, gradStride, gradX, gradY, bitDepth); +} + +void InterPrediction::xCalcBIOPar(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, int bitDepth) +{ + g_pelBufOP.calcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, gradStride, widthG, heightG, bitDepth); +} +#else void InterPrediction::xBioGradFilter(Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY) { g_pelBufOP.bioGradFilter(pSrc, srcStride, width, height, gradStride, gradX, gradY); @@ -1075,6 +1098,7 @@ void InterPrediction::xCalcBIOPar(const Pel* srcY0Temp, const Pel* srcY1Temp, co { g_pelBufOP.calcBIOPar(srcY0Temp, srcY1Temp, gradX0, gradX1, gradY0, gradY1, dotProductTemp1, dotProductTemp2, dotProductTemp3, dotProductTemp5, dotProductTemp6, src0Stride, src1Stride, gradStride, widthG, heightG); } +#endif void InterPrediction::xCalcBlkGradient(int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize) { diff --git a/source/Lib/CommonLib/InterPrediction.h b/source/Lib/CommonLib/InterPrediction.h index 145431e2777417e854efa484d18fe1fba7418ad4..741cc655a871a28fd86ec8db87b0aace19a1c1af 100644 --- a/source/Lib/CommonLib/InterPrediction.h +++ b/source/Lib/CommonLib/InterPrediction.h @@ -118,8 +118,13 @@ protected: ); void xAddBIOAvg4 (const Pel* src0, int src0Stride, const Pel* src1, int src1Stride, Pel *dst, int dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, int gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng); +#if JVET_M0063_BDOF_FIX + void xBioGradFilter (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, int bitDepth); + void xCalcBIOPar (const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, int bitDepth); +#else void xBioGradFilter (Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY); void xCalcBIOPar (const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG); +#endif void xCalcBlkGradient (int sx, int sy, int *arraysGx2, int *arraysGxGy, int *arraysGxdI, int *arraysGy2, int *arraysGydI, int &sGx2, int &sGy2, int &sGxGy, int &sGxdI, int &sGydI, int width, int height, int unitSize); void xWeightedAverage ( const PredictionUnit& pu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied ); void xPredAffineBlk( const ComponentID& compID, const PredictionUnit& pu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool& bi, const ClpRng& clpRng ); diff --git a/source/Lib/CommonLib/TypeDef.h b/source/Lib/CommonLib/TypeDef.h index 7be8dbd8b367997855609dd4cc68180bd6712fcc..cb70e1dcec69ea1ecc85fef4c8dd0ba875dba20c 100644 --- a/source/Lib/CommonLib/TypeDef.h +++ b/source/Lib/CommonLib/TypeDef.h @@ -52,6 +52,8 @@ #define JVET_M0116_ATMVP_LEFT_NB_FOR_OFFSET 1 // Only use left neighbor for ATMVP offset derivation, from M0273, M0240, M0116, M0338, M0204 +#define JVET_M0063_BDOF_FIX 1 // BDOF bitdepth bugfix + #define JVET_M0265_MV_ROUNDING_CLEANUP 1 // Unify MV roundings and make SW/WD allignment #define JVET_M0228_REMOVE_CPMV_COMPARE 1 // Remove CPMV comparisons for construnted affine merge candidates from JVET-M0228, M0166, M0477 diff --git a/source/Lib/CommonLib/x86/BufferX86.h b/source/Lib/CommonLib/x86/BufferX86.h index bdae6dcc51a4b2c2d87629bf3ff32b6e79e4bc64..e83a00ef15352cbe221bd78f9ae5eeeb7299fd71 100644 --- a/source/Lib/CommonLib/x86/BufferX86.h +++ b/source/Lib/CommonLib/x86/BufferX86.h @@ -162,7 +162,11 @@ void addBIOAvg4_SSE(const Pel* src0, int src0Stride, const Pel* src1, int src1St } template< X86_VEXT vext > +#if JVET_M0063_BDOF_FIX +void gradFilter_SSE(Pel* src, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, const int bitDepth) +#else void gradFilter_SSE(Pel* src, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY) +#endif { __m128i vzero = _mm_setzero_si128(); Pel* srcTmp = src + srcStride + 1; @@ -171,6 +175,10 @@ void gradFilter_SSE(Pel* src, int srcStride, int width, int height, int gradStri int widthInside = width - 2 * BIO_EXTEND_SIZE; int heightInside = height - 2 * BIO_EXTEND_SIZE; +#if JVET_M0063_BDOF_FIX + int shift1 = std::max<int>(2, (14 - bitDepth)); +#endif + assert((widthInside & 3) == 0); @@ -184,8 +192,13 @@ void gradFilter_SSE(Pel* src, int srcStride, int width, int height, int gradStri __m128i mmPixLeft = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x - 1))); __m128i mmPixRight = _mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i*)(srcTmp + x + 1))); +#if JVET_M0063_BDOF_FIX + __m128i mmGradVer = _mm_sra_epi32(_mm_sub_epi32(mmPixBottom, mmPixTop), _mm_cvtsi32_si128(shift1)); + __m128i mmGradHor = _mm_sra_epi32(_mm_sub_epi32(mmPixRight, mmPixLeft), _mm_cvtsi32_si128(shift1)); +#else __m128i mmGradVer = _mm_srai_epi32(_mm_sub_epi32(mmPixBottom, mmPixTop), 4); __m128i mmGradHor = _mm_srai_epi32(_mm_sub_epi32(mmPixRight, mmPixLeft), 4); +#endif mmGradVer = _mm_packs_epi32(mmGradVer, vzero); mmGradHor = _mm_packs_epi32(mmGradHor, vzero); @@ -220,23 +233,41 @@ void gradFilter_SSE(Pel* src, int srcStride, int width, int height, int gradStri } template< X86_VEXT vext > +#if JVET_M0063_BDOF_FIX +void calcBIOPar_SSE(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG, const int bitDepth) +#else void calcBIOPar_SSE(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int* dotProductTemp1, int* dotProductTemp2, int* dotProductTemp3, int* dotProductTemp5, int* dotProductTemp6, const int src0Stride, const int src1Stride, const int gradStride, const int widthG, const int heightG) +#endif { +#if JVET_M0063_BDOF_FIX + int shift4 = std::min<int>(8, (bitDepth - 4)); + int shift5 = std::min<int>(5, (bitDepth - 7)); +#endif for (int y = 0; y < heightG; y++) { int x = 0; for (; x < ((widthG >> 3) << 3); x += 8) { +#if JVET_M0063_BDOF_FIX + __m128i mmSrcY0Temp = _mm_sra_epi16(_mm_loadu_si128((__m128i*)(srcY0Temp + x)), _mm_cvtsi32_si128(shift4)); + __m128i mmSrcY1Temp = _mm_sra_epi16(_mm_loadu_si128((__m128i*)(srcY1Temp + x)), _mm_cvtsi32_si128(shift4)); +#else __m128i mmSrcY0Temp = _mm_srai_epi16(_mm_loadu_si128((__m128i*)(srcY0Temp + x)), 6); __m128i mmSrcY1Temp = _mm_srai_epi16(_mm_loadu_si128((__m128i*)(srcY1Temp + x)), 6); +#endif __m128i mmGradX0 = _mm_loadu_si128((__m128i*)(gradX0 + x)); __m128i mmGradX1 = _mm_loadu_si128((__m128i*)(gradX1 + x)); __m128i mmGradY0 = _mm_loadu_si128((__m128i*)(gradY0 + x)); __m128i mmGradY1 = _mm_loadu_si128((__m128i*)(gradY1 + x)); __m128i mmTemp1 = _mm_sub_epi16(mmSrcY1Temp, mmSrcY0Temp); +#if JVET_M0063_BDOF_FIX + __m128i mmTempX = _mm_sra_epi16(_mm_add_epi16(mmGradX0, mmGradX1), _mm_cvtsi32_si128(shift5)); + __m128i mmTempY = _mm_sra_epi16(_mm_add_epi16(mmGradY0, mmGradY1), _mm_cvtsi32_si128(shift5)); +#else __m128i mmTempX = _mm_srai_epi16(_mm_add_epi16(mmGradX0, mmGradX1), 3); __m128i mmTempY = _mm_srai_epi16(_mm_add_epi16(mmGradY0, mmGradY1), 3); +#endif // m_piDotProductTemp1 __m128i mm_b = _mm_mulhi_epi16(mmTempX, mmTempX); @@ -291,16 +322,26 @@ void calcBIOPar_SSE(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX for (; x < ((widthG >> 2) << 2); x += 4) { +#if JVET_M0063_BDOF_FIX + __m128i mmSrcY0Temp = _mm_sra_epi16(_mm_loadl_epi64((__m128i*)(srcY0Temp + x)), _mm_cvtsi32_si128(shift4)); + __m128i mmSrcY1Temp = _mm_sra_epi16(_mm_loadl_epi64((__m128i*)(srcY1Temp + x)), _mm_cvtsi32_si128(shift4)); +#else __m128i mmSrcY0Temp = _mm_srai_epi16(_mm_loadl_epi64((__m128i*)(srcY0Temp + x)), 6); __m128i mmSrcY1Temp = _mm_srai_epi16(_mm_loadl_epi64((__m128i*)(srcY1Temp + x)), 6); +#endif __m128i mmGradX0 = _mm_loadl_epi64((__m128i*)(gradX0 + x)); __m128i mmGradX1 = _mm_loadl_epi64((__m128i*)(gradX1 + x)); __m128i mmGradY0 = _mm_loadl_epi64((__m128i*)(gradY0 + x)); __m128i mmGradY1 = _mm_loadl_epi64((__m128i*)(gradY1 + x)); __m128i mmTemp1 = _mm_sub_epi16(mmSrcY1Temp, mmSrcY0Temp); +#if JVET_M0063_BDOF_FIX + __m128i mmTempX = _mm_sra_epi16(_mm_add_epi16(mmGradX0, mmGradX1), _mm_cvtsi32_si128(shift5)); + __m128i mmTempY = _mm_sra_epi16(_mm_add_epi16(mmGradY0, mmGradY1), _mm_cvtsi32_si128(shift5)); +#else __m128i mmTempX = _mm_srai_epi16(_mm_add_epi16(mmGradX0, mmGradX1), 3); __m128i mmTempY = _mm_srai_epi16(_mm_add_epi16(mmGradY0, mmGradY1), 3); +#endif // m_piDotProductTemp1 __m128i mm_b = _mm_mulhi_epi16(mmTempX, mmTempX); @@ -340,9 +381,15 @@ void calcBIOPar_SSE(const Pel* srcY0Temp, const Pel* srcY1Temp, const Pel* gradX for (; x < widthG; x++) { +#if JVET_M0063_BDOF_FIX + int temp = (srcY0Temp[x] >> shift4) - (srcY1Temp[x] >> shift4); + int tempX = (gradX0[x] + gradX1[x]) >> shift5; + int tempY = (gradY0[x] + gradY1[x]) >> shift5; +#else int temp = (srcY0Temp[x] >> 6) - (srcY1Temp[x] >> 6); int tempX = (gradX0[x] + gradX1[x]) >> 3; int tempY = (gradY0[x] + gradY1[x]) >> 3; +#endif dotProductTemp1[x] = tempX * tempX; dotProductTemp2[x] = tempX * tempY; dotProductTemp3[x] = -tempX * temp;